aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rwxr-xr-xbuild-mac/mailcore2.xcodeproj/project.pbxproj324
-rw-r--r--src/core/basetypes/MCData.cpp2
-rw-r--r--src/core/basetypes/icu-ucsdet/cmemory.c183
-rw-r--r--src/core/basetypes/icu-ucsdet/csdetect.cpp485
-rw-r--r--src/core/basetypes/icu-ucsdet/csmatch.cpp73
-rw-r--r--src/core/basetypes/icu-ucsdet/csr2022.cpp190
-rw-r--r--src/core/basetypes/icu-ucsdet/csrecog.cpp28
-rw-r--r--src/core/basetypes/icu-ucsdet/csrmbcs.cpp529
-rw-r--r--src/core/basetypes/icu-ucsdet/csrsbcs.cpp1259
-rw-r--r--src/core/basetypes/icu-ucsdet/csrucode.cpp198
-rw-r--r--src/core/basetypes/icu-ucsdet/csrutf8.cpp109
-rw-r--r--src/core/basetypes/icu-ucsdet/cstring.c339
-rw-r--r--src/core/basetypes/icu-ucsdet/include/charstr.h130
-rw-r--r--src/core/basetypes/icu-ucsdet/include/cmemory.h607
-rw-r--r--src/core/basetypes/icu-ucsdet/include/csdetect.h65
-rw-r--r--src/core/basetypes/icu-ucsdet/include/csmatch.h69
-rw-r--r--src/core/basetypes/icu-ucsdet/include/csr2022.h91
-rw-r--r--src/core/basetypes/icu-ucsdet/include/csrecog.h55
-rw-r--r--src/core/basetypes/icu-ucsdet/include/csrmbcs.h205
-rw-r--r--src/core/basetypes/icu-ucsdet/include/csrsbcs.h287
-rw-r--r--src/core/basetypes/icu-ucsdet/include/csrucode.h106
-rw-r--r--src/core/basetypes/icu-ucsdet/include/csrutf8.h42
-rw-r--r--src/core/basetypes/icu-ucsdet/include/cstring.h140
-rw-r--r--src/core/basetypes/icu-ucsdet/include/cwchar.h56
-rw-r--r--src/core/basetypes/icu-ucsdet/include/inputext.h61
-rw-r--r--src/core/basetypes/icu-ucsdet/include/locmap.h37
-rw-r--r--src/core/basetypes/icu-ucsdet/include/mutex.h77
-rw-r--r--src/core/basetypes/icu-ucsdet/include/putilimp.h611
-rw-r--r--src/core/basetypes/icu-ucsdet/include/uarrsort.h101
-rw-r--r--src/core/basetypes/icu-ucsdet/include/uassert.h32
-rw-r--r--src/core/basetypes/icu-ucsdet/include/ucase.h409
-rw-r--r--src/core/basetypes/icu-ucsdet/include/ucln.h89
-rw-r--r--src/core/basetypes/icu-ucsdet/include/ucln_cmn.h72
-rw-r--r--src/core/basetypes/icu-ucsdet/include/ucln_imp.h178
-rw-r--r--src/core/basetypes/icu-ucsdet/include/ucln_in.h64
-rw-r--r--src/core/basetypes/icu-ucsdet/include/ucmndata.h111
-rw-r--r--src/core/basetypes/icu-ucsdet/include/udataswp.h351
-rw-r--r--src/core/basetypes/icu-ucsdet/include/uelement.h89
-rw-r--r--src/core/basetypes/icu-ucsdet/include/uenumimp.h153
-rw-r--r--src/core/basetypes/icu-ucsdet/include/uinvchar.h125
-rw-r--r--src/core/basetypes/icu-ucsdet/include/umapfile.h55
-rw-r--r--src/core/basetypes/icu-ucsdet/include/umutex.h424
-rw-r--r--src/core/basetypes/icu-ucsdet/include/unicode/appendable.h232
-rw-r--r--src/core/basetypes/icu-ucsdet/include/unicode/bytestream.h257
-rw-r--r--src/core/basetypes/icu-ucsdet/include/unicode/localpointer.h304
-rw-r--r--src/core/basetypes/icu-ucsdet/include/unicode/platform.h751
-rw-r--r--src/core/basetypes/icu-ucsdet/include/unicode/ptypes.h126
-rw-r--r--src/core/basetypes/icu-ucsdet/include/unicode/putil.h181
-rw-r--r--src/core/basetypes/icu-ucsdet/include/unicode/rep.h261
-rw-r--r--src/core/basetypes/icu-ucsdet/include/unicode/std_string.h37
-rw-r--r--src/core/basetypes/icu-ucsdet/include/unicode/strenum.h276
-rw-r--r--src/core/basetypes/icu-ucsdet/include/unicode/stringpiece.h224
-rw-r--r--src/core/basetypes/icu-ucsdet/include/unicode/ucasemap.h423
-rw-r--r--src/core/basetypes/icu-ucsdet/include/unicode/uchar.h3426
-rw-r--r--src/core/basetypes/icu-ucsdet/include/unicode/uclean.h258
-rw-r--r--src/core/basetypes/icu-ucsdet/include/unicode/ucnv.h2036
-rw-r--r--src/core/basetypes/icu-ucsdet/include/unicode/ucnv_err.h463
-rw-r--r--src/core/basetypes/icu-ucsdet/include/unicode/uconfig.h430
-rw-r--r--src/core/basetypes/icu-ucsdet/include/unicode/ucsdet.h413
-rw-r--r--src/core/basetypes/icu-ucsdet/include/unicode/udata.h430
-rw-r--r--src/core/basetypes/icu-ucsdet/include/unicode/uenum.h206
-rw-r--r--src/core/basetypes/icu-ucsdet/include/unicode/uiter.h707
-rw-r--r--src/core/basetypes/icu-ucsdet/include/unicode/umachine.h356
-rw-r--r--src/core/basetypes/icu-ucsdet/include/unicode/unistr.h4470
-rw-r--r--src/core/basetypes/icu-ucsdet/include/unicode/uobject.h320
-rw-r--r--src/core/basetypes/icu-ucsdet/include/unicode/urename.h1784
-rw-r--r--src/core/basetypes/icu-ucsdet/include/unicode/uset.h1126
-rw-r--r--src/core/basetypes/icu-ucsdet/include/unicode/ustring.h1700
-rw-r--r--src/core/basetypes/icu-ucsdet/include/unicode/utf.h223
-rw-r--r--src/core/basetypes/icu-ucsdet/include/unicode/utf16.h623
-rw-r--r--src/core/basetypes/icu-ucsdet/include/unicode/utf8.h824
-rw-r--r--src/core/basetypes/icu-ucsdet/include/unicode/utf_old.h1169
-rw-r--r--src/core/basetypes/icu-ucsdet/include/unicode/utrace.h359
-rw-r--r--src/core/basetypes/icu-ucsdet/include/unicode/utypes.h723
-rw-r--r--src/core/basetypes/icu-ucsdet/include/unicode/uvernum.h170
-rw-r--r--src/core/basetypes/icu-ucsdet/include/unicode/uversion.h193
-rw-r--r--src/core/basetypes/icu-ucsdet/include/uposixdefs.h73
-rw-r--r--src/core/basetypes/icu-ucsdet/include/uset_imp.h60
-rw-r--r--src/core/basetypes/icu-ucsdet/include/ustr_imp.h247
-rw-r--r--src/core/basetypes/icu-ucsdet/include/utracimp.h384
-rw-r--r--src/core/basetypes/icu-ucsdet/inputext.cpp164
-rw-r--r--src/core/basetypes/icu-ucsdet/uarrsort.c283
-rw-r--r--src/core/basetypes/icu-ucsdet/ucln_cmn.cpp111
-rw-r--r--src/core/basetypes/icu-ucsdet/ucln_in.cpp63
-rw-r--r--src/core/basetypes/icu-ucsdet/ucsdet.cpp207
-rw-r--r--src/core/basetypes/icu-ucsdet/udataswp.c471
-rw-r--r--src/core/basetypes/icu-ucsdet/uenum.c187
-rw-r--r--src/core/basetypes/icu-ucsdet/uinvchar.c611
-rw-r--r--src/core/basetypes/icu-ucsdet/umutex.cpp392
-rw-r--r--src/core/basetypes/icu-ucsdet/uobject.cpp103
-rw-r--r--src/core/basetypes/icu-ucsdet/ustring.cpp1516
-rw-r--r--src/core/basetypes/icu-ucsdet/utrace.c488
-rw-r--r--unittest/data/summary/input/1015-windows-1252.eml19
-rw-r--r--unittest/data/summary/input/1021-chinese.eml64
-rw-r--r--unittest/data/summary/output/1015-windows-1252.txt12
-rw-r--r--unittest/data/summary/output/1021-chinese.txt7
-rw-r--r--unittest/data/summary/output/2458-encoding.txt14
-rw-r--r--unittest/data/summary/output/2518-encoding.txt14
98 files changed, 39545 insertions, 37 deletions
diff --git a/build-mac/mailcore2.xcodeproj/project.pbxproj b/build-mac/mailcore2.xcodeproj/project.pbxproj
index fcc4830b..58525de5 100755
--- a/build-mac/mailcore2.xcodeproj/project.pbxproj
+++ b/build-mac/mailcore2.xcodeproj/project.pbxproj
@@ -168,6 +168,50 @@
BD63713B177DFF080094121B /* MCLibetpan.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BD637139177DFF080094121B /* MCLibetpan.cpp */; };
BDCD7C5B1A5B1C2C0001DCC3 /* MCErrorMessage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7C591A5B1C2C0001DCC3 /* MCErrorMessage.cpp */; };
BDCD7C5C1A5B1C2C0001DCC3 /* MCErrorMessage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7C591A5B1C2C0001DCC3 /* MCErrorMessage.cpp */; };
+ BDCD7CC31A70771B0001DCC3 /* csdetect.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7C6A1A70771B0001DCC3 /* csdetect.cpp */; };
+ BDCD7CC41A70771B0001DCC3 /* csdetect.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7C6A1A70771B0001DCC3 /* csdetect.cpp */; };
+ BDCD7CC51A70771B0001DCC3 /* csmatch.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7C6B1A70771B0001DCC3 /* csmatch.cpp */; };
+ BDCD7CC61A70771B0001DCC3 /* csmatch.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7C6B1A70771B0001DCC3 /* csmatch.cpp */; };
+ BDCD7CC71A70771B0001DCC3 /* csr2022.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7C6C1A70771B0001DCC3 /* csr2022.cpp */; };
+ BDCD7CC81A70771B0001DCC3 /* csr2022.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7C6C1A70771B0001DCC3 /* csr2022.cpp */; };
+ BDCD7CC91A70771B0001DCC3 /* csrecog.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7C6D1A70771B0001DCC3 /* csrecog.cpp */; };
+ BDCD7CCA1A70771B0001DCC3 /* csrecog.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7C6D1A70771B0001DCC3 /* csrecog.cpp */; };
+ BDCD7CCB1A70771B0001DCC3 /* csrmbcs.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7C6E1A70771B0001DCC3 /* csrmbcs.cpp */; };
+ BDCD7CCC1A70771B0001DCC3 /* csrmbcs.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7C6E1A70771B0001DCC3 /* csrmbcs.cpp */; };
+ BDCD7CCD1A70771B0001DCC3 /* csrsbcs.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7C6F1A70771B0001DCC3 /* csrsbcs.cpp */; };
+ BDCD7CCE1A70771B0001DCC3 /* csrsbcs.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7C6F1A70771B0001DCC3 /* csrsbcs.cpp */; };
+ BDCD7CCF1A70771B0001DCC3 /* csrucode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7C701A70771B0001DCC3 /* csrucode.cpp */; };
+ BDCD7CD01A70771B0001DCC3 /* csrucode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7C701A70771B0001DCC3 /* csrucode.cpp */; };
+ BDCD7CD11A70771B0001DCC3 /* csrutf8.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7C711A70771B0001DCC3 /* csrutf8.cpp */; };
+ BDCD7CD21A70771B0001DCC3 /* csrutf8.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7C711A70771B0001DCC3 /* csrutf8.cpp */; };
+ BDCD7CD31A70771B0001DCC3 /* cstring.c in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7C721A70771B0001DCC3 /* cstring.c */; };
+ BDCD7CD41A70771B0001DCC3 /* cstring.c in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7C721A70771B0001DCC3 /* cstring.c */; };
+ BDCD7CD51A70771B0001DCC3 /* inputext.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CB91A70771B0001DCC3 /* inputext.cpp */; };
+ BDCD7CD61A70771B0001DCC3 /* inputext.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CB91A70771B0001DCC3 /* inputext.cpp */; };
+ BDCD7CD71A70771B0001DCC3 /* uarrsort.c in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CBA1A70771B0001DCC3 /* uarrsort.c */; };
+ BDCD7CD81A70771B0001DCC3 /* uarrsort.c in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CBA1A70771B0001DCC3 /* uarrsort.c */; };
+ BDCD7CD91A70771B0001DCC3 /* ucln_cmn.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CBB1A70771B0001DCC3 /* ucln_cmn.cpp */; };
+ BDCD7CDA1A70771B0001DCC3 /* ucln_cmn.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CBB1A70771B0001DCC3 /* ucln_cmn.cpp */; };
+ BDCD7CDB1A70771B0001DCC3 /* ucln_in.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CBC1A70771B0001DCC3 /* ucln_in.cpp */; };
+ BDCD7CDC1A70771B0001DCC3 /* ucln_in.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CBC1A70771B0001DCC3 /* ucln_in.cpp */; };
+ BDCD7CDD1A70771B0001DCC3 /* udataswp.c in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CBD1A70771B0001DCC3 /* udataswp.c */; };
+ BDCD7CDE1A70771B0001DCC3 /* udataswp.c in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CBD1A70771B0001DCC3 /* udataswp.c */; };
+ BDCD7CDF1A70771B0001DCC3 /* uenum.c in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CBE1A70771B0001DCC3 /* uenum.c */; };
+ BDCD7CE01A70771B0001DCC3 /* uenum.c in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CBE1A70771B0001DCC3 /* uenum.c */; };
+ BDCD7CE11A70771B0001DCC3 /* uinvchar.c in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CBF1A70771B0001DCC3 /* uinvchar.c */; };
+ BDCD7CE21A70771B0001DCC3 /* uinvchar.c in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CBF1A70771B0001DCC3 /* uinvchar.c */; };
+ BDCD7CE31A70771B0001DCC3 /* uobject.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CC01A70771B0001DCC3 /* uobject.cpp */; };
+ BDCD7CE41A70771B0001DCC3 /* uobject.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CC01A70771B0001DCC3 /* uobject.cpp */; };
+ BDCD7CE51A70771B0001DCC3 /* ustring.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CC11A70771B0001DCC3 /* ustring.cpp */; };
+ BDCD7CE61A70771B0001DCC3 /* ustring.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CC11A70771B0001DCC3 /* ustring.cpp */; };
+ BDCD7CE71A70771B0001DCC3 /* utrace.c in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CC21A70771B0001DCC3 /* utrace.c */; };
+ BDCD7CE81A70771B0001DCC3 /* utrace.c in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CC21A70771B0001DCC3 /* utrace.c */; };
+ BDCD7CEA1A7077680001DCC3 /* ucsdet.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CE91A7077680001DCC3 /* ucsdet.cpp */; };
+ BDCD7CEB1A7077680001DCC3 /* ucsdet.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CE91A7077680001DCC3 /* ucsdet.cpp */; };
+ BDCD7CEE1A7079300001DCC3 /* cmemory.c in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CEC1A7079300001DCC3 /* cmemory.c */; };
+ BDCD7CEF1A7079300001DCC3 /* cmemory.c in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CEC1A7079300001DCC3 /* cmemory.c */; };
+ BDCD7CF01A7079300001DCC3 /* umutex.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CED1A7079300001DCC3 /* umutex.cpp */; };
+ BDCD7CF11A7079300001DCC3 /* umutex.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CED1A7079300001DCC3 /* umutex.cpp */; };
C07AD5D7FD82F8ACAB576231 /* NSError+MCO.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = C07AD44B013BB42A240B4F04 /* NSError+MCO.h */; };
C07AD99B2E2054C684DB8FF6 /* NSError+MCO.mm in Sources */ = {isa = PBXBuildFile; fileRef = C07ADFE43E22B38EFF23ADB5 /* NSError+MCO.mm */; };
C07ADC28B83E7959BF114D46 /* MCOIMAPSession.mm in Sources */ = {isa = PBXBuildFile; fileRef = C07AD057D3C8FBDC7AC95733 /* MCOIMAPSession.mm */; };
@@ -1493,6 +1537,96 @@
BD63713A177DFF080094121B /* MCLibetpan.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MCLibetpan.h; sourceTree = "<group>"; };
BDCD7C591A5B1C2C0001DCC3 /* MCErrorMessage.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = MCErrorMessage.cpp; sourceTree = "<group>"; };
BDCD7C5A1A5B1C2C0001DCC3 /* MCErrorMessage.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MCErrorMessage.h; sourceTree = "<group>"; };
+ BDCD7C6A1A70771B0001DCC3 /* csdetect.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = csdetect.cpp; sourceTree = "<group>"; };
+ BDCD7C6B1A70771B0001DCC3 /* csmatch.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = csmatch.cpp; sourceTree = "<group>"; };
+ BDCD7C6C1A70771B0001DCC3 /* csr2022.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = csr2022.cpp; sourceTree = "<group>"; };
+ BDCD7C6D1A70771B0001DCC3 /* csrecog.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = csrecog.cpp; sourceTree = "<group>"; };
+ BDCD7C6E1A70771B0001DCC3 /* csrmbcs.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = csrmbcs.cpp; sourceTree = "<group>"; };
+ BDCD7C6F1A70771B0001DCC3 /* csrsbcs.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = csrsbcs.cpp; sourceTree = "<group>"; };
+ BDCD7C701A70771B0001DCC3 /* csrucode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = csrucode.cpp; sourceTree = "<group>"; };
+ BDCD7C711A70771B0001DCC3 /* csrutf8.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = csrutf8.cpp; sourceTree = "<group>"; };
+ BDCD7C721A70771B0001DCC3 /* cstring.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = cstring.c; sourceTree = "<group>"; };
+ BDCD7C741A70771B0001DCC3 /* charstr.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = charstr.h; sourceTree = "<group>"; };
+ BDCD7C751A70771B0001DCC3 /* cmemory.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = cmemory.h; sourceTree = "<group>"; };
+ BDCD7C761A70771B0001DCC3 /* csdetect.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = csdetect.h; sourceTree = "<group>"; };
+ BDCD7C771A70771B0001DCC3 /* csmatch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = csmatch.h; sourceTree = "<group>"; };
+ BDCD7C781A70771B0001DCC3 /* csr2022.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = csr2022.h; sourceTree = "<group>"; };
+ BDCD7C791A70771B0001DCC3 /* csrecog.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = csrecog.h; sourceTree = "<group>"; };
+ BDCD7C7A1A70771B0001DCC3 /* csrmbcs.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = csrmbcs.h; sourceTree = "<group>"; };
+ BDCD7C7B1A70771B0001DCC3 /* csrsbcs.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = csrsbcs.h; sourceTree = "<group>"; };
+ BDCD7C7C1A70771B0001DCC3 /* csrucode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = csrucode.h; sourceTree = "<group>"; };
+ BDCD7C7D1A70771B0001DCC3 /* csrutf8.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = csrutf8.h; sourceTree = "<group>"; };
+ BDCD7C7E1A70771B0001DCC3 /* cstring.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = cstring.h; sourceTree = "<group>"; };
+ BDCD7C7F1A70771B0001DCC3 /* cwchar.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = cwchar.h; sourceTree = "<group>"; };
+ BDCD7C801A70771B0001DCC3 /* inputext.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = inputext.h; sourceTree = "<group>"; };
+ BDCD7C811A70771B0001DCC3 /* locmap.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = locmap.h; sourceTree = "<group>"; };
+ BDCD7C821A70771B0001DCC3 /* mutex.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mutex.h; sourceTree = "<group>"; };
+ BDCD7C831A70771B0001DCC3 /* putilimp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = putilimp.h; sourceTree = "<group>"; };
+ BDCD7C841A70771B0001DCC3 /* uarrsort.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = uarrsort.h; sourceTree = "<group>"; };
+ BDCD7C851A70771B0001DCC3 /* uassert.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = uassert.h; sourceTree = "<group>"; };
+ BDCD7C861A70771B0001DCC3 /* ucase.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ucase.h; sourceTree = "<group>"; };
+ BDCD7C871A70771B0001DCC3 /* ucln.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ucln.h; sourceTree = "<group>"; };
+ BDCD7C881A70771B0001DCC3 /* ucln_cmn.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ucln_cmn.h; sourceTree = "<group>"; };
+ BDCD7C891A70771B0001DCC3 /* ucln_imp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ucln_imp.h; sourceTree = "<group>"; };
+ BDCD7C8A1A70771B0001DCC3 /* ucln_in.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ucln_in.h; sourceTree = "<group>"; };
+ BDCD7C8B1A70771B0001DCC3 /* ucmndata.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ucmndata.h; sourceTree = "<group>"; };
+ BDCD7C8C1A70771B0001DCC3 /* udataswp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = udataswp.h; sourceTree = "<group>"; };
+ BDCD7C8D1A70771B0001DCC3 /* uelement.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = uelement.h; sourceTree = "<group>"; };
+ BDCD7C8E1A70771B0001DCC3 /* uenumimp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = uenumimp.h; sourceTree = "<group>"; };
+ BDCD7C8F1A70771B0001DCC3 /* uinvchar.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = uinvchar.h; sourceTree = "<group>"; };
+ BDCD7C901A70771B0001DCC3 /* umapfile.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = umapfile.h; sourceTree = "<group>"; };
+ BDCD7C911A70771B0001DCC3 /* umutex.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = umutex.h; sourceTree = "<group>"; };
+ BDCD7C931A70771B0001DCC3 /* appendable.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = appendable.h; sourceTree = "<group>"; };
+ BDCD7C941A70771B0001DCC3 /* bytestream.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bytestream.h; sourceTree = "<group>"; };
+ BDCD7C951A70771B0001DCC3 /* localpointer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = localpointer.h; sourceTree = "<group>"; };
+ BDCD7C961A70771B0001DCC3 /* platform.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = platform.h; sourceTree = "<group>"; };
+ BDCD7C971A70771B0001DCC3 /* ptypes.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ptypes.h; sourceTree = "<group>"; };
+ BDCD7C981A70771B0001DCC3 /* putil.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = putil.h; sourceTree = "<group>"; };
+ BDCD7C991A70771B0001DCC3 /* rep.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = rep.h; sourceTree = "<group>"; };
+ BDCD7C9A1A70771B0001DCC3 /* std_string.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = std_string.h; sourceTree = "<group>"; };
+ BDCD7C9B1A70771B0001DCC3 /* strenum.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = strenum.h; sourceTree = "<group>"; };
+ BDCD7C9C1A70771B0001DCC3 /* stringpiece.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = stringpiece.h; sourceTree = "<group>"; };
+ BDCD7C9D1A70771B0001DCC3 /* ucasemap.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ucasemap.h; sourceTree = "<group>"; };
+ BDCD7C9E1A70771B0001DCC3 /* uchar.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = uchar.h; sourceTree = "<group>"; };
+ BDCD7C9F1A70771B0001DCC3 /* uclean.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = uclean.h; sourceTree = "<group>"; };
+ BDCD7CA01A70771B0001DCC3 /* ucnv.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ucnv.h; sourceTree = "<group>"; };
+ BDCD7CA11A70771B0001DCC3 /* ucnv_err.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ucnv_err.h; sourceTree = "<group>"; };
+ BDCD7CA21A70771B0001DCC3 /* uconfig.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = uconfig.h; sourceTree = "<group>"; };
+ BDCD7CA31A70771B0001DCC3 /* ucsdet.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ucsdet.h; sourceTree = "<group>"; };
+ BDCD7CA41A70771B0001DCC3 /* udata.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = udata.h; sourceTree = "<group>"; };
+ BDCD7CA51A70771B0001DCC3 /* uenum.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = uenum.h; sourceTree = "<group>"; };
+ BDCD7CA61A70771B0001DCC3 /* uiter.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = uiter.h; sourceTree = "<group>"; };
+ BDCD7CA71A70771B0001DCC3 /* umachine.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = umachine.h; sourceTree = "<group>"; };
+ BDCD7CA81A70771B0001DCC3 /* unistr.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = unistr.h; sourceTree = "<group>"; };
+ BDCD7CA91A70771B0001DCC3 /* uobject.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = uobject.h; sourceTree = "<group>"; };
+ BDCD7CAA1A70771B0001DCC3 /* urename.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = urename.h; sourceTree = "<group>"; };
+ BDCD7CAB1A70771B0001DCC3 /* uset.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = uset.h; sourceTree = "<group>"; };
+ BDCD7CAC1A70771B0001DCC3 /* ustring.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ustring.h; sourceTree = "<group>"; };
+ BDCD7CAD1A70771B0001DCC3 /* utf.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = utf.h; sourceTree = "<group>"; };
+ BDCD7CAE1A70771B0001DCC3 /* utf16.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = utf16.h; sourceTree = "<group>"; };
+ BDCD7CAF1A70771B0001DCC3 /* utf8.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = utf8.h; sourceTree = "<group>"; };
+ BDCD7CB01A70771B0001DCC3 /* utf_old.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = utf_old.h; sourceTree = "<group>"; };
+ BDCD7CB11A70771B0001DCC3 /* utrace.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = utrace.h; sourceTree = "<group>"; };
+ BDCD7CB21A70771B0001DCC3 /* utypes.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = utypes.h; sourceTree = "<group>"; };
+ BDCD7CB31A70771B0001DCC3 /* uvernum.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = uvernum.h; sourceTree = "<group>"; };
+ BDCD7CB41A70771B0001DCC3 /* uversion.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = uversion.h; sourceTree = "<group>"; };
+ BDCD7CB51A70771B0001DCC3 /* uposixdefs.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = uposixdefs.h; sourceTree = "<group>"; };
+ BDCD7CB61A70771B0001DCC3 /* uset_imp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = uset_imp.h; sourceTree = "<group>"; };
+ BDCD7CB71A70771B0001DCC3 /* ustr_imp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ustr_imp.h; sourceTree = "<group>"; };
+ BDCD7CB81A70771B0001DCC3 /* utracimp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = utracimp.h; sourceTree = "<group>"; };
+ BDCD7CB91A70771B0001DCC3 /* inputext.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = inputext.cpp; sourceTree = "<group>"; };
+ BDCD7CBA1A70771B0001DCC3 /* uarrsort.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = uarrsort.c; sourceTree = "<group>"; };
+ BDCD7CBB1A70771B0001DCC3 /* ucln_cmn.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ucln_cmn.cpp; sourceTree = "<group>"; };
+ BDCD7CBC1A70771B0001DCC3 /* ucln_in.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ucln_in.cpp; sourceTree = "<group>"; };
+ BDCD7CBD1A70771B0001DCC3 /* udataswp.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = udataswp.c; sourceTree = "<group>"; };
+ BDCD7CBE1A70771B0001DCC3 /* uenum.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = uenum.c; sourceTree = "<group>"; };
+ BDCD7CBF1A70771B0001DCC3 /* uinvchar.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = uinvchar.c; sourceTree = "<group>"; };
+ BDCD7CC01A70771B0001DCC3 /* uobject.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = uobject.cpp; sourceTree = "<group>"; };
+ BDCD7CC11A70771B0001DCC3 /* ustring.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ustring.cpp; sourceTree = "<group>"; };
+ BDCD7CC21A70771B0001DCC3 /* utrace.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = utrace.c; sourceTree = "<group>"; };
+ BDCD7CE91A7077680001DCC3 /* ucsdet.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ucsdet.cpp; sourceTree = "<group>"; };
+ BDCD7CEC1A7079300001DCC3 /* cmemory.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = cmemory.c; sourceTree = "<group>"; };
+ BDCD7CED1A7079300001DCC3 /* umutex.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = umutex.cpp; sourceTree = "<group>"; };
C07AD057D3C8FBDC7AC95733 /* MCOIMAPSession.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = MCOIMAPSession.mm; sourceTree = "<group>"; };
C07AD44B013BB42A240B4F04 /* NSError+MCO.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "NSError+MCO.h"; sourceTree = "<group>"; };
C07ADFE43E22B38EFF23ADB5 /* NSError+MCO.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = "NSError+MCO.mm"; sourceTree = "<group>"; };
@@ -2091,6 +2225,119 @@
path = nntp;
sourceTree = "<group>";
};
+ BDCD7C691A70771B0001DCC3 /* icu-ucsdet */ = {
+ isa = PBXGroup;
+ children = (
+ BDCD7CEC1A7079300001DCC3 /* cmemory.c */,
+ BDCD7C6A1A70771B0001DCC3 /* csdetect.cpp */,
+ BDCD7C6B1A70771B0001DCC3 /* csmatch.cpp */,
+ BDCD7C6C1A70771B0001DCC3 /* csr2022.cpp */,
+ BDCD7C6D1A70771B0001DCC3 /* csrecog.cpp */,
+ BDCD7C6E1A70771B0001DCC3 /* csrmbcs.cpp */,
+ BDCD7C6F1A70771B0001DCC3 /* csrsbcs.cpp */,
+ BDCD7C701A70771B0001DCC3 /* csrucode.cpp */,
+ BDCD7C711A70771B0001DCC3 /* csrutf8.cpp */,
+ BDCD7C721A70771B0001DCC3 /* cstring.c */,
+ BDCD7C731A70771B0001DCC3 /* include */,
+ BDCD7CB91A70771B0001DCC3 /* inputext.cpp */,
+ BDCD7CBA1A70771B0001DCC3 /* uarrsort.c */,
+ BDCD7CBB1A70771B0001DCC3 /* ucln_cmn.cpp */,
+ BDCD7CBC1A70771B0001DCC3 /* ucln_in.cpp */,
+ BDCD7CE91A7077680001DCC3 /* ucsdet.cpp */,
+ BDCD7CBD1A70771B0001DCC3 /* udataswp.c */,
+ BDCD7CBE1A70771B0001DCC3 /* uenum.c */,
+ BDCD7CBF1A70771B0001DCC3 /* uinvchar.c */,
+ BDCD7CED1A7079300001DCC3 /* umutex.cpp */,
+ BDCD7CC01A70771B0001DCC3 /* uobject.cpp */,
+ BDCD7CC11A70771B0001DCC3 /* ustring.cpp */,
+ BDCD7CC21A70771B0001DCC3 /* utrace.c */,
+ );
+ path = "icu-ucsdet";
+ sourceTree = "<group>";
+ };
+ BDCD7C731A70771B0001DCC3 /* include */ = {
+ isa = PBXGroup;
+ children = (
+ BDCD7C741A70771B0001DCC3 /* charstr.h */,
+ BDCD7C751A70771B0001DCC3 /* cmemory.h */,
+ BDCD7C761A70771B0001DCC3 /* csdetect.h */,
+ BDCD7C771A70771B0001DCC3 /* csmatch.h */,
+ BDCD7C781A70771B0001DCC3 /* csr2022.h */,
+ BDCD7C791A70771B0001DCC3 /* csrecog.h */,
+ BDCD7C7A1A70771B0001DCC3 /* csrmbcs.h */,
+ BDCD7C7B1A70771B0001DCC3 /* csrsbcs.h */,
+ BDCD7C7C1A70771B0001DCC3 /* csrucode.h */,
+ BDCD7C7D1A70771B0001DCC3 /* csrutf8.h */,
+ BDCD7C7E1A70771B0001DCC3 /* cstring.h */,
+ BDCD7C7F1A70771B0001DCC3 /* cwchar.h */,
+ BDCD7C801A70771B0001DCC3 /* inputext.h */,
+ BDCD7C811A70771B0001DCC3 /* locmap.h */,
+ BDCD7C821A70771B0001DCC3 /* mutex.h */,
+ BDCD7C831A70771B0001DCC3 /* putilimp.h */,
+ BDCD7C841A70771B0001DCC3 /* uarrsort.h */,
+ BDCD7C851A70771B0001DCC3 /* uassert.h */,
+ BDCD7C861A70771B0001DCC3 /* ucase.h */,
+ BDCD7C871A70771B0001DCC3 /* ucln.h */,
+ BDCD7C881A70771B0001DCC3 /* ucln_cmn.h */,
+ BDCD7C891A70771B0001DCC3 /* ucln_imp.h */,
+ BDCD7C8A1A70771B0001DCC3 /* ucln_in.h */,
+ BDCD7C8B1A70771B0001DCC3 /* ucmndata.h */,
+ BDCD7C8C1A70771B0001DCC3 /* udataswp.h */,
+ BDCD7C8D1A70771B0001DCC3 /* uelement.h */,
+ BDCD7C8E1A70771B0001DCC3 /* uenumimp.h */,
+ BDCD7C8F1A70771B0001DCC3 /* uinvchar.h */,
+ BDCD7C901A70771B0001DCC3 /* umapfile.h */,
+ BDCD7C911A70771B0001DCC3 /* umutex.h */,
+ BDCD7C921A70771B0001DCC3 /* unicode */,
+ BDCD7CB51A70771B0001DCC3 /* uposixdefs.h */,
+ BDCD7CB61A70771B0001DCC3 /* uset_imp.h */,
+ BDCD7CB71A70771B0001DCC3 /* ustr_imp.h */,
+ BDCD7CB81A70771B0001DCC3 /* utracimp.h */,
+ );
+ path = include;
+ sourceTree = "<group>";
+ };
+ BDCD7C921A70771B0001DCC3 /* unicode */ = {
+ isa = PBXGroup;
+ children = (
+ BDCD7C931A70771B0001DCC3 /* appendable.h */,
+ BDCD7C941A70771B0001DCC3 /* bytestream.h */,
+ BDCD7C951A70771B0001DCC3 /* localpointer.h */,
+ BDCD7C961A70771B0001DCC3 /* platform.h */,
+ BDCD7C971A70771B0001DCC3 /* ptypes.h */,
+ BDCD7C981A70771B0001DCC3 /* putil.h */,
+ BDCD7C991A70771B0001DCC3 /* rep.h */,
+ BDCD7C9A1A70771B0001DCC3 /* std_string.h */,
+ BDCD7C9B1A70771B0001DCC3 /* strenum.h */,
+ BDCD7C9C1A70771B0001DCC3 /* stringpiece.h */,
+ BDCD7C9D1A70771B0001DCC3 /* ucasemap.h */,
+ BDCD7C9E1A70771B0001DCC3 /* uchar.h */,
+ BDCD7C9F1A70771B0001DCC3 /* uclean.h */,
+ BDCD7CA01A70771B0001DCC3 /* ucnv.h */,
+ BDCD7CA11A70771B0001DCC3 /* ucnv_err.h */,
+ BDCD7CA21A70771B0001DCC3 /* uconfig.h */,
+ BDCD7CA31A70771B0001DCC3 /* ucsdet.h */,
+ BDCD7CA41A70771B0001DCC3 /* udata.h */,
+ BDCD7CA51A70771B0001DCC3 /* uenum.h */,
+ BDCD7CA61A70771B0001DCC3 /* uiter.h */,
+ BDCD7CA71A70771B0001DCC3 /* umachine.h */,
+ BDCD7CA81A70771B0001DCC3 /* unistr.h */,
+ BDCD7CA91A70771B0001DCC3 /* uobject.h */,
+ BDCD7CAA1A70771B0001DCC3 /* urename.h */,
+ BDCD7CAB1A70771B0001DCC3 /* uset.h */,
+ BDCD7CAC1A70771B0001DCC3 /* ustring.h */,
+ BDCD7CAD1A70771B0001DCC3 /* utf.h */,
+ BDCD7CAE1A70771B0001DCC3 /* utf16.h */,
+ BDCD7CAF1A70771B0001DCC3 /* utf8.h */,
+ BDCD7CB01A70771B0001DCC3 /* utf_old.h */,
+ BDCD7CB11A70771B0001DCC3 /* utrace.h */,
+ BDCD7CB21A70771B0001DCC3 /* utypes.h */,
+ BDCD7CB31A70771B0001DCC3 /* uvernum.h */,
+ BDCD7CB41A70771B0001DCC3 /* uversion.h */,
+ );
+ path = unicode;
+ sourceTree = "<group>";
+ };
C63CD67616BDCDD300DB18F1 /* renderer */ = {
isa = PBXGroup;
children = (
@@ -2376,6 +2623,7 @@
C64EA6A1169E847800778456 /* basetypes */ = {
isa = PBXGroup;
children = (
+ BDCD7C691A70771B0001DCC3 /* icu-ucsdet */,
BD62729B1A158DA900129AA8 /* MCWin32.cpp */,
BD62729C1A158DA900129AA8 /* MCWin32.h */,
BD62729A1A158DA000129AA8 /* MCDefines.h */,
@@ -3144,6 +3392,7 @@
84D7373D199BF83B005124E5 /* MCNNTPFetchArticleOperation.cpp in Sources */,
C64EA6F4169E847800778456 /* MCAbstractMessage.cpp in Sources */,
C64EA6F6169E847800778456 /* MCAbstractMessagePart.cpp in Sources */,
+ BDCD7CF01A7079300001DCC3 /* umutex.cpp in Sources */,
84B639ED17F280F3003B5BA2 /* MCIMAPNoopOperation.cpp in Sources */,
C64EA6F8169E847800778456 /* MCAbstractMultipart.cpp in Sources */,
C64EA6FA169E847800778456 /* MCAbstractPart.cpp in Sources */,
@@ -3178,7 +3427,9 @@
C64EA72B169E847800778456 /* MCIMAPNamespace.cpp in Sources */,
C64EA72D169E847800778456 /* MCIMAPNamespaceItem.cpp in Sources */,
C64EA72F169E847800778456 /* MCIMAPPart.cpp in Sources */,
+ BDCD7CCB1A70771B0001DCC3 /* csrmbcs.cpp in Sources */,
C64EA732169E847800778456 /* MCIMAPSearchExpression.cpp in Sources */,
+ BDCD7CE11A70771B0001DCC3 /* uinvchar.c in Sources */,
C64EA734169E847800778456 /* MCIMAPSession.cpp in Sources */,
C68B2AF717797389005E61EF /* MCConnectionLoggerUtils.cpp in Sources */,
C64EA737169E847800778456 /* MCPOPMessageInfo.cpp in Sources */,
@@ -3199,6 +3450,7 @@
C64EA7DA16A1386600778456 /* MCSMTPOperation.cpp in Sources */,
C64EA7EA16A154B300778456 /* MCSMTPCheckAccountOperation.cpp in Sources */,
C64EA7F116A15A4D00778456 /* MCIMAPOperation.cpp in Sources */,
+ BDCD7CDB1A70771B0001DCC3 /* ucln_in.cpp in Sources */,
C64EA7F816A15A7800778456 /* MCIMAPCheckAccountOperation.cpp in Sources */,
943F1A9A17D964F600F0C798 /* MCIMAPConnectOperation.cpp in Sources */,
C64EA7FC16A2959800778456 /* MCIMAPFetchFoldersOperation.cpp in Sources */,
@@ -3208,11 +3460,13 @@
C63D315C17C9155C00A4D993 /* MCIMAPIdentity.cpp in Sources */,
84D73742199BF963005124E5 /* MCNNTPFetchAllArticlesOperation.cpp in Sources */,
C64EA80816A2999A00778456 /* MCIMAPCreateFolderOperation.cpp in Sources */,
+ BDCD7CC51A70771B0001DCC3 /* csmatch.cpp in Sources */,
C64EA80B16A299B700778456 /* MCIMAPSubscribeFolderOperation.cpp in Sources */,
84D7372C199BF66C005124E5 /* MCNNTPAsyncSession.cpp in Sources */,
C64EA81116A299ED00778456 /* MCIMAPAppendMessageOperation.cpp in Sources */,
C64EA81416A29A2300778456 /* MCIMAPCopyMessagesOperation.cpp in Sources */,
C64EA81716A29A8700778456 /* MCIMAPExpungeOperation.cpp in Sources */,
+ BDCD7CE31A70771B0001DCC3 /* uobject.cpp in Sources */,
C69BA85B17DEFCCB00D601B7 /* NSIndexSet+MCO.m in Sources */,
C64EA81A16A29AF200778456 /* MCIMAPFetchMessagesOperation.cpp in Sources */,
C64EA81D16A29DC500778456 /* MCIMAPFetchContentOperation.cpp in Sources */,
@@ -3230,6 +3484,7 @@
C62C6ED416A2A0E600737497 /* MCIMAPIdentityOperation.cpp in Sources */,
C62C6ED816A398FA00737497 /* MCIMAPFolderInfoOperation.cpp in Sources */,
C62C6EDA16A3D60700737497 /* MCIMAPAsyncConnection.cpp in Sources */,
+ BDCD7CD11A70771B0001DCC3 /* csrutf8.cpp in Sources */,
C62C6EEF16A7B67600737497 /* MCPOPAsyncSession.cpp in Sources */,
C62C6EF216A7C6DE00737497 /* MCPOPFetchHeaderOperation.cpp in Sources */,
C62C6EF516A7C6EA00737497 /* MCPOPFetchMessageOperation.cpp in Sources */,
@@ -3237,13 +3492,18 @@
C62C6EFB16A7C94000737497 /* MCPOPOperation.cpp in Sources */,
C62C6F0616A7E54500737497 /* MCPOPFetchMessagesOperation.cpp in Sources */,
C62C6F0A16A8F58000737497 /* MCIMAPAsyncSession.cpp in Sources */,
+ BDCD7CDD1A70771B0001DCC3 /* udataswp.c in Sources */,
+ BDCD7CD31A70771B0001DCC3 /* cstring.c in Sources */,
C6D42C1D16AE03D6002BB4F9 /* NSData+MCO.mm in Sources */,
C6D42C1E16AE03D6002BB4F9 /* NSString+MCO.mm in Sources */,
C64FF39116B3C13000F8C162 /* MCOObjectWrapper.mm in Sources */,
84D73737199BF7F2005124E5 /* MCNNTPFetchHeaderOperation.cpp in Sources */,
84D7376D199C005A005124E5 /* MCONNTPFetchHeaderOperation.mm in Sources */,
C6E665B71796500B0063F2CF /* ioapi.c in Sources */,
+ BDCD7CD51A70771B0001DCC3 /* inputext.cpp in Sources */,
+ BDCD7CEA1A7077680001DCC3 /* ucsdet.cpp in Sources */,
C07ADC28B83E7959BF114D46 /* MCOIMAPSession.mm in Sources */,
+ BDCD7CE71A70771B0001DCC3 /* utrace.c in Sources */,
C07AD99B2E2054C684DB8FF6 /* NSError+MCO.mm in Sources */,
F87F190C16BB62B00012652F /* MCOIMAPFetchFoldersOperation.mm in Sources */,
84B639F117F282B4003B5BA2 /* MCOPOPNoopOperation.mm in Sources */,
@@ -3251,6 +3511,7 @@
8199FBF119FAF1270040BBC3 /* MCIMAPFetchParsedContentOperation.cpp in Sources */,
C6EB30FE16B8E50F0091F4F1 /* NSArray+MCO.mm in Sources */,
C6EB310116B8E6E60091F4F1 /* NSObject+MCO.mm in Sources */,
+ BDCD7CDF1A70771B0001DCC3 /* uenum.c in Sources */,
C63CD67F16BDCDD400DB18F1 /* MCAddressDisplay.cpp in Sources */,
C63CD68016BDCDD400DB18F1 /* MCDateFormatter.cpp in Sources */,
C63CD68116BDCDD400DB18F1 /* MCHTMLRenderer.cpp in Sources */,
@@ -3263,8 +3524,10 @@
C64BB22E16E5C1EE000DB34C /* MCIndexSet.cpp in Sources */,
84D73764199BFFC7005124E5 /* MCONNTPSession.mm in Sources */,
C64BB23516EDAA17000DB34C /* MCOAbstractMessage.mm in Sources */,
+ BDCD7CE51A70771B0001DCC3 /* ustring.cpp in Sources */,
C64BB23916EDAA3F000DB34C /* MCOAbstractMessagePart.mm in Sources */,
C64BB23C16EDAAC7000DB34C /* MCOAbstractMultipart.mm in Sources */,
+ BDCD7CD71A70771B0001DCC3 /* uarrsort.c in Sources */,
C6D4FD4319FB7DAA001F7E01 /* MCDataMac.mm in Sources */,
C64BB23F16EDAAE1000DB34C /* MCOAbstractPart.mm in Sources */,
C64BB24216EDAAF4000DB34C /* MCOAddress.mm in Sources */,
@@ -3274,6 +3537,7 @@
C64BB25E16FD4377000DB34C /* MCOAttachment.mm in Sources */,
C64BB26116FD4390000DB34C /* MCOMessageBuilder.mm in Sources */,
C64BB26416FD43A1000DB34C /* MCOMessageParser.mm in Sources */,
+ BDCD7CCD1A70771B0001DCC3 /* csrsbcs.cpp in Sources */,
C64BB26716FD43E2000DB34C /* MCOMessagePart.mm in Sources */,
C64BB26A16FD44C2000DB34C /* MCOMultipart.mm in Sources */,
C623C58F16FE6B45001BBEFC /* MCOIMAPOperation.mm in Sources */,
@@ -3291,6 +3555,8 @@
C6CCC5C716FFE5190077A5FC /* MCORange.mm in Sources */,
C63D316217C92D8300A4D993 /* MCOIMAPIdentity.mm in Sources */,
C6F61F7B170169EE0073032E /* MCOIMAPFolderInfoOperation.mm in Sources */,
+ BDCD7CC31A70771B0001DCC3 /* csdetect.cpp in Sources */,
+ BDCD7CCF1A70771B0001DCC3 /* csrucode.cpp in Sources */,
C6F61F7E170169FB0073032E /* MCOIMAPAppendMessageOperation.mm in Sources */,
C6F61F8117016A0D0073032E /* MCOIMAPCopyMessagesOperation.mm in Sources */,
DA0F1C7B177C07B300F0D3B4 /* MCIMAPMessageRenderingOperation.cpp in Sources */,
@@ -3308,6 +3574,7 @@
C6F61FB51702AB340073032E /* MCOIMAPBaseOperation.mm in Sources */,
C608167517759967001F1018 /* MCSMTPDisconnectOperation.cpp in Sources */,
C6A81BBF17068E5E00882C15 /* MCOSMTPSession.mm in Sources */,
+ BDCD7CC71A70771B0001DCC3 /* csr2022.cpp in Sources */,
C673EBED1A46B41000A53F7F /* MCIMAPFolderInfo.cpp in Sources */,
8B0095CC1A00DDE700F84BC0 /* MCOSMTPLoginOperation.mm in Sources */,
C6A81BC317068E9500882C15 /* MCOSMTPSendOperation.mm in Sources */,
@@ -3323,9 +3590,12 @@
C6A81BE61706906D00882C15 /* MCOPOPFetchMessagesOperation.mm in Sources */,
C6A81C001707CEE600882C15 /* MCOPOPMessageInfo.mm in Sources */,
C6AC110017114DAF00B715B7 /* MCPOPCheckAccountOperation.cpp in Sources */,
+ BDCD7CD91A70771B0001DCC3 /* ucln_cmn.cpp in Sources */,
C6D6F7F9171E595D006F5B28 /* MCJSON.cpp in Sources */,
C6D6F954171E5CB8006F5B28 /* MCMD5.cpp in Sources */,
C6D6F956171E5CB8006F5B28 /* MCNull.cpp in Sources */,
+ BDCD7CEE1A7079300001DCC3 /* cmemory.c in Sources */,
+ BDCD7CC91A70771B0001DCC3 /* csrecog.cpp in Sources */,
C6D6F967171FCF9F006F5B28 /* MCJSONParser.cpp in Sources */,
BDCD7C5B1A5B1C2C0001DCC3 /* MCErrorMessage.cpp in Sources */,
84CFA98719F7159700FE35D2 /* MCNNTPFetchOverviewOperation.cpp in Sources */,
@@ -3390,6 +3660,7 @@
84D7373E199BF83B005124E5 /* MCNNTPFetchArticleOperation.cpp in Sources */,
C6BA2B991705F4E6003F0E9E /* MCAbstractMessage.cpp in Sources */,
C6BA2B9A1705F4E6003F0E9E /* MCAbstractMessagePart.cpp in Sources */,
+ BDCD7CF11A7079300001DCC3 /* umutex.cpp in Sources */,
84B639EE17F280F3003B5BA2 /* MCIMAPNoopOperation.cpp in Sources */,
C6BA2B9B1705F4E6003F0E9E /* MCAbstractMultipart.cpp in Sources */,
C6BA2B9C1705F4E6003F0E9E /* MCAbstractPart.cpp in Sources */,
@@ -3424,7 +3695,9 @@
C6BA2BB11705F4E6003F0E9E /* MCIMAPMultipart.cpp in Sources */,
C6BA2BB21705F4E6003F0E9E /* MCIMAPNamespace.cpp in Sources */,
C6BA2BB31705F4E6003F0E9E /* MCIMAPNamespaceItem.cpp in Sources */,
+ BDCD7CCC1A70771B0001DCC3 /* csrmbcs.cpp in Sources */,
C6BA2BB41705F4E6003F0E9E /* MCIMAPPart.cpp in Sources */,
+ BDCD7CE21A70771B0001DCC3 /* uinvchar.c in Sources */,
C6BA2BB51705F4E6003F0E9E /* MCIMAPSearchExpression.cpp in Sources */,
C6BA2BB61705F4E6003F0E9E /* MCIMAPSession.cpp in Sources */,
C68B2AF817797389005E61EF /* MCConnectionLoggerUtils.cpp in Sources */,
@@ -3445,6 +3718,7 @@
C6BA2BC01705F4E6003F0E9E /* MCSMTPSendWithDataOperation.cpp in Sources */,
C6BA2BC11705F4E6003F0E9E /* MCSMTPOperation.cpp in Sources */,
C6BA2BC21705F4E6003F0E9E /* MCSMTPCheckAccountOperation.cpp in Sources */,
+ BDCD7CDC1A70771B0001DCC3 /* ucln_in.cpp in Sources */,
C6BA2BC31705F4E6003F0E9E /* MCIMAPOperation.cpp in Sources */,
C6BA2BC41705F4E6003F0E9E /* MCIMAPCheckAccountOperation.cpp in Sources */,
943F1A9E17D96C5500F0C798 /* MCIMAPConnectOperation.cpp in Sources */,
@@ -3454,11 +3728,13 @@
C6BA2BC71705F4E6003F0E9E /* MCIMAPDeleteFolderOperation.cpp in Sources */,
C63D315D17C9155C00A4D993 /* MCIMAPIdentity.cpp in Sources */,
84D73743199BF963005124E5 /* MCNNTPFetchAllArticlesOperation.cpp in Sources */,
+ BDCD7CC61A70771B0001DCC3 /* csmatch.cpp in Sources */,
C6BA2BC81705F4E6003F0E9E /* MCIMAPCreateFolderOperation.cpp in Sources */,
C6BA2BC91705F4E6003F0E9E /* MCIMAPSubscribeFolderOperation.cpp in Sources */,
84D7372D199BF66C005124E5 /* MCNNTPAsyncSession.cpp in Sources */,
C6BA2BCA1705F4E6003F0E9E /* MCIMAPAppendMessageOperation.cpp in Sources */,
C6BA2BCB1705F4E6003F0E9E /* MCIMAPCopyMessagesOperation.cpp in Sources */,
+ BDCD7CE41A70771B0001DCC3 /* uobject.cpp in Sources */,
C6BA2BCC1705F4E6003F0E9E /* MCIMAPExpungeOperation.cpp in Sources */,
C69BA85C17DEFCCB00D601B7 /* NSIndexSet+MCO.m in Sources */,
C6BA2BCD1705F4E6003F0E9E /* MCIMAPFetchMessagesOperation.cpp in Sources */,
@@ -3476,6 +3752,7 @@
849189A218C93FB7002063A3 /* MCNNTPSession.cpp in Sources */,
C6BA2BD41705F4E6003F0E9E /* MCIMAPIdentityOperation.cpp in Sources */,
C6BA2BD51705F4E6003F0E9E /* MCIMAPFolderInfoOperation.cpp in Sources */,
+ BDCD7CD21A70771B0001DCC3 /* csrutf8.cpp in Sources */,
C6BA2BD61705F4E6003F0E9E /* MCIMAPAsyncConnection.cpp in Sources */,
C6BA2BD71705F4E6003F0E9E /* MCPOPAsyncSession.cpp in Sources */,
C6BA2BD81705F4E6003F0E9E /* MCPOPFetchHeaderOperation.cpp in Sources */,
@@ -3483,13 +3760,18 @@
C6BA2BDA1705F4E6003F0E9E /* MCPOPDeleteMessagesOperation.cpp in Sources */,
C6BA2BDB1705F4E6003F0E9E /* MCPOPOperation.cpp in Sources */,
C6BA2BDC1705F4E6003F0E9E /* MCPOPFetchMessagesOperation.cpp in Sources */,
+ BDCD7CDE1A70771B0001DCC3 /* udataswp.c in Sources */,
+ BDCD7CD41A70771B0001DCC3 /* cstring.c in Sources */,
C6BA2BDD1705F4E6003F0E9E /* MCIMAPAsyncSession.cpp in Sources */,
C6BA2BDE1705F4E6003F0E9E /* NSData+MCO.mm in Sources */,
C6BA2BDF1705F4E6003F0E9E /* NSString+MCO.mm in Sources */,
C6BA2BE01705F4E6003F0E9E /* MCOObjectWrapper.mm in Sources */,
84D73738199BF7F2005124E5 /* MCNNTPFetchHeaderOperation.cpp in Sources */,
84D7376E199C005A005124E5 /* MCONNTPFetchHeaderOperation.mm in Sources */,
+ BDCD7CD61A70771B0001DCC3 /* inputext.cpp in Sources */,
+ BDCD7CEB1A7077680001DCC3 /* ucsdet.cpp in Sources */,
C6E665B81796500B0063F2CF /* ioapi.c in Sources */,
+ BDCD7CE81A70771B0001DCC3 /* utrace.c in Sources */,
C6BA2BE11705F4E6003F0E9E /* MCOIMAPSession.mm in Sources */,
C6BA2BE21705F4E6003F0E9E /* NSError+MCO.mm in Sources */,
C6BA2BE31705F4E6003F0E9E /* MCOIMAPFetchFoldersOperation.mm in Sources */,
@@ -3497,6 +3779,7 @@
8199FBF219FAF1270040BBC3 /* MCIMAPFetchParsedContentOperation.cpp in Sources */,
C6BA2BE41705F4E6003F0E9E /* NSDictionary+MCO.mm in Sources */,
C6BA2BE51705F4E6003F0E9E /* NSArray+MCO.mm in Sources */,
+ BDCD7CE01A70771B0001DCC3 /* uenum.c in Sources */,
C6BA2BE61705F4E6003F0E9E /* NSObject+MCO.mm in Sources */,
C6BA2BE71705F4E6003F0E9E /* MCAddressDisplay.cpp in Sources */,
C6BA2BE81705F4E6003F0E9E /* MCDateFormatter.cpp in Sources */,
@@ -3509,8 +3792,10 @@
C6BA2BEE1705F4E6003F0E9E /* MCIMAPCapabilityOperation.cpp in Sources */,
C6BA2BEF1705F4E6003F0E9E /* MCIndexSet.cpp in Sources */,
84D73765199BFFC7005124E5 /* MCONNTPSession.mm in Sources */,
+ BDCD7CE61A70771B0001DCC3 /* ustring.cpp in Sources */,
C6BA2BF01705F4E6003F0E9E /* MCOAbstractMessage.mm in Sources */,
C6BA2BF11705F4E6003F0E9E /* MCOAbstractMessagePart.mm in Sources */,
+ BDCD7CD81A70771B0001DCC3 /* uarrsort.c in Sources */,
C6D4FD4419FB7DB2001F7E01 /* MCDataMac.mm in Sources */,
C6BA2BF21705F4E6003F0E9E /* MCOAbstractMultipart.mm in Sources */,
C6BA2BF31705F4E6003F0E9E /* MCOAbstractPart.mm in Sources */,
@@ -3520,6 +3805,7 @@
C6BA2BF71705F4E6003F0E9E /* MCOOperation.mm in Sources */,
C6BA2BF81705F4E6003F0E9E /* MCOAttachment.mm in Sources */,
C6BA2BF91705F4E6003F0E9E /* MCOMessageBuilder.mm in Sources */,
+ BDCD7CCE1A70771B0001DCC3 /* csrsbcs.cpp in Sources */,
C6BA2BFA1705F4E6003F0E9E /* MCOMessageParser.mm in Sources */,
C6BA2BFB1705F4E6003F0E9E /* MCOMessagePart.mm in Sources */,
C6BA2BFC1705F4E6003F0E9E /* MCOMultipart.mm in Sources */,
@@ -3537,6 +3823,8 @@
C6BA2C061705F4E6003F0E9E /* MCOAbstractMessageRendererCallback.mm in Sources */,
C6BA2C071705F4E6003F0E9E /* MCORange.mm in Sources */,
C63D316317C92D8300A4D993 /* MCOIMAPIdentity.mm in Sources */,
+ BDCD7CC41A70771B0001DCC3 /* csdetect.cpp in Sources */,
+ BDCD7CD01A70771B0001DCC3 /* csrucode.cpp in Sources */,
C6BA2C081705F4E6003F0E9E /* MCOIMAPFolderInfoOperation.mm in Sources */,
C6BA2C091705F4E6003F0E9E /* MCOIMAPAppendMessageOperation.mm in Sources */,
C6BA2C0A1705F4E6003F0E9E /* MCOIMAPCopyMessagesOperation.mm in Sources */,
@@ -3554,6 +3842,7 @@
C6BA2C131705F4E6003F0E9E /* MCOIMAPFolderInfo.mm in Sources */,
C6BA2C141705F4E6003F0E9E /* MCOIMAPBaseOperation.mm in Sources */,
C608167617759968001F1018 /* MCSMTPDisconnectOperation.cpp in Sources */,
+ BDCD7CC81A70771B0001DCC3 /* csr2022.cpp in Sources */,
C673EBEE1A46B41000A53F7F /* MCIMAPFolderInfo.cpp in Sources */,
C6A81BC017068E5E00882C15 /* MCOSMTPSession.mm in Sources */,
8B0095CE1A00DE7700F84BC0 /* MCOSMTPLoginOperation.mm in Sources */,
@@ -3569,9 +3858,12 @@
C6A81BE31706905600882C15 /* MCOPOPOperation.mm in Sources */,
C6A81BE71706906D00882C15 /* MCOPOPFetchMessagesOperation.mm in Sources */,
C6A81C011707CEE600882C15 /* MCOPOPMessageInfo.mm in Sources */,
+ BDCD7CDA1A70771B0001DCC3 /* ucln_cmn.cpp in Sources */,
C6AC110117114DAF00B715B7 /* MCPOPCheckAccountOperation.cpp in Sources */,
C6D6F7FA171E595D006F5B28 /* MCJSON.cpp in Sources */,
C6D6F955171E5CB8006F5B28 /* MCMD5.cpp in Sources */,
+ BDCD7CEF1A7079300001DCC3 /* cmemory.c in Sources */,
+ BDCD7CCA1A70771B0001DCC3 /* csrecog.cpp in Sources */,
C6D6F957171E5CB8006F5B28 /* MCNull.cpp in Sources */,
BDCD7C5C1A5B1C2C0001DCC3 /* MCErrorMessage.cpp in Sources */,
84CFA98819F7159700FE35D2 /* MCNNTPFetchOverviewOperation.cpp in Sources */,
@@ -3656,7 +3948,6 @@
INFOPLIST_FILE = "framework-Info.plist";
INSTALL_PATH = "@executable_path/../Frameworks";
OTHER_LDFLAGS = (
- "-luchardet-ios",
"-lctemplate-ios",
"-letpan-ios",
"-lxml2",
@@ -3684,7 +3975,6 @@
INFOPLIST_FILE = "framework-Info.plist";
INSTALL_PATH = "@executable_path/../Frameworks";
OTHER_LDFLAGS = (
- "-luchardet-ios",
"-lctemplate-ios",
"-letpan-ios",
"-lxml2",
@@ -3724,7 +4014,6 @@
GCC_WARN_UNUSED_FUNCTION = YES;
MTL_ENABLE_DEBUG_INFO = YES;
OTHER_LDFLAGS = (
- "-luchardet",
"-lctemplate",
"-letpan",
"-lxml2",
@@ -3764,7 +4053,6 @@
GCC_WARN_UNUSED_FUNCTION = YES;
MTL_ENABLE_DEBUG_INFO = NO;
OTHER_LDFLAGS = (
- "-luchardet",
"-lctemplate",
"-letpan",
"-lxml2",
@@ -3799,8 +4087,7 @@
GCC_OPTIMIZATION_LEVEL = 0;
GCC_PREPROCESSOR_DEFINITIONS = (
"DEBUG=1",
- "JSON_NDEBUG=1",
- "JSON_ISO_STRICT=1",
+ "U_COMMON_IMPLEMENTATION=1",
"$(inherited)",
);
GCC_SYMBOLS_PRIVATE_EXTERN = NO;
@@ -3818,8 +4105,8 @@
"HEADER_SEARCH_PATHS[sdk=iphoneos*]" = "$(IOS_HEADERS_SEARCH_PATHS)";
"HEADER_SEARCH_PATHS[sdk=iphonesimulator*]" = "$(IOS_HEADERS_SEARCH_PATHS)";
"HEADER_SEARCH_PATHS[sdk=macosx*]" = "$(OSX_HEADERS_SEARCH_PATHS)";
- IOS_HEADERS_SEARCH_PATHS = "\"$(SRCROOT)/../Externals/libetpan-ios/include\" \"$(SRCROOT)/../Externals/ctemplate-ios/include\" \"$(SRCROOT)/../Externals/tidy-html5-ios/include/tidy\" \"$(SRCROOT)/../Externals/uchardet-ios/include\" /usr/include/libxml2";
- IOS_LIBRARY_SEARCH_PATHS = "\"$(SRCROOT)/../Externals/libetpan-ios/lib\" \"$(SRCROOT)/../Externals/ctemplate-ios/lib\" \"$(SRCROOT)/../Externals/libsasl-ios/lib\" \"$(SRCROOT)/../Externals/tidy-html5-ios/lib\" \"$(SRCROOT)/../Externals/uchardet-ios/lib\"";
+ IOS_HEADERS_SEARCH_PATHS = "\"$(SRCROOT)/../src/core/basetypes/icu-ucsdet/include\" \"$(SRCROOT)/../Externals/libetpan-ios/include\" \"$(SRCROOT)/../Externals/ctemplate-ios/include\" \"$(SRCROOT)/../Externals/tidy-html5-ios/include/tidy\" /usr/include/libxml2";
+ IOS_LIBRARY_SEARCH_PATHS = "\"$(SRCROOT)/../Externals/libetpan-ios/lib\" \"$(SRCROOT)/../Externals/ctemplate-ios/lib\" \"$(SRCROOT)/../Externals/libsasl-ios/lib\" \"$(SRCROOT)/../Externals/tidy-html5-ios/lib\"";
LIBRARY_SEARCH_PATHS = (
"\"$(SRCROOT)/../Externals/libetpan/lib\"",
"\"$(SRCROOT)/../Externals/icu4c/lib\"",
@@ -3830,8 +4117,8 @@
"LIBRARY_SEARCH_PATHS[sdk=macosx*]" = "$(OSX_LIBRARY_SEARCH_PATHS)";
MACOSX_DEPLOYMENT_TARGET = 10.8;
ONLY_ACTIVE_ARCH = YES;
- OSX_HEADERS_SEARCH_PATHS = "\"$(SRCROOT)/../Externals/libetpan-osx/include\" \"$(SRCROOT)/../Externals/ctemplate-osx/include\" \"$(SRCROOT)/../Externals/uchardet-osx/include\" /usr/include/tidy /usr/include/libxml2";
- OSX_LIBRARY_SEARCH_PATHS = "\"$(SRCROOT)/../Externals/libetpan-osx/lib\" \"$(SRCROOT)/../Externals/uchardet-osx/lib\" \"$(SRCROOT)/../Externals/ctemplate-osx/lib\"";
+ OSX_HEADERS_SEARCH_PATHS = "\"$(SRCROOT)/../src/core/basetypes/icu-ucsdet/include\" \"$(SRCROOT)/../Externals/libetpan-osx/include\" \"$(SRCROOT)/../Externals/ctemplate-osx/include\" /usr/include/tidy /usr/include/libxml2";
+ OSX_LIBRARY_SEARCH_PATHS = "\"$(SRCROOT)/../Externals/libetpan-osx/lib\" \"$(SRCROOT)/../Externals/ctemplate-osx/lib\"";
};
name = Debug;
};
@@ -3848,10 +4135,7 @@
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
GCC_C_LANGUAGE_STANDARD = gnu99;
GCC_ENABLE_OBJC_EXCEPTIONS = YES;
- GCC_PREPROCESSOR_DEFINITIONS = (
- "JSON_NDEBUG=1",
- "JSON_ISO_STRICT=1",
- );
+ GCC_PREPROCESSOR_DEFINITIONS = "U_COMMON_IMPLEMENTATION=1";
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES;
GCC_WARN_UNINITIALIZED_AUTOS = YES;
@@ -3866,8 +4150,8 @@
"HEADER_SEARCH_PATHS[sdk=iphoneos*]" = "$(IOS_HEADERS_SEARCH_PATHS)";
"HEADER_SEARCH_PATHS[sdk=iphonesimulator*]" = "$(IOS_HEADERS_SEARCH_PATHS)";
"HEADER_SEARCH_PATHS[sdk=macosx*]" = "$(OSX_HEADERS_SEARCH_PATHS)";
- IOS_HEADERS_SEARCH_PATHS = "\"$(SRCROOT)/../Externals/libetpan-ios/include\" \"$(SRCROOT)/../Externals/ctemplate-ios/include\" \"$(SRCROOT)/../Externals/tidy-html5-ios/include/tidy\" \"$(SRCROOT)/../Externals/uchardet-ios/include\" /usr/include/libxml2";
- IOS_LIBRARY_SEARCH_PATHS = "\"$(SRCROOT)/../Externals/libetpan-ios/lib\" \"$(SRCROOT)/../Externals/ctemplate-ios/lib\" \"$(SRCROOT)/../Externals/libsasl-ios/lib\" \"$(SRCROOT)/../Externals/tidy-html5-ios/lib\" \"$(SRCROOT)/../Externals/uchardet-ios/lib\"";
+ IOS_HEADERS_SEARCH_PATHS = "\"$(SRCROOT)/../src/core/basetypes/icu-ucsdet/include\" \"$(SRCROOT)/../Externals/libetpan-ios/include\" \"$(SRCROOT)/../Externals/ctemplate-ios/include\" \"$(SRCROOT)/../Externals/tidy-html5-ios/include/tidy\" /usr/include/libxml2";
+ IOS_LIBRARY_SEARCH_PATHS = "\"$(SRCROOT)/../Externals/libetpan-ios/lib\" \"$(SRCROOT)/../Externals/ctemplate-ios/lib\" \"$(SRCROOT)/../Externals/libsasl-ios/lib\" \"$(SRCROOT)/../Externals/tidy-html5-ios/lib\"";
LIBRARY_SEARCH_PATHS = (
"\"$(SRCROOT)/../Externals/libetpan/lib\"",
"\"$(SRCROOT)/../Externals/icu4c/lib\"",
@@ -3877,8 +4161,8 @@
"LIBRARY_SEARCH_PATHS[sdk=iphonesimulator*]" = "$(IOS_LIBRARY_SEARCH_PATHS)";
"LIBRARY_SEARCH_PATHS[sdk=macosx*]" = "$(OSX_LIBRARY_SEARCH_PATHS)";
MACOSX_DEPLOYMENT_TARGET = 10.8;
- OSX_HEADERS_SEARCH_PATHS = "\"$(SRCROOT)/../Externals/libetpan-osx/include\" \"$(SRCROOT)/../Externals/ctemplate-osx/include\" \"$(SRCROOT)/../Externals/uchardet-osx/include\" /usr/include/tidy /usr/include/libxml2";
- OSX_LIBRARY_SEARCH_PATHS = "\"$(SRCROOT)/../Externals/libetpan-osx/lib\" \"$(SRCROOT)/../Externals/uchardet-osx/lib\" \"$(SRCROOT)/../Externals/ctemplate-osx/lib\"";
+ OSX_HEADERS_SEARCH_PATHS = "\"$(SRCROOT)/../src/core/basetypes/icu-ucsdet/include\" \"$(SRCROOT)/../Externals/libetpan-osx/include\" \"$(SRCROOT)/../Externals/ctemplate-osx/include\" /usr/include/tidy /usr/include/libxml2";
+ OSX_LIBRARY_SEARCH_PATHS = "\"$(SRCROOT)/../Externals/libetpan-osx/lib\" \"$(SRCROOT)/../Externals/ctemplate-osx/lib\"";
};
name = Release;
};
@@ -3932,7 +4216,6 @@
IPHONEOS_DEPLOYMENT_TARGET = 6.1;
ONLY_ACTIVE_ARCH = NO;
OTHER_LDFLAGS = (
- "-luchardet-ios",
"-lctemplate-ios",
"-letpan-ios",
"-lxml2",
@@ -3964,7 +4247,6 @@
ONLY_ACTIVE_ARCH = NO;
OTHER_CFLAGS = "-DNS_BLOCK_ASSERTIONS=1";
OTHER_LDFLAGS = (
- "-luchardet-ios",
"-lctemplate-ios",
"-letpan-ios",
"-lxml2",
@@ -4094,7 +4376,6 @@
"-all_load",
"-ltidy",
"-lz",
- "-luchardet",
);
PRODUCT_NAME = MailCore;
SDKROOT = macosx;
@@ -4123,7 +4404,6 @@
"-all_load",
"-ltidy",
"-lz",
- "-luchardet",
);
PRODUCT_NAME = MailCore;
SDKROOT = macosx;
diff --git a/src/core/basetypes/MCData.cpp b/src/core/basetypes/MCData.cpp
index 1fbca6b4..170ce927 100644
--- a/src/core/basetypes/MCData.cpp
+++ b/src/core/basetypes/MCData.cpp
@@ -2,7 +2,7 @@
#include "MCData.h"
-#define USE_UCHARDET 1
+#define USE_UCHARDET 0
#include <stdlib.h>
#include <string.h>
diff --git a/src/core/basetypes/icu-ucsdet/cmemory.c b/src/core/basetypes/icu-ucsdet/cmemory.c
new file mode 100644
index 00000000..cd3ccac6
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/cmemory.c
@@ -0,0 +1,183 @@
+/*
+******************************************************************************
+*
+* Copyright (C) 2002-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* File cmemory.c ICU Heap allocation.
+* All ICU heap allocation, both for C and C++ new of ICU
+* class types, comes through these functions.
+*
+* If you have a need to replace ICU allocation, this is the
+* place to do it.
+*
+* Note that uprv_malloc(0) returns a non-NULL pointer, and
+* that a subsequent free of that pointer value is a NOP.
+*
+******************************************************************************
+*/
+#include "unicode/uclean.h"
+#include "cmemory.h"
+#include "putilimp.h"
+#include "uassert.h"
+#include <stdlib.h>
+
+/* uprv_malloc(0) returns a pointer to this read-only data. */
+static const int32_t zeroMem[] = {0, 0, 0, 0, 0, 0};
+
+/* Function Pointers for user-supplied heap functions */
+static const void *pContext;
+static UMemAllocFn *pAlloc;
+static UMemReallocFn *pRealloc;
+static UMemFreeFn *pFree;
+
+/* Flag indicating whether any heap allocations have happened.
+ * Used to prevent changing out the heap functions after allocations have been made */
+static UBool gHeapInUse;
+
+#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
+#include <stdio.h>
+static int n=0;
+static long b=0;
+#endif
+
+#if U_DEBUG
+
+static char gValidMemorySink = 0;
+
+U_CAPI void uprv_checkValidMemory(const void *p, size_t n) {
+ /*
+ * Access the memory to ensure that it's all valid.
+ * Load and save a computed value to try to ensure that the compiler
+ * does not throw away the whole loop.
+ * A thread analyzer might complain about un-mutexed access to gValidMemorySink
+ * which is true but harmless because no one ever uses the value in gValidMemorySink.
+ */
+ const char *s = (const char *)p;
+ char c = gValidMemorySink;
+ size_t i;
+ U_ASSERT(p != NULL);
+ for(i = 0; i < n; ++i) {
+ c ^= s[i];
+ }
+ gValidMemorySink = c;
+}
+
+#endif /* U_DEBUG */
+
+U_CAPI void * U_EXPORT2
+uprv_malloc(size_t s) {
+#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
+#if 1
+ putchar('>');
+ fflush(stdout);
+#else
+ fprintf(stderr,"MALLOC\t#%d\t%ul bytes\t%ul total\n", ++n,s,(b+=s)); fflush(stderr);
+#endif
+#endif
+ if (s > 0) {
+ gHeapInUse = TRUE;
+ if (pAlloc) {
+ return (*pAlloc)(pContext, s);
+ } else {
+ return uprv_default_malloc(s);
+ }
+ } else {
+ return (void *)zeroMem;
+ }
+}
+
+U_CAPI void * U_EXPORT2
+uprv_realloc(void * buffer, size_t size) {
+#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
+ putchar('~');
+ fflush(stdout);
+#endif
+ if (buffer == zeroMem) {
+ return uprv_malloc(size);
+ } else if (size == 0) {
+ if (pFree) {
+ (*pFree)(pContext, buffer);
+ } else {
+ uprv_default_free(buffer);
+ }
+ return (void *)zeroMem;
+ } else {
+ gHeapInUse = TRUE;
+ if (pRealloc) {
+ return (*pRealloc)(pContext, buffer, size);
+ } else {
+ return uprv_default_realloc(buffer, size);
+ }
+ }
+}
+
+U_CAPI void U_EXPORT2
+uprv_free(void *buffer) {
+#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
+ putchar('<');
+ fflush(stdout);
+#endif
+ if (buffer != zeroMem) {
+ if (pFree) {
+ (*pFree)(pContext, buffer);
+ } else {
+ uprv_default_free(buffer);
+ }
+ }
+}
+
+U_CAPI void * U_EXPORT2
+uprv_calloc(size_t num, size_t size) {
+ void *mem = NULL;
+ size *= num;
+ mem = uprv_malloc(size);
+ if (mem) {
+ uprv_memset(mem, 0, size);
+ }
+ return mem;
+}
+
+U_CAPI void U_EXPORT2
+u_setMemoryFunctions(const void *context, UMemAllocFn *a, UMemReallocFn *r, UMemFreeFn *f, UErrorCode *status)
+{
+ if (U_FAILURE(*status)) {
+ return;
+ }
+ if (a==NULL || r==NULL || f==NULL) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ if (gHeapInUse) {
+ *status = U_INVALID_STATE_ERROR;
+ return;
+ }
+ pContext = context;
+ pAlloc = a;
+ pRealloc = r;
+ pFree = f;
+}
+
+
+U_CFUNC UBool cmemory_cleanup(void) {
+ pContext = NULL;
+ pAlloc = NULL;
+ pRealloc = NULL;
+ pFree = NULL;
+ gHeapInUse = FALSE;
+ return TRUE;
+}
+
+
+/*
+ * gHeapInUse
+ * Return True if ICU has allocated any memory.
+ * Used by u_SetMutexFunctions() and similar to verify that ICU has not
+ * been used, that it is in a pristine initial state.
+ */
+U_CFUNC UBool cmemory_inUse() {
+ return gHeapInUse;
+}
+
diff --git a/src/core/basetypes/icu-ucsdet/csdetect.cpp b/src/core/basetypes/icu-ucsdet/csdetect.cpp
new file mode 100644
index 00000000..3efbd492
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/csdetect.cpp
@@ -0,0 +1,485 @@
+/*
+ **********************************************************************
+ * Copyright (C) 2005-2013, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ **********************************************************************
+ */
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "unicode/ucsdet.h"
+
+#include "csdetect.h"
+#include "csmatch.h"
+#include "uenumimp.h"
+
+#include "cmemory.h"
+#include "cstring.h"
+#include "umutex.h"
+#include "ucln_in.h"
+#include "uarrsort.h"
+#include "inputext.h"
+#include "csrsbcs.h"
+#include "csrmbcs.h"
+#include "csrutf8.h"
+#include "csrucode.h"
+#include "csr2022.h"
+
+#define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
+
+#define NEW_ARRAY(type,count) (type *) uprv_malloc((count) * sizeof(type))
+#define DELETE_ARRAY(array) uprv_free((void *) (array))
+
+U_NAMESPACE_BEGIN
+
+struct CSRecognizerInfo : public UMemory {
+ CSRecognizerInfo(CharsetRecognizer *recognizer, UBool isDefaultEnabled)
+ : recognizer(recognizer), isDefaultEnabled(isDefaultEnabled) {};
+
+ ~CSRecognizerInfo() {delete recognizer;};
+
+ CharsetRecognizer *recognizer;
+ UBool isDefaultEnabled;
+};
+
+U_NAMESPACE_END
+
+static icu::CSRecognizerInfo **fCSRecognizers = NULL;
+static icu::UInitOnce gCSRecognizersInitOnce;
+static int32_t fCSRecognizers_size = 0;
+
+U_CDECL_BEGIN
+static UBool U_CALLCONV csdet_cleanup(void)
+{
+ U_NAMESPACE_USE
+ if (fCSRecognizers != NULL) {
+ for(int32_t r = 0; r < fCSRecognizers_size; r += 1) {
+ delete fCSRecognizers[r];
+ fCSRecognizers[r] = NULL;
+ }
+
+ DELETE_ARRAY(fCSRecognizers);
+ fCSRecognizers = NULL;
+ fCSRecognizers_size = 0;
+ }
+ gCSRecognizersInitOnce.reset();
+
+ return TRUE;
+}
+
+static int32_t U_CALLCONV
+charsetMatchComparator(const void * /*context*/, const void *left, const void *right)
+{
+ U_NAMESPACE_USE
+
+ const CharsetMatch **csm_l = (const CharsetMatch **) left;
+ const CharsetMatch **csm_r = (const CharsetMatch **) right;
+
+ // NOTE: compare is backwards to sort from highest to lowest.
+ return (*csm_r)->getConfidence() - (*csm_l)->getConfidence();
+}
+
+static void U_CALLCONV initRecognizers(UErrorCode &status) {
+ U_NAMESPACE_USE
+ ucln_i18n_registerCleanup(UCLN_I18N_CSDET, csdet_cleanup);
+ CSRecognizerInfo *tempArray[] = {
+ new CSRecognizerInfo(new CharsetRecog_UTF8(), TRUE),
+
+ new CSRecognizerInfo(new CharsetRecog_UTF_16_BE(), TRUE),
+ new CSRecognizerInfo(new CharsetRecog_UTF_16_LE(), TRUE),
+ new CSRecognizerInfo(new CharsetRecog_UTF_32_BE(), TRUE),
+ new CSRecognizerInfo(new CharsetRecog_UTF_32_LE(), TRUE),
+
+ new CSRecognizerInfo(new CharsetRecog_8859_1(), TRUE),
+ new CSRecognizerInfo(new CharsetRecog_8859_2(), TRUE),
+ new CSRecognizerInfo(new CharsetRecog_8859_5_ru(), TRUE),
+ new CSRecognizerInfo(new CharsetRecog_8859_6_ar(), TRUE),
+ new CSRecognizerInfo(new CharsetRecog_8859_7_el(), TRUE),
+ new CSRecognizerInfo(new CharsetRecog_8859_8_I_he(), TRUE),
+ new CSRecognizerInfo(new CharsetRecog_8859_8_he(), TRUE),
+ new CSRecognizerInfo(new CharsetRecog_windows_1251(), TRUE),
+ new CSRecognizerInfo(new CharsetRecog_windows_1256(), TRUE),
+ new CSRecognizerInfo(new CharsetRecog_KOI8_R(), TRUE),
+ new CSRecognizerInfo(new CharsetRecog_8859_9_tr(), TRUE),
+ new CSRecognizerInfo(new CharsetRecog_sjis(), TRUE),
+ new CSRecognizerInfo(new CharsetRecog_gb_18030(), TRUE),
+ new CSRecognizerInfo(new CharsetRecog_euc_jp(), TRUE),
+ new CSRecognizerInfo(new CharsetRecog_euc_kr(), TRUE),
+ new CSRecognizerInfo(new CharsetRecog_big5(), TRUE),
+
+ new CSRecognizerInfo(new CharsetRecog_2022JP(), TRUE),
+ new CSRecognizerInfo(new CharsetRecog_2022KR(), TRUE),
+ new CSRecognizerInfo(new CharsetRecog_2022CN(), TRUE),
+
+ new CSRecognizerInfo(new CharsetRecog_IBM424_he_rtl(), FALSE),
+ new CSRecognizerInfo(new CharsetRecog_IBM424_he_ltr(), FALSE),
+ new CSRecognizerInfo(new CharsetRecog_IBM420_ar_rtl(), FALSE),
+ new CSRecognizerInfo(new CharsetRecog_IBM420_ar_ltr(), FALSE)
+ };
+ int32_t rCount = ARRAY_SIZE(tempArray);
+
+ fCSRecognizers = NEW_ARRAY(CSRecognizerInfo *, rCount);
+
+ if (fCSRecognizers == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ }
+ else {
+ fCSRecognizers_size = rCount;
+ for (int32_t r = 0; r < rCount; r += 1) {
+ fCSRecognizers[r] = tempArray[r];
+ if (fCSRecognizers[r] == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ }
+ }
+ }
+}
+
+U_CDECL_END
+
+U_NAMESPACE_BEGIN
+
+void CharsetDetector::setRecognizers(UErrorCode &status)
+{
+ umtx_initOnce(gCSRecognizersInitOnce, &initRecognizers, status);
+}
+
+CharsetDetector::CharsetDetector(UErrorCode &status)
+ : textIn(new InputText(status)), resultArray(NULL),
+ resultCount(0), fStripTags(FALSE), fFreshTextSet(FALSE),
+ fEnabledRecognizers(NULL)
+{
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ setRecognizers(status);
+
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ resultArray = (CharsetMatch **)uprv_malloc(sizeof(CharsetMatch *)*fCSRecognizers_size);
+
+ if (resultArray == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+
+ for(int32_t i = 0; i < fCSRecognizers_size; i += 1) {
+ resultArray[i] = new CharsetMatch();
+
+ if (resultArray[i] == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ break;
+ }
+ }
+}
+
+CharsetDetector::~CharsetDetector()
+{
+ delete textIn;
+
+ for(int32_t i = 0; i < fCSRecognizers_size; i += 1) {
+ delete resultArray[i];
+ }
+
+ uprv_free(resultArray);
+
+ if (fEnabledRecognizers) {
+ uprv_free(fEnabledRecognizers);
+ }
+}
+
+void CharsetDetector::setText(const char *in, int32_t len)
+{
+ textIn->setText(in, len);
+ fFreshTextSet = TRUE;
+}
+
+UBool CharsetDetector::setStripTagsFlag(UBool flag)
+{
+ UBool temp = fStripTags;
+ fStripTags = flag;
+ fFreshTextSet = TRUE;
+ return temp;
+}
+
+UBool CharsetDetector::getStripTagsFlag() const
+{
+ return fStripTags;
+}
+
+void CharsetDetector::setDeclaredEncoding(const char *encoding, int32_t len) const
+{
+ textIn->setDeclaredEncoding(encoding,len);
+}
+
+int32_t CharsetDetector::getDetectableCount()
+{
+ UErrorCode status = U_ZERO_ERROR;
+
+ setRecognizers(status);
+
+ return fCSRecognizers_size;
+}
+
+const CharsetMatch *CharsetDetector::detect(UErrorCode &status)
+{
+ int32_t maxMatchesFound = 0;
+
+ detectAll(maxMatchesFound, status);
+
+ if(maxMatchesFound > 0) {
+ return resultArray[0];
+ } else {
+ return NULL;
+ }
+}
+
+const CharsetMatch * const *CharsetDetector::detectAll(int32_t &maxMatchesFound, UErrorCode &status)
+{
+ if(!textIn->isSet()) {
+ status = U_MISSING_RESOURCE_ERROR;// TODO: Need to set proper status code for input text not set
+
+ return NULL;
+ } else if (fFreshTextSet) {
+ CharsetRecognizer *csr;
+ int32_t i;
+
+ textIn->MungeInput(fStripTags);
+
+ // Iterate over all possible charsets, remember all that
+ // give a match quality > 0.
+ resultCount = 0;
+ for (i = 0; i < fCSRecognizers_size; i += 1) {
+ csr = fCSRecognizers[i]->recognizer;
+ if (csr->match(textIn, resultArray[resultCount])) {
+ resultCount++;
+ }
+ }
+
+ if (resultCount > 1) {
+ uprv_sortArray(resultArray, resultCount, sizeof resultArray[0], charsetMatchComparator, NULL, TRUE, &status);
+ }
+ fFreshTextSet = FALSE;
+ }
+
+ maxMatchesFound = resultCount;
+
+ return resultArray;
+}
+
+void CharsetDetector::setDetectableCharset(const char *encoding, UBool enabled, UErrorCode &status)
+{
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ int32_t modIdx = -1;
+ UBool isDefaultVal = FALSE;
+ for (int32_t i = 0; i < fCSRecognizers_size; i++) {
+ CSRecognizerInfo *csrinfo = fCSRecognizers[i];
+ if (uprv_strcmp(csrinfo->recognizer->getName(), encoding) == 0) {
+ modIdx = i;
+ isDefaultVal = (csrinfo->isDefaultEnabled == enabled);
+ break;
+ }
+ }
+ if (modIdx < 0) {
+ // No matching encoding found
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ if (fEnabledRecognizers == NULL && !isDefaultVal) {
+ // Create an array storing the non default setting
+ fEnabledRecognizers = NEW_ARRAY(UBool, fCSRecognizers_size);
+ if (fEnabledRecognizers == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ // Initialize the array with default info
+ for (int32_t i = 0; i < fCSRecognizers_size; i++) {
+ fEnabledRecognizers[i] = fCSRecognizers[i]->isDefaultEnabled;
+ }
+ }
+
+ if (fEnabledRecognizers != NULL) {
+ fEnabledRecognizers[modIdx] = enabled;
+ }
+}
+
+/*const char *CharsetDetector::getCharsetName(int32_t index, UErrorCode &status) const
+{
+ if( index > fCSRecognizers_size-1 || index < 0) {
+ status = U_INDEX_OUTOFBOUNDS_ERROR;
+
+ return 0;
+ } else {
+ return fCSRecognizers[index]->getName();
+ }
+}*/
+
+U_NAMESPACE_END
+
+U_CDECL_BEGIN
+typedef struct {
+ int32_t currIndex;
+ UBool all;
+ UBool *enabledRecognizers;
+} Context;
+
+
+
+static void U_CALLCONV
+enumClose(UEnumeration *en) {
+ if(en->context != NULL) {
+ DELETE_ARRAY(en->context);
+ }
+
+ DELETE_ARRAY(en);
+}
+
+static int32_t U_CALLCONV
+enumCount(UEnumeration *en, UErrorCode *) {
+ if (((Context *)en->context)->all) {
+ // ucsdet_getAllDetectableCharsets, all charset detector names
+ return fCSRecognizers_size;
+ }
+
+ // Otherwise, ucsdet_getDetectableCharsets - only enabled ones
+ int32_t count = 0;
+ UBool *enabledArray = ((Context *)en->context)->enabledRecognizers;
+ if (enabledArray != NULL) {
+ // custom set
+ for (int32_t i = 0; i < fCSRecognizers_size; i++) {
+ if (enabledArray[i]) {
+ count++;
+ }
+ }
+ } else {
+ // default set
+ for (int32_t i = 0; i < fCSRecognizers_size; i++) {
+ if (fCSRecognizers[i]->isDefaultEnabled) {
+ count++;
+ }
+ }
+ }
+ return count;
+}
+
+static const char* U_CALLCONV
+enumNext(UEnumeration *en, int32_t *resultLength, UErrorCode * /*status*/) {
+ const char *currName = NULL;
+
+ if (((Context *)en->context)->currIndex < fCSRecognizers_size) {
+ if (((Context *)en->context)->all) {
+ // ucsdet_getAllDetectableCharsets, all charset detector names
+ currName = fCSRecognizers[((Context *)en->context)->currIndex]->recognizer->getName();
+ ((Context *)en->context)->currIndex++;
+ } else {
+ // ucsdet_getDetectableCharsets
+ UBool *enabledArray = ((Context *)en->context)->enabledRecognizers;
+ if (enabledArray != NULL) {
+ // custome set
+ while (currName == NULL && ((Context *)en->context)->currIndex < fCSRecognizers_size) {
+ if (enabledArray[((Context *)en->context)->currIndex]) {
+ currName = fCSRecognizers[((Context *)en->context)->currIndex]->recognizer->getName();
+ }
+ ((Context *)en->context)->currIndex++;
+ }
+ } else {
+ // default set
+ while (currName == NULL && ((Context *)en->context)->currIndex < fCSRecognizers_size) {
+ if (fCSRecognizers[((Context *)en->context)->currIndex]->isDefaultEnabled) {
+ currName = fCSRecognizers[((Context *)en->context)->currIndex]->recognizer->getName();
+ }
+ ((Context *)en->context)->currIndex++;
+ }
+ }
+ }
+ }
+
+ if(resultLength != NULL) {
+ *resultLength = currName == NULL ? 0 : (int32_t)uprv_strlen(currName);
+ }
+
+ return currName;
+}
+
+
+static void U_CALLCONV
+enumReset(UEnumeration *en, UErrorCode *) {
+ ((Context *)en->context)->currIndex = 0;
+}
+
+static const UEnumeration gCSDetEnumeration = {
+ NULL,
+ NULL,
+ enumClose,
+ enumCount,
+ uenum_unextDefault,
+ enumNext,
+ enumReset
+};
+
+U_CDECL_END
+
+U_NAMESPACE_BEGIN
+
+UEnumeration * CharsetDetector::getAllDetectableCharsets(UErrorCode &status)
+{
+
+ /* Initialize recognized charsets. */
+ setRecognizers(status);
+
+ if(U_FAILURE(status)) {
+ return 0;
+ }
+
+ UEnumeration *en = NEW_ARRAY(UEnumeration, 1);
+ if (en == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return 0;
+ }
+ memcpy(en, &gCSDetEnumeration, sizeof(UEnumeration));
+ en->context = (void*)NEW_ARRAY(Context, 1);
+ if (en->context == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ DELETE_ARRAY(en);
+ return 0;
+ }
+ uprv_memset(en->context, 0, sizeof(Context));
+ ((Context*)en->context)->all = TRUE;
+ return en;
+}
+
+UEnumeration * CharsetDetector::getDetectableCharsets(UErrorCode &status) const
+{
+ if(U_FAILURE(status)) {
+ return 0;
+ }
+
+ UEnumeration *en = NEW_ARRAY(UEnumeration, 1);
+ if (en == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return 0;
+ }
+ memcpy(en, &gCSDetEnumeration, sizeof(UEnumeration));
+ en->context = (void*)NEW_ARRAY(Context, 1);
+ if (en->context == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ DELETE_ARRAY(en);
+ return 0;
+ }
+ uprv_memset(en->context, 0, sizeof(Context));
+ ((Context*)en->context)->all = FALSE;
+ ((Context*)en->context)->enabledRecognizers = fEnabledRecognizers;
+ return en;
+}
+
+U_NAMESPACE_END
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/csmatch.cpp b/src/core/basetypes/icu-ucsdet/csmatch.cpp
new file mode 100644
index 00000000..7233e3c2
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/csmatch.cpp
@@ -0,0 +1,73 @@
+/*
+ **********************************************************************
+ * Copyright (C) 2005-2012, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ **********************************************************************
+ */
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+#include "unicode/unistr.h"
+#include "unicode/ucnv.h"
+
+#include "csmatch.h"
+
+#include "csrecog.h"
+#include "inputext.h"
+
+U_NAMESPACE_BEGIN
+
+CharsetMatch::CharsetMatch()
+ : textIn(NULL), confidence(0), fCharsetName(NULL), fLang(NULL)
+{
+ // nothing else to do.
+}
+
+void CharsetMatch::set(InputText *input, const CharsetRecognizer *cr, int32_t conf,
+ const char *csName, const char *lang)
+{
+ textIn = input;
+ confidence = conf;
+ fCharsetName = csName;
+ fLang = lang;
+ if (cr != NULL) {
+ if (fCharsetName == NULL) {
+ fCharsetName = cr->getName();
+ }
+ if (fLang == NULL) {
+ fLang = cr->getLanguage();
+ }
+ }
+}
+
+const char* CharsetMatch::getName()const
+{
+ return fCharsetName;
+}
+
+const char* CharsetMatch::getLanguage()const
+{
+ return fLang;
+}
+
+int32_t CharsetMatch::getConfidence()const
+{
+ return confidence;
+}
+
+/*
+int32_t CharsetMatch::getUChars(UChar *buf, int32_t cap, UErrorCode *status) const
+{
+ UConverter *conv = ucnv_open(getName(), status);
+ int32_t result = ucnv_toUChars(conv, buf, cap, (const char *) textIn->fRawInput, textIn->fRawLength, status);
+
+ ucnv_close(conv);
+
+ return result;
+}
+*/
+
+U_NAMESPACE_END
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/csr2022.cpp b/src/core/basetypes/icu-ucsdet/csr2022.cpp
new file mode 100644
index 00000000..3db0bc9f
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/csr2022.cpp
@@ -0,0 +1,190 @@
+/*
+ **********************************************************************
+ * Copyright (C) 2005-2012, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ **********************************************************************
+ */
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "cstring.h"
+
+#include "csr2022.h"
+#include "csmatch.h"
+
+U_NAMESPACE_BEGIN
+
+#define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
+
+/**
+ * Matching function shared among the 2022 detectors JP, CN and KR
+ * Counts up the number of legal and unrecognized escape sequences in
+ * the sample of text, and computes a score based on the total number &
+ * the proportion that fit the encoding.
+ *
+ *
+ * @param text the byte buffer containing text to analyse
+ * @param textLen the size of the text in the byte.
+ * @param escapeSequences the byte escape sequences to test for.
+ * @return match quality, in the range of 0-100.
+ */
+int32_t CharsetRecog_2022::match_2022(const uint8_t *text, int32_t textLen, const uint8_t escapeSequences[][5], int32_t escapeSequences_length) const
+{
+ int32_t i, j;
+ int32_t escN;
+ int32_t hits = 0;
+ int32_t misses = 0;
+ int32_t shifts = 0;
+ int32_t quality;
+
+ i = 0;
+ while(i < textLen) {
+ if(text[i] == 0x1B) {
+ escN = 0;
+ while(escN < escapeSequences_length) {
+ const uint8_t *seq = escapeSequences[escN];
+ int32_t seq_length = (int32_t)uprv_strlen((const char *) seq);
+
+ if (textLen-i >= seq_length) {
+ j = 1;
+ while(j < seq_length) {
+ if(seq[j] != text[i+j]) {
+ goto checkEscapes;
+ }
+
+ j += 1;
+ }
+
+ hits += 1;
+ i += seq_length-1;
+ goto scanInput;
+ }
+ // else we ran out of string to compare this time.
+checkEscapes:
+ escN += 1;
+ }
+
+ misses += 1;
+ }
+
+ if( text[i]== 0x0e || text[i] == 0x0f){
+ shifts += 1;
+ }
+
+scanInput:
+ i += 1;
+ }
+
+ if (hits == 0) {
+ return 0;
+ }
+
+ //
+ // Initial quality is based on relative proportion of recongized vs.
+ // unrecognized escape sequences.
+ // All good: quality = 100;
+ // half or less good: quality = 0;
+ // linear inbetween.
+ quality = (100*hits - 100*misses) / (hits + misses);
+
+ // Back off quality if there were too few escape sequences seen.
+ // Include shifts in this computation, so that KR does not get penalized
+ // for having only a single Escape sequence, but many shifts.
+ if (hits+shifts < 5) {
+ quality -= (5-(hits+shifts))*10;
+ }
+
+ if (quality < 0) {
+ quality = 0;
+ }
+
+ return quality;
+}
+
+
+static const uint8_t escapeSequences_2022JP[][5] = {
+ {0x1b, 0x24, 0x28, 0x43, 0x00}, // KS X 1001:1992
+ {0x1b, 0x24, 0x28, 0x44, 0x00}, // JIS X 212-1990
+ {0x1b, 0x24, 0x40, 0x00, 0x00}, // JIS C 6226-1978
+ {0x1b, 0x24, 0x41, 0x00, 0x00}, // GB 2312-80
+ {0x1b, 0x24, 0x42, 0x00, 0x00}, // JIS X 208-1983
+ {0x1b, 0x26, 0x40, 0x00, 0x00}, // JIS X 208 1990, 1997
+ {0x1b, 0x28, 0x42, 0x00, 0x00}, // ASCII
+ {0x1b, 0x28, 0x48, 0x00, 0x00}, // JIS-Roman
+ {0x1b, 0x28, 0x49, 0x00, 0x00}, // Half-width katakana
+ {0x1b, 0x28, 0x4a, 0x00, 0x00}, // JIS-Roman
+ {0x1b, 0x2e, 0x41, 0x00, 0x00}, // ISO 8859-1
+ {0x1b, 0x2e, 0x46, 0x00, 0x00} // ISO 8859-7
+};
+
+static const uint8_t escapeSequences_2022KR[][5] = {
+ {0x1b, 0x24, 0x29, 0x43, 0x00}
+};
+
+static const uint8_t escapeSequences_2022CN[][5] = {
+ {0x1b, 0x24, 0x29, 0x41, 0x00}, // GB 2312-80
+ {0x1b, 0x24, 0x29, 0x47, 0x00}, // CNS 11643-1992 Plane 1
+ {0x1b, 0x24, 0x2A, 0x48, 0x00}, // CNS 11643-1992 Plane 2
+ {0x1b, 0x24, 0x29, 0x45, 0x00}, // ISO-IR-165
+ {0x1b, 0x24, 0x2B, 0x49, 0x00}, // CNS 11643-1992 Plane 3
+ {0x1b, 0x24, 0x2B, 0x4A, 0x00}, // CNS 11643-1992 Plane 4
+ {0x1b, 0x24, 0x2B, 0x4B, 0x00}, // CNS 11643-1992 Plane 5
+ {0x1b, 0x24, 0x2B, 0x4C, 0x00}, // CNS 11643-1992 Plane 6
+ {0x1b, 0x24, 0x2B, 0x4D, 0x00}, // CNS 11643-1992 Plane 7
+ {0x1b, 0x4e, 0x00, 0x00, 0x00}, // SS2
+ {0x1b, 0x4f, 0x00, 0x00, 0x00}, // SS3
+};
+
+CharsetRecog_2022JP::~CharsetRecog_2022JP() {}
+
+const char *CharsetRecog_2022JP::getName() const {
+ return "ISO-2022-JP";
+}
+
+UBool CharsetRecog_2022JP::match(InputText *textIn, CharsetMatch *results) const {
+ int32_t confidence = match_2022(textIn->fInputBytes,
+ textIn->fInputLen,
+ escapeSequences_2022JP,
+ ARRAY_SIZE(escapeSequences_2022JP));
+ results->set(textIn, this, confidence);
+ return (confidence > 0);
+}
+
+CharsetRecog_2022KR::~CharsetRecog_2022KR() {}
+
+const char *CharsetRecog_2022KR::getName() const {
+ return "ISO-2022-KR";
+}
+
+UBool CharsetRecog_2022KR::match(InputText *textIn, CharsetMatch *results) const {
+ int32_t confidence = match_2022(textIn->fInputBytes,
+ textIn->fInputLen,
+ escapeSequences_2022KR,
+ ARRAY_SIZE(escapeSequences_2022KR));
+ results->set(textIn, this, confidence);
+ return (confidence > 0);
+}
+
+CharsetRecog_2022CN::~CharsetRecog_2022CN() {}
+
+const char *CharsetRecog_2022CN::getName() const {
+ return "ISO-2022-CN";
+}
+
+UBool CharsetRecog_2022CN::match(InputText *textIn, CharsetMatch *results) const {
+ int32_t confidence = match_2022(textIn->fInputBytes,
+ textIn->fInputLen,
+ escapeSequences_2022CN,
+ ARRAY_SIZE(escapeSequences_2022CN));
+ results->set(textIn, this, confidence);
+ return (confidence > 0);
+}
+
+CharsetRecog_2022::~CharsetRecog_2022() {
+ // nothing to do
+}
+
+U_NAMESPACE_END
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/csrecog.cpp b/src/core/basetypes/icu-ucsdet/csrecog.cpp
new file mode 100644
index 00000000..ba70b154
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/csrecog.cpp
@@ -0,0 +1,28 @@
+/*
+ **********************************************************************
+ * Copyright (C) 2005-2006, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ **********************************************************************
+ */
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "csrecog.h"
+
+U_NAMESPACE_BEGIN
+
+CharsetRecognizer::~CharsetRecognizer()
+{
+ // nothing to do.
+}
+
+const char *CharsetRecognizer::getLanguage() const
+{
+ return "";
+}
+
+U_NAMESPACE_END
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/csrmbcs.cpp b/src/core/basetypes/icu-ucsdet/csrmbcs.cpp
new file mode 100644
index 00000000..fef2e869
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/csrmbcs.cpp
@@ -0,0 +1,529 @@
+/*
+ **********************************************************************
+ * Copyright (C) 2005-2012, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ **********************************************************************
+ */
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "csmatch.h"
+#include "csrmbcs.h"
+
+#include <math.h>
+
+U_NAMESPACE_BEGIN
+
+#define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
+
+#define min(x,y) (((x)<(y))?(x):(y))
+
+static const uint16_t commonChars_sjis [] = {
+// TODO: This set of data comes from the character frequency-
+// of-occurence analysis tool. The data needs to be moved
+// into a resource and loaded from there.
+0x8140, 0x8141, 0x8142, 0x8145, 0x815b, 0x8169, 0x816a, 0x8175, 0x8176, 0x82a0,
+0x82a2, 0x82a4, 0x82a9, 0x82aa, 0x82ab, 0x82ad, 0x82af, 0x82b1, 0x82b3, 0x82b5,
+0x82b7, 0x82bd, 0x82be, 0x82c1, 0x82c4, 0x82c5, 0x82c6, 0x82c8, 0x82c9, 0x82cc,
+0x82cd, 0x82dc, 0x82e0, 0x82e7, 0x82e8, 0x82e9, 0x82ea, 0x82f0, 0x82f1, 0x8341,
+0x8343, 0x834e, 0x834f, 0x8358, 0x835e, 0x8362, 0x8367, 0x8375, 0x8376, 0x8389,
+0x838a, 0x838b, 0x838d, 0x8393, 0x8e96, 0x93fa, 0x95aa};
+
+static const uint16_t commonChars_euc_jp[] = {
+// TODO: This set of data comes from the character frequency-
+// of-occurence analysis tool. The data needs to be moved
+// into a resource and loaded from there.
+0xa1a1, 0xa1a2, 0xa1a3, 0xa1a6, 0xa1bc, 0xa1ca, 0xa1cb, 0xa1d6, 0xa1d7, 0xa4a2,
+0xa4a4, 0xa4a6, 0xa4a8, 0xa4aa, 0xa4ab, 0xa4ac, 0xa4ad, 0xa4af, 0xa4b1, 0xa4b3,
+0xa4b5, 0xa4b7, 0xa4b9, 0xa4bb, 0xa4bd, 0xa4bf, 0xa4c0, 0xa4c1, 0xa4c3, 0xa4c4,
+0xa4c6, 0xa4c7, 0xa4c8, 0xa4c9, 0xa4ca, 0xa4cb, 0xa4ce, 0xa4cf, 0xa4d0, 0xa4de,
+0xa4df, 0xa4e1, 0xa4e2, 0xa4e4, 0xa4e8, 0xa4e9, 0xa4ea, 0xa4eb, 0xa4ec, 0xa4ef,
+0xa4f2, 0xa4f3, 0xa5a2, 0xa5a3, 0xa5a4, 0xa5a6, 0xa5a7, 0xa5aa, 0xa5ad, 0xa5af,
+0xa5b0, 0xa5b3, 0xa5b5, 0xa5b7, 0xa5b8, 0xa5b9, 0xa5bf, 0xa5c3, 0xa5c6, 0xa5c7,
+0xa5c8, 0xa5c9, 0xa5cb, 0xa5d0, 0xa5d5, 0xa5d6, 0xa5d7, 0xa5de, 0xa5e0, 0xa5e1,
+0xa5e5, 0xa5e9, 0xa5ea, 0xa5eb, 0xa5ec, 0xa5ed, 0xa5f3, 0xb8a9, 0xb9d4, 0xbaee,
+0xbbc8, 0xbef0, 0xbfb7, 0xc4ea, 0xc6fc, 0xc7bd, 0xcab8, 0xcaf3, 0xcbdc, 0xcdd1};
+
+static const uint16_t commonChars_euc_kr[] = {
+// TODO: This set of data comes from the character frequency-
+// of-occurence analysis tool. The data needs to be moved
+// into a resource and loaded from there.
+0xb0a1, 0xb0b3, 0xb0c5, 0xb0cd, 0xb0d4, 0xb0e6, 0xb0ed, 0xb0f8, 0xb0fa, 0xb0fc,
+0xb1b8, 0xb1b9, 0xb1c7, 0xb1d7, 0xb1e2, 0xb3aa, 0xb3bb, 0xb4c2, 0xb4cf, 0xb4d9,
+0xb4eb, 0xb5a5, 0xb5b5, 0xb5bf, 0xb5c7, 0xb5e9, 0xb6f3, 0xb7af, 0xb7c2, 0xb7ce,
+0xb8a6, 0xb8ae, 0xb8b6, 0xb8b8, 0xb8bb, 0xb8e9, 0xb9ab, 0xb9ae, 0xb9cc, 0xb9ce,
+0xb9fd, 0xbab8, 0xbace, 0xbad0, 0xbaf1, 0xbbe7, 0xbbf3, 0xbbfd, 0xbcad, 0xbcba,
+0xbcd2, 0xbcf6, 0xbdba, 0xbdc0, 0xbdc3, 0xbdc5, 0xbec6, 0xbec8, 0xbedf, 0xbeee,
+0xbef8, 0xbefa, 0xbfa1, 0xbfa9, 0xbfc0, 0xbfe4, 0xbfeb, 0xbfec, 0xbff8, 0xc0a7,
+0xc0af, 0xc0b8, 0xc0ba, 0xc0bb, 0xc0bd, 0xc0c7, 0xc0cc, 0xc0ce, 0xc0cf, 0xc0d6,
+0xc0da, 0xc0e5, 0xc0fb, 0xc0fc, 0xc1a4, 0xc1a6, 0xc1b6, 0xc1d6, 0xc1df, 0xc1f6,
+0xc1f8, 0xc4a1, 0xc5cd, 0xc6ae, 0xc7cf, 0xc7d1, 0xc7d2, 0xc7d8, 0xc7e5, 0xc8ad};
+
+static const uint16_t commonChars_big5[] = {
+// TODO: This set of data comes from the character frequency-
+// of-occurence analysis tool. The data needs to be moved
+// into a resource and loaded from there.
+0xa140, 0xa141, 0xa142, 0xa143, 0xa147, 0xa149, 0xa175, 0xa176, 0xa440, 0xa446,
+0xa447, 0xa448, 0xa451, 0xa454, 0xa457, 0xa464, 0xa46a, 0xa46c, 0xa477, 0xa4a3,
+0xa4a4, 0xa4a7, 0xa4c1, 0xa4ce, 0xa4d1, 0xa4df, 0xa4e8, 0xa4fd, 0xa540, 0xa548,
+0xa558, 0xa569, 0xa5cd, 0xa5e7, 0xa657, 0xa661, 0xa662, 0xa668, 0xa670, 0xa6a8,
+0xa6b3, 0xa6b9, 0xa6d3, 0xa6db, 0xa6e6, 0xa6f2, 0xa740, 0xa751, 0xa759, 0xa7da,
+0xa8a3, 0xa8a5, 0xa8ad, 0xa8d1, 0xa8d3, 0xa8e4, 0xa8fc, 0xa9c0, 0xa9d2, 0xa9f3,
+0xaa6b, 0xaaba, 0xaabe, 0xaacc, 0xaafc, 0xac47, 0xac4f, 0xacb0, 0xacd2, 0xad59,
+0xaec9, 0xafe0, 0xb0ea, 0xb16f, 0xb2b3, 0xb2c4, 0xb36f, 0xb44c, 0xb44e, 0xb54c,
+0xb5a5, 0xb5bd, 0xb5d0, 0xb5d8, 0xb671, 0xb7ed, 0xb867, 0xb944, 0xbad8, 0xbb44,
+0xbba1, 0xbdd1, 0xc2c4, 0xc3b9, 0xc440, 0xc45f};
+
+static const uint16_t commonChars_gb_18030[] = {
+// TODO: This set of data comes from the character frequency-
+// of-occurence analysis tool. The data needs to be moved
+// into a resource and loaded from there.
+0xa1a1, 0xa1a2, 0xa1a3, 0xa1a4, 0xa1b0, 0xa1b1, 0xa1f1, 0xa1f3, 0xa3a1, 0xa3ac,
+0xa3ba, 0xb1a8, 0xb1b8, 0xb1be, 0xb2bb, 0xb3c9, 0xb3f6, 0xb4f3, 0xb5bd, 0xb5c4,
+0xb5e3, 0xb6af, 0xb6d4, 0xb6e0, 0xb7a2, 0xb7a8, 0xb7bd, 0xb7d6, 0xb7dd, 0xb8b4,
+0xb8df, 0xb8f6, 0xb9ab, 0xb9c9, 0xb9d8, 0xb9fa, 0xb9fd, 0xbacd, 0xbba7, 0xbbd6,
+0xbbe1, 0xbbfa, 0xbcbc, 0xbcdb, 0xbcfe, 0xbdcc, 0xbecd, 0xbedd, 0xbfb4, 0xbfc6,
+0xbfc9, 0xc0b4, 0xc0ed, 0xc1cb, 0xc2db, 0xc3c7, 0xc4dc, 0xc4ea, 0xc5cc, 0xc6f7,
+0xc7f8, 0xc8ab, 0xc8cb, 0xc8d5, 0xc8e7, 0xc9cf, 0xc9fa, 0xcab1, 0xcab5, 0xcac7,
+0xcad0, 0xcad6, 0xcaf5, 0xcafd, 0xccec, 0xcdf8, 0xceaa, 0xcec4, 0xced2, 0xcee5,
+0xcfb5, 0xcfc2, 0xcfd6, 0xd0c2, 0xd0c5, 0xd0d0, 0xd0d4, 0xd1a7, 0xd2aa, 0xd2b2,
+0xd2b5, 0xd2bb, 0xd2d4, 0xd3c3, 0xd3d0, 0xd3fd, 0xd4c2, 0xd4da, 0xd5e2, 0xd6d0};
+
+static int32_t binarySearch(const uint16_t *array, int32_t len, uint16_t value)
+{
+ int32_t start = 0, end = len-1;
+ int32_t mid = (start+end)/2;
+
+ while(start <= end) {
+ if(array[mid] == value) {
+ return mid;
+ }
+
+ if(array[mid] < value){
+ start = mid+1;
+ } else {
+ end = mid-1;
+ }
+
+ mid = (start+end)/2;
+ }
+
+ return -1;
+}
+
+IteratedChar::IteratedChar() :
+charValue(0), index(-1), nextIndex(0), error(FALSE), done(FALSE)
+{
+ // nothing else to do.
+}
+
+/*void IteratedChar::reset()
+{
+ charValue = 0;
+ index = -1;
+ nextIndex = 0;
+ error = FALSE;
+ done = FALSE;
+}*/
+
+int32_t IteratedChar::nextByte(InputText *det)
+{
+ if (nextIndex >= det->fRawLength) {
+ done = TRUE;
+
+ return -1;
+ }
+
+ return det->fRawInput[nextIndex++];
+}
+
+CharsetRecog_mbcs::~CharsetRecog_mbcs()
+{
+ // nothing to do.
+}
+
+int32_t CharsetRecog_mbcs::match_mbcs(InputText *det, const uint16_t commonChars[], int32_t commonCharsLen) const {
+ int32_t singleByteCharCount = 0;
+ int32_t doubleByteCharCount = 0;
+ int32_t commonCharCount = 0;
+ int32_t badCharCount = 0;
+ int32_t totalCharCount = 0;
+ int32_t confidence = 0;
+ IteratedChar iter;
+
+ while (nextChar(&iter, det)) {
+ totalCharCount++;
+
+ if (iter.error) {
+ badCharCount++;
+ } else {
+ if (iter.charValue <= 0xFF) {
+ singleByteCharCount++;
+ } else {
+ doubleByteCharCount++;
+
+ if (commonChars != 0) {
+ if (binarySearch(commonChars, commonCharsLen, iter.charValue) >= 0){
+ commonCharCount += 1;
+ }
+ }
+ }
+ }
+
+
+ if (badCharCount >= 2 && badCharCount*5 >= doubleByteCharCount) {
+ // Bail out early if the byte data is not matching the encoding scheme.
+ // break detectBlock;
+ return confidence;
+ }
+ }
+
+ if (doubleByteCharCount <= 10 && badCharCount == 0) {
+ // Not many multi-byte chars.
+ if (doubleByteCharCount == 0 && totalCharCount < 10) {
+ // There weren't any multibyte sequences, and there was a low density of non-ASCII single bytes.
+ // We don't have enough data to have any confidence.
+ // Statistical analysis of single byte non-ASCII charcters would probably help here.
+ confidence = 0;
+ }
+ else {
+ // ASCII or ISO file? It's probably not our encoding,
+ // but is not incompatible with our encoding, so don't give it a zero.
+ confidence = 10;
+ }
+
+ return confidence;
+ }
+
+ //
+ // No match if there are too many characters that don't fit the encoding scheme.
+ // (should we have zero tolerance for these?)
+ //
+ if (doubleByteCharCount < 20*badCharCount) {
+ confidence = 0;
+
+ return confidence;
+ }
+
+ if (commonChars == 0) {
+ // We have no statistics on frequently occuring characters.
+ // Assess confidence purely on having a reasonable number of
+ // multi-byte characters (the more the better)
+ confidence = 30 + doubleByteCharCount - 20*badCharCount;
+
+ if (confidence > 100) {
+ confidence = 100;
+ }
+ } else {
+ //
+ // Frequency of occurence statistics exist.
+ //
+
+ double maxVal = log((double)doubleByteCharCount / 4); /*(float)?*/
+ double scaleFactor = 90.0 / maxVal;
+ confidence = (int32_t)(log((double)commonCharCount+1) * scaleFactor + 10.0);
+
+ confidence = min(confidence, 100);
+ }
+
+ if (confidence < 0) {
+ confidence = 0;
+ }
+
+ return confidence;
+}
+
+CharsetRecog_sjis::~CharsetRecog_sjis()
+{
+ // nothing to do
+}
+
+UBool CharsetRecog_sjis::nextChar(IteratedChar* it, InputText* det) const {
+ it->index = it->nextIndex;
+ it->error = FALSE;
+
+ int32_t firstByte = it->charValue = it->nextByte(det);
+
+ if (firstByte < 0) {
+ return FALSE;
+ }
+
+ if (firstByte <= 0x7F || (firstByte > 0xA0 && firstByte <= 0xDF)) {
+ return TRUE;
+ }
+
+ int32_t secondByte = it->nextByte(det);
+ if (secondByte >= 0) {
+ it->charValue = (firstByte << 8) | secondByte;
+ }
+ // else we'll handle the error later.
+
+ if (! ((secondByte >= 0x40 && secondByte <= 0x7F) || (secondByte >= 0x80 && secondByte <= 0xFE))) {
+ // Illegal second byte value.
+ it->error = TRUE;
+ }
+
+ return TRUE;
+}
+
+UBool CharsetRecog_sjis::match(InputText* det, CharsetMatch *results) const {
+ int32_t confidence = match_mbcs(det, commonChars_sjis, ARRAY_SIZE(commonChars_sjis));
+ results->set(det, this, confidence);
+ return (confidence > 0);
+}
+
+const char *CharsetRecog_sjis::getName() const
+{
+ return "Shift_JIS";
+}
+
+const char *CharsetRecog_sjis::getLanguage() const
+{
+ return "ja";
+}
+
+CharsetRecog_euc::~CharsetRecog_euc()
+{
+ // nothing to do
+}
+
+UBool CharsetRecog_euc::nextChar(IteratedChar* it, InputText* det) const {
+ int32_t firstByte = 0;
+ int32_t secondByte = 0;
+ int32_t thirdByte = 0;
+
+ it->index = it->nextIndex;
+ it->error = FALSE;
+ firstByte = it->charValue = it->nextByte(det);
+
+ if (firstByte < 0) {
+ // Ran off the end of the input data
+ return FALSE;
+ }
+
+ if (firstByte <= 0x8D) {
+ // single byte char
+ return TRUE;
+ }
+
+ secondByte = it->nextByte(det);
+ if (secondByte >= 0) {
+ it->charValue = (it->charValue << 8) | secondByte;
+ }
+ // else we'll handle the error later.
+
+ if (firstByte >= 0xA1 && firstByte <= 0xFE) {
+ // Two byte Char
+ if (secondByte < 0xA1) {
+ it->error = TRUE;
+ }
+
+ return TRUE;
+ }
+
+ if (firstByte == 0x8E) {
+ // Code Set 2.
+ // In EUC-JP, total char size is 2 bytes, only one byte of actual char value.
+ // In EUC-TW, total char size is 4 bytes, three bytes contribute to char value.
+ // We don't know which we've got.
+ // Treat it like EUC-JP. If the data really was EUC-TW, the following two
+ // bytes will look like a well formed 2 byte char.
+ if (secondByte < 0xA1) {
+ it->error = TRUE;
+ }
+
+ return TRUE;
+ }
+
+ if (firstByte == 0x8F) {
+ // Code set 3.
+ // Three byte total char size, two bytes of actual char value.
+ thirdByte = it->nextByte(det);
+ it->charValue = (it->charValue << 8) | thirdByte;
+
+ if (thirdByte < 0xa1) {
+ // Bad second byte or ran off the end of the input data with a non-ASCII first byte.
+ it->error = TRUE;
+ }
+ }
+
+ return TRUE;
+
+}
+
+CharsetRecog_euc_jp::~CharsetRecog_euc_jp()
+{
+ // nothing to do
+}
+
+const char *CharsetRecog_euc_jp::getName() const
+{
+ return "EUC-JP";
+}
+
+const char *CharsetRecog_euc_jp::getLanguage() const
+{
+ return "ja";
+}
+
+UBool CharsetRecog_euc_jp::match(InputText *det, CharsetMatch *results) const
+{
+ int32_t confidence = match_mbcs(det, commonChars_euc_jp, ARRAY_SIZE(commonChars_euc_jp));
+ results->set(det, this, confidence);
+ return (confidence > 0);
+}
+
+CharsetRecog_euc_kr::~CharsetRecog_euc_kr()
+{
+ // nothing to do
+}
+
+const char *CharsetRecog_euc_kr::getName() const
+{
+ return "EUC-KR";
+}
+
+const char *CharsetRecog_euc_kr::getLanguage() const
+{
+ return "ko";
+}
+
+UBool CharsetRecog_euc_kr::match(InputText *det, CharsetMatch *results) const
+{
+ int32_t confidence = match_mbcs(det, commonChars_euc_kr, ARRAY_SIZE(commonChars_euc_kr));
+ results->set(det, this, confidence);
+ return (confidence > 0);
+}
+
+CharsetRecog_big5::~CharsetRecog_big5()
+{
+ // nothing to do
+}
+
+UBool CharsetRecog_big5::nextChar(IteratedChar* it, InputText* det) const
+{
+ int32_t firstByte;
+
+ it->index = it->nextIndex;
+ it->error = FALSE;
+ firstByte = it->charValue = it->nextByte(det);
+
+ if (firstByte < 0) {
+ return FALSE;
+ }
+
+ if (firstByte <= 0x7F || firstByte == 0xFF) {
+ // single byte character.
+ return TRUE;
+ }
+
+ int32_t secondByte = it->nextByte(det);
+ if (secondByte >= 0) {
+ it->charValue = (it->charValue << 8) | secondByte;
+ }
+ // else we'll handle the error later.
+
+ if (secondByte < 0x40 || secondByte == 0x7F || secondByte == 0xFF) {
+ it->error = TRUE;
+ }
+
+ return TRUE;
+}
+
+const char *CharsetRecog_big5::getName() const
+{
+ return "Big5";
+}
+
+const char *CharsetRecog_big5::getLanguage() const
+{
+ return "zh";
+}
+
+UBool CharsetRecog_big5::match(InputText *det, CharsetMatch *results) const
+{
+ int32_t confidence = match_mbcs(det, commonChars_big5, ARRAY_SIZE(commonChars_big5));
+ results->set(det, this, confidence);
+ return (confidence > 0);
+}
+
+CharsetRecog_gb_18030::~CharsetRecog_gb_18030()
+{
+ // nothing to do
+}
+
+UBool CharsetRecog_gb_18030::nextChar(IteratedChar* it, InputText* det) const {
+ int32_t firstByte = 0;
+ int32_t secondByte = 0;
+ int32_t thirdByte = 0;
+ int32_t fourthByte = 0;
+
+ it->index = it->nextIndex;
+ it->error = FALSE;
+ firstByte = it->charValue = it->nextByte(det);
+
+ if (firstByte < 0) {
+ // Ran off the end of the input data
+ return FALSE;
+ }
+
+ if (firstByte <= 0x80) {
+ // single byte char
+ return TRUE;
+ }
+
+ secondByte = it->nextByte(det);
+ if (secondByte >= 0) {
+ it->charValue = (it->charValue << 8) | secondByte;
+ }
+ // else we'll handle the error later.
+
+ if (firstByte >= 0x81 && firstByte <= 0xFE) {
+ // Two byte Char
+ if ((secondByte >= 0x40 && secondByte <= 0x7E) || (secondByte >=80 && secondByte <= 0xFE)) {
+ return TRUE;
+ }
+
+ // Four byte char
+ if (secondByte >= 0x30 && secondByte <= 0x39) {
+ thirdByte = it->nextByte(det);
+
+ if (thirdByte >= 0x81 && thirdByte <= 0xFE) {
+ fourthByte = it->nextByte(det);
+
+ if (fourthByte >= 0x30 && fourthByte <= 0x39) {
+ it->charValue = (it->charValue << 16) | (thirdByte << 8) | fourthByte;
+
+ return TRUE;
+ }
+ }
+ }
+
+ // Something wasn't valid, or we ran out of data (-1).
+ it->error = TRUE;
+ }
+
+ return TRUE;
+}
+
+const char *CharsetRecog_gb_18030::getName() const
+{
+ return "GB18030";
+}
+
+const char *CharsetRecog_gb_18030::getLanguage() const
+{
+ return "zh";
+}
+
+UBool CharsetRecog_gb_18030::match(InputText *det, CharsetMatch *results) const
+{
+ int32_t confidence = match_mbcs(det, commonChars_gb_18030, ARRAY_SIZE(commonChars_gb_18030));
+ results->set(det, this, confidence);
+ return (confidence > 0);
+}
+
+U_NAMESPACE_END
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/csrsbcs.cpp b/src/core/basetypes/icu-ucsdet/csrsbcs.cpp
new file mode 100644
index 00000000..d03367cc
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/csrsbcs.cpp
@@ -0,0 +1,1259 @@
+/*
+ **********************************************************************
+ * Copyright (C) 2005-2013, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ **********************************************************************
+ */
+
+#include "unicode/utypes.h"
+
+#include "cmemory.h"
+
+#if !UCONFIG_NO_CONVERSION
+#include "csrsbcs.h"
+#include "csmatch.h"
+
+#define N_GRAM_SIZE 3
+#define N_GRAM_MASK 0xFFFFFF
+#define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
+
+U_NAMESPACE_BEGIN
+
+NGramParser::NGramParser(const int32_t *theNgramList, const uint8_t *theCharMap)
+ : ngram(0), byteIndex(0)
+{
+ ngramList = theNgramList;
+ charMap = theCharMap;
+
+ ngramCount = hitCount = 0;
+}
+
+/*
+ * Binary search for value in table, which must have exactly 64 entries.
+ */
+
+int32_t NGramParser::search(const int32_t *table, int32_t value)
+{
+ int32_t index = 0;
+
+ if (table[index + 32] <= value) {
+ index += 32;
+ }
+
+ if (table[index + 16] <= value) {
+ index += 16;
+ }
+
+ if (table[index + 8] <= value) {
+ index += 8;
+ }
+
+ if (table[index + 4] <= value) {
+ index += 4;
+ }
+
+ if (table[index + 2] <= value) {
+ index += 2;
+ }
+
+ if (table[index + 1] <= value) {
+ index += 1;
+ }
+
+ if (table[index] > value) {
+ index -= 1;
+ }
+
+ if (index < 0 || table[index] != value) {
+ return -1;
+ }
+
+ return index;
+}
+
+void NGramParser::lookup(int32_t thisNgram)
+{
+ ngramCount += 1;
+
+ if (search(ngramList, thisNgram) >= 0) {
+ hitCount += 1;
+ }
+
+}
+
+void NGramParser::addByte(int32_t b)
+{
+ ngram = ((ngram << 8) + b) & N_GRAM_MASK;
+ lookup(ngram);
+}
+
+int32_t NGramParser::nextByte(InputText *det)
+{
+ if (byteIndex >= det->fInputLen) {
+ return -1;
+ }
+
+ return det->fInputBytes[byteIndex++];
+}
+
+void NGramParser::parseCharacters(InputText *det)
+{
+ int32_t b;
+ bool ignoreSpace = FALSE;
+
+ while ((b = nextByte(det)) >= 0) {
+ uint8_t mb = charMap[b];
+
+ // TODO: 0x20 might not be a space in all character sets...
+ if (mb != 0) {
+ if (!(mb == 0x20 && ignoreSpace)) {
+ addByte(mb);
+ }
+
+ ignoreSpace = (mb == 0x20);
+ }
+ }
+}
+
+int32_t NGramParser::parse(InputText *det)
+{
+ parseCharacters(det);
+
+ // TODO: Is this OK? The buffer could have ended in the middle of a word...
+ addByte(0x20);
+
+ double rawPercent = (double) hitCount / (double) ngramCount;
+
+ // if (rawPercent <= 2.0) {
+ // return 0;
+ // }
+
+ // TODO - This is a bit of a hack to take care of a case
+ // were we were getting a confidence of 135...
+ if (rawPercent > 0.33) {
+ return 98;
+ }
+
+ return (int32_t) (rawPercent * 300.0);
+}
+
+static const uint8_t unshapeMap_IBM420[] = {
+/* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -B -C -D -E -F */
+/* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
+/* 1- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
+/* 2- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
+/* 3- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
+/* 4- */ 0x40, 0x40, 0x42, 0x42, 0x44, 0x45, 0x46, 0x47, 0x47, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
+/* 5- */ 0x50, 0x49, 0x52, 0x53, 0x54, 0x55, 0x56, 0x56, 0x58, 0x58, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
+/* 6- */ 0x60, 0x61, 0x62, 0x63, 0x63, 0x65, 0x65, 0x67, 0x67, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
+/* 7- */ 0x69, 0x71, 0x71, 0x73, 0x74, 0x75, 0x76, 0x77, 0x77, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
+/* 8- */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x80, 0x8B, 0x8B, 0x8D, 0x8D, 0x8F,
+/* 9- */ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9A, 0x9A, 0x9A, 0x9E, 0x9E,
+/* A- */ 0x9E, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x9E, 0xAB, 0xAB, 0xAD, 0xAD, 0xAF,
+/* B- */ 0xAF, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xB1, 0xBB, 0xBB, 0xBD, 0xBD, 0xBF,
+/* C- */ 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xBF, 0xCC, 0xBF, 0xCE, 0xCF,
+/* D- */ 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDA, 0xDC, 0xDC, 0xDC, 0xDF,
+/* E- */ 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
+/* F- */ 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
+};
+
+NGramParser_IBM420::NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap):NGramParser(theNgramList, theCharMap)
+{
+ alef = 0x00;
+}
+
+
+int32_t NGramParser_IBM420::isLamAlef(int32_t b)
+{
+ if(b == 0xB2 || b == 0xB3){
+ return 0x47;
+ }else if(b == 0xB4 || b == 0xB5){
+ return 0x49;
+ }else if(b == 0xB8 || b == 0xB9){
+ return 0x56;
+ }else
+ return 0x00;
+}
+
+/*
+* Arabic shaping needs to be done manually. Cannot call ArabicShaping class
+* because CharsetDetector is dealing with bytes not Unicode code points. We could
+* convert the bytes to Unicode code points but that would leave us dependent
+* on CharsetICU which we try to avoid. IBM420 converter amongst different versions
+* of JDK can produce different results and therefore is also avoided.
+*/
+int32_t NGramParser_IBM420::nextByte(InputText *det)
+{
+
+ if (byteIndex >= det->fInputLen || det->fInputBytes[byteIndex] == 0) {
+ return -1;
+ }
+ int next;
+
+ alef = isLamAlef(det->fInputBytes[byteIndex]);
+ if(alef != 0x00)
+ next = 0xB1 & 0xFF;
+ else
+ next = unshapeMap_IBM420[det->fInputBytes[byteIndex]& 0xFF] & 0xFF;
+
+ byteIndex++;
+
+ return next;
+}
+
+void NGramParser_IBM420::parseCharacters(InputText *det)
+{
+ int32_t b;
+ bool ignoreSpace = FALSE;
+
+ while ((b = nextByte(det)) >= 0) {
+ uint8_t mb = charMap[b];
+
+ // TODO: 0x20 might not be a space in all character sets...
+ if (mb != 0) {
+ if (!(mb == 0x20 && ignoreSpace)) {
+ addByte(mb);
+ }
+ ignoreSpace = (mb == 0x20);
+ }
+
+ if(alef != 0x00){
+ mb = charMap[alef & 0xFF];
+
+ // TODO: 0x20 might not be a space in all character sets...
+ if (mb != 0) {
+ if (!(mb == 0x20 && ignoreSpace)) {
+ addByte(mb);
+ }
+
+ ignoreSpace = (mb == 0x20);
+ }
+
+ }
+ }
+}
+
+CharsetRecog_sbcs::CharsetRecog_sbcs()
+{
+ // nothing else to do
+}
+
+CharsetRecog_sbcs::~CharsetRecog_sbcs()
+{
+ // nothing to do
+}
+
+int32_t CharsetRecog_sbcs::match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t byteMap[]) const
+{
+ NGramParser parser(ngrams, byteMap);
+ int32_t result;
+
+ result = parser.parse(det);
+
+ return result;
+}
+
+static const uint8_t charMap_8859_1[] = {
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+ 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+ 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
+ 0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
+ 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
+ 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
+ 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF,
+ 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
+ 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
+ 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
+ 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
+};
+
+static const uint8_t charMap_8859_2[] = {
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+ 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+ 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0x20,
+ 0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF,
+ 0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0xB7,
+ 0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF,
+ 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
+ 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
+ 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
+ 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF,
+ 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
+ 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
+ 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
+ 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20,
+};
+
+static const uint8_t charMap_8859_5[] = {
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+ 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+ 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
+ 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF,
+ 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
+ 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
+ 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
+ 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
+ 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
+ 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
+ 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
+ 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
+ 0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
+ 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF,
+};
+
+static const uint8_t charMap_8859_6[] = {
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+ 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+ 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
+ 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
+ 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
+ 0xD8, 0xD9, 0xDA, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
+ 0xE8, 0xE9, 0xEA, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+};
+
+static const uint8_t charMap_8859_7[] = {
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+ 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+ 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0xA1, 0xA2, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xDC, 0x20,
+ 0xDD, 0xDE, 0xDF, 0x20, 0xFC, 0x20, 0xFD, 0xFE,
+ 0xC0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
+ 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
+ 0xF0, 0xF1, 0x20, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
+ 0xF8, 0xF9, 0xFA, 0xFB, 0xDC, 0xDD, 0xDE, 0xDF,
+ 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
+ 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
+ 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
+ 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20,
+};
+
+static const uint8_t charMap_8859_8[] = {
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+ 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+ 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
+ 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
+ 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
+ 0xF8, 0xF9, 0xFA, 0x20, 0x20, 0x20, 0x20, 0x20,
+};
+
+static const uint8_t charMap_8859_9[] = {
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+ 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+ 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
+ 0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
+ 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
+ 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
+ 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x69, 0xFE, 0xDF,
+ 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
+ 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
+ 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
+ 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
+};
+
+static const int32_t ngrams_windows_1251[] = {
+ 0x20E220, 0x20E2EE, 0x20E4EE, 0x20E7E0, 0x20E820, 0x20EAE0, 0x20EAEE, 0x20EDE0, 0x20EDE5, 0x20EEE1, 0x20EFEE, 0x20EFF0, 0x20F0E0, 0x20F1EE, 0x20F1F2, 0x20F2EE,
+ 0x20F7F2, 0x20FDF2, 0xE0EDE8, 0xE0F2FC, 0xE3EE20, 0xE5EBFC, 0xE5EDE8, 0xE5F1F2, 0xE5F220, 0xE820EF, 0xE8E520, 0xE8E820, 0xE8FF20, 0xEBE5ED, 0xEBE820, 0xEBFCED,
+ 0xEDE020, 0xEDE520, 0xEDE8E5, 0xEDE8FF, 0xEDEE20, 0xEDEEE2, 0xEE20E2, 0xEE20EF, 0xEE20F1, 0xEEE220, 0xEEE2E0, 0xEEE3EE, 0xEEE920, 0xEEEBFC, 0xEEEC20, 0xEEF1F2,
+ 0xEFEEEB, 0xEFF0E5, 0xEFF0E8, 0xEFF0EE, 0xF0E0E2, 0xF0E5E4, 0xF1F2E0, 0xF1F2E2, 0xF1F2E8, 0xF1FF20, 0xF2E5EB, 0xF2EE20, 0xF2EEF0, 0xF2FC20, 0xF7F2EE, 0xFBF520,
+};
+
+static const uint8_t charMap_windows_1251[] = {
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+ 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+ 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x90, 0x83, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F,
+ 0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F,
+ 0x20, 0xA2, 0xA2, 0xBC, 0x20, 0xB4, 0x20, 0x20,
+ 0xB8, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0xBF,
+ 0x20, 0x20, 0xB3, 0xB3, 0xB4, 0xB5, 0x20, 0x20,
+ 0xB8, 0x20, 0xBA, 0x20, 0xBC, 0xBE, 0xBE, 0xBF,
+ 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
+ 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
+ 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
+ 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
+ 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
+ 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
+ 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
+ 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
+};
+
+static const int32_t ngrams_windows_1256[] = {
+ 0x20C7E1, 0x20C7E4, 0x20C8C7, 0x20DAE1, 0x20DDED, 0x20E1E1, 0x20E3E4, 0x20E6C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E120, 0xC7E1C3, 0xC7E1C7, 0xC7E1C8,
+ 0xC7E1CA, 0xC7E1CC, 0xC7E1CD, 0xC7E1CF, 0xC7E1D3, 0xC7E1DA, 0xC7E1DE, 0xC7E1E3, 0xC7E1E6, 0xC7E1ED, 0xC7E320, 0xC7E420, 0xC7E4CA, 0xC820C7, 0xC920C7, 0xC920DD,
+ 0xC920E1, 0xC920E3, 0xC920E6, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xDA20C7, 0xDAE1EC, 0xDDED20, 0xE120C7, 0xE1C920, 0xE1EC20, 0xE1ED20,
+ 0xE320C7, 0xE3C720, 0xE3C920, 0xE3E420, 0xE420C7, 0xE520C7, 0xE5C720, 0xE6C7E1, 0xE6E420, 0xEC20C7, 0xED20C7, 0xED20E3, 0xED20E6, 0xEDC920, 0xEDD120, 0xEDE420,
+};
+
+static const uint8_t charMap_windows_1256[] = {
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+ 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+ 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x81, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20,
+ 0x88, 0x20, 0x8A, 0x20, 0x9C, 0x8D, 0x8E, 0x8F,
+ 0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x98, 0x20, 0x9A, 0x20, 0x9C, 0x20, 0x20, 0x9F,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
+ 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
+ 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0x20,
+ 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
+ 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
+ 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
+ 0x20, 0x20, 0x20, 0x20, 0xF4, 0x20, 0x20, 0x20,
+ 0x20, 0xF9, 0x20, 0xFB, 0xFC, 0x20, 0x20, 0xFF,
+};
+
+static const int32_t ngrams_KOI8_R[] = {
+ 0x20C4CF, 0x20C920, 0x20CBC1, 0x20CBCF, 0x20CEC1, 0x20CEC5, 0x20CFC2, 0x20D0CF, 0x20D0D2, 0x20D2C1, 0x20D3CF, 0x20D3D4, 0x20D4CF, 0x20D720, 0x20D7CF, 0x20DAC1,
+ 0x20DCD4, 0x20DED4, 0xC1CEC9, 0xC1D4D8, 0xC5CCD8, 0xC5CEC9, 0xC5D3D4, 0xC5D420, 0xC7CF20, 0xC920D0, 0xC9C520, 0xC9C920, 0xC9D120, 0xCCC5CE, 0xCCC920, 0xCCD8CE,
+ 0xCEC120, 0xCEC520, 0xCEC9C5, 0xCEC9D1, 0xCECF20, 0xCECFD7, 0xCF20D0, 0xCF20D3, 0xCF20D7, 0xCFC7CF, 0xCFCA20, 0xCFCCD8, 0xCFCD20, 0xCFD3D4, 0xCFD720, 0xCFD7C1,
+ 0xD0CFCC, 0xD0D2C5, 0xD0D2C9, 0xD0D2CF, 0xD2C1D7, 0xD2C5C4, 0xD3D120, 0xD3D4C1, 0xD3D4C9, 0xD3D4D7, 0xD4C5CC, 0xD4CF20, 0xD4CFD2, 0xD4D820, 0xD9C820, 0xDED4CF,
+};
+
+static const uint8_t charMap_KOI8_R[] = {
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+ 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+ 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
+ 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
+ 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
+ 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
+ 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
+ 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
+ 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
+ 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
+};
+
+static const int32_t ngrams_IBM424_he_rtl[] = {
+ 0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405641,
+ 0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514045,
+ 0x514540, 0x514671, 0x515155, 0x515540, 0x515740, 0x516840, 0x517140, 0x544041, 0x544045, 0x544140, 0x544540, 0x554041, 0x554042, 0x554045, 0x554054, 0x554056,
+ 0x554069, 0x564540, 0x574045, 0x584540, 0x585140, 0x585155, 0x625440, 0x684045, 0x685155, 0x695440, 0x714041, 0x714042, 0x714045, 0x714054, 0x714056, 0x714069,
+};
+
+static const int32_t ngrams_IBM424_he_ltr[] = {
+ 0x404146, 0x404154, 0x404551, 0x404554, 0x404556, 0x404558, 0x405158, 0x405462, 0x405469, 0x405546, 0x405551, 0x405746, 0x405751, 0x406846, 0x406851, 0x407141,
+ 0x407146, 0x407151, 0x414045, 0x414054, 0x414055, 0x414071, 0x414540, 0x414645, 0x415440, 0x415640, 0x424045, 0x424055, 0x424071, 0x454045, 0x454051, 0x454054,
+ 0x454055, 0x454057, 0x454068, 0x454071, 0x455440, 0x464140, 0x464540, 0x484140, 0x514140, 0x514240, 0x514540, 0x544045, 0x544055, 0x544071, 0x546240, 0x546940,
+ 0x555151, 0x555158, 0x555168, 0x564045, 0x564055, 0x564071, 0x564240, 0x564540, 0x624540, 0x694045, 0x694055, 0x694071, 0x694540, 0x714140, 0x714540, 0x714651,
+};
+
+static const uint8_t charMap_IBM424_he[] = {
+/* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -B -C -D -E -F */
+/* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
+/* 1- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
+/* 2- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
+/* 3- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
+/* 4- */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
+/* 5- */ 0x40, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
+/* 6- */ 0x40, 0x40, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
+/* 7- */ 0x40, 0x71, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x00, 0x40, 0x40,
+/* 8- */ 0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
+/* 9- */ 0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
+/* A- */ 0xA0, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
+/* B- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
+/* C- */ 0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
+/* D- */ 0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
+/* E- */ 0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
+/* F- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
+};
+
+static const int32_t ngrams_IBM420_ar_rtl[] = {
+ 0x4056B1, 0x4056BD, 0x405856, 0x409AB1, 0x40ABDC, 0x40B1B1, 0x40BBBD, 0x40CF56, 0x564056, 0x564640, 0x566340, 0x567540, 0x56B140, 0x56B149, 0x56B156, 0x56B158,
+ 0x56B163, 0x56B167, 0x56B169, 0x56B173, 0x56B178, 0x56B19A, 0x56B1AD, 0x56B1BB, 0x56B1CF, 0x56B1DC, 0x56BB40, 0x56BD40, 0x56BD63, 0x584056, 0x624056, 0x6240AB,
+ 0x6240B1, 0x6240BB, 0x6240CF, 0x634056, 0x734056, 0x736240, 0x754056, 0x756240, 0x784056, 0x9A4056, 0x9AB1DA, 0xABDC40, 0xB14056, 0xB16240, 0xB1DA40, 0xB1DC40,
+ 0xBB4056, 0xBB5640, 0xBB6240, 0xBBBD40, 0xBD4056, 0xBF4056, 0xBF5640, 0xCF56B1, 0xCFBD40, 0xDA4056, 0xDC4056, 0xDC40BB, 0xDC40CF, 0xDC6240, 0xDC7540, 0xDCBD40,
+};
+
+static const int32_t ngrams_IBM420_ar_ltr[] = {
+ 0x404656, 0x4056BB, 0x4056BF, 0x406273, 0x406275, 0x4062B1, 0x4062BB, 0x4062DC, 0x406356, 0x407556, 0x4075DC, 0x40B156, 0x40BB56, 0x40BD56, 0x40BDBB, 0x40BDCF,
+ 0x40BDDC, 0x40DAB1, 0x40DCAB, 0x40DCB1, 0x49B156, 0x564056, 0x564058, 0x564062, 0x564063, 0x564073, 0x564075, 0x564078, 0x56409A, 0x5640B1, 0x5640BB, 0x5640BD,
+ 0x5640BF, 0x5640DA, 0x5640DC, 0x565840, 0x56B156, 0x56CF40, 0x58B156, 0x63B156, 0x63BD56, 0x67B156, 0x69B156, 0x73B156, 0x78B156, 0x9AB156, 0xAB4062, 0xADB156,
+ 0xB14062, 0xB15640, 0xB156CF, 0xB19A40, 0xB1B140, 0xBB4062, 0xBB40DC, 0xBBB156, 0xBD5640, 0xBDBB40, 0xCF4062, 0xCF40DC, 0xCFB156, 0xDAB19A, 0xDCAB40, 0xDCB156
+};
+
+static const uint8_t charMap_IBM420_ar[]= {
+/* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -B -C -D -E -F */
+/* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
+/* 1- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
+/* 2- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
+/* 3- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
+/* 4- */ 0x40, 0x40, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
+/* 5- */ 0x40, 0x51, 0x52, 0x40, 0x40, 0x55, 0x56, 0x57, 0x58, 0x59, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
+/* 6- */ 0x40, 0x40, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
+/* 7- */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
+/* 8- */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
+/* 9- */ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
+/* A- */ 0xA0, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
+/* B- */ 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0x40, 0x40, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
+/* C- */ 0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0xCB, 0x40, 0xCD, 0x40, 0xCF,
+/* D- */ 0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
+/* E- */ 0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0xEB, 0x40, 0xED, 0xEE, 0xEF,
+/* F- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0xFB, 0xFC, 0xFD, 0xFE, 0x40,
+};
+
+//ISO-8859-1,2,5,6,7,8,9 Ngrams
+
+struct NGramsPlusLang {
+ const int32_t ngrams[64];
+ const char * lang;
+};
+
+static const NGramsPlusLang ngrams_8859_1[] = {
+ {
+ {
+ 0x206120, 0x20616E, 0x206265, 0x20636F, 0x20666F, 0x206861, 0x206865, 0x20696E, 0x206D61, 0x206F66, 0x207072, 0x207265, 0x207361, 0x207374, 0x207468, 0x20746F,
+ 0x207768, 0x616964, 0x616C20, 0x616E20, 0x616E64, 0x617320, 0x617420, 0x617465, 0x617469, 0x642061, 0x642074, 0x652061, 0x652073, 0x652074, 0x656420, 0x656E74,
+ 0x657220, 0x657320, 0x666F72, 0x686174, 0x686520, 0x686572, 0x696420, 0x696E20, 0x696E67, 0x696F6E, 0x697320, 0x6E2061, 0x6E2074, 0x6E6420, 0x6E6720, 0x6E7420,
+ 0x6F6620, 0x6F6E20, 0x6F7220, 0x726520, 0x727320, 0x732061, 0x732074, 0x736169, 0x737420, 0x742074, 0x746572, 0x746861, 0x746865, 0x74696F, 0x746F20, 0x747320,
+ },
+ "en"
+ },
+ {
+ {
+ 0x206166, 0x206174, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207369, 0x207374, 0x207469, 0x207669, 0x616620,
+ 0x616E20, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646572, 0x646574, 0x652073, 0x656420, 0x656465, 0x656E20, 0x656E64, 0x657220, 0x657265, 0x657320,
+ 0x657420, 0x666F72, 0x676520, 0x67656E, 0x676572, 0x696765, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6572, 0x6C6967, 0x6C6C65, 0x6D6564, 0x6E6465, 0x6E6520,
+ 0x6E6720, 0x6E6765, 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722064, 0x722065, 0x722073, 0x726520, 0x737465, 0x742073, 0x746520, 0x746572, 0x74696C, 0x766572,
+ },
+ "da"
+ },
+ {
+ {
+ 0x20616E, 0x206175, 0x206265, 0x206461, 0x206465, 0x206469, 0x206569, 0x206765, 0x206861, 0x20696E, 0x206D69, 0x207363, 0x207365, 0x20756E, 0x207665, 0x20766F,
+ 0x207765, 0x207A75, 0x626572, 0x636820, 0x636865, 0x636874, 0x646173, 0x64656E, 0x646572, 0x646965, 0x652064, 0x652073, 0x65696E, 0x656974, 0x656E20, 0x657220,
+ 0x657320, 0x67656E, 0x68656E, 0x687420, 0x696368, 0x696520, 0x696E20, 0x696E65, 0x697420, 0x6C6963, 0x6C6C65, 0x6E2061, 0x6E2064, 0x6E2073, 0x6E6420, 0x6E6465,
+ 0x6E6520, 0x6E6720, 0x6E6765, 0x6E7465, 0x722064, 0x726465, 0x726569, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x756E64, 0x756E67, 0x766572,
+ },
+ "de"
+ },
+ {
+ {
+ 0x206120, 0x206361, 0x20636F, 0x206465, 0x20656C, 0x20656E, 0x206573, 0x20696E, 0x206C61, 0x206C6F, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365,
+ 0x20756E, 0x207920, 0x612063, 0x612064, 0x612065, 0x61206C, 0x612070, 0x616369, 0x61646F, 0x616C20, 0x617220, 0x617320, 0x6369F3, 0x636F6E, 0x646520, 0x64656C,
+ 0x646F20, 0x652064, 0x652065, 0x65206C, 0x656C20, 0x656E20, 0x656E74, 0x657320, 0x657374, 0x69656E, 0x69F36E, 0x6C6120, 0x6C6F73, 0x6E2065, 0x6E7465, 0x6F2064,
+ 0x6F2065, 0x6F6E20, 0x6F7220, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732064, 0x732065, 0x732070, 0x736520, 0x746520, 0x746F20, 0x756520, 0xF36E20,
+ },
+ "es"
+ },
+ {
+ {
+ 0x206175, 0x20636F, 0x206461, 0x206465, 0x206475, 0x20656E, 0x206574, 0x206C61, 0x206C65, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207365, 0x20736F, 0x20756E,
+ 0x20E020, 0x616E74, 0x617469, 0x636520, 0x636F6E, 0x646520, 0x646573, 0x647520, 0x652061, 0x652063, 0x652064, 0x652065, 0x65206C, 0x652070, 0x652073, 0x656E20,
+ 0x656E74, 0x657220, 0x657320, 0x657420, 0x657572, 0x696F6E, 0x697320, 0x697420, 0x6C6120, 0x6C6520, 0x6C6573, 0x6D656E, 0x6E2064, 0x6E6520, 0x6E7320, 0x6E7420,
+ 0x6F6E20, 0x6F6E74, 0x6F7572, 0x717565, 0x72206C, 0x726520, 0x732061, 0x732064, 0x732065, 0x73206C, 0x732070, 0x742064, 0x746520, 0x74696F, 0x756520, 0x757220,
+ },
+ "fr"
+ },
+ {
+ {
+ 0x20616C, 0x206368, 0x20636F, 0x206465, 0x206469, 0x206520, 0x20696C, 0x20696E, 0x206C61, 0x207065, 0x207072, 0x20756E, 0x612063, 0x612064, 0x612070, 0x612073,
+ 0x61746F, 0x636865, 0x636F6E, 0x64656C, 0x646920, 0x652061, 0x652063, 0x652064, 0x652069, 0x65206C, 0x652070, 0x652073, 0x656C20, 0x656C6C, 0x656E74, 0x657220,
+ 0x686520, 0x692061, 0x692063, 0x692064, 0x692073, 0x696120, 0x696C20, 0x696E20, 0x696F6E, 0x6C6120, 0x6C6520, 0x6C6920, 0x6C6C61, 0x6E6520, 0x6E6920, 0x6E6F20,
+ 0x6E7465, 0x6F2061, 0x6F2064, 0x6F2069, 0x6F2073, 0x6F6E20, 0x6F6E65, 0x706572, 0x726120, 0x726520, 0x736920, 0x746120, 0x746520, 0x746920, 0x746F20, 0x7A696F,
+ },
+ "it"
+ },
+ {
+ {
+ 0x20616C, 0x206265, 0x206461, 0x206465, 0x206469, 0x206565, 0x20656E, 0x206765, 0x206865, 0x20696E, 0x206D61, 0x206D65, 0x206F70, 0x207465, 0x207661, 0x207665,
+ 0x20766F, 0x207765, 0x207A69, 0x61616E, 0x616172, 0x616E20, 0x616E64, 0x617220, 0x617420, 0x636874, 0x646520, 0x64656E, 0x646572, 0x652062, 0x652076, 0x65656E,
+ 0x656572, 0x656E20, 0x657220, 0x657273, 0x657420, 0x67656E, 0x686574, 0x696520, 0x696E20, 0x696E67, 0x697320, 0x6E2062, 0x6E2064, 0x6E2065, 0x6E2068, 0x6E206F,
+ 0x6E2076, 0x6E6465, 0x6E6720, 0x6F6E64, 0x6F6F72, 0x6F7020, 0x6F7220, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x76616E, 0x766572, 0x766F6F,
+ },
+ "nl"
+ },
+ {
+ {
+ 0x206174, 0x206176, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207365, 0x20736B, 0x20736F, 0x207374, 0x207469,
+ 0x207669, 0x20E520, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646574, 0x652073, 0x656420, 0x656E20, 0x656E65, 0x657220, 0x657265, 0x657420, 0x657474,
+ 0x666F72, 0x67656E, 0x696B6B, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6520, 0x6C6C65, 0x6D6564, 0x6D656E, 0x6E2073, 0x6E6520, 0x6E6720, 0x6E6765, 0x6E6E65,
+ 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722073, 0x726520, 0x736F6D, 0x737465, 0x742073, 0x746520, 0x74656E, 0x746572, 0x74696C, 0x747420, 0x747465, 0x766572,
+ },
+ "no"
+ },
+ {
+ {
+ 0x206120, 0x20636F, 0x206461, 0x206465, 0x20646F, 0x206520, 0x206573, 0x206D61, 0x206E6F, 0x206F20, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365,
+ 0x20756D, 0x612061, 0x612063, 0x612064, 0x612070, 0x616465, 0x61646F, 0x616C20, 0x617220, 0x617261, 0x617320, 0x636F6D, 0x636F6E, 0x646120, 0x646520, 0x646F20,
+ 0x646F73, 0x652061, 0x652064, 0x656D20, 0x656E74, 0x657320, 0x657374, 0x696120, 0x696361, 0x6D656E, 0x6E7465, 0x6E746F, 0x6F2061, 0x6F2063, 0x6F2064, 0x6F2065,
+ 0x6F2070, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732061, 0x732064, 0x732065, 0x732070, 0x737461, 0x746520, 0x746F20, 0x756520, 0xE36F20, 0xE7E36F,
+ },
+ "pt"
+ },
+ {
+ {
+ 0x206174, 0x206176, 0x206465, 0x20656E, 0x2066F6, 0x206861, 0x206920, 0x20696E, 0x206B6F, 0x206D65, 0x206F63, 0x2070E5, 0x20736B, 0x20736F, 0x207374, 0x207469,
+ 0x207661, 0x207669, 0x20E472, 0x616465, 0x616E20, 0x616E64, 0x617220, 0x617474, 0x636820, 0x646520, 0x64656E, 0x646572, 0x646574, 0x656420, 0x656E20, 0x657220,
+ 0x657420, 0x66F672, 0x67656E, 0x696C6C, 0x696E67, 0x6B6120, 0x6C6C20, 0x6D6564, 0x6E2073, 0x6E6120, 0x6E6465, 0x6E6720, 0x6E6765, 0x6E696E, 0x6F6368, 0x6F6D20,
+ 0x6F6E20, 0x70E520, 0x722061, 0x722073, 0x726120, 0x736B61, 0x736F6D, 0x742073, 0x746120, 0x746520, 0x746572, 0x74696C, 0x747420, 0x766172, 0xE47220, 0xF67220,
+ },
+ "sv"
+ }
+};
+
+
+static const NGramsPlusLang ngrams_8859_2[] = {
+ {
+ {
+ 0x206120, 0x206279, 0x20646F, 0x206A65, 0x206E61, 0x206E65, 0x206F20, 0x206F64, 0x20706F, 0x207072, 0x2070F8, 0x20726F, 0x207365, 0x20736F, 0x207374, 0x20746F,
+ 0x207620, 0x207679, 0x207A61, 0x612070, 0x636520, 0x636820, 0x652070, 0x652073, 0x652076, 0x656D20, 0x656EED, 0x686F20, 0x686F64, 0x697374, 0x6A6520, 0x6B7465,
+ 0x6C6520, 0x6C6920, 0x6E6120, 0x6EE920, 0x6EEC20, 0x6EED20, 0x6F2070, 0x6F646E, 0x6F6A69, 0x6F7374, 0x6F7520, 0x6F7661, 0x706F64, 0x706F6A, 0x70726F, 0x70F865,
+ 0x736520, 0x736F75, 0x737461, 0x737469, 0x73746E, 0x746572, 0x746EED, 0x746F20, 0x752070, 0xBE6520, 0xE16EED, 0xE9686F, 0xED2070, 0xED2073, 0xED6D20, 0xF86564,
+ },
+ "cs"
+ },
+ {
+ {
+ 0x206120, 0x20617A, 0x206265, 0x206567, 0x20656C, 0x206665, 0x206861, 0x20686F, 0x206973, 0x206B65, 0x206B69, 0x206BF6, 0x206C65, 0x206D61, 0x206D65, 0x206D69,
+ 0x206E65, 0x20737A, 0x207465, 0x20E973, 0x612061, 0x61206B, 0x61206D, 0x612073, 0x616B20, 0x616E20, 0x617A20, 0x62616E, 0x62656E, 0x656779, 0x656B20, 0x656C20,
+ 0x656C65, 0x656D20, 0x656E20, 0x657265, 0x657420, 0x657465, 0x657474, 0x677920, 0x686F67, 0x696E74, 0x697320, 0x6B2061, 0x6BF67A, 0x6D6567, 0x6D696E, 0x6E2061,
+ 0x6E616B, 0x6E656B, 0x6E656D, 0x6E7420, 0x6F6779, 0x732061, 0x737A65, 0x737A74, 0x737AE1, 0x73E967, 0x742061, 0x747420, 0x74E173, 0x7A6572, 0xE16E20, 0xE97320,
+ },
+ "hu"
+ },
+ {
+ {
+ 0x20637A, 0x20646F, 0x206920, 0x206A65, 0x206B6F, 0x206D61, 0x206D69, 0x206E61, 0x206E69, 0x206F64, 0x20706F, 0x207072, 0x207369, 0x207720, 0x207769, 0x207779,
+ 0x207A20, 0x207A61, 0x612070, 0x612077, 0x616E69, 0x636820, 0x637A65, 0x637A79, 0x646F20, 0x647A69, 0x652070, 0x652073, 0x652077, 0x65207A, 0x65676F, 0x656A20,
+ 0x656D20, 0x656E69, 0x676F20, 0x696120, 0x696520, 0x69656A, 0x6B6120, 0x6B6920, 0x6B6965, 0x6D6965, 0x6E6120, 0x6E6961, 0x6E6965, 0x6F2070, 0x6F7761, 0x6F7769,
+ 0x706F6C, 0x707261, 0x70726F, 0x70727A, 0x727A65, 0x727A79, 0x7369EA, 0x736B69, 0x737461, 0x776965, 0x796368, 0x796D20, 0x7A6520, 0x7A6965, 0x7A7920, 0xF37720,
+ },
+ "pl"
+ },
+ {
+ {
+ 0x206120, 0x206163, 0x206361, 0x206365, 0x20636F, 0x206375, 0x206465, 0x206469, 0x206C61, 0x206D61, 0x207065, 0x207072, 0x207365, 0x2073E3, 0x20756E, 0x20BA69,
+ 0x20EE6E, 0x612063, 0x612064, 0x617265, 0x617420, 0x617465, 0x617520, 0x636172, 0x636F6E, 0x637520, 0x63E320, 0x646520, 0x652061, 0x652063, 0x652064, 0x652070,
+ 0x652073, 0x656120, 0x656920, 0x656C65, 0x656E74, 0x657374, 0x692061, 0x692063, 0x692064, 0x692070, 0x696520, 0x696920, 0x696E20, 0x6C6120, 0x6C6520, 0x6C6F72,
+ 0x6C7569, 0x6E6520, 0x6E7472, 0x6F7220, 0x70656E, 0x726520, 0x726561, 0x727520, 0x73E320, 0x746520, 0x747275, 0x74E320, 0x756920, 0x756C20, 0xBA6920, 0xEE6E20,
+ },
+ "ro"
+ }
+};
+
+static const int32_t ngrams_8859_5_ru[] = {
+ 0x20D220, 0x20D2DE, 0x20D4DE, 0x20D7D0, 0x20D820, 0x20DAD0, 0x20DADE, 0x20DDD0, 0x20DDD5, 0x20DED1, 0x20DFDE, 0x20DFE0, 0x20E0D0, 0x20E1DE, 0x20E1E2, 0x20E2DE,
+ 0x20E7E2, 0x20EDE2, 0xD0DDD8, 0xD0E2EC, 0xD3DE20, 0xD5DBEC, 0xD5DDD8, 0xD5E1E2, 0xD5E220, 0xD820DF, 0xD8D520, 0xD8D820, 0xD8EF20, 0xDBD5DD, 0xDBD820, 0xDBECDD,
+ 0xDDD020, 0xDDD520, 0xDDD8D5, 0xDDD8EF, 0xDDDE20, 0xDDDED2, 0xDE20D2, 0xDE20DF, 0xDE20E1, 0xDED220, 0xDED2D0, 0xDED3DE, 0xDED920, 0xDEDBEC, 0xDEDC20, 0xDEE1E2,
+ 0xDFDEDB, 0xDFE0D5, 0xDFE0D8, 0xDFE0DE, 0xE0D0D2, 0xE0D5D4, 0xE1E2D0, 0xE1E2D2, 0xE1E2D8, 0xE1EF20, 0xE2D5DB, 0xE2DE20, 0xE2DEE0, 0xE2EC20, 0xE7E2DE, 0xEBE520,
+};
+
+static const int32_t ngrams_8859_6_ar[] = {
+ 0x20C7E4, 0x20C7E6, 0x20C8C7, 0x20D9E4, 0x20E1EA, 0x20E4E4, 0x20E5E6, 0x20E8C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E420, 0xC7E4C3, 0xC7E4C7, 0xC7E4C8,
+ 0xC7E4CA, 0xC7E4CC, 0xC7E4CD, 0xC7E4CF, 0xC7E4D3, 0xC7E4D9, 0xC7E4E2, 0xC7E4E5, 0xC7E4E8, 0xC7E4EA, 0xC7E520, 0xC7E620, 0xC7E6CA, 0xC820C7, 0xC920C7, 0xC920E1,
+ 0xC920E4, 0xC920E5, 0xC920E8, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xD920C7, 0xD9E4E9, 0xE1EA20, 0xE420C7, 0xE4C920, 0xE4E920, 0xE4EA20,
+ 0xE520C7, 0xE5C720, 0xE5C920, 0xE5E620, 0xE620C7, 0xE720C7, 0xE7C720, 0xE8C7E4, 0xE8E620, 0xE920C7, 0xEA20C7, 0xEA20E5, 0xEA20E8, 0xEAC920, 0xEAD120, 0xEAE620,
+};
+
+static const int32_t ngrams_8859_7_el[] = {
+ 0x20E1ED, 0x20E1F0, 0x20E3E9, 0x20E4E9, 0x20E5F0, 0x20E720, 0x20EAE1, 0x20ECE5, 0x20EDE1, 0x20EF20, 0x20F0E1, 0x20F0EF, 0x20F0F1, 0x20F3F4, 0x20F3F5, 0x20F4E7,
+ 0x20F4EF, 0xDFE120, 0xE120E1, 0xE120F4, 0xE1E920, 0xE1ED20, 0xE1F0FC, 0xE1F220, 0xE3E9E1, 0xE5E920, 0xE5F220, 0xE720F4, 0xE7ED20, 0xE7F220, 0xE920F4, 0xE9E120,
+ 0xE9EADE, 0xE9F220, 0xEAE1E9, 0xEAE1F4, 0xECE520, 0xED20E1, 0xED20E5, 0xED20F0, 0xEDE120, 0xEFF220, 0xEFF520, 0xF0EFF5, 0xF0F1EF, 0xF0FC20, 0xF220E1, 0xF220E5,
+ 0xF220EA, 0xF220F0, 0xF220F4, 0xF3E520, 0xF3E720, 0xF3F4EF, 0xF4E120, 0xF4E1E9, 0xF4E7ED, 0xF4E7F2, 0xF4E9EA, 0xF4EF20, 0xF4EFF5, 0xF4F9ED, 0xF9ED20, 0xFEED20,
+};
+
+static const int32_t ngrams_8859_8_I_he[] = {
+ 0x20E0E5, 0x20E0E7, 0x20E0E9, 0x20E0FA, 0x20E1E9, 0x20E1EE, 0x20E4E0, 0x20E4E5, 0x20E4E9, 0x20E4EE, 0x20E4F2, 0x20E4F9, 0x20E4FA, 0x20ECE0, 0x20ECE4, 0x20EEE0,
+ 0x20F2EC, 0x20F9EC, 0xE0FA20, 0xE420E0, 0xE420E1, 0xE420E4, 0xE420EC, 0xE420EE, 0xE420F9, 0xE4E5E0, 0xE5E020, 0xE5ED20, 0xE5EF20, 0xE5F820, 0xE5FA20, 0xE920E4,
+ 0xE9E420, 0xE9E5FA, 0xE9E9ED, 0xE9ED20, 0xE9EF20, 0xE9F820, 0xE9FA20, 0xEC20E0, 0xEC20E4, 0xECE020, 0xECE420, 0xED20E0, 0xED20E1, 0xED20E4, 0xED20EC, 0xED20EE,
+ 0xED20F9, 0xEEE420, 0xEF20E4, 0xF0E420, 0xF0E920, 0xF0E9ED, 0xF2EC20, 0xF820E4, 0xF8E9ED, 0xF9EC20, 0xFA20E0, 0xFA20E1, 0xFA20E4, 0xFA20EC, 0xFA20EE, 0xFA20F9,
+};
+
+static const int32_t ngrams_8859_8_he[] = {
+ 0x20E0E5, 0x20E0EC, 0x20E4E9, 0x20E4EC, 0x20E4EE, 0x20E4F0, 0x20E9F0, 0x20ECF2, 0x20ECF9, 0x20EDE5, 0x20EDE9, 0x20EFE5, 0x20EFE9, 0x20F8E5, 0x20F8E9, 0x20FAE0,
+ 0x20FAE5, 0x20FAE9, 0xE020E4, 0xE020EC, 0xE020ED, 0xE020FA, 0xE0E420, 0xE0E5E4, 0xE0EC20, 0xE0EE20, 0xE120E4, 0xE120ED, 0xE120FA, 0xE420E4, 0xE420E9, 0xE420EC,
+ 0xE420ED, 0xE420EF, 0xE420F8, 0xE420FA, 0xE4EC20, 0xE5E020, 0xE5E420, 0xE7E020, 0xE9E020, 0xE9E120, 0xE9E420, 0xEC20E4, 0xEC20ED, 0xEC20FA, 0xECF220, 0xECF920,
+ 0xEDE9E9, 0xEDE9F0, 0xEDE9F8, 0xEE20E4, 0xEE20ED, 0xEE20FA, 0xEEE120, 0xEEE420, 0xF2E420, 0xF920E4, 0xF920ED, 0xF920FA, 0xF9E420, 0xFAE020, 0xFAE420, 0xFAE5E9,
+};
+
+static const int32_t ngrams_8859_9_tr[] = {
+ 0x206261, 0x206269, 0x206275, 0x206461, 0x206465, 0x206765, 0x206861, 0x20696C, 0x206B61, 0x206B6F, 0x206D61, 0x206F6C, 0x207361, 0x207461, 0x207665, 0x207961,
+ 0x612062, 0x616B20, 0x616C61, 0x616D61, 0x616E20, 0x616EFD, 0x617220, 0x617261, 0x6172FD, 0x6173FD, 0x617961, 0x626972, 0x646120, 0x646520, 0x646920, 0x652062,
+ 0x65206B, 0x656469, 0x656E20, 0x657220, 0x657269, 0x657369, 0x696C65, 0x696E20, 0x696E69, 0x697220, 0x6C616E, 0x6C6172, 0x6C6520, 0x6C6572, 0x6E2061, 0x6E2062,
+ 0x6E206B, 0x6E6461, 0x6E6465, 0x6E6520, 0x6E6920, 0x6E696E, 0x6EFD20, 0x72696E, 0x72FD6E, 0x766520, 0x796120, 0x796F72, 0xFD6E20, 0xFD6E64, 0xFD6EFD, 0xFDF0FD,
+};
+
+CharsetRecog_8859_1::~CharsetRecog_8859_1()
+{
+ // nothing to do
+}
+
+UBool CharsetRecog_8859_1::match(InputText *textIn, CharsetMatch *results) const {
+ const char *name = textIn->fC1Bytes? "windows-1252" : "ISO-8859-1";
+ uint32_t i;
+ int32_t bestConfidenceSoFar = -1;
+ for (i=0; i < ARRAY_SIZE(ngrams_8859_1) ; i++) {
+ const int32_t *ngrams = ngrams_8859_1[i].ngrams;
+ const char *lang = ngrams_8859_1[i].lang;
+ int32_t confidence = match_sbcs(textIn, ngrams, charMap_8859_1);
+ if (confidence > bestConfidenceSoFar) {
+ results->set(textIn, this, confidence, name, lang);
+ bestConfidenceSoFar = confidence;
+ }
+ }
+ return (bestConfidenceSoFar > 0);
+}
+
+const char *CharsetRecog_8859_1::getName() const
+{
+ return "ISO-8859-1";
+}
+
+
+CharsetRecog_8859_2::~CharsetRecog_8859_2()
+{
+ // nothing to do
+}
+
+UBool CharsetRecog_8859_2::match(InputText *textIn, CharsetMatch *results) const {
+ const char *name = textIn->fC1Bytes? "windows-1250" : "ISO-8859-2";
+ uint32_t i;
+ int32_t bestConfidenceSoFar = -1;
+ for (i=0; i < ARRAY_SIZE(ngrams_8859_2) ; i++) {
+ const int32_t *ngrams = ngrams_8859_2[i].ngrams;
+ const char *lang = ngrams_8859_2[i].lang;
+ int32_t confidence = match_sbcs(textIn, ngrams, charMap_8859_2);
+ if (confidence > bestConfidenceSoFar) {
+ results->set(textIn, this, confidence, name, lang);
+ bestConfidenceSoFar = confidence;
+ }
+ }
+ return (bestConfidenceSoFar > 0);
+}
+
+const char *CharsetRecog_8859_2::getName() const
+{
+ return "ISO-8859-2";
+}
+
+
+CharsetRecog_8859_5::~CharsetRecog_8859_5()
+{
+ // nothing to do
+}
+
+const char *CharsetRecog_8859_5::getName() const
+{
+ return "ISO-8859-5";
+}
+
+CharsetRecog_8859_5_ru::~CharsetRecog_8859_5_ru()
+{
+ // nothing to do
+}
+
+const char *CharsetRecog_8859_5_ru::getLanguage() const
+{
+ return "ru";
+}
+
+UBool CharsetRecog_8859_5_ru::match(InputText *textIn, CharsetMatch *results) const
+{
+ int32_t confidence = match_sbcs(textIn, ngrams_8859_5_ru, charMap_8859_5);
+ results->set(textIn, this, confidence);
+ return (confidence > 0);
+}
+
+CharsetRecog_8859_6::~CharsetRecog_8859_6()
+{
+ // nothing to do
+}
+
+const char *CharsetRecog_8859_6::getName() const
+{
+ return "ISO-8859-6";
+}
+
+CharsetRecog_8859_6_ar::~CharsetRecog_8859_6_ar()
+{
+ // nothing to do
+}
+
+const char *CharsetRecog_8859_6_ar::getLanguage() const
+{
+ return "ar";
+}
+
+UBool CharsetRecog_8859_6_ar::match(InputText *textIn, CharsetMatch *results) const
+{
+ int32_t confidence = match_sbcs(textIn, ngrams_8859_6_ar, charMap_8859_6);
+ results->set(textIn, this, confidence);
+ return (confidence > 0);
+}
+
+CharsetRecog_8859_7::~CharsetRecog_8859_7()
+{
+ // nothing to do
+}
+
+const char *CharsetRecog_8859_7::getName() const
+{
+ return "ISO-8859-7";
+}
+
+CharsetRecog_8859_7_el::~CharsetRecog_8859_7_el()
+{
+ // nothing to do
+}
+
+const char *CharsetRecog_8859_7_el::getLanguage() const
+{
+ return "el";
+}
+
+UBool CharsetRecog_8859_7_el::match(InputText *textIn, CharsetMatch *results) const
+{
+ const char *name = textIn->fC1Bytes? "windows-1253" : "ISO-8859-7";
+ int32_t confidence = match_sbcs(textIn, ngrams_8859_7_el, charMap_8859_7);
+ results->set(textIn, this, confidence, name, "el");
+ return (confidence > 0);
+}
+
+CharsetRecog_8859_8::~CharsetRecog_8859_8()
+{
+ // nothing to do
+}
+
+const char *CharsetRecog_8859_8::getName() const
+{
+ return "ISO-8859-8";
+}
+
+CharsetRecog_8859_8_I_he::~CharsetRecog_8859_8_I_he ()
+{
+ // nothing to do
+}
+
+const char *CharsetRecog_8859_8_I_he::getName() const
+{
+ return "ISO-8859-8-I";
+}
+
+const char *CharsetRecog_8859_8_I_he::getLanguage() const
+{
+ return "he";
+}
+
+UBool CharsetRecog_8859_8_I_he::match(InputText *textIn, CharsetMatch *results) const
+{
+ const char *name = textIn->fC1Bytes? "windows-1255" : "ISO-8859-8-I";
+ int32_t confidence = match_sbcs(textIn, ngrams_8859_8_I_he, charMap_8859_8);
+ results->set(textIn, this, confidence, name, "he");
+ return (confidence > 0);
+}
+
+CharsetRecog_8859_8_he::~CharsetRecog_8859_8_he()
+{
+ // od ot gnihton
+}
+
+const char *CharsetRecog_8859_8_he::getLanguage() const
+{
+ return "he";
+}
+
+UBool CharsetRecog_8859_8_he::match(InputText *textIn, CharsetMatch *results) const
+{
+ const char *name = textIn->fC1Bytes? "windows-1255" : "ISO-8859-8";
+ int32_t confidence = match_sbcs(textIn, ngrams_8859_8_he, charMap_8859_8);
+ results->set(textIn, this, confidence, name, "he");
+ return (confidence > 0);
+}
+
+CharsetRecog_8859_9::~CharsetRecog_8859_9()
+{
+ // nothing to do
+}
+
+const char *CharsetRecog_8859_9::getName() const
+{
+ return "ISO-8859-9";
+}
+
+CharsetRecog_8859_9_tr::~CharsetRecog_8859_9_tr ()
+{
+ // nothing to do
+}
+
+const char *CharsetRecog_8859_9_tr::getLanguage() const
+{
+ return "tr";
+}
+
+UBool CharsetRecog_8859_9_tr::match(InputText *textIn, CharsetMatch *results) const
+{
+ const char *name = textIn->fC1Bytes? "windows-1254" : "ISO-8859-9";
+ int32_t confidence = match_sbcs(textIn, ngrams_8859_9_tr, charMap_8859_9);
+ results->set(textIn, this, confidence, name, "tr");
+ return (confidence > 0);
+}
+
+CharsetRecog_windows_1256::~CharsetRecog_windows_1256()
+{
+ // nothing to do
+}
+
+const char *CharsetRecog_windows_1256::getName() const
+{
+ return "windows-1256";
+}
+
+const char *CharsetRecog_windows_1256::getLanguage() const
+{
+ return "ar";
+}
+
+UBool CharsetRecog_windows_1256::match(InputText *textIn, CharsetMatch *results) const
+{
+ int32_t confidence = match_sbcs(textIn, ngrams_windows_1256, charMap_windows_1256);
+ results->set(textIn, this, confidence);
+ return (confidence > 0);
+}
+
+CharsetRecog_windows_1251::~CharsetRecog_windows_1251()
+{
+ // nothing to do
+}
+
+const char *CharsetRecog_windows_1251::getName() const
+{
+ return "windows-1251";
+}
+
+const char *CharsetRecog_windows_1251::getLanguage() const
+{
+ return "ru";
+}
+
+UBool CharsetRecog_windows_1251::match(InputText *textIn, CharsetMatch *results) const
+{
+ int32_t confidence = match_sbcs(textIn, ngrams_windows_1251, charMap_windows_1251);
+ results->set(textIn, this, confidence);
+ return (confidence > 0);
+}
+
+CharsetRecog_KOI8_R::~CharsetRecog_KOI8_R()
+{
+ // nothing to do
+}
+
+const char *CharsetRecog_KOI8_R::getName() const
+{
+ return "KOI8-R";
+}
+
+const char *CharsetRecog_KOI8_R::getLanguage() const
+{
+ return "ru";
+}
+
+UBool CharsetRecog_KOI8_R::match(InputText *textIn, CharsetMatch *results) const
+{
+ int32_t confidence = match_sbcs(textIn, ngrams_KOI8_R, charMap_KOI8_R);
+ results->set(textIn, this, confidence);
+ return (confidence > 0);
+}
+
+CharsetRecog_IBM424_he::~CharsetRecog_IBM424_he()
+{
+ // nothing to do
+}
+
+const char *CharsetRecog_IBM424_he::getLanguage() const
+{
+ return "he";
+}
+
+CharsetRecog_IBM424_he_rtl::~CharsetRecog_IBM424_he_rtl()
+{
+ // nothing to do
+}
+
+const char *CharsetRecog_IBM424_he_rtl::getName() const
+{
+ return "IBM424_rtl";
+}
+
+UBool CharsetRecog_IBM424_he_rtl::match(InputText *textIn, CharsetMatch *results) const
+{
+ int32_t confidence = match_sbcs(textIn, ngrams_IBM424_he_rtl, charMap_IBM424_he);
+ results->set(textIn, this, confidence);
+ return (confidence > 0);
+}
+
+CharsetRecog_IBM424_he_ltr::~CharsetRecog_IBM424_he_ltr()
+{
+ // nothing to do
+}
+
+const char *CharsetRecog_IBM424_he_ltr::getName() const
+{
+ return "IBM424_ltr";
+}
+
+UBool CharsetRecog_IBM424_he_ltr::match(InputText *textIn, CharsetMatch *results) const
+{
+ int32_t confidence = match_sbcs(textIn, ngrams_IBM424_he_ltr, charMap_IBM424_he);
+ results->set(textIn, this, confidence);
+ return (confidence > 0);
+}
+
+CharsetRecog_IBM420_ar::~CharsetRecog_IBM420_ar()
+{
+ // nothing to do
+}
+
+const char *CharsetRecog_IBM420_ar::getLanguage() const
+{
+ return "ar";
+}
+
+
+int32_t CharsetRecog_IBM420_ar::match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t byteMap[]) const
+{
+ NGramParser_IBM420 parser(ngrams, byteMap);
+ int32_t result;
+
+ result = parser.parse(det);
+
+ return result;
+}
+
+CharsetRecog_IBM420_ar_rtl::~CharsetRecog_IBM420_ar_rtl()
+{
+ // nothing to do
+}
+
+const char *CharsetRecog_IBM420_ar_rtl::getName() const
+{
+ return "IBM420_rtl";
+}
+
+UBool CharsetRecog_IBM420_ar_rtl::match(InputText *textIn, CharsetMatch *results) const
+{
+ int32_t confidence = match_sbcs(textIn, ngrams_IBM420_ar_rtl, charMap_IBM420_ar);
+ results->set(textIn, this, confidence);
+ return (confidence > 0);
+}
+
+CharsetRecog_IBM420_ar_ltr::~CharsetRecog_IBM420_ar_ltr()
+{
+ // nothing to do
+}
+
+const char *CharsetRecog_IBM420_ar_ltr::getName() const
+{
+ return "IBM420_ltr";
+}
+
+UBool CharsetRecog_IBM420_ar_ltr::match(InputText *textIn, CharsetMatch *results) const
+{
+ int32_t confidence = match_sbcs(textIn, ngrams_IBM420_ar_ltr, charMap_IBM420_ar);
+ results->set(textIn, this, confidence);
+ return (confidence > 0);
+}
+
+U_NAMESPACE_END
+#endif
+
diff --git a/src/core/basetypes/icu-ucsdet/csrucode.cpp b/src/core/basetypes/icu-ucsdet/csrucode.cpp
new file mode 100644
index 00000000..f0983430
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/csrucode.cpp
@@ -0,0 +1,198 @@
+/*
+ **********************************************************************
+ * Copyright (C) 2005-2013, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ **********************************************************************
+ */
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "csrucode.h"
+#include "csmatch.h"
+
+U_NAMESPACE_BEGIN
+
+CharsetRecog_Unicode::~CharsetRecog_Unicode()
+{
+ // nothing to do
+}
+
+CharsetRecog_UTF_16_BE::~CharsetRecog_UTF_16_BE()
+{
+ // nothing to do
+}
+
+const char *CharsetRecog_UTF_16_BE::getName() const
+{
+ return "UTF-16BE";
+}
+
+// UTF-16 confidence calculation. Very simple minded, but better than nothing.
+// Any 8 bit non-control characters bump the confidence up. These have a zero high byte,
+// and are very likely to be UTF-16, although they could also be part of a UTF-32 code.
+// NULs are a contra-indication, they will appear commonly if the actual encoding is UTF-32.
+// NULs should be rare in actual text.
+
+static int32_t adjustConfidence(UChar codeUnit, int32_t confidence) {
+ if (codeUnit == 0) {
+ confidence -= 10;
+ } else if ((codeUnit >= 0x20 && codeUnit <= 0xff) || codeUnit == 0x0a) {
+ confidence += 10;
+ }
+ if (confidence < 0) {
+ confidence = 0;
+ } else if (confidence > 100) {
+ confidence = 100;
+ }
+ return confidence;
+}
+
+
+UBool CharsetRecog_UTF_16_BE::match(InputText* textIn, CharsetMatch *results) const
+{
+ const uint8_t *input = textIn->fRawInput;
+ int32_t confidence = 10;
+ int32_t length = textIn->fRawLength;
+
+ int32_t bytesToCheck = (length > 30) ? 30 : length;
+ for (int32_t charIndex=0; charIndex<bytesToCheck-1; charIndex+=2) {
+ UChar codeUnit = (input[charIndex] << 8) | input[charIndex + 1];
+ if (charIndex == 0 && codeUnit == 0xFEFF) {
+ confidence = 100;
+ break;
+ }
+ confidence = adjustConfidence(codeUnit, confidence);
+ if (confidence == 0 || confidence == 100) {
+ break;
+ }
+ }
+ if (bytesToCheck < 4 && confidence < 100) {
+ confidence = 0;
+ }
+ results->set(textIn, this, confidence);
+ return (confidence > 0);
+}
+
+CharsetRecog_UTF_16_LE::~CharsetRecog_UTF_16_LE()
+{
+ // nothing to do
+}
+
+const char *CharsetRecog_UTF_16_LE::getName() const
+{
+ return "UTF-16LE";
+}
+
+UBool CharsetRecog_UTF_16_LE::match(InputText* textIn, CharsetMatch *results) const
+{
+ const uint8_t *input = textIn->fRawInput;
+ int32_t confidence = 10;
+ int32_t length = textIn->fRawLength;
+
+ int32_t bytesToCheck = (length > 30) ? 30 : length;
+ for (int32_t charIndex=0; charIndex<bytesToCheck-1; charIndex+=2) {
+ UChar codeUnit = input[charIndex] | (input[charIndex + 1] << 8);
+ if (charIndex == 0 && codeUnit == 0xFEFF) {
+ confidence = 100; // UTF-16 BOM
+ if (length >= 4 && input[2] == 0 && input[3] == 0) {
+ confidence = 0; // UTF-32 BOM
+ }
+ break;
+ }
+ confidence = adjustConfidence(codeUnit, confidence);
+ if (confidence == 0 || confidence == 100) {
+ break;
+ }
+ }
+ if (bytesToCheck < 4 && confidence < 100) {
+ confidence = 0;
+ }
+ results->set(textIn, this, confidence);
+ return (confidence > 0);
+}
+
+CharsetRecog_UTF_32::~CharsetRecog_UTF_32()
+{
+ // nothing to do
+}
+
+UBool CharsetRecog_UTF_32::match(InputText* textIn, CharsetMatch *results) const
+{
+ const uint8_t *input = textIn->fRawInput;
+ int32_t limit = (textIn->fRawLength / 4) * 4;
+ int32_t numValid = 0;
+ int32_t numInvalid = 0;
+ bool hasBOM = FALSE;
+ int32_t confidence = 0;
+
+ if (limit > 0 && getChar(input, 0) == 0x0000FEFFUL) {
+ hasBOM = TRUE;
+ }
+
+ for(int32_t i = 0; i < limit; i += 4) {
+ int32_t ch = getChar(input, i);
+
+ if (ch < 0 || ch >= 0x10FFFF || (ch >= 0xD800 && ch <= 0xDFFF)) {
+ numInvalid += 1;
+ } else {
+ numValid += 1;
+ }
+ }
+
+
+ // Cook up some sort of confidence score, based on presense of a BOM
+ // and the existence of valid and/or invalid multi-byte sequences.
+ if (hasBOM && numInvalid==0) {
+ confidence = 100;
+ } else if (hasBOM && numValid > numInvalid*10) {
+ confidence = 80;
+ } else if (numValid > 3 && numInvalid == 0) {
+ confidence = 100;
+ } else if (numValid > 0 && numInvalid == 0) {
+ confidence = 80;
+ } else if (numValid > numInvalid*10) {
+ // Probably corruput UTF-32BE data. Valid sequences aren't likely by chance.
+ confidence = 25;
+ }
+
+ results->set(textIn, this, confidence);
+ return (confidence > 0);
+}
+
+CharsetRecog_UTF_32_BE::~CharsetRecog_UTF_32_BE()
+{
+ // nothing to do
+}
+
+const char *CharsetRecog_UTF_32_BE::getName() const
+{
+ return "UTF-32BE";
+}
+
+int32_t CharsetRecog_UTF_32_BE::getChar(const uint8_t *input, int32_t index) const
+{
+ return input[index + 0] << 24 | input[index + 1] << 16 |
+ input[index + 2] << 8 | input[index + 3];
+}
+
+CharsetRecog_UTF_32_LE::~CharsetRecog_UTF_32_LE()
+{
+ // nothing to do
+}
+
+const char *CharsetRecog_UTF_32_LE::getName() const
+{
+ return "UTF-32LE";
+}
+
+int32_t CharsetRecog_UTF_32_LE::getChar(const uint8_t *input, int32_t index) const
+{
+ return input[index + 3] << 24 | input[index + 2] << 16 |
+ input[index + 1] << 8 | input[index + 0];
+}
+
+U_NAMESPACE_END
+#endif
+
diff --git a/src/core/basetypes/icu-ucsdet/csrutf8.cpp b/src/core/basetypes/icu-ucsdet/csrutf8.cpp
new file mode 100644
index 00000000..b18aa77e
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/csrutf8.cpp
@@ -0,0 +1,109 @@
+/*
+ **********************************************************************
+ * Copyright (C) 2005-2014, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ **********************************************************************
+ */
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "csrutf8.h"
+#include "csmatch.h"
+
+U_NAMESPACE_BEGIN
+
+CharsetRecog_UTF8::~CharsetRecog_UTF8()
+{
+ // nothing to do
+}
+
+const char *CharsetRecog_UTF8::getName() const
+{
+ return "UTF-8";
+}
+
+UBool CharsetRecog_UTF8::match(InputText* input, CharsetMatch *results) const {
+ bool hasBOM = FALSE;
+ int32_t numValid = 0;
+ int32_t numInvalid = 0;
+ const uint8_t *inputBytes = input->fRawInput;
+ int32_t i;
+ int32_t trailBytes = 0;
+ int32_t confidence;
+
+ if (input->fRawLength >= 3 &&
+ inputBytes[0] == 0xEF && inputBytes[1] == 0xBB && inputBytes[2] == 0xBF) {
+ hasBOM = TRUE;
+ }
+
+ // Scan for multi-byte sequences
+ for (i=0; i < input->fRawLength; i += 1) {
+ int32_t b = inputBytes[i];
+
+ if ((b & 0x80) == 0) {
+ continue; // ASCII
+ }
+
+ // Hi bit on char found. Figure out how long the sequence should be
+ if ((b & 0x0E0) == 0x0C0) {
+ trailBytes = 1;
+ } else if ((b & 0x0F0) == 0x0E0) {
+ trailBytes = 2;
+ } else if ((b & 0x0F8) == 0xF0) {
+ trailBytes = 3;
+ } else {
+ numInvalid += 1;
+ continue;
+ }
+
+ // Verify that we've got the right number of trail bytes in the sequence
+ for (;;) {
+ i += 1;
+
+ if (i >= input->fRawLength) {
+ break;
+ }
+
+ b = inputBytes[i];
+
+ if ((b & 0xC0) != 0x080) {
+ numInvalid += 1;
+ break;
+ }
+
+ if (--trailBytes == 0) {
+ numValid += 1;
+ break;
+ }
+ }
+
+ }
+
+ // Cook up some sort of confidence score, based on presence of a BOM
+ // and the existence of valid and/or invalid multi-byte sequences.
+ confidence = 0;
+ if (hasBOM && numInvalid == 0) {
+ confidence = 100;
+ } else if (hasBOM && numValid > numInvalid*10) {
+ confidence = 80;
+ } else if (numValid > 3 && numInvalid == 0) {
+ confidence = 100;
+ } else if (numValid > 0 && numInvalid == 0) {
+ confidence = 80;
+ } else if (numValid == 0 && numInvalid == 0) {
+ // Plain ASCII. Confidence must be > 10, it's more likely than UTF-16, which
+ // accepts ASCII with confidence = 10.
+ confidence = 15;
+ } else if (numValid > numInvalid*10) {
+ // Probably corruput utf-8 data. Valid sequences aren't likely by chance.
+ confidence = 25;
+ }
+
+ results->set(input, this, confidence);
+ return (confidence > 0);
+}
+
+U_NAMESPACE_END
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/cstring.c b/src/core/basetypes/icu-ucsdet/cstring.c
new file mode 100644
index 00000000..3af959eb
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/cstring.c
@@ -0,0 +1,339 @@
+/*
+******************************************************************************
+*
+* Copyright (C) 1997-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* File CSTRING.C
+*
+* @author Helena Shih
+*
+* Modification History:
+*
+* Date Name Description
+* 6/18/98 hshih Created
+* 09/08/98 stephen Added include for ctype, for Mac Port
+* 11/15/99 helena Integrated S/390 IEEE changes.
+******************************************************************************
+*/
+
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "unicode/utypes.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "uassert.h"
+
+/*
+ * We hardcode case conversion for invariant characters to match our expectation
+ * and the compiler execution charset.
+ * This prevents problems on systems
+ * - with non-default casing behavior, like Turkish system locales where
+ * tolower('I') maps to dotless i and toupper('i') maps to dotted I
+ * - where there are no lowercase Latin characters at all, or using different
+ * codes (some old EBCDIC codepages)
+ *
+ * This works because the compiler usually runs on a platform where the execution
+ * charset includes all of the invariant characters at their expected
+ * code positions, so that the char * string literals in ICU code match
+ * the char literals here.
+ *
+ * Note that the set of lowercase Latin letters is discontiguous in EBCDIC
+ * and the set of uppercase Latin letters is discontiguous as well.
+ */
+
+U_CAPI UBool U_EXPORT2
+uprv_isASCIILetter(char c) {
+#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+ return
+ ('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z') ||
+ ('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z');
+#else
+ return ('a'<=c && c<='z') || ('A'<=c && c<='Z');
+#endif
+}
+
+U_CAPI char U_EXPORT2
+uprv_toupper(char c) {
+#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+ if(('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z')) {
+ c=(char)(c+('A'-'a'));
+ }
+#else
+ if('a'<=c && c<='z') {
+ c=(char)(c+('A'-'a'));
+ }
+#endif
+ return c;
+}
+
+
+#if 0
+/*
+ * Commented out because cstring.h defines uprv_tolower() to be
+ * the same as either uprv_asciitolower() or uprv_ebcdictolower()
+ * to reduce the amount of code to cover with tests.
+ *
+ * Note that this uprv_tolower() definition is likely to work for most
+ * charset families, not just ASCII and EBCDIC, because its #else branch
+ * is written generically.
+ */
+U_CAPI char U_EXPORT2
+uprv_tolower(char c) {
+#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+ if(('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z')) {
+ c=(char)(c+('a'-'A'));
+ }
+#else
+ if('A'<=c && c<='Z') {
+ c=(char)(c+('a'-'A'));
+ }
+#endif
+ return c;
+}
+#endif
+
+U_CAPI char U_EXPORT2
+uprv_asciitolower(char c) {
+ if(0x41<=c && c<=0x5a) {
+ c=(char)(c+0x20);
+ }
+ return c;
+}
+
+U_CAPI char U_EXPORT2
+uprv_ebcdictolower(char c) {
+ if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) ||
+ (0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) ||
+ (0xe2<=(uint8_t)c && (uint8_t)c<=0xe9)
+ ) {
+ c=(char)(c-0x40);
+ }
+ return c;
+}
+
+
+U_CAPI char* U_EXPORT2
+T_CString_toLowerCase(char* str)
+{
+ char* origPtr = str;
+
+ if (str) {
+ do
+ *str = (char)uprv_tolower(*str);
+ while (*(str++));
+ }
+
+ return origPtr;
+}
+
+U_CAPI char* U_EXPORT2
+T_CString_toUpperCase(char* str)
+{
+ char* origPtr = str;
+
+ if (str) {
+ do
+ *str = (char)uprv_toupper(*str);
+ while (*(str++));
+ }
+
+ return origPtr;
+}
+
+/*
+ * Takes a int32_t and fills in a char* string with that number "radix"-based.
+ * Does not handle negative values (makes an empty string for them).
+ * Writes at most 12 chars ("-2147483647" plus NUL).
+ * Returns the length of the string (not including the NUL).
+ */
+U_CAPI int32_t U_EXPORT2
+T_CString_integerToString(char* buffer, int32_t v, int32_t radix)
+{
+ char tbuf[30];
+ int32_t tbx = sizeof(tbuf);
+ uint8_t digit;
+ int32_t length = 0;
+ uint32_t uval;
+
+ U_ASSERT(radix>=2 && radix<=16);
+ uval = (uint32_t) v;
+ if(v<0 && radix == 10) {
+ /* Only in base 10 do we conside numbers to be signed. */
+ uval = (uint32_t)(-v);
+ buffer[length++] = '-';
+ }
+
+ tbx = sizeof(tbuf)-1;
+ tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */
+ do {
+ digit = (uint8_t)(uval % radix);
+ tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
+ uval = uval / radix;
+ } while (uval != 0);
+
+ /* copy converted number into user buffer */
+ uprv_strcpy(buffer+length, tbuf+tbx);
+ length += sizeof(tbuf) - tbx -1;
+ return length;
+}
+
+
+
+/*
+ * Takes a int64_t and fills in a char* string with that number "radix"-based.
+ * Writes at most 21: chars ("-9223372036854775807" plus NUL).
+ * Returns the length of the string, not including the terminating NULL.
+ */
+U_CAPI int32_t U_EXPORT2
+T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix)
+{
+ char tbuf[30];
+ int32_t tbx = sizeof(tbuf);
+ uint8_t digit;
+ int32_t length = 0;
+ uint64_t uval;
+
+ U_ASSERT(radix>=2 && radix<=16);
+ uval = (uint64_t) v;
+ if(v<0 && radix == 10) {
+ /* Only in base 10 do we conside numbers to be signed. */
+ uval = (uint64_t)(-v);
+ buffer[length++] = '-';
+ }
+
+ tbx = sizeof(tbuf)-1;
+ tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */
+ do {
+ digit = (uint8_t)(uval % radix);
+ tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
+ uval = uval / radix;
+ } while (uval != 0);
+
+ /* copy converted number into user buffer */
+ uprv_strcpy(buffer+length, tbuf+tbx);
+ length += sizeof(tbuf) - tbx -1;
+ return length;
+}
+
+
+U_CAPI int32_t U_EXPORT2
+T_CString_stringToInteger(const char *integerString, int32_t radix)
+{
+ char *end;
+ return uprv_strtoul(integerString, &end, radix);
+
+}
+
+U_CAPI int U_EXPORT2
+uprv_stricmp(const char *str1, const char *str2) {
+ if(str1==NULL) {
+ if(str2==NULL) {
+ return 0;
+ } else {
+ return -1;
+ }
+ } else if(str2==NULL) {
+ return 1;
+ } else {
+ /* compare non-NULL strings lexically with lowercase */
+ int rc;
+ unsigned char c1, c2;
+
+ for(;;) {
+ c1=(unsigned char)*str1;
+ c2=(unsigned char)*str2;
+ if(c1==0) {
+ if(c2==0) {
+ return 0;
+ } else {
+ return -1;
+ }
+ } else if(c2==0) {
+ return 1;
+ } else {
+ /* compare non-zero characters with lowercase */
+ rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
+ if(rc!=0) {
+ return rc;
+ }
+ }
+ ++str1;
+ ++str2;
+ }
+ }
+}
+
+U_CAPI int U_EXPORT2
+uprv_strnicmp(const char *str1, const char *str2, uint32_t n) {
+ if(str1==NULL) {
+ if(str2==NULL) {
+ return 0;
+ } else {
+ return -1;
+ }
+ } else if(str2==NULL) {
+ return 1;
+ } else {
+ /* compare non-NULL strings lexically with lowercase */
+ int rc;
+ unsigned char c1, c2;
+
+ for(; n--;) {
+ c1=(unsigned char)*str1;
+ c2=(unsigned char)*str2;
+ if(c1==0) {
+ if(c2==0) {
+ return 0;
+ } else {
+ return -1;
+ }
+ } else if(c2==0) {
+ return 1;
+ } else {
+ /* compare non-zero characters with lowercase */
+ rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
+ if(rc!=0) {
+ return rc;
+ }
+ }
+ ++str1;
+ ++str2;
+ }
+ }
+
+ return 0;
+}
+
+U_CAPI char* U_EXPORT2
+uprv_strdup(const char *src) {
+ size_t len = uprv_strlen(src) + 1;
+ char *dup = (char *) uprv_malloc(len);
+
+ if (dup) {
+ uprv_memcpy(dup, src, len);
+ }
+
+ return dup;
+}
+
+U_CAPI char* U_EXPORT2
+uprv_strndup(const char *src, int32_t n) {
+ char *dup;
+
+ if(n < 0) {
+ dup = uprv_strdup(src);
+ } else {
+ dup = (char*)uprv_malloc(n+1);
+ if (dup) {
+ uprv_memcpy(dup, src, n);
+ dup[n] = 0;
+ }
+ }
+
+ return dup;
+}
diff --git a/src/core/basetypes/icu-ucsdet/include/charstr.h b/src/core/basetypes/icu-ucsdet/include/charstr.h
new file mode 100644
index 00000000..4b86c835
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/charstr.h
@@ -0,0 +1,130 @@
+/*
+**********************************************************************
+* Copyright (c) 2001-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* Date Name Description
+* 11/19/2001 aliu Creation.
+* 05/19/2010 markus Rewritten from scratch
+**********************************************************************
+*/
+
+#ifndef CHARSTRING_H
+#define CHARSTRING_H
+
+#include "unicode/utypes.h"
+#include "unicode/unistr.h"
+#include "unicode/uobject.h"
+#include "cmemory.h"
+
+U_NAMESPACE_BEGIN
+
+// Windows needs us to DLL-export the MaybeStackArray template specialization,
+// but MacOS X cannot handle it. Same as in digitlst.h.
+#if !U_PLATFORM_IS_DARWIN_BASED
+template class U_COMMON_API MaybeStackArray<char, 40>;
+#endif
+
+/**
+ * ICU-internal char * string class.
+ * This class does not assume or enforce any particular character encoding.
+ * Raw bytes can be stored. The string object owns its characters.
+ * A terminating NUL is stored, but the class does not prevent embedded NUL characters.
+ *
+ * This class wants to be convenient but is also deliberately minimalist.
+ * Please do not add methods if they only add minor convenience.
+ * For example:
+ * cs.data()[5]='a'; // no need for setCharAt(5, 'a')
+ */
+class U_COMMON_API CharString : public UMemory {
+public:
+ CharString() : len(0) { buffer[0]=0; }
+ CharString(const StringPiece &s, UErrorCode &errorCode) : len(0) {
+ buffer[0]=0;
+ append(s, errorCode);
+ }
+ CharString(const CharString &s, UErrorCode &errorCode) : len(0) {
+ buffer[0]=0;
+ append(s, errorCode);
+ }
+ CharString(const char *s, int32_t sLength, UErrorCode &errorCode) : len(0) {
+ buffer[0]=0;
+ append(s, sLength, errorCode);
+ }
+ ~CharString() {}
+
+ /**
+ * Replaces this string's contents with the other string's contents.
+ * CharString does not support the standard copy constructor nor
+ * the assignment operator, to make copies explicit and to
+ * use a UErrorCode where memory allocations might be needed.
+ */
+ CharString &copyFrom(const CharString &other, UErrorCode &errorCode);
+
+ UBool isEmpty() const { return len==0; }
+ int32_t length() const { return len; }
+ char operator[](int32_t index) const { return buffer[index]; }
+ StringPiece toStringPiece() const { return StringPiece(buffer.getAlias(), len); }
+
+ const char *data() const { return buffer.getAlias(); }
+ char *data() { return buffer.getAlias(); }
+
+ CharString &clear() { len=0; buffer[0]=0; return *this; }
+ CharString &truncate(int32_t newLength);
+
+ CharString &append(char c, UErrorCode &errorCode);
+ CharString &append(const StringPiece &s, UErrorCode &errorCode) {
+ return append(s.data(), s.length(), errorCode);
+ }
+ CharString &append(const CharString &s, UErrorCode &errorCode) {
+ return append(s.data(), s.length(), errorCode);
+ }
+ CharString &append(const char *s, int32_t sLength, UErrorCode &status);
+ /**
+ * Returns a writable buffer for appending and writes the buffer's capacity to
+ * resultCapacity. Guarantees resultCapacity>=minCapacity if U_SUCCESS().
+ * There will additionally be space for a terminating NUL right at resultCapacity.
+ * (This function is similar to ByteSink.GetAppendBuffer().)
+ *
+ * The returned buffer is only valid until the next write operation
+ * on this string.
+ *
+ * After writing at most resultCapacity bytes, call append() with the
+ * pointer returned from this function and the number of bytes written.
+ *
+ * @param minCapacity required minimum capacity of the returned buffer;
+ * must be non-negative
+ * @param desiredCapacityHint desired capacity of the returned buffer;
+ * must be non-negative
+ * @param resultCapacity will be set to the capacity of the returned buffer
+ * @param errorCode in/out error code
+ * @return a buffer with resultCapacity>=min_capacity
+ */
+ char *getAppendBuffer(int32_t minCapacity,
+ int32_t desiredCapacityHint,
+ int32_t &resultCapacity,
+ UErrorCode &errorCode);
+
+ CharString &appendInvariantChars(const UnicodeString &s, UErrorCode &errorCode);
+
+ /**
+ * Appends a filename/path part, e.g., a directory name.
+ * First appends a U_FILE_SEP_CHAR if necessary.
+ * Does nothing if s is empty.
+ */
+ CharString &appendPathPart(const StringPiece &s, UErrorCode &errorCode);
+
+private:
+ MaybeStackArray<char, 40> buffer;
+ int32_t len;
+
+ UBool ensureCapacity(int32_t capacity, int32_t desiredCapacityHint, UErrorCode &errorCode);
+
+ CharString(const CharString &other); // forbid copying of this class
+ CharString &operator=(const CharString &other); // forbid copying of this class
+};
+
+U_NAMESPACE_END
+
+#endif
+//eof
diff --git a/src/core/basetypes/icu-ucsdet/include/cmemory.h b/src/core/basetypes/icu-ucsdet/include/cmemory.h
new file mode 100644
index 00000000..ed29b63e
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/cmemory.h
@@ -0,0 +1,607 @@
+/*
+******************************************************************************
+*
+* Copyright (C) 1997-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* File CMEMORY.H
+*
+* Contains stdlib.h/string.h memory functions
+*
+* @author Bertrand A. Damiba
+*
+* Modification History:
+*
+* Date Name Description
+* 6/20/98 Bertrand Created.
+* 05/03/99 stephen Changed from functions to macros.
+*
+******************************************************************************
+*/
+
+#ifndef CMEMORY_H
+#define CMEMORY_H
+
+#include "unicode/utypes.h"
+
+#include <stddef.h>
+#include <string.h>
+#include "unicode/localpointer.h"
+
+#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
+#include <stdio.h>
+#endif
+
+#if U_DEBUG
+
+/*
+ * The C++ standard requires that the source pointer for memcpy() & memmove()
+ * is valid, not NULL, and not at the end of an allocated memory block.
+ * In debug mode, we read one byte from the source point to verify that it's
+ * a valid, readable pointer.
+ */
+
+U_CAPI void uprv_checkValidMemory(const void *p, size_t n);
+
+#define uprv_memcpy(dst, src, size) ( \
+ uprv_checkValidMemory(src, 1), \
+ U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size))
+#define uprv_memmove(dst, src, size) ( \
+ uprv_checkValidMemory(src, 1), \
+ U_STANDARD_CPP_NAMESPACE memmove(dst, src, size))
+
+#else
+
+#define uprv_memcpy(dst, src, size) U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size)
+#define uprv_memmove(dst, src, size) U_STANDARD_CPP_NAMESPACE memmove(dst, src, size)
+
+#endif /* U_DEBUG */
+
+/**
+ * \def UPRV_LENGTHOF
+ * Convenience macro to determine the length of a fixed array at compile-time.
+ * @param array A fixed length array
+ * @return The length of the array, in elements
+ * @internal
+ */
+#define UPRV_LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
+#define uprv_memset(buffer, mark, size) U_STANDARD_CPP_NAMESPACE memset(buffer, mark, size)
+#define uprv_memcmp(buffer1, buffer2, size) U_STANDARD_CPP_NAMESPACE memcmp(buffer1, buffer2,size)
+
+U_CAPI void * U_EXPORT2
+uprv_malloc(size_t s) U_MALLOC_ATTR U_ALLOC_SIZE_ATTR(1);
+
+U_CAPI void * U_EXPORT2
+uprv_realloc(void *mem, size_t size) U_ALLOC_SIZE_ATTR(2);
+
+U_CAPI void U_EXPORT2
+uprv_free(void *mem);
+
+U_CAPI void * U_EXPORT2
+uprv_calloc(size_t num, size_t size) U_MALLOC_ATTR U_ALLOC_SIZE_ATTR2(1,2);
+
+/**
+ * This should align the memory properly on any machine.
+ * This is very useful for the safeClone functions.
+ */
+typedef union {
+ long t1;
+ double t2;
+ void *t3;
+} UAlignedMemory;
+
+/**
+ * Get the least significant bits of a pointer (a memory address).
+ * For example, with a mask of 3, the macro gets the 2 least significant bits,
+ * which will be 0 if the pointer is 32-bit (4-byte) aligned.
+ *
+ * ptrdiff_t is the most appropriate integer type to cast to.
+ * size_t should work too, since on most (or all?) platforms it has the same
+ * width as ptrdiff_t.
+ */
+#define U_POINTER_MASK_LSB(ptr, mask) (((ptrdiff_t)(char *)(ptr)) & (mask))
+
+/**
+ * Get the amount of bytes that a pointer is off by from
+ * the previous UAlignedMemory-aligned pointer.
+ */
+#define U_ALIGNMENT_OFFSET(ptr) U_POINTER_MASK_LSB(ptr, sizeof(UAlignedMemory) - 1)
+
+/**
+ * Get the amount of bytes to add to a pointer
+ * in order to get the next UAlignedMemory-aligned address.
+ */
+#define U_ALIGNMENT_OFFSET_UP(ptr) (sizeof(UAlignedMemory) - U_ALIGNMENT_OFFSET(ptr))
+
+/**
+ * Indicate whether the ICU allocation functions have been used.
+ * This is used to determine whether ICU is in an initial, unused state.
+ */
+U_CFUNC UBool
+cmemory_inUse(void);
+
+/**
+ * Heap clean up function, called from u_cleanup()
+ * Clears any user heap functions from u_setMemoryFunctions()
+ * Does NOT deallocate any remaining allocated memory.
+ */
+U_CFUNC UBool
+cmemory_cleanup(void);
+
+/**
+ * A function called by <TT>uhash_remove</TT>,
+ * <TT>uhash_close</TT>, or <TT>uhash_put</TT> to delete
+ * an existing key or value.
+ * @param obj A key or value stored in a hashtable
+ * @see uprv_deleteUObject
+ */
+typedef void U_CALLCONV UObjectDeleter(void* obj);
+
+/**
+ * Deleter for UObject instances.
+ * Works for all subclasses of UObject because it has a virtual destructor.
+ */
+U_CAPI void U_EXPORT2
+uprv_deleteUObject(void *obj);
+
+#ifdef __cplusplus
+
+U_NAMESPACE_BEGIN
+
+/**
+ * "Smart pointer" class, deletes memory via uprv_free().
+ * For most methods see the LocalPointerBase base class.
+ * Adds operator[] for array item access.
+ *
+ * @see LocalPointerBase
+ */
+template<typename T>
+class LocalMemory : public LocalPointerBase<T> {
+public:
+ /**
+ * Constructor takes ownership.
+ * @param p simple pointer to an array of T items that is adopted
+ */
+ explicit LocalMemory(T *p=NULL) : LocalPointerBase<T>(p) {}
+ /**
+ * Destructor deletes the memory it owns.
+ */
+ ~LocalMemory() {
+ uprv_free(LocalPointerBase<T>::ptr);
+ }
+ /**
+ * Deletes the array it owns,
+ * and adopts (takes ownership of) the one passed in.
+ * @param p simple pointer to an array of T items that is adopted
+ */
+ void adoptInstead(T *p) {
+ uprv_free(LocalPointerBase<T>::ptr);
+ LocalPointerBase<T>::ptr=p;
+ }
+ /**
+ * Deletes the array it owns, allocates a new one and reset its bytes to 0.
+ * Returns the new array pointer.
+ * If the allocation fails, then the current array is unchanged and
+ * this method returns NULL.
+ * @param newCapacity must be >0
+ * @return the allocated array pointer, or NULL if the allocation failed
+ */
+ inline T *allocateInsteadAndReset(int32_t newCapacity=1);
+ /**
+ * Deletes the array it owns and allocates a new one, copying length T items.
+ * Returns the new array pointer.
+ * If the allocation fails, then the current array is unchanged and
+ * this method returns NULL.
+ * @param newCapacity must be >0
+ * @param length number of T items to be copied from the old array to the new one;
+ * must be no more than the capacity of the old array,
+ * which the caller must track because the LocalMemory does not track it
+ * @return the allocated array pointer, or NULL if the allocation failed
+ */
+ inline T *allocateInsteadAndCopy(int32_t newCapacity=1, int32_t length=0);
+ /**
+ * Array item access (writable).
+ * No index bounds check.
+ * @param i array index
+ * @return reference to the array item
+ */
+ T &operator[](ptrdiff_t i) const { return LocalPointerBase<T>::ptr[i]; }
+};
+
+template<typename T>
+inline T *LocalMemory<T>::allocateInsteadAndReset(int32_t newCapacity) {
+ if(newCapacity>0) {
+ T *p=(T *)uprv_malloc(newCapacity*sizeof(T));
+ if(p!=NULL) {
+ uprv_memset(p, 0, newCapacity*sizeof(T));
+ uprv_free(LocalPointerBase<T>::ptr);
+ LocalPointerBase<T>::ptr=p;
+ }
+ return p;
+ } else {
+ return NULL;
+ }
+}
+
+
+template<typename T>
+inline T *LocalMemory<T>::allocateInsteadAndCopy(int32_t newCapacity, int32_t length) {
+ if(newCapacity>0) {
+ T *p=(T *)uprv_malloc(newCapacity*sizeof(T));
+ if(p!=NULL) {
+ if(length>0) {
+ if(length>newCapacity) {
+ length=newCapacity;
+ }
+ uprv_memcpy(p, LocalPointerBase<T>::ptr, length*sizeof(T));
+ }
+ uprv_free(LocalPointerBase<T>::ptr);
+ LocalPointerBase<T>::ptr=p;
+ }
+ return p;
+ } else {
+ return NULL;
+ }
+}
+
+/**
+ * Simple array/buffer management class using uprv_malloc() and uprv_free().
+ * Provides an internal array with fixed capacity. Can alias another array
+ * or allocate one.
+ *
+ * The array address is properly aligned for type T. It might not be properly
+ * aligned for types larger than T (or larger than the largest subtype of T).
+ *
+ * Unlike LocalMemory and LocalArray, this class never adopts
+ * (takes ownership of) another array.
+ */
+template<typename T, int32_t stackCapacity>
+class MaybeStackArray {
+public:
+ /**
+ * Default constructor initializes with internal T[stackCapacity] buffer.
+ */
+ MaybeStackArray() : ptr(stackArray), capacity(stackCapacity), needToRelease(FALSE) {}
+ /**
+ * Destructor deletes the array (if owned).
+ */
+ ~MaybeStackArray() { releaseArray(); }
+ /**
+ * Returns the array capacity (number of T items).
+ * @return array capacity
+ */
+ int32_t getCapacity() const { return capacity; }
+ /**
+ * Access without ownership change.
+ * @return the array pointer
+ */
+ T *getAlias() const { return ptr; }
+ /**
+ * Returns the array limit. Simple convenience method.
+ * @return getAlias()+getCapacity()
+ */
+ T *getArrayLimit() const { return getAlias()+capacity; }
+ // No "operator T *() const" because that can make
+ // expressions like mbs[index] ambiguous for some compilers.
+ /**
+ * Array item access (const).
+ * No index bounds check.
+ * @param i array index
+ * @return reference to the array item
+ */
+ const T &operator[](ptrdiff_t i) const { return ptr[i]; }
+ /**
+ * Array item access (writable).
+ * No index bounds check.
+ * @param i array index
+ * @return reference to the array item
+ */
+ T &operator[](ptrdiff_t i) { return ptr[i]; }
+ /**
+ * Deletes the array (if owned) and aliases another one, no transfer of ownership.
+ * If the arguments are illegal, then the current array is unchanged.
+ * @param otherArray must not be NULL
+ * @param otherCapacity must be >0
+ */
+ void aliasInstead(T *otherArray, int32_t otherCapacity) {
+ if(otherArray!=NULL && otherCapacity>0) {
+ releaseArray();
+ ptr=otherArray;
+ capacity=otherCapacity;
+ needToRelease=FALSE;
+ }
+ }
+ /**
+ * Deletes the array (if owned) and allocates a new one, copying length T items.
+ * Returns the new array pointer.
+ * If the allocation fails, then the current array is unchanged and
+ * this method returns NULL.
+ * @param newCapacity can be less than or greater than the current capacity;
+ * must be >0
+ * @param length number of T items to be copied from the old array to the new one
+ * @return the allocated array pointer, or NULL if the allocation failed
+ */
+ inline T *resize(int32_t newCapacity, int32_t length=0);
+ /**
+ * Gives up ownership of the array if owned, or else clones it,
+ * copying length T items; resets itself to the internal stack array.
+ * Returns NULL if the allocation failed.
+ * @param length number of T items to copy when cloning,
+ * and capacity of the clone when cloning
+ * @param resultCapacity will be set to the returned array's capacity (output-only)
+ * @return the array pointer;
+ * caller becomes responsible for deleting the array
+ */
+ inline T *orphanOrClone(int32_t length, int32_t &resultCapacity);
+private:
+ T *ptr;
+ int32_t capacity;
+ UBool needToRelease;
+ T stackArray[stackCapacity];
+ void releaseArray() {
+ if(needToRelease) {
+ uprv_free(ptr);
+ }
+ }
+ /* No comparison operators with other MaybeStackArray's. */
+ bool operator==(const MaybeStackArray & /*other*/) {return FALSE;}
+ bool operator!=(const MaybeStackArray & /*other*/) {return TRUE;}
+ /* No ownership transfer: No copy constructor, no assignment operator. */
+ MaybeStackArray(const MaybeStackArray & /*other*/) {}
+ void operator=(const MaybeStackArray & /*other*/) {}
+
+ // No heap allocation. Use only on the stack.
+ // (Declaring these functions private triggers a cascade of problems:
+ // MSVC insists on exporting an instantiation of MaybeStackArray, which
+ // requires that all functions be defined.
+ // An empty implementation of new() is rejected, it must return a value.
+ // Returning NULL is rejected by gcc for operator new.
+ // The expedient thing is just not to override operator new.
+ // While relatively pointless, heap allocated instances will function.
+ // static void * U_EXPORT2 operator new(size_t size);
+ // static void * U_EXPORT2 operator new[](size_t size);
+#if U_HAVE_PLACEMENT_NEW
+ // static void * U_EXPORT2 operator new(size_t, void *ptr);
+#endif
+};
+
+template<typename T, int32_t stackCapacity>
+inline T *MaybeStackArray<T, stackCapacity>::resize(int32_t newCapacity, int32_t length) {
+ if(newCapacity>0) {
+#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
+ ::fprintf(::stderr,"MaybeStacArray (resize) alloc %d * %lu\n", newCapacity,sizeof(T));
+#endif
+ T *p=(T *)uprv_malloc(newCapacity*sizeof(T));
+ if(p!=NULL) {
+ if(length>0) {
+ if(length>capacity) {
+ length=capacity;
+ }
+ if(length>newCapacity) {
+ length=newCapacity;
+ }
+ uprv_memcpy(p, ptr, length*sizeof(T));
+ }
+ releaseArray();
+ ptr=p;
+ capacity=newCapacity;
+ needToRelease=TRUE;
+ }
+ return p;
+ } else {
+ return NULL;
+ }
+}
+
+template<typename T, int32_t stackCapacity>
+inline T *MaybeStackArray<T, stackCapacity>::orphanOrClone(int32_t length, int32_t &resultCapacity) {
+ T *p;
+ if(needToRelease) {
+ p=ptr;
+ } else if(length<=0) {
+ return NULL;
+ } else {
+ if(length>capacity) {
+ length=capacity;
+ }
+ p=(T *)uprv_malloc(length*sizeof(T));
+#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
+ ::fprintf(::stderr,"MaybeStacArray (orphan) alloc %d * %lu\n", length,sizeof(T));
+#endif
+ if(p==NULL) {
+ return NULL;
+ }
+ uprv_memcpy(p, ptr, length*sizeof(T));
+ }
+ resultCapacity=length;
+ ptr=stackArray;
+ capacity=stackCapacity;
+ needToRelease=FALSE;
+ return p;
+}
+
+/**
+ * Variant of MaybeStackArray that allocates a header struct and an array
+ * in one contiguous memory block, using uprv_malloc() and uprv_free().
+ * Provides internal memory with fixed array capacity. Can alias another memory
+ * block or allocate one.
+ * The stackCapacity is the number of T items in the internal memory,
+ * not counting the H header.
+ * Unlike LocalMemory and LocalArray, this class never adopts
+ * (takes ownership of) another memory block.
+ */
+template<typename H, typename T, int32_t stackCapacity>
+class MaybeStackHeaderAndArray {
+public:
+ /**
+ * Default constructor initializes with internal H+T[stackCapacity] buffer.
+ */
+ MaybeStackHeaderAndArray() : ptr(&stackHeader), capacity(stackCapacity), needToRelease(FALSE) {}
+ /**
+ * Destructor deletes the memory (if owned).
+ */
+ ~MaybeStackHeaderAndArray() { releaseMemory(); }
+ /**
+ * Returns the array capacity (number of T items).
+ * @return array capacity
+ */
+ int32_t getCapacity() const { return capacity; }
+ /**
+ * Access without ownership change.
+ * @return the header pointer
+ */
+ H *getAlias() const { return ptr; }
+ /**
+ * Returns the array start.
+ * @return array start, same address as getAlias()+1
+ */
+ T *getArrayStart() const { return reinterpret_cast<T *>(getAlias()+1); }
+ /**
+ * Returns the array limit.
+ * @return array limit
+ */
+ T *getArrayLimit() const { return getArrayStart()+capacity; }
+ /**
+ * Access without ownership change. Same as getAlias().
+ * A class instance can be used directly in expressions that take a T *.
+ * @return the header pointer
+ */
+ operator H *() const { return ptr; }
+ /**
+ * Array item access (writable).
+ * No index bounds check.
+ * @param i array index
+ * @return reference to the array item
+ */
+ T &operator[](ptrdiff_t i) { return getArrayStart()[i]; }
+ /**
+ * Deletes the memory block (if owned) and aliases another one, no transfer of ownership.
+ * If the arguments are illegal, then the current memory is unchanged.
+ * @param otherArray must not be NULL
+ * @param otherCapacity must be >0
+ */
+ void aliasInstead(H *otherMemory, int32_t otherCapacity) {
+ if(otherMemory!=NULL && otherCapacity>0) {
+ releaseMemory();
+ ptr=otherMemory;
+ capacity=otherCapacity;
+ needToRelease=FALSE;
+ }
+ }
+ /**
+ * Deletes the memory block (if owned) and allocates a new one,
+ * copying the header and length T array items.
+ * Returns the new header pointer.
+ * If the allocation fails, then the current memory is unchanged and
+ * this method returns NULL.
+ * @param newCapacity can be less than or greater than the current capacity;
+ * must be >0
+ * @param length number of T items to be copied from the old array to the new one
+ * @return the allocated pointer, or NULL if the allocation failed
+ */
+ inline H *resize(int32_t newCapacity, int32_t length=0);
+ /**
+ * Gives up ownership of the memory if owned, or else clones it,
+ * copying the header and length T array items; resets itself to the internal memory.
+ * Returns NULL if the allocation failed.
+ * @param length number of T items to copy when cloning,
+ * and array capacity of the clone when cloning
+ * @param resultCapacity will be set to the returned array's capacity (output-only)
+ * @return the header pointer;
+ * caller becomes responsible for deleting the array
+ */
+ inline H *orphanOrClone(int32_t length, int32_t &resultCapacity);
+private:
+ H *ptr;
+ int32_t capacity;
+ UBool needToRelease;
+ // stackHeader must precede stackArray immediately.
+ H stackHeader;
+ T stackArray[stackCapacity];
+ void releaseMemory() {
+ if(needToRelease) {
+ uprv_free(ptr);
+ }
+ }
+ /* No comparison operators with other MaybeStackHeaderAndArray's. */
+ bool operator==(const MaybeStackHeaderAndArray & /*other*/) {return FALSE;}
+ bool operator!=(const MaybeStackHeaderAndArray & /*other*/) {return TRUE;}
+ /* No ownership transfer: No copy constructor, no assignment operator. */
+ MaybeStackHeaderAndArray(const MaybeStackHeaderAndArray & /*other*/) {}
+ void operator=(const MaybeStackHeaderAndArray & /*other*/) {}
+
+ // No heap allocation. Use only on the stack.
+ // (Declaring these functions private triggers a cascade of problems;
+ // see the MaybeStackArray class for details.)
+ // static void * U_EXPORT2 operator new(size_t size);
+ // static void * U_EXPORT2 operator new[](size_t size);
+#if U_HAVE_PLACEMENT_NEW
+ // static void * U_EXPORT2 operator new(size_t, void *ptr);
+#endif
+};
+
+template<typename H, typename T, int32_t stackCapacity>
+inline H *MaybeStackHeaderAndArray<H, T, stackCapacity>::resize(int32_t newCapacity,
+ int32_t length) {
+ if(newCapacity>=0) {
+#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
+ ::fprintf(::stderr,"MaybeStackHeaderAndArray alloc %d + %d * %ul\n", sizeof(H),newCapacity,sizeof(T));
+#endif
+ H *p=(H *)uprv_malloc(sizeof(H)+newCapacity*sizeof(T));
+ if(p!=NULL) {
+ if(length<0) {
+ length=0;
+ } else if(length>0) {
+ if(length>capacity) {
+ length=capacity;
+ }
+ if(length>newCapacity) {
+ length=newCapacity;
+ }
+ }
+ uprv_memcpy(p, ptr, sizeof(H)+length*sizeof(T));
+ releaseMemory();
+ ptr=p;
+ capacity=newCapacity;
+ needToRelease=TRUE;
+ }
+ return p;
+ } else {
+ return NULL;
+ }
+}
+
+template<typename H, typename T, int32_t stackCapacity>
+inline H *MaybeStackHeaderAndArray<H, T, stackCapacity>::orphanOrClone(int32_t length,
+ int32_t &resultCapacity) {
+ H *p;
+ if(needToRelease) {
+ p=ptr;
+ } else {
+ if(length<0) {
+ length=0;
+ } else if(length>capacity) {
+ length=capacity;
+ }
+#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
+ ::fprintf(::stderr,"MaybeStackHeaderAndArray (orphan) alloc %ul + %d * %lu\n", sizeof(H),length,sizeof(T));
+#endif
+ p=(H *)uprv_malloc(sizeof(H)+length*sizeof(T));
+ if(p==NULL) {
+ return NULL;
+ }
+ uprv_memcpy(p, ptr, sizeof(H)+length*sizeof(T));
+ }
+ resultCapacity=length;
+ ptr=&stackHeader;
+ capacity=stackCapacity;
+ needToRelease=FALSE;
+ return p;
+}
+
+U_NAMESPACE_END
+
+#endif /* __cplusplus */
+#endif /* CMEMORY_H */
diff --git a/src/core/basetypes/icu-ucsdet/include/csdetect.h b/src/core/basetypes/icu-ucsdet/include/csdetect.h
new file mode 100644
index 00000000..15910453
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/csdetect.h
@@ -0,0 +1,65 @@
+/*
+ **********************************************************************
+ * Copyright (C) 2005-2013, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ **********************************************************************
+ */
+
+#ifndef __CSDETECT_H
+#define __CSDETECT_H
+
+#include "unicode/uobject.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+U_NAMESPACE_BEGIN
+
+class InputText;
+class CharsetRecognizer;
+class CharsetMatch;
+
+class CharsetDetector : public UMemory
+{
+private:
+ InputText *textIn;
+ CharsetMatch **resultArray;
+ int32_t resultCount;
+ UBool fStripTags; // If true, setText() will strip tags from input text.
+ UBool fFreshTextSet;
+ static void setRecognizers(UErrorCode &status);
+
+ UBool *fEnabledRecognizers; // If not null, active set of charset recognizers had
+ // been changed from the default. The array index is
+ // corresponding to fCSRecognizers. See setDetectableCharset().
+
+public:
+ CharsetDetector(UErrorCode &status);
+
+ ~CharsetDetector();
+
+ void setText(const char *in, int32_t len);
+
+ const CharsetMatch * const *detectAll(int32_t &maxMatchesFound, UErrorCode &status);
+
+ const CharsetMatch *detect(UErrorCode& status);
+
+ void setDeclaredEncoding(const char *encoding, int32_t len) const;
+
+ UBool setStripTagsFlag(UBool flag);
+
+ UBool getStripTagsFlag() const;
+
+// const char *getCharsetName(int32_t index, UErrorCode& status) const;
+
+ static int32_t getDetectableCount();
+
+
+ static UEnumeration * getAllDetectableCharsets(UErrorCode &status);
+ UEnumeration * getDetectableCharsets(UErrorCode &status) const;
+ void setDetectableCharset(const char *encoding, UBool enabled, UErrorCode &status);
+};
+
+U_NAMESPACE_END
+
+#endif
+#endif /* __CSDETECT_H */
diff --git a/src/core/basetypes/icu-ucsdet/include/csmatch.h b/src/core/basetypes/icu-ucsdet/include/csmatch.h
new file mode 100644
index 00000000..6a3671c4
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/csmatch.h
@@ -0,0 +1,69 @@
+/*
+ **********************************************************************
+ * Copyright (C) 2005-2012, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ **********************************************************************
+ */
+
+#ifndef __CSMATCH_H
+#define __CSMATCH_H
+
+#include "unicode/uobject.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+U_NAMESPACE_BEGIN
+
+class InputText;
+class CharsetRecognizer;
+
+/*
+ * CharsetMatch represents the results produced by one Charset Recognizer for one input text
+ * Any confidence > 0 indicates a possible match, meaning that the input bytes
+ * are at least legal.
+ *
+ * The full results of a detect are represented by an array of these
+ * CharsetMatch objects, each representing a possible matching charset.
+ *
+ * Note that a single charset recognizer may detect multiple closely related
+ * charsets, and set different names depending on the exact input bytes seen.
+ */
+class CharsetMatch : public UMemory
+{
+ private:
+ InputText *textIn;
+ int32_t confidence;
+ const char *fCharsetName;
+ const char *fLang;
+
+ public:
+ CharsetMatch();
+
+ /**
+ * fully set the state of this CharsetMatch.
+ * Called by the CharsetRecognizers to record match results.
+ * Default (NULL) parameters for names will be filled by calling the
+ * corresponding getters on the recognizer.
+ */
+ void set(InputText *input,
+ const CharsetRecognizer *cr,
+ int32_t conf,
+ const char *csName=NULL,
+ const char *lang=NULL);
+
+ /**
+ * Return the name of the charset for this Match
+ */
+ const char *getName() const;
+
+ const char *getLanguage()const;
+
+ int32_t getConfidence()const;
+
+ int32_t getUChars(UChar *buf, int32_t cap, UErrorCode *status) const;
+};
+
+U_NAMESPACE_END
+
+#endif
+#endif /* __CSMATCH_H */
diff --git a/src/core/basetypes/icu-ucsdet/include/csr2022.h b/src/core/basetypes/icu-ucsdet/include/csr2022.h
new file mode 100644
index 00000000..2ac2b87d
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/csr2022.h
@@ -0,0 +1,91 @@
+/*
+ **********************************************************************
+ * Copyright (C) 2005-2012, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ **********************************************************************
+ */
+
+#ifndef __CSR2022_H
+#define __CSR2022_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "csrecog.h"
+
+U_NAMESPACE_BEGIN
+
+class CharsetMatch;
+
+/**
+ * class CharsetRecog_2022 part of the ICU charset detection imlementation.
+ * This is a superclass for the individual detectors for
+ * each of the detectable members of the ISO 2022 family
+ * of encodings.
+ *
+ * The separate classes are nested within this class.
+ *
+ * @internal
+ */
+class CharsetRecog_2022 : public CharsetRecognizer
+{
+
+public:
+ virtual ~CharsetRecog_2022() = 0;
+
+protected:
+
+ /**
+ * Matching function shared among the 2022 detectors JP, CN and KR
+ * Counts up the number of legal an unrecognized escape sequences in
+ * the sample of text, and computes a score based on the total number &
+ * the proportion that fit the encoding.
+ *
+ *
+ * @param text the byte buffer containing text to analyse
+ * @param textLen the size of the text in the byte.
+ * @param escapeSequences the byte escape sequences to test for.
+ * @return match quality, in the range of 0-100.
+ */
+ int32_t match_2022(const uint8_t *text,
+ int32_t textLen,
+ const uint8_t escapeSequences[][5],
+ int32_t escapeSequences_length) const;
+
+};
+
+class CharsetRecog_2022JP :public CharsetRecog_2022
+{
+public:
+ virtual ~CharsetRecog_2022JP();
+
+ const char *getName() const;
+
+ UBool match(InputText *textIn, CharsetMatch *results) const;
+};
+
+class CharsetRecog_2022KR :public CharsetRecog_2022 {
+public:
+ virtual ~CharsetRecog_2022KR();
+
+ const char *getName() const;
+
+ UBool match(InputText *textIn, CharsetMatch *results) const;
+
+};
+
+class CharsetRecog_2022CN :public CharsetRecog_2022
+{
+public:
+ virtual ~CharsetRecog_2022CN();
+
+ const char* getName() const;
+
+ UBool match(InputText *textIn, CharsetMatch *results) const;
+};
+
+U_NAMESPACE_END
+
+#endif
+#endif /* __CSR2022_H */
diff --git a/src/core/basetypes/icu-ucsdet/include/csrecog.h b/src/core/basetypes/icu-ucsdet/include/csrecog.h
new file mode 100644
index 00000000..6b7573a1
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/csrecog.h
@@ -0,0 +1,55 @@
+/*
+ **********************************************************************
+ * Copyright (C) 2005-2012, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ **********************************************************************
+ */
+
+#ifndef __CSRECOG_H
+#define __CSRECOG_H
+
+#include "unicode/uobject.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "inputext.h"
+
+U_NAMESPACE_BEGIN
+
+class CharsetMatch;
+
+class CharsetRecognizer : public UMemory
+{
+ public:
+ /**
+ * Get the IANA name of this charset.
+ * Note that some recognizers can recognize more than one charset, but that this API
+ * assumes just one name per recognizer.
+ * TODO: need to account for multiple names in public API that enumerates over the
+ * known detectable charsets.
+ * @return the charset name.
+ */
+ virtual const char *getName() const = 0;
+
+ /**
+ * Get the ISO language code for this charset.
+ * @return the language code, or <code>null</code> if the language cannot be determined.
+ */
+ virtual const char *getLanguage() const;
+
+ /*
+ * Try the given input text against this Charset, and fill in the results object
+ * with the quality of the match plus other information related to the match.
+ *
+ * Return TRUE if the the input bytes are a potential match, and
+ * FALSE if the input data is not compatible with, or illegal in this charset.
+ */
+ virtual UBool match(InputText *textIn, CharsetMatch *results) const = 0;
+
+ virtual ~CharsetRecognizer();
+};
+
+U_NAMESPACE_END
+
+#endif
+#endif /* __CSRECOG_H */
diff --git a/src/core/basetypes/icu-ucsdet/include/csrmbcs.h b/src/core/basetypes/icu-ucsdet/include/csrmbcs.h
new file mode 100644
index 00000000..9ea9d8f8
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/csrmbcs.h
@@ -0,0 +1,205 @@
+/*
+ **********************************************************************
+ * Copyright (C) 2005-2012, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ **********************************************************************
+ */
+
+#ifndef __CSRMBCS_H
+#define __CSRMBCS_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "csrecog.h"
+
+U_NAMESPACE_BEGIN
+
+// "Character" iterated character class.
+// Recognizers for specific mbcs encodings make their "characters" available
+// by providing a nextChar() function that fills in an instance of IteratedChar
+// with the next char from the input.
+// The returned characters are not converted to Unicode, but remain as the raw
+// bytes (concatenated into an int) from the codepage data.
+//
+// For Asian charsets, use the raw input rather than the input that has been
+// stripped of markup. Detection only considers multi-byte chars, effectively
+// stripping markup anyway, and double byte chars do occur in markup too.
+//
+class IteratedChar : public UMemory
+{
+public:
+ uint32_t charValue; // 1-4 bytes from the raw input data
+ int32_t index;
+ int32_t nextIndex;
+ UBool error;
+ UBool done;
+
+public:
+ IteratedChar();
+ //void reset();
+ int32_t nextByte(InputText* det);
+};
+
+
+class CharsetRecog_mbcs : public CharsetRecognizer {
+
+protected:
+ /**
+ * Test the match of this charset with the input text data
+ * which is obtained via the CharsetDetector object.
+ *
+ * @param det The CharsetDetector, which contains the input text
+ * to be checked for being in this charset.
+ * @return Two values packed into one int (Damn java, anyhow)
+ * <br/>
+ * bits 0-7: the match confidence, ranging from 0-100
+ * <br/>
+ * bits 8-15: The match reason, an enum-like value.
+ */
+ int32_t match_mbcs(InputText* det, const uint16_t commonChars[], int32_t commonCharsLen) const;
+
+public:
+
+ virtual ~CharsetRecog_mbcs();
+
+ /**
+ * Get the IANA name of this charset.
+ * @return the charset name.
+ */
+
+ const char *getName() const = 0;
+ const char *getLanguage() const = 0;
+ UBool match(InputText* input, CharsetMatch *results) const = 0;
+
+ /**
+ * Get the next character (however many bytes it is) from the input data
+ * Subclasses for specific charset encodings must implement this function
+ * to get characters according to the rules of their encoding scheme.
+ *
+ * This function is not a method of class IteratedChar only because
+ * that would require a lot of extra derived classes, which is awkward.
+ * @param it The IteratedChar "struct" into which the returned char is placed.
+ * @param det The charset detector, which is needed to get at the input byte data
+ * being iterated over.
+ * @return True if a character was returned, false at end of input.
+ */
+ virtual UBool nextChar(IteratedChar *it, InputText *textIn) const = 0;
+
+};
+
+
+/**
+ * Shift-JIS charset recognizer.
+ *
+ */
+class CharsetRecog_sjis : public CharsetRecog_mbcs {
+public:
+ virtual ~CharsetRecog_sjis();
+
+ UBool nextChar(IteratedChar *it, InputText *det) const;
+
+ UBool match(InputText* input, CharsetMatch *results) const;
+
+ const char *getName() const;
+ const char *getLanguage() const;
+
+};
+
+
+/**
+ * EUC charset recognizers. One abstract class that provides the common function
+ * for getting the next character according to the EUC encoding scheme,
+ * and nested derived classes for EUC_KR, EUC_JP, EUC_CN.
+ *
+ */
+class CharsetRecog_euc : public CharsetRecog_mbcs
+{
+public:
+ virtual ~CharsetRecog_euc();
+
+ const char *getName() const = 0;
+ const char *getLanguage() const = 0;
+
+ UBool match(InputText* input, CharsetMatch *results) const = 0;
+ /*
+ * (non-Javadoc)
+ * Get the next character value for EUC based encodings.
+ * Character "value" is simply the raw bytes that make up the character
+ * packed into an int.
+ */
+ UBool nextChar(IteratedChar *it, InputText *det) const;
+};
+
+/**
+ * The charset recognize for EUC-JP. A singleton instance of this class
+ * is created and kept by the public CharsetDetector class
+ */
+class CharsetRecog_euc_jp : public CharsetRecog_euc
+{
+public:
+ virtual ~CharsetRecog_euc_jp();
+
+ const char *getName() const;
+ const char *getLanguage() const;
+
+ UBool match(InputText* input, CharsetMatch *results) const;
+};
+
+/**
+ * The charset recognize for EUC-KR. A singleton instance of this class
+ * is created and kept by the public CharsetDetector class
+ */
+class CharsetRecog_euc_kr : public CharsetRecog_euc
+{
+public:
+ virtual ~CharsetRecog_euc_kr();
+
+ const char *getName() const;
+ const char *getLanguage() const;
+
+ UBool match(InputText* input, CharsetMatch *results) const;
+};
+
+/**
+ *
+ * Big5 charset recognizer.
+ *
+ */
+class CharsetRecog_big5 : public CharsetRecog_mbcs
+{
+public:
+ virtual ~CharsetRecog_big5();
+
+ UBool nextChar(IteratedChar* it, InputText* det) const;
+
+ const char *getName() const;
+ const char *getLanguage() const;
+
+ UBool match(InputText* input, CharsetMatch *results) const;
+};
+
+
+/**
+ *
+ * GB-18030 recognizer. Uses simplified Chinese statistics.
+ *
+ */
+class CharsetRecog_gb_18030 : public CharsetRecog_mbcs
+{
+public:
+ virtual ~CharsetRecog_gb_18030();
+
+ UBool nextChar(IteratedChar* it, InputText* det) const;
+
+ const char *getName() const;
+ const char *getLanguage() const;
+
+ UBool match(InputText* input, CharsetMatch *results) const;
+};
+
+U_NAMESPACE_END
+
+#endif
+#endif /* __CSRMBCS_H */
diff --git a/src/core/basetypes/icu-ucsdet/include/csrsbcs.h b/src/core/basetypes/icu-ucsdet/include/csrsbcs.h
new file mode 100644
index 00000000..2579c029
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/csrsbcs.h
@@ -0,0 +1,287 @@
+/*
+ **********************************************************************
+ * Copyright (C) 2005-2013, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ **********************************************************************
+ */
+
+#ifndef __CSRSBCS_H
+#define __CSRSBCS_H
+
+#include "unicode/uobject.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "csrecog.h"
+
+U_NAMESPACE_BEGIN
+
+class NGramParser : public UMemory
+{
+private:
+ int32_t ngram;
+ const int32_t *ngramList;
+
+ int32_t ngramCount;
+ int32_t hitCount;
+
+protected:
+ int32_t byteIndex;
+ const uint8_t *charMap;
+
+ void addByte(int32_t b);
+
+public:
+ NGramParser(const int32_t *theNgramList, const uint8_t *theCharMap);
+
+private:
+ /*
+ * Binary search for value in table, which must have exactly 64 entries.
+ */
+ int32_t search(const int32_t *table, int32_t value);
+
+ void lookup(int32_t thisNgram);
+
+ virtual int32_t nextByte(InputText *det);
+ virtual void parseCharacters(InputText *det);
+
+public:
+ int32_t parse(InputText *det);
+
+};
+
+class NGramParser_IBM420 : public NGramParser
+{
+private:
+ int32_t alef;
+ int32_t isLamAlef(int32_t b);
+ int32_t nextByte(InputText *det);
+ void parseCharacters(InputText *det);
+
+public:
+ NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap);
+};
+
+
+class CharsetRecog_sbcs : public CharsetRecognizer
+{
+public:
+ CharsetRecog_sbcs();
+ virtual ~CharsetRecog_sbcs();
+ virtual const char *getName() const = 0;
+ virtual UBool match(InputText *det, CharsetMatch *results) const = 0;
+ virtual int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const;
+};
+
+class CharsetRecog_8859_1 : public CharsetRecog_sbcs
+{
+public:
+ virtual ~CharsetRecog_8859_1();
+ const char *getName() const;
+ virtual UBool match(InputText *det, CharsetMatch *results) const;
+};
+
+class CharsetRecog_8859_2 : public CharsetRecog_sbcs
+{
+public:
+ virtual ~CharsetRecog_8859_2();
+ const char *getName() const;
+ virtual UBool match(InputText *det, CharsetMatch *results) const;
+};
+
+class CharsetRecog_8859_5 : public CharsetRecog_sbcs
+{
+public:
+ virtual ~CharsetRecog_8859_5();
+ const char *getName() const;
+};
+
+class CharsetRecog_8859_6 : public CharsetRecog_sbcs
+{
+public:
+ virtual ~CharsetRecog_8859_6();
+
+ const char *getName() const;
+};
+
+class CharsetRecog_8859_7 : public CharsetRecog_sbcs
+{
+public:
+ virtual ~CharsetRecog_8859_7();
+
+ const char *getName() const;
+};
+
+class CharsetRecog_8859_8 : public CharsetRecog_sbcs
+{
+public:
+ virtual ~CharsetRecog_8859_8();
+
+ virtual const char *getName() const;
+};
+
+class CharsetRecog_8859_9 : public CharsetRecog_sbcs
+{
+public:
+ virtual ~CharsetRecog_8859_9();
+
+ const char *getName() const;
+};
+
+
+
+class CharsetRecog_8859_5_ru : public CharsetRecog_8859_5
+{
+public:
+ virtual ~CharsetRecog_8859_5_ru();
+
+ const char *getLanguage() const;
+
+ virtual UBool match(InputText *det, CharsetMatch *results) const;
+};
+
+class CharsetRecog_8859_6_ar : public CharsetRecog_8859_6
+{
+public:
+ virtual ~CharsetRecog_8859_6_ar();
+
+ const char *getLanguage() const;
+
+ virtual UBool match(InputText *det, CharsetMatch *results) const;
+};
+
+class CharsetRecog_8859_7_el : public CharsetRecog_8859_7
+{
+public:
+ virtual ~CharsetRecog_8859_7_el();
+
+ const char *getLanguage() const;
+
+ virtual UBool match(InputText *det, CharsetMatch *results) const;
+};
+
+class CharsetRecog_8859_8_I_he : public CharsetRecog_8859_8
+{
+public:
+ virtual ~CharsetRecog_8859_8_I_he();
+
+ const char *getName() const;
+
+ const char *getLanguage() const;
+
+ virtual UBool match(InputText *det, CharsetMatch *results) const;
+};
+
+class CharsetRecog_8859_8_he : public CharsetRecog_8859_8
+{
+public:
+ virtual ~CharsetRecog_8859_8_he ();
+
+ const char *getLanguage() const;
+
+ virtual UBool match(InputText *det, CharsetMatch *results) const;
+};
+
+class CharsetRecog_8859_9_tr : public CharsetRecog_8859_9
+{
+public:
+ virtual ~CharsetRecog_8859_9_tr ();
+
+ const char *getLanguage() const;
+
+ virtual UBool match(InputText *det, CharsetMatch *results) const;
+};
+
+class CharsetRecog_windows_1256 : public CharsetRecog_sbcs
+{
+public:
+ virtual ~CharsetRecog_windows_1256();
+
+ const char *getName() const;
+
+ const char *getLanguage() const;
+
+ virtual UBool match(InputText *det, CharsetMatch *results) const;
+};
+
+class CharsetRecog_windows_1251 : public CharsetRecog_sbcs
+{
+public:
+ virtual ~CharsetRecog_windows_1251();
+
+ const char *getName() const;
+
+ const char *getLanguage() const;
+
+ virtual UBool match(InputText *det, CharsetMatch *results) const;
+};
+
+
+class CharsetRecog_KOI8_R : public CharsetRecog_sbcs
+{
+public:
+ virtual ~CharsetRecog_KOI8_R();
+
+ const char *getName() const;
+
+ const char *getLanguage() const;
+
+ virtual UBool match(InputText *det, CharsetMatch *results) const;
+};
+
+class CharsetRecog_IBM424_he : public CharsetRecog_sbcs
+{
+public:
+ virtual ~CharsetRecog_IBM424_he();
+
+ const char *getLanguage() const;
+};
+
+class CharsetRecog_IBM424_he_rtl : public CharsetRecog_IBM424_he {
+public:
+ virtual ~CharsetRecog_IBM424_he_rtl();
+
+ const char *getName() const;
+
+ virtual UBool match(InputText *det, CharsetMatch *results) const;
+};
+
+class CharsetRecog_IBM424_he_ltr : public CharsetRecog_IBM424_he {
+ virtual ~CharsetRecog_IBM424_he_ltr();
+
+ const char *getName() const;
+
+ virtual UBool match(InputText *det, CharsetMatch *results) const;
+};
+
+class CharsetRecog_IBM420_ar : public CharsetRecog_sbcs
+{
+public:
+ virtual ~CharsetRecog_IBM420_ar();
+
+ const char *getLanguage() const;
+ int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const;
+
+};
+
+class CharsetRecog_IBM420_ar_rtl : public CharsetRecog_IBM420_ar {
+public:
+ virtual ~CharsetRecog_IBM420_ar_rtl();
+
+ const char *getName() const;
+
+ virtual UBool match(InputText *det, CharsetMatch *results) const;
+};
+
+class CharsetRecog_IBM420_ar_ltr : public CharsetRecog_IBM420_ar {
+ virtual ~CharsetRecog_IBM420_ar_ltr();
+
+ const char *getName() const;
+
+ virtual UBool match(InputText *det, CharsetMatch *results) const;
+};
+
+U_NAMESPACE_END
+
+#endif /* !UCONFIG_NO_CONVERSION */
+#endif /* __CSRSBCS_H */
diff --git a/src/core/basetypes/icu-ucsdet/include/csrucode.h b/src/core/basetypes/icu-ucsdet/include/csrucode.h
new file mode 100644
index 00000000..a8a4f2bc
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/csrucode.h
@@ -0,0 +1,106 @@
+/*
+ **********************************************************************
+ * Copyright (C) 2005-2012, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ **********************************************************************
+ */
+
+#ifndef __CSRUCODE_H
+#define __CSRUCODE_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "csrecog.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * This class matches UTF-16 and UTF-32, both big- and little-endian. The
+ * BOM will be used if it is present.
+ *
+ * @internal
+ */
+class CharsetRecog_Unicode : public CharsetRecognizer
+{
+
+public:
+
+ virtual ~CharsetRecog_Unicode();
+ /* (non-Javadoc)
+ * @see com.ibm.icu.text.CharsetRecognizer#getName()
+ */
+ const char* getName() const = 0;
+
+ /* (non-Javadoc)
+ * @see com.ibm.icu.text.CharsetRecognizer#match(com.ibm.icu.text.CharsetDetector)
+ */
+ UBool match(InputText* textIn, CharsetMatch *results) const = 0;
+};
+
+
+class CharsetRecog_UTF_16_BE : public CharsetRecog_Unicode
+{
+public:
+
+ virtual ~CharsetRecog_UTF_16_BE();
+
+ const char *getName() const;
+
+ UBool match(InputText* textIn, CharsetMatch *results) const;
+};
+
+class CharsetRecog_UTF_16_LE : public CharsetRecog_Unicode
+{
+public:
+
+ virtual ~CharsetRecog_UTF_16_LE();
+
+ const char *getName() const;
+
+ UBool match(InputText* textIn, CharsetMatch *results) const;
+};
+
+class CharsetRecog_UTF_32 : public CharsetRecog_Unicode
+{
+protected:
+ virtual int32_t getChar(const uint8_t *input, int32_t index) const = 0;
+public:
+
+ virtual ~CharsetRecog_UTF_32();
+
+ const char* getName() const = 0;
+
+ UBool match(InputText* textIn, CharsetMatch *results) const;
+};
+
+
+class CharsetRecog_UTF_32_BE : public CharsetRecog_UTF_32
+{
+protected:
+ int32_t getChar(const uint8_t *input, int32_t index) const;
+
+public:
+
+ virtual ~CharsetRecog_UTF_32_BE();
+
+ const char *getName() const;
+};
+
+
+class CharsetRecog_UTF_32_LE : public CharsetRecog_UTF_32
+{
+protected:
+ int32_t getChar(const uint8_t *input, int32_t index) const;
+
+public:
+ virtual ~CharsetRecog_UTF_32_LE();
+
+ const char* getName() const;
+};
+
+U_NAMESPACE_END
+
+#endif
+#endif /* __CSRUCODE_H */
diff --git a/src/core/basetypes/icu-ucsdet/include/csrutf8.h b/src/core/basetypes/icu-ucsdet/include/csrutf8.h
new file mode 100644
index 00000000..82e8f9d7
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/csrutf8.h
@@ -0,0 +1,42 @@
+/*
+ **********************************************************************
+ * Copyright (C) 2005-2012, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ **********************************************************************
+ */
+
+#ifndef __CSRUTF8_H
+#define __CSRUTF8_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "csrecog.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * Charset recognizer for UTF-8
+ *
+ * @internal
+ */
+class CharsetRecog_UTF8: public CharsetRecognizer {
+
+ public:
+
+ virtual ~CharsetRecog_UTF8();
+
+ const char *getName() const;
+
+ /* (non-Javadoc)
+ * @see com.ibm.icu.text.CharsetRecognizer#match(com.ibm.icu.text.CharsetDetector)
+ */
+ UBool match(InputText *input, CharsetMatch *results) const;
+
+};
+
+U_NAMESPACE_END
+
+#endif
+#endif /* __CSRUTF8_H */
diff --git a/src/core/basetypes/icu-ucsdet/include/cstring.h b/src/core/basetypes/icu-ucsdet/include/cstring.h
new file mode 100644
index 00000000..64b68ffa
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/cstring.h
@@ -0,0 +1,140 @@
+/*
+******************************************************************************
+*
+* Copyright (C) 1997-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* File CSTRING.H
+*
+* Contains CString interface
+*
+* @author Helena Shih
+*
+* Modification History:
+*
+* Date Name Description
+* 6/17/98 hshih Created.
+* 05/03/99 stephen Changed from functions to macros.
+* 06/14/99 stephen Added icu_strncat, icu_strncmp, icu_tolower
+*
+******************************************************************************
+*/
+
+#ifndef CSTRING_H
+#define CSTRING_H 1
+
+#include "unicode/utypes.h"
+#include "cmemory.h"
+#include <string.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+#define uprv_strcpy(dst, src) U_STANDARD_CPP_NAMESPACE strcpy(dst, src)
+#define uprv_strlen(str) U_STANDARD_CPP_NAMESPACE strlen(str)
+#define uprv_strcmp(s1, s2) U_STANDARD_CPP_NAMESPACE strcmp(s1, s2)
+#define uprv_strcat(dst, src) U_STANDARD_CPP_NAMESPACE strcat(dst, src)
+#define uprv_strchr(s, c) U_STANDARD_CPP_NAMESPACE strchr(s, c)
+#define uprv_strstr(s, c) U_STANDARD_CPP_NAMESPACE strstr(s, c)
+#define uprv_strrchr(s, c) U_STANDARD_CPP_NAMESPACE strrchr(s, c)
+
+#if U_DEBUG
+
+#define uprv_strncpy(dst, src, size) ( \
+ uprv_checkValidMemory(src, 1), \
+ U_STANDARD_CPP_NAMESPACE strncpy(dst, src, size))
+#define uprv_strncmp(s1, s2, n) ( \
+ uprv_checkValidMemory(s1, 1), \
+ uprv_checkValidMemory(s2, 1), \
+ U_STANDARD_CPP_NAMESPACE strncmp(s1, s2, n))
+#define uprv_strncat(dst, src, n) ( \
+ uprv_checkValidMemory(src, 1), \
+ U_STANDARD_CPP_NAMESPACE strncat(dst, src, n))
+
+#else
+
+#define uprv_strncpy(dst, src, size) U_STANDARD_CPP_NAMESPACE strncpy(dst, src, size)
+#define uprv_strncmp(s1, s2, n) U_STANDARD_CPP_NAMESPACE strncmp(s1, s2, n)
+#define uprv_strncat(dst, src, n) U_STANDARD_CPP_NAMESPACE strncat(dst, src, n)
+
+#endif /* U_DEBUG */
+
+/**
+ * Is c an ASCII-repertoire letter a-z or A-Z?
+ * Note: The implementation is specific to whether ICU is compiled for
+ * an ASCII-based or EBCDIC-based machine. There just does not seem to be a better name for this.
+ */
+U_CAPI UBool U_EXPORT2
+uprv_isASCIILetter(char c);
+
+U_CAPI char U_EXPORT2
+uprv_toupper(char c);
+
+
+U_CAPI char U_EXPORT2
+uprv_asciitolower(char c);
+
+U_CAPI char U_EXPORT2
+uprv_ebcdictolower(char c);
+
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+# define uprv_tolower uprv_asciitolower
+#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+# define uprv_tolower uprv_ebcdictolower
+#else
+# error U_CHARSET_FAMILY is not valid
+#endif
+
+#define uprv_strtod(source, end) U_STANDARD_CPP_NAMESPACE strtod(source, end)
+#define uprv_strtoul(str, end, base) U_STANDARD_CPP_NAMESPACE strtoul(str, end, base)
+#define uprv_strtol(str, end, base) U_STANDARD_CPP_NAMESPACE strtol(str, end, base)
+
+/* Conversion from a digit to the character with radix base from 2-19 */
+/* May need to use U_UPPER_ORDINAL*/
+#define T_CString_itosOffset(a) ((a)<=9?('0'+(a)):('A'+(a)-10))
+
+U_CAPI char* U_EXPORT2
+uprv_strdup(const char *src);
+
+/**
+ * uprv_malloc n+1 bytes, and copy n bytes from src into the new string.
+ * Terminate with a null at offset n. If n is -1, works like uprv_strdup
+ * @param src
+ * @param n length of the input string, not including null.
+ * @return new string (owned by caller, use uprv_free to free).
+ * @internal
+ */
+U_CAPI char* U_EXPORT2
+uprv_strndup(const char *src, int32_t n);
+
+U_CAPI char* U_EXPORT2
+T_CString_toLowerCase(char* str);
+
+U_CAPI char* U_EXPORT2
+T_CString_toUpperCase(char* str);
+
+U_CAPI int32_t U_EXPORT2
+T_CString_integerToString(char *buffer, int32_t n, int32_t radix);
+
+U_CAPI int32_t U_EXPORT2
+T_CString_int64ToString(char *buffer, int64_t n, uint32_t radix);
+
+U_CAPI int32_t U_EXPORT2
+T_CString_stringToInteger(const char *integerString, int32_t radix);
+
+/**
+ * Case-insensitive, language-independent string comparison
+ * limited to the ASCII character repertoire.
+ */
+U_CAPI int U_EXPORT2
+uprv_stricmp(const char *str1, const char *str2);
+
+/**
+ * Case-insensitive, language-independent string comparison
+ * limited to the ASCII character repertoire.
+ */
+U_CAPI int U_EXPORT2
+uprv_strnicmp(const char *str1, const char *str2, uint32_t n);
+
+#endif /* ! CSTRING_H */
diff --git a/src/core/basetypes/icu-ucsdet/include/cwchar.h b/src/core/basetypes/icu-ucsdet/include/cwchar.h
new file mode 100644
index 00000000..2ab36c03
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/cwchar.h
@@ -0,0 +1,56 @@
+/*
+******************************************************************************
+*
+* Copyright (C) 2001, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: cwchar.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2001may25
+* created by: Markus W. Scherer
+*
+* This file contains ICU-internal definitions of wchar_t operations.
+* These definitions were moved here from cstring.h so that fewer
+* ICU implementation files include wchar.h.
+*/
+
+#ifndef __CWCHAR_H__
+#define __CWCHAR_H__
+
+#include <string.h>
+#include <stdlib.h>
+#include "unicode/utypes.h"
+
+/* Do this after utypes.h so that we have U_HAVE_WCHAR_H . */
+#if U_HAVE_WCHAR_H
+# include <wchar.h>
+#endif
+
+/*===========================================================================*/
+/* Wide-character functions */
+/*===========================================================================*/
+
+/* The following are not available on all systems, defined in wchar.h or string.h. */
+#if U_HAVE_WCSCPY
+# define uprv_wcscpy wcscpy
+# define uprv_wcscat wcscat
+# define uprv_wcslen wcslen
+#else
+U_CAPI wchar_t* U_EXPORT2
+uprv_wcscpy(wchar_t *dst, const wchar_t *src);
+U_CAPI wchar_t* U_EXPORT2
+uprv_wcscat(wchar_t *dst, const wchar_t *src);
+U_CAPI size_t U_EXPORT2
+uprv_wcslen(const wchar_t *src);
+#endif
+
+/* The following are part of the ANSI C standard, defined in stdlib.h . */
+#define uprv_wcstombs(mbstr, wcstr, count) U_STANDARD_CPP_NAMESPACE wcstombs(mbstr, wcstr, count)
+#define uprv_mbstowcs(wcstr, mbstr, count) U_STANDARD_CPP_NAMESPACE mbstowcs(wcstr, mbstr, count)
+
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/inputext.h b/src/core/basetypes/icu-ucsdet/include/inputext.h
new file mode 100644
index 00000000..0c5973d8
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/inputext.h
@@ -0,0 +1,61 @@
+/*
+ **********************************************************************
+ * Copyright (C) 2005-2008, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ **********************************************************************
+ */
+
+#ifndef __INPUTEXT_H
+#define __INPUTEXT_H
+
+/**
+ * \file
+ * \internal
+ *
+ * This is an internal header for the Character Set Detection code. The
+ * name is probably too generic...
+ */
+
+
+#include "unicode/uobject.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+U_NAMESPACE_BEGIN
+
+class InputText : public UMemory
+{
+ // Prevent copying
+ InputText(const InputText &);
+public:
+ InputText(UErrorCode &status);
+ ~InputText();
+
+ void setText(const char *in, int32_t len);
+ void setDeclaredEncoding(const char *encoding, int32_t len);
+ UBool isSet() const;
+ void MungeInput(UBool fStripTags);
+
+ // The text to be checked. Markup will have been
+ // removed if appropriate.
+ uint8_t *fInputBytes;
+ int32_t fInputLen; // Length of the byte data in fInputBytes.
+ // byte frequency statistics for the input text.
+ // Value is percent, not absolute.
+ // Value is rounded up, so zero really means zero occurences.
+ int16_t *fByteStats;
+ UBool fC1Bytes; // True if any bytes in the range 0x80 - 0x9F are in the input;false by default
+ char *fDeclaredEncoding;
+
+ const uint8_t *fRawInput; // Original, untouched input bytes.
+ // If user gave us a byte array, this is it.
+ // If user gave us a stream, it's read to a
+ // buffer here.
+ int32_t fRawLength; // Length of data in fRawInput array.
+
+};
+
+U_NAMESPACE_END
+
+#endif
+#endif /* __INPUTEXT_H */
diff --git a/src/core/basetypes/icu-ucsdet/include/locmap.h b/src/core/basetypes/icu-ucsdet/include/locmap.h
new file mode 100644
index 00000000..214bbcec
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/locmap.h
@@ -0,0 +1,37 @@
+/*
+******************************************************************************
+*
+* Copyright (C) 1996-2013, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* File locmap.h : Locale Mapping Classes
+*
+*
+* Created by: Helena Shih
+*
+* Modification History:
+*
+* Date Name Description
+* 3/11/97 aliu Added setId().
+* 4/20/99 Madhu Added T_convertToPosix()
+* 09/18/00 george Removed the memory leaks.
+* 08/23/01 george Convert to C
+*============================================================================
+*/
+
+#ifndef LOCMAP_H
+#define LOCMAP_H
+
+#include "unicode/utypes.h"
+
+#define LANGUAGE_LCID(hostID) (uint16_t)(0x03FF & hostID)
+
+U_CAPI int32_t uprv_convertToPosix(uint32_t hostid, char* posixID, int32_t posixIDCapacity, UErrorCode* status);
+
+/* Don't call this function directly. Use uloc_getLCID instead. */
+U_CAPI uint32_t uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status);
+
+#endif /* LOCMAP_H */
+
diff --git a/src/core/basetypes/icu-ucsdet/include/mutex.h b/src/core/basetypes/icu-ucsdet/include/mutex.h
new file mode 100644
index 00000000..07dcdbbc
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/mutex.h
@@ -0,0 +1,77 @@
+/*
+******************************************************************************
+*
+* Copyright (C) 1997-2013, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*/
+//----------------------------------------------------------------------------
+// File: mutex.h
+//
+// Lightweight C++ wrapper for umtx_ C mutex functions
+//
+// Author: Alan Liu 1/31/97
+// History:
+// 06/04/97 helena Updated setImplementation as per feedback from 5/21 drop.
+// 04/07/1999 srl refocused as a thin wrapper
+//
+//----------------------------------------------------------------------------
+#ifndef MUTEX_H
+#define MUTEX_H
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "umutex.h"
+
+U_NAMESPACE_BEGIN
+
+//----------------------------------------------------------------------------
+// Code within that accesses shared static or global data should
+// should instantiate a Mutex object while doing so. You should make your own
+// private mutex where possible.
+
+// For example:
+//
+// UMutex myMutex;
+//
+// void Function(int arg1, int arg2)
+// {
+// static Object* foo; // Shared read-write object
+// Mutex mutex(&myMutex); // or no args for the global lock
+// foo->Method();
+// // When 'mutex' goes out of scope and gets destroyed here, the lock is released
+// }
+//
+// Note: Do NOT use the form 'Mutex mutex();' as that merely forward-declares a function
+// returning a Mutex. This is a common mistake which silently slips through the
+// compiler!!
+//
+
+class U_COMMON_API Mutex : public UMemory {
+public:
+ inline Mutex(UMutex *mutex = NULL);
+ inline ~Mutex();
+
+private:
+ UMutex *fMutex;
+
+ Mutex(const Mutex &other); // forbid copying of this class
+ Mutex &operator=(const Mutex &other); // forbid copying of this class
+};
+
+inline Mutex::Mutex(UMutex *mutex)
+ : fMutex(mutex)
+{
+ umtx_lock(fMutex);
+}
+
+inline Mutex::~Mutex()
+{
+ umtx_unlock(fMutex);
+}
+
+U_NAMESPACE_END
+
+#endif //_MUTEX_
+//eof
diff --git a/src/core/basetypes/icu-ucsdet/include/putilimp.h b/src/core/basetypes/icu-ucsdet/include/putilimp.h
new file mode 100644
index 00000000..d2c1c66f
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/putilimp.h
@@ -0,0 +1,611 @@
+/*
+******************************************************************************
+*
+* Copyright (C) 1997-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* FILE NAME : putilimp.h
+*
+* Date Name Description
+* 10/17/04 grhoten Move internal functions from putil.h to this file.
+******************************************************************************
+*/
+
+#ifndef PUTILIMP_H
+#define PUTILIMP_H
+
+#include "unicode/utypes.h"
+#include "unicode/putil.h"
+
+/**
+ * \def U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC
+ * Nearly all CPUs and compilers implement a right-shift of a signed integer
+ * as an Arithmetic Shift Right which copies the sign bit (the Most Significant Bit (MSB))
+ * into the vacated bits (sign extension).
+ * For example, (int32_t)0xfff5fff3>>4 becomes 0xffff5fff and -1>>1=-1.
+ *
+ * This can be useful for storing a signed value in the upper bits
+ * and another bit field in the lower bits.
+ * The signed value can be retrieved by simple right-shifting.
+ *
+ * This is consistent with the Java language.
+ *
+ * However, the C standard allows compilers to implement a right-shift of a signed integer
+ * as a Logical Shift Right which copies a 0 into the vacated bits.
+ * For example, (int32_t)0xfff5fff3>>4 becomes 0x0fff5fff and -1>>1=0x7fffffff.
+ *
+ * Code that depends on the natural behavior should be guarded with this macro,
+ * with an alternate path for unusual platforms.
+ * @internal
+ */
+#ifdef U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC
+ /* Use the predefined value. */
+#else
+ /*
+ * Nearly all CPUs & compilers implement a right-shift of a signed integer
+ * as an Arithmetic Shift Right (with sign extension).
+ */
+# define U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC 1
+#endif
+
+/** Define this to 1 if your platform supports IEEE 754 floating point,
+ to 0 if it does not. */
+#ifndef IEEE_754
+# define IEEE_754 1
+#endif
+
+/**
+ * uintptr_t is an optional part of the standard definitions in stdint.h.
+ * The opengroup.org documentation for stdint.h says
+ * "On XSI-conformant systems, the intptr_t and uintptr_t types are required;
+ * otherwise, they are optional."
+ * We assume that when uintptr_t is defined, UINTPTR_MAX is defined as well.
+ *
+ * Do not use ptrdiff_t since it is signed. size_t is unsigned.
+ */
+/* TODO: This check fails on some z environments. Filed a ticket #9357 for this. */
+#if !defined(__intptr_t_defined) && !defined(UINTPTR_MAX) && (U_PLATFORM != U_PF_OS390)
+typedef size_t uintptr_t;
+#endif
+
+/**
+ * \def U_HAVE_MSVC_2003_OR_EARLIER
+ * Flag for workaround of MSVC 2003 optimization bugs
+ * @internal
+ */
+#if !defined(U_HAVE_MSVC_2003_OR_EARLIER) && defined(_MSC_VER) && (_MSC_VER < 1400)
+#define U_HAVE_MSVC_2003_OR_EARLIER
+#endif
+
+/*===========================================================================*/
+/** @{ Information about POSIX support */
+/*===========================================================================*/
+
+#ifdef U_HAVE_NL_LANGINFO_CODESET
+ /* Use the predefined value. */
+#elif U_PLATFORM_HAS_WIN32_API || U_PLATFORM == U_PF_ANDROID || U_PLATFORM == U_PF_QNX
+# define U_HAVE_NL_LANGINFO_CODESET 0
+#else
+# define U_HAVE_NL_LANGINFO_CODESET 1
+#endif
+
+#ifdef U_NL_LANGINFO_CODESET
+ /* Use the predefined value. */
+#elif !U_HAVE_NL_LANGINFO_CODESET
+# define U_NL_LANGINFO_CODESET -1
+#elif U_PLATFORM == U_PF_OS400
+ /* not defined */
+#else
+# define U_NL_LANGINFO_CODESET CODESET
+#endif
+
+#ifdef U_TZSET
+ /* Use the predefined value. */
+#elif U_PLATFORM_USES_ONLY_WIN32_API
+# define U_TZSET _tzset
+#elif U_PLATFORM == U_PF_OS400
+ /* not defined */
+#else
+# define U_TZSET tzset
+#endif
+
+#if defined(U_TIMEZONE) || defined(U_HAVE_TIMEZONE)
+ /* Use the predefined value. */
+#elif U_PLATFORM == U_PF_ANDROID
+# define U_TIMEZONE timezone
+#elif U_PLATFORM_IS_LINUX_BASED
+# if defined(__UCLIBC__)
+ /* uClibc does not have __timezone or _timezone. */
+# elif defined(_NEWLIB_VERSION)
+# define U_TIMEZONE _timezone
+# elif defined(__GLIBC__)
+ /* glibc */
+# define U_TIMEZONE __timezone
+# endif
+#elif U_PLATFORM_USES_ONLY_WIN32_API
+# define U_TIMEZONE _timezone
+#elif U_PLATFORM == U_PF_BSD && !defined(__NetBSD__)
+ /* not defined */
+#elif U_PLATFORM == U_PF_OS400
+ /* not defined */
+#elif U_PLATFORM == U_PF_IPHONE
+ /* not defined */
+#else
+# define U_TIMEZONE timezone
+#endif
+
+#ifdef U_TZNAME
+ /* Use the predefined value. */
+#elif U_PLATFORM_USES_ONLY_WIN32_API
+# define U_TZNAME _tzname
+#elif U_PLATFORM == U_PF_OS400
+ /* not defined */
+#else
+# define U_TZNAME tzname
+#endif
+
+#ifdef U_HAVE_MMAP
+ /* Use the predefined value. */
+#elif U_PLATFORM_HAS_WIN32_API
+# define U_HAVE_MMAP 0
+#else
+# define U_HAVE_MMAP 1
+#endif
+
+#ifdef U_HAVE_POPEN
+ /* Use the predefined value. */
+#elif U_PLATFORM_USES_ONLY_WIN32_API
+# define U_HAVE_POPEN 0
+#elif U_PLATFORM == U_PF_OS400
+# define U_HAVE_POPEN 0
+#else
+# define U_HAVE_POPEN 1
+#endif
+
+/**
+ * \def U_HAVE_DIRENT_H
+ * Defines whether dirent.h is available.
+ * @internal
+ */
+#ifdef U_HAVE_DIRENT_H
+ /* Use the predefined value. */
+#elif U_PLATFORM_HAS_WIN32_API
+# define U_HAVE_DIRENT_H 0
+#else
+# define U_HAVE_DIRENT_H 1
+#endif
+
+/** @} */
+
+/*===========================================================================*/
+/** @{ GCC built in functions for atomic memory operations */
+/*===========================================================================*/
+
+/**
+ * \def U_HAVE_GCC_ATOMICS
+ * @internal
+ */
+#ifdef U_HAVE_GCC_ATOMICS
+ /* Use the predefined value. */
+#elif U_PLATFORM == U_PF_MINGW
+ #define U_HAVE_GCC_ATOMICS 0
+#elif U_GCC_MAJOR_MINOR >= 404 || defined(__clang__)
+ /* TODO: Intel icc and IBM xlc on AIX also support gcc atomics. (Intel originated them.)
+ * Add them for these compilers.
+ * Note: Clang sets __GNUC__ defines for version 4.2, so misses the 4.4 test here.
+ */
+# define U_HAVE_GCC_ATOMICS 1
+#else
+# define U_HAVE_GCC_ATOMICS 0
+#endif
+
+/** @} */
+
+/**
+ * \def U_HAVE_STD_ATOMICS
+ * Defines whether the standard C++11 <atomic> is available.
+ * ICU will use this when avialable,
+ * otherwise will fall back to compiler or platform specific alternatives.
+ * @internal
+ */
+#ifdef U_HAVE_STD_ATOMICS
+ /* Use the predefined value. */
+#elif !defined(__cplusplus) || __cplusplus<201103L
+ /* Not C++11, disable use of atomics */
+# define U_HAVE_STD_ATOMICS 0
+#elif __clang__ && __clang_major__==3 && __clang_minor__<=1
+ /* Clang 3.1, has atomic variable initializer bug. */
+# define U_HAVE_STD_ATOMICS 0
+#else
+ /* U_HAVE_ATOMIC is typically set by an autoconf test of #include <atomic> */
+ /* Can be set manually, or left undefined, on platforms without autoconf. */
+# if defined(U_HAVE_ATOMIC) && U_HAVE_ATOMIC
+# define U_HAVE_STD_ATOMICS 1
+# else
+# define U_HAVE_STD_ATOMICS 0
+# endif
+#endif
+
+
+/*===========================================================================*/
+/** @{ Code alignment */
+/*===========================================================================*/
+
+/**
+ * \def U_ALIGN_CODE
+ * This is used to align code fragments to a specific byte boundary.
+ * This is useful for getting consistent performance test results.
+ * @internal
+ */
+#ifdef U_ALIGN_CODE
+ /* Use the predefined value. */
+#elif defined(_MSC_VER) && defined(_M_IX86) && !defined(_MANAGED)
+# define U_ALIGN_CODE(boundarySize) __asm align boundarySize
+#else
+# define U_ALIGN_CODE(boundarySize)
+#endif
+
+/** @} */
+
+/*===========================================================================*/
+/** @{ Programs used by ICU code */
+/*===========================================================================*/
+
+/**
+ * \def U_MAKE_IS_NMAKE
+ * Defines whether the "make" program is Windows nmake.
+ */
+#ifdef U_MAKE_IS_NMAKE
+ /* Use the predefined value. */
+#elif U_PLATFORM == U_PF_WINDOWS
+# define U_MAKE_IS_NMAKE 1
+#else
+# define U_MAKE_IS_NMAKE 0
+#endif
+
+/** @} */
+
+/*==========================================================================*/
+/* Platform utilities */
+/*==========================================================================*/
+
+/**
+ * Platform utilities isolates the platform dependencies of the
+ * libarary. For each platform which this code is ported to, these
+ * functions may have to be re-implemented.
+ */
+
+/**
+ * Floating point utility to determine if a double is Not a Number (NaN).
+ * @internal
+ */
+U_INTERNAL UBool U_EXPORT2 uprv_isNaN(double d);
+/**
+ * Floating point utility to determine if a double has an infinite value.
+ * @internal
+ */
+U_INTERNAL UBool U_EXPORT2 uprv_isInfinite(double d);
+/**
+ * Floating point utility to determine if a double has a positive infinite value.
+ * @internal
+ */
+U_INTERNAL UBool U_EXPORT2 uprv_isPositiveInfinity(double d);
+/**
+ * Floating point utility to determine if a double has a negative infinite value.
+ * @internal
+ */
+U_INTERNAL UBool U_EXPORT2 uprv_isNegativeInfinity(double d);
+/**
+ * Floating point utility that returns a Not a Number (NaN) value.
+ * @internal
+ */
+U_INTERNAL double U_EXPORT2 uprv_getNaN(void);
+/**
+ * Floating point utility that returns an infinite value.
+ * @internal
+ */
+U_INTERNAL double U_EXPORT2 uprv_getInfinity(void);
+
+/**
+ * Floating point utility to truncate a double.
+ * @internal
+ */
+U_INTERNAL double U_EXPORT2 uprv_trunc(double d);
+/**
+ * Floating point utility to calculate the floor of a double.
+ * @internal
+ */
+U_INTERNAL double U_EXPORT2 uprv_floor(double d);
+/**
+ * Floating point utility to calculate the ceiling of a double.
+ * @internal
+ */
+U_INTERNAL double U_EXPORT2 uprv_ceil(double d);
+/**
+ * Floating point utility to calculate the absolute value of a double.
+ * @internal
+ */
+U_INTERNAL double U_EXPORT2 uprv_fabs(double d);
+/**
+ * Floating point utility to calculate the fractional and integer parts of a double.
+ * @internal
+ */
+U_INTERNAL double U_EXPORT2 uprv_modf(double d, double* pinteger);
+/**
+ * Floating point utility to calculate the remainder of a double divided by another double.
+ * @internal
+ */
+U_INTERNAL double U_EXPORT2 uprv_fmod(double d, double y);
+/**
+ * Floating point utility to calculate d to the power of exponent (d^exponent).
+ * @internal
+ */
+U_INTERNAL double U_EXPORT2 uprv_pow(double d, double exponent);
+/**
+ * Floating point utility to calculate 10 to the power of exponent (10^exponent).
+ * @internal
+ */
+U_INTERNAL double U_EXPORT2 uprv_pow10(int32_t exponent);
+/**
+ * Floating point utility to calculate the maximum value of two doubles.
+ * @internal
+ */
+U_INTERNAL double U_EXPORT2 uprv_fmax(double d, double y);
+/**
+ * Floating point utility to calculate the minimum value of two doubles.
+ * @internal
+ */
+U_INTERNAL double U_EXPORT2 uprv_fmin(double d, double y);
+/**
+ * Private utility to calculate the maximum value of two integers.
+ * @internal
+ */
+U_INTERNAL int32_t U_EXPORT2 uprv_max(int32_t d, int32_t y);
+/**
+ * Private utility to calculate the minimum value of two integers.
+ * @internal
+ */
+U_INTERNAL int32_t U_EXPORT2 uprv_min(int32_t d, int32_t y);
+
+#if U_IS_BIG_ENDIAN
+# define uprv_isNegative(number) (*((signed char *)&(number))<0)
+#else
+# define uprv_isNegative(number) (*((signed char *)&(number)+sizeof(number)-1)<0)
+#endif
+
+/**
+ * Return the largest positive number that can be represented by an integer
+ * type of arbitrary bit length.
+ * @internal
+ */
+U_INTERNAL double U_EXPORT2 uprv_maxMantissa(void);
+
+/**
+ * Floating point utility to calculate the logarithm of a double.
+ * @internal
+ */
+U_INTERNAL double U_EXPORT2 uprv_log(double d);
+
+/**
+ * Does common notion of rounding e.g. uprv_floor(x + 0.5);
+ * @param x the double number
+ * @return the rounded double
+ * @internal
+ */
+U_INTERNAL double U_EXPORT2 uprv_round(double x);
+
+#if 0
+/**
+ * Returns the number of digits after the decimal point in a double number x.
+ *
+ * @param x the double number
+ * @return the number of digits after the decimal point in a double number x.
+ * @internal
+ */
+/*U_INTERNAL int32_t U_EXPORT2 uprv_digitsAfterDecimal(double x);*/
+#endif
+
+#if !U_CHARSET_IS_UTF8
+/**
+ * Please use ucnv_getDefaultName() instead.
+ * Return the default codepage for this platform and locale.
+ * This function can call setlocale() on Unix platforms. Please read the
+ * platform documentation on setlocale() before calling this function.
+ * @return the default codepage for this platform
+ * @internal
+ */
+U_INTERNAL const char* U_EXPORT2 uprv_getDefaultCodepage(void);
+#endif
+
+/**
+ * Please use uloc_getDefault() instead.
+ * Return the default locale ID string by querying ths system, or
+ * zero if one cannot be found.
+ * This function can call setlocale() on Unix platforms. Please read the
+ * platform documentation on setlocale() before calling this function.
+ * @return the default locale ID string
+ * @internal
+ */
+U_INTERNAL const char* U_EXPORT2 uprv_getDefaultLocaleID(void);
+
+/**
+ * Time zone utilities
+ *
+ * Wrappers for C runtime library functions relating to timezones.
+ * The t_tzset() function (similar to tzset) uses the current setting
+ * of the environment variable TZ to assign values to three global
+ * variables: daylight, timezone, and tzname. These variables have the
+ * following meanings, and are declared in &lt;time.h&gt;.
+ *
+ * daylight Nonzero if daylight-saving-time zone (DST) is specified
+ * in TZ; otherwise, 0. Default value is 1.
+ * timezone Difference in seconds between coordinated universal
+ * time and local time. E.g., -28,800 for PST (GMT-8hrs)
+ * tzname(0) Three-letter time-zone name derived from TZ environment
+ * variable. E.g., "PST".
+ * tzname(1) Three-letter DST zone name derived from TZ environment
+ * variable. E.g., "PDT". If DST zone is omitted from TZ,
+ * tzname(1) is an empty string.
+ *
+ * Notes: For example, to set the TZ environment variable to correspond
+ * to the current time zone in Germany, you can use one of the
+ * following statements:
+ *
+ * set TZ=GST1GDT
+ * set TZ=GST+1GDT
+ *
+ * If the TZ value is not set, t_tzset() attempts to use the time zone
+ * information specified by the operating system. Under Windows NT
+ * and Windows 95, this information is specified in the Control Panel's
+ * Date/Time application.
+ * @internal
+ */
+U_INTERNAL void U_EXPORT2 uprv_tzset(void);
+
+/**
+ * Difference in seconds between coordinated universal
+ * time and local time. E.g., -28,800 for PST (GMT-8hrs)
+ * @return the difference in seconds between coordinated universal time and local time.
+ * @internal
+ */
+U_INTERNAL int32_t U_EXPORT2 uprv_timezone(void);
+
+/**
+ * tzname(0) Three-letter time-zone name derived from TZ environment
+ * variable. E.g., "PST".
+ * tzname(1) Three-letter DST zone name derived from TZ environment
+ * variable. E.g., "PDT". If DST zone is omitted from TZ,
+ * tzname(1) is an empty string.
+ * @internal
+ */
+U_INTERNAL const char* U_EXPORT2 uprv_tzname(int n);
+
+/**
+ * Get UTC (GMT) time measured in milliseconds since 0:00 on 1/1/1970.
+ * This function is affected by 'faketime' and should be the bottleneck for all user-visible ICU time functions.
+ * @return the UTC time measured in milliseconds
+ * @internal
+ */
+U_INTERNAL UDate U_EXPORT2 uprv_getUTCtime(void);
+
+/**
+ * Get UTC (GMT) time measured in milliseconds since 0:00 on 1/1/1970.
+ * This function is not affected by 'faketime', so it should only be used by low level test functions- not by anything that
+ * exposes time to the end user.
+ * @return the UTC time measured in milliseconds
+ * @internal
+ */
+U_INTERNAL UDate U_EXPORT2 uprv_getRawUTCtime(void);
+
+/**
+ * Determine whether a pathname is absolute or not, as defined by the platform.
+ * @param path Pathname to test
+ * @return TRUE if the path is absolute
+ * @internal (ICU 3.0)
+ */
+U_INTERNAL UBool U_EXPORT2 uprv_pathIsAbsolute(const char *path);
+
+/**
+ * Use U_MAX_PTR instead of this function.
+ * @param void pointer to test
+ * @return the largest possible pointer greater than the base
+ * @internal (ICU 3.8)
+ */
+U_INTERNAL void * U_EXPORT2 uprv_maximumPtr(void *base);
+
+/**
+ * Maximum value of a (void*) - use to indicate the limit of an 'infinite' buffer.
+ * In fact, buffer sizes must not exceed 2GB so that the difference between
+ * the buffer limit and the buffer start can be expressed in an int32_t.
+ *
+ * The definition of U_MAX_PTR must fulfill the following conditions:
+ * - return the largest possible pointer greater than base
+ * - return a valid pointer according to the machine architecture (AS/400, 64-bit, etc.)
+ * - avoid wrapping around at high addresses
+ * - make sure that the returned pointer is not farther from base than 0x7fffffff bytes
+ *
+ * @param base The beginning of a buffer to find the maximum offset from
+ * @internal
+ */
+#ifndef U_MAX_PTR
+# if U_PLATFORM == U_PF_OS390 && !defined(_LP64)
+ /* We have 31-bit pointers. */
+# define U_MAX_PTR(base) ((void *)0x7fffffff)
+# elif U_PLATFORM == U_PF_OS400
+# define U_MAX_PTR(base) uprv_maximumPtr((void *)base)
+# elif 0
+ /*
+ * For platforms where pointers are scalar values (which is normal, but unlike i5/OS)
+ * but that do not define uintptr_t.
+ *
+ * However, this does not work on modern compilers:
+ * The C++ standard does not define pointer overflow, and allows compilers to
+ * assume that p+u>p for any pointer p and any integer u>0.
+ * Thus, modern compilers optimize away the ">" comparison.
+ * (See ICU tickets #7187 and #8096.)
+ */
+# define U_MAX_PTR(base) \
+ ((void *)(((char *)(base)+0x7fffffffu) > (char *)(base) \
+ ? ((char *)(base)+0x7fffffffu) \
+ : (char *)-1))
+# else
+ /* Default version. C++ standard compliant for scalar pointers. */
+# define U_MAX_PTR(base) \
+ ((void *)(((uintptr_t)(base)+0x7fffffffu) > (uintptr_t)(base) \
+ ? ((uintptr_t)(base)+0x7fffffffu) \
+ : (uintptr_t)-1))
+# endif
+#endif
+
+/* Dynamic Library Functions */
+
+typedef void (UVoidFunction)(void);
+
+#if U_ENABLE_DYLOAD
+/**
+ * Load a library
+ * @internal (ICU 4.4)
+ */
+U_INTERNAL void * U_EXPORT2 uprv_dl_open(const char *libName, UErrorCode *status);
+
+/**
+ * Close a library
+ * @internal (ICU 4.4)
+ */
+U_INTERNAL void U_EXPORT2 uprv_dl_close( void *lib, UErrorCode *status);
+
+/**
+ * Extract a symbol from a library (function)
+ * @internal (ICU 4.8)
+ */
+U_INTERNAL UVoidFunction* U_EXPORT2 uprv_dlsym_func( void *lib, const char *symbolName, UErrorCode *status);
+
+/**
+ * Extract a symbol from a library (function)
+ * Not implemented, no clients.
+ * @internal
+ */
+/* U_INTERNAL void * U_EXPORT2 uprv_dlsym_data( void *lib, const char *symbolName, UErrorCode *status); */
+
+#endif
+
+/**
+ * Define malloc and related functions
+ * @internal
+ */
+#if U_PLATFORM == U_PF_OS400
+# define uprv_default_malloc(x) _C_TS_malloc(x)
+# define uprv_default_realloc(x,y) _C_TS_realloc(x,y)
+# define uprv_default_free(x) _C_TS_free(x)
+/* also _C_TS_calloc(x) */
+#else
+/* C defaults */
+# define uprv_default_malloc(x) malloc(x)
+# define uprv_default_realloc(x,y) realloc(x,y)
+# define uprv_default_free(x) free(x)
+#endif
+
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/uarrsort.h b/src/core/basetypes/icu-ucsdet/include/uarrsort.h
new file mode 100644
index 00000000..aece6da9
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/uarrsort.h
@@ -0,0 +1,101 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 2003-2013, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: uarrsort.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2003aug04
+* created by: Markus W. Scherer
+*
+* Internal function for sorting arrays.
+*/
+
+#ifndef __UARRSORT_H__
+#define __UARRSORT_H__
+
+#include "unicode/utypes.h"
+
+U_CDECL_BEGIN
+/**
+ * Function type for comparing two items as part of sorting an array or similar.
+ * Callback function for uprv_sortArray().
+ *
+ * @param context Application-specific pointer, passed through by uprv_sortArray().
+ * @param left Pointer to the "left" item.
+ * @param right Pointer to the "right" item.
+ * @return 32-bit signed integer comparison result:
+ * <0 if left<right
+ * ==0 if left==right
+ * >0 if left>right
+ *
+ * @internal
+ */
+typedef int32_t U_CALLCONV
+UComparator(const void *context, const void *left, const void *right);
+U_CDECL_END
+
+/**
+ * Array sorting function.
+ * Uses a UComparator for comparing array items to each other, and simple
+ * memory copying to move items.
+ *
+ * @param array The array to be sorted.
+ * @param length The number of items in the array.
+ * @param itemSize The size in bytes of each array item.
+ * @param cmp UComparator function used to compare two items each.
+ * @param context Application-specific pointer, passed through to the UComparator.
+ * @param sortStable If true, a stable sorting algorithm must be used.
+ * @param pErrorCode ICU in/out UErrorCode parameter.
+ *
+ * @internal
+ */
+U_CAPI void U_EXPORT2
+uprv_sortArray(void *array, int32_t length, int32_t itemSize,
+ UComparator *cmp, const void *context,
+ UBool sortStable, UErrorCode *pErrorCode);
+
+/**
+ * Convenience UComparator implementation for uint16_t arrays.
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+uprv_uint16Comparator(const void *context, const void *left, const void *right);
+
+/**
+ * Convenience UComparator implementation for int32_t arrays.
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+uprv_int32Comparator(const void *context, const void *left, const void *right);
+
+/**
+ * Convenience UComparator implementation for uint32_t arrays.
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+uprv_uint32Comparator(const void *context, const void *left, const void *right);
+
+/**
+ * Much like Java Collections.binarySearch(list, key, comparator).
+ *
+ * Except: Java documents "If the list contains multiple elements equal to
+ * the specified object, there is no guarantee which one will be found."
+ *
+ * This version here will return the largest index of any equal item,
+ * for use in stable sorting.
+ *
+ * @return the index>=0 where the item was found:
+ * the largest such index, if multiple, for stable sorting;
+ * or the index<0 for inserting the item at ~index in sorted order
+ */
+U_CAPI int32_t U_EXPORT2
+uprv_stableBinarySearch(char *array, int32_t length, void *item, int32_t itemSize,
+ UComparator *cmp, const void *context);
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/uassert.h b/src/core/basetypes/icu-ucsdet/include/uassert.h
new file mode 100644
index 00000000..9dc29b24
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/uassert.h
@@ -0,0 +1,32 @@
+/*
+******************************************************************************
+*
+* Copyright (C) 2002-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* File uassert.h
+*
+* Contains U_ASSERT macro
+*
+* By default, U_ASSERT just wraps the C library assert macro.
+* By changing the definition here, the assert behavior for ICU can be changed
+* without affecting other non-ICU uses of the C library assert().
+*
+******************************************************************************
+*/
+
+#ifndef U_ASSERT_H
+#define U_ASSERT_H
+/* utypes.h is included to get the proper define for uint8_t */
+#include "unicode/utypes.h"
+#if U_DEBUG
+# include <assert.h>
+# define U_ASSERT(exp) assert(exp)
+#else
+# define U_ASSERT(exp)
+#endif
+#endif
+
+
diff --git a/src/core/basetypes/icu-ucsdet/include/ucase.h b/src/core/basetypes/icu-ucsdet/include/ucase.h
new file mode 100644
index 00000000..8f24769d
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/ucase.h
@@ -0,0 +1,409 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 2004-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: ucase.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2004aug30
+* created by: Markus W. Scherer
+*
+* Low-level Unicode character/string case mapping code.
+*/
+
+#ifndef __UCASE_H__
+#define __UCASE_H__
+
+#include "unicode/utypes.h"
+#include "unicode/uset.h"
+#include "putilimp.h"
+#include "uset_imp.h"
+#include "udataswp.h"
+
+#ifdef __cplusplus
+U_NAMESPACE_BEGIN
+
+class UnicodeString;
+
+U_NAMESPACE_END
+#endif
+
+/* library API -------------------------------------------------------------- */
+
+U_CDECL_BEGIN
+
+struct UCaseProps;
+typedef struct UCaseProps UCaseProps;
+
+U_CDECL_END
+
+U_CAPI const UCaseProps * U_EXPORT2
+ucase_getSingleton(void);
+
+U_CFUNC void U_EXPORT2
+ucase_addPropertyStarts(const UCaseProps *csp, const USetAdder *sa, UErrorCode *pErrorCode);
+
+/**
+ * Requires non-NULL locale ID but otherwise does the equivalent of
+ * checking for language codes as if uloc_getLanguage() were called:
+ * Accepts both 2- and 3-letter codes and accepts case variants.
+ */
+U_CFUNC int32_t
+ucase_getCaseLocale(const char *locale, int32_t *locCache);
+
+/* Casing locale types for ucase_getCaseLocale */
+enum {
+ UCASE_LOC_UNKNOWN,
+ UCASE_LOC_ROOT,
+ UCASE_LOC_TURKISH,
+ UCASE_LOC_LITHUANIAN,
+ UCASE_LOC_DUTCH
+};
+
+/**
+ * Bit mask for getting just the options from a string compare options word
+ * that are relevant for case-insensitive string comparison.
+ * See uchar.h. Also include _STRNCMP_STYLE and U_COMPARE_CODE_POINT_ORDER.
+ * @internal
+ */
+#define _STRCASECMP_OPTIONS_MASK 0xffff
+
+/**
+ * Bit mask for getting just the options from a string compare options word
+ * that are relevant for case folding (of a single string or code point).
+ * See uchar.h.
+ * @internal
+ */
+#define _FOLD_CASE_OPTIONS_MASK 0xff
+
+/* single-code point functions */
+
+U_CAPI UChar32 U_EXPORT2
+ucase_tolower(const UCaseProps *csp, UChar32 c);
+
+U_CAPI UChar32 U_EXPORT2
+ucase_toupper(const UCaseProps *csp, UChar32 c);
+
+U_CAPI UChar32 U_EXPORT2
+ucase_totitle(const UCaseProps *csp, UChar32 c);
+
+U_CAPI UChar32 U_EXPORT2
+ucase_fold(const UCaseProps *csp, UChar32 c, uint32_t options);
+
+/**
+ * Adds all simple case mappings and the full case folding for c to sa,
+ * and also adds special case closure mappings.
+ * c itself is not added.
+ * For example, the mappings
+ * - for s include long s
+ * - for sharp s include ss
+ * - for k include the Kelvin sign
+ */
+U_CFUNC void U_EXPORT2
+ucase_addCaseClosure(const UCaseProps *csp, UChar32 c, const USetAdder *sa);
+
+/**
+ * Maps the string to single code points and adds the associated case closure
+ * mappings.
+ * The string is mapped to code points if it is their full case folding string.
+ * In other words, this performs a reverse full case folding and then
+ * adds the case closure items of the resulting code points.
+ * If the string is found and its closure applied, then
+ * the string itself is added as well as part of its code points' closure.
+ * It must be length>=0.
+ *
+ * @return TRUE if the string was found
+ */
+U_CFUNC UBool U_EXPORT2
+ucase_addStringCaseClosure(const UCaseProps *csp, const UChar *s, int32_t length, const USetAdder *sa);
+
+#ifdef __cplusplus
+U_NAMESPACE_BEGIN
+
+/**
+ * Iterator over characters with more than one code point in the full default Case_Folding.
+ */
+class U_COMMON_API FullCaseFoldingIterator {
+public:
+ /** Constructor. */
+ FullCaseFoldingIterator();
+ /**
+ * Returns the next (cp, full) pair where "full" is cp's full default Case_Folding.
+ * Returns a negative cp value at the end of the iteration.
+ */
+ UChar32 next(UnicodeString &full);
+private:
+ FullCaseFoldingIterator(const FullCaseFoldingIterator &); // no copy
+ FullCaseFoldingIterator &operator=(const FullCaseFoldingIterator &); // no assignment
+
+ const UChar *unfold;
+ int32_t unfoldRows;
+ int32_t unfoldRowWidth;
+ int32_t unfoldStringWidth;
+ int32_t currentRow;
+ int32_t rowCpIndex;
+};
+
+U_NAMESPACE_END
+#endif
+
+/** @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE */
+U_CAPI int32_t U_EXPORT2
+ucase_getType(const UCaseProps *csp, UChar32 c);
+
+/** @return same as ucase_getType(), or <0 if c is case-ignorable */
+U_CAPI int32_t U_EXPORT2
+ucase_getTypeOrIgnorable(const UCaseProps *csp, UChar32 c);
+
+U_CAPI UBool U_EXPORT2
+ucase_isSoftDotted(const UCaseProps *csp, UChar32 c);
+
+U_CAPI UBool U_EXPORT2
+ucase_isCaseSensitive(const UCaseProps *csp, UChar32 c);
+
+/* string case mapping functions */
+
+U_CDECL_BEGIN
+
+/**
+ * Iterator function for string case mappings, which need to look at the
+ * context (surrounding text) of a given character for conditional mappings.
+ *
+ * The iterator only needs to go backward or forward away from the
+ * character in question. It does not use any indexes on this interface.
+ * It does not support random access or an arbitrary change of
+ * iteration direction.
+ *
+ * The code point being case-mapped itself is never returned by
+ * this iterator.
+ *
+ * @param context A pointer to the iterator's working data.
+ * @param dir If <0 then start iterating backward from the character;
+ * if >0 then start iterating forward from the character;
+ * if 0 then continue iterating in the current direction.
+ * @return Next code point, or <0 when the iteration is done.
+ */
+typedef UChar32 U_CALLCONV
+UCaseContextIterator(void *context, int8_t dir);
+
+/**
+ * Sample struct which may be used by some implementations of
+ * UCaseContextIterator.
+ */
+struct UCaseContext {
+ void *p;
+ int32_t start, index, limit;
+ int32_t cpStart, cpLimit;
+ int8_t dir;
+ int8_t b1, b2, b3;
+};
+typedef struct UCaseContext UCaseContext;
+
+U_CDECL_END
+
+#define UCASECONTEXT_INITIALIZER { NULL, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+
+enum {
+ /**
+ * For string case mappings, a single character (a code point) is mapped
+ * either to itself (in which case in-place mapping functions do nothing),
+ * or to another single code point, or to a string.
+ * Aside from the string contents, these are indicated with a single int32_t
+ * value as follows:
+ *
+ * Mapping to self: Negative values (~self instead of -self to support U+0000)
+ *
+ * Mapping to another code point: Positive values >UCASE_MAX_STRING_LENGTH
+ *
+ * Mapping to a string: The string length (0..UCASE_MAX_STRING_LENGTH) is
+ * returned. Note that the string result may indeed have zero length.
+ */
+ UCASE_MAX_STRING_LENGTH=0x1f
+};
+
+/**
+ * Get the full lowercase mapping for c.
+ *
+ * @param csp Case mapping properties.
+ * @param c Character to be mapped.
+ * @param iter Character iterator, used for context-sensitive mappings.
+ * See UCaseContextIterator for details.
+ * If iter==NULL then a context-independent result is returned.
+ * @param context Pointer to be passed into iter.
+ * @param pString If the mapping result is a string, then the pointer is
+ * written to *pString.
+ * @param locale Locale ID for locale-dependent mappings.
+ * @param locCache Initialize to 0; may be used to cache the result of parsing
+ * the locale ID for subsequent calls.
+ * Can be NULL.
+ * @return Output code point or string length, see UCASE_MAX_STRING_LENGTH.
+ *
+ * @see UCaseContextIterator
+ * @see UCASE_MAX_STRING_LENGTH
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+ucase_toFullLower(const UCaseProps *csp, UChar32 c,
+ UCaseContextIterator *iter, void *context,
+ const UChar **pString,
+ const char *locale, int32_t *locCache);
+
+U_CAPI int32_t U_EXPORT2
+ucase_toFullUpper(const UCaseProps *csp, UChar32 c,
+ UCaseContextIterator *iter, void *context,
+ const UChar **pString,
+ const char *locale, int32_t *locCache);
+
+U_CAPI int32_t U_EXPORT2
+ucase_toFullTitle(const UCaseProps *csp, UChar32 c,
+ UCaseContextIterator *iter, void *context,
+ const UChar **pString,
+ const char *locale, int32_t *locCache);
+
+U_CAPI int32_t U_EXPORT2
+ucase_toFullFolding(const UCaseProps *csp, UChar32 c,
+ const UChar **pString,
+ uint32_t options);
+
+U_CFUNC int32_t U_EXPORT2
+ucase_hasBinaryProperty(UChar32 c, UProperty which);
+
+
+U_CDECL_BEGIN
+
+/**
+ * @internal
+ */
+typedef int32_t U_CALLCONV
+UCaseMapFull(const UCaseProps *csp, UChar32 c,
+ UCaseContextIterator *iter, void *context,
+ const UChar **pString,
+ const char *locale, int32_t *locCache);
+
+U_CDECL_END
+
+/* file definitions --------------------------------------------------------- */
+
+#define UCASE_DATA_NAME "ucase"
+#define UCASE_DATA_TYPE "icu"
+
+/* format "cAsE" */
+#define UCASE_FMT_0 0x63
+#define UCASE_FMT_1 0x41
+#define UCASE_FMT_2 0x53
+#define UCASE_FMT_3 0x45
+
+/* indexes into indexes[] */
+enum {
+ UCASE_IX_INDEX_TOP,
+ UCASE_IX_LENGTH,
+ UCASE_IX_TRIE_SIZE,
+ UCASE_IX_EXC_LENGTH,
+ UCASE_IX_UNFOLD_LENGTH,
+
+ UCASE_IX_MAX_FULL_LENGTH=15,
+ UCASE_IX_TOP=16
+};
+
+/* definitions for 16-bit case properties word ------------------------------ */
+
+/* 2-bit constants for types of cased characters */
+#define UCASE_TYPE_MASK 3
+enum {
+ UCASE_NONE,
+ UCASE_LOWER,
+ UCASE_UPPER,
+ UCASE_TITLE
+};
+
+#define UCASE_GET_TYPE(props) ((props)&UCASE_TYPE_MASK)
+#define UCASE_GET_TYPE_AND_IGNORABLE(props) ((props)&7)
+
+#define UCASE_IGNORABLE 4
+#define UCASE_SENSITIVE 8
+#define UCASE_EXCEPTION 0x10
+
+#define UCASE_DOT_MASK 0x60
+enum {
+ UCASE_NO_DOT=0, /* normal characters with cc=0 */
+ UCASE_SOFT_DOTTED=0x20, /* soft-dotted characters with cc=0 */
+ UCASE_ABOVE=0x40, /* "above" accents with cc=230 */
+ UCASE_OTHER_ACCENT=0x60 /* other accent character (0<cc!=230) */
+};
+
+/* no exception: bits 15..7 are a 9-bit signed case mapping delta */
+#define UCASE_DELTA_SHIFT 7
+#define UCASE_DELTA_MASK 0xff80
+#define UCASE_MAX_DELTA 0xff
+#define UCASE_MIN_DELTA (-UCASE_MAX_DELTA-1)
+
+#if U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC
+# define UCASE_GET_DELTA(props) ((int16_t)(props)>>UCASE_DELTA_SHIFT)
+#else
+# define UCASE_GET_DELTA(props) (int16_t)(((props)&0x8000) ? (((props)>>UCASE_DELTA_SHIFT)|0xfe00) : ((uint16_t)(props)>>UCASE_DELTA_SHIFT))
+#endif
+
+/* exception: bits 15..5 are an unsigned 11-bit index into the exceptions array */
+#define UCASE_EXC_SHIFT 5
+#define UCASE_EXC_MASK 0xffe0
+#define UCASE_MAX_EXCEPTIONS ((UCASE_EXC_MASK>>UCASE_EXC_SHIFT)+1)
+
+/* definitions for 16-bit main exceptions word ------------------------------ */
+
+/* first 8 bits indicate values in optional slots */
+enum {
+ UCASE_EXC_LOWER,
+ UCASE_EXC_FOLD,
+ UCASE_EXC_UPPER,
+ UCASE_EXC_TITLE,
+ UCASE_EXC_4, /* reserved */
+ UCASE_EXC_5, /* reserved */
+ UCASE_EXC_CLOSURE,
+ UCASE_EXC_FULL_MAPPINGS,
+ UCASE_EXC_ALL_SLOTS /* one past the last slot */
+};
+
+/* each slot is 2 uint16_t instead of 1 */
+#define UCASE_EXC_DOUBLE_SLOTS 0x100
+
+/* reserved: exception bits 11..9 */
+
+/* UCASE_EXC_DOT_MASK=UCASE_DOT_MASK<<UCASE_EXC_DOT_SHIFT */
+#define UCASE_EXC_DOT_SHIFT 7
+
+/* normally stored in the main word, but pushed out for larger exception indexes */
+#define UCASE_EXC_DOT_MASK 0x3000
+enum {
+ UCASE_EXC_NO_DOT=0,
+ UCASE_EXC_SOFT_DOTTED=0x1000,
+ UCASE_EXC_ABOVE=0x2000, /* "above" accents with cc=230 */
+ UCASE_EXC_OTHER_ACCENT=0x3000 /* other character (0<cc!=230) */
+};
+
+/* complex/conditional mappings */
+#define UCASE_EXC_CONDITIONAL_SPECIAL 0x4000
+#define UCASE_EXC_CONDITIONAL_FOLD 0x8000
+
+/* definitions for lengths word for full case mappings */
+#define UCASE_FULL_LOWER 0xf
+#define UCASE_FULL_FOLDING 0xf0
+#define UCASE_FULL_UPPER 0xf00
+#define UCASE_FULL_TITLE 0xf000
+
+/* maximum lengths */
+#define UCASE_FULL_MAPPINGS_MAX_LENGTH (4*0xf)
+#define UCASE_CLOSURE_MAX_LENGTH 0xf
+
+/* constants for reverse case folding ("unfold") data */
+enum {
+ UCASE_UNFOLD_ROWS,
+ UCASE_UNFOLD_ROW_WIDTH,
+ UCASE_UNFOLD_STRING_WIDTH
+};
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/ucln.h b/src/core/basetypes/icu-ucsdet/include/ucln.h
new file mode 100644
index 00000000..cd2630af
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/ucln.h
@@ -0,0 +1,89 @@
+/*
+******************************************************************************
+*
+* Copyright (C) 2001-2013, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: ucln.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2001July05
+* created by: George Rhoten
+*/
+
+#ifndef __UCLN_H__
+#define __UCLN_H__
+
+#include "unicode/utypes.h"
+
+/** These are the functions used to register a library's memory cleanup
+ * functions. Each library should define a single library register function
+ * to call this API. In the i18n library, it is ucln_i18n_registerCleanup().
+ *
+ * None of the cleanup functions should use a mutex to clean up an API's
+ * allocated memory because a cleanup function is not meant to be thread safe,
+ * and plenty of data cannot be reference counted in order to make sure that
+ * no one else needs the allocated data.
+ *
+ * In order to make a cleanup function get called when u_cleanup is called,
+ * You should add your function to the library specific cleanup function.
+ * If the cleanup function is not in the common library, the code that
+ * allocates the memory should call the library specific cleanup function.
+ * For instance, in the i18n library, any memory allocated statically must
+ * call ucln_i18n_registerCleanup() from the ucln_in.h header. These library
+ * cleanup functions are needed in order to prevent a circular dependency
+ * between the common library and any other library.
+ *
+ * The order of the cleanup is very important. In general, an API that
+ * depends on a second API should be cleaned up before the second API.
+ * For instance, the default converter in ustring depends upon the converter
+ * API. So the default converter should be closed before the converter API
+ * has its cache flushed. This will prevent any memory leaks due to
+ * reference counting.
+ *
+ * Please see common/ucln_cmn.{h,c} and i18n/ucln_in.{h,c} for examples.
+ */
+
+/**
+ * Data Type for cleanup function selector. These roughly correspond to libraries.
+ */
+typedef enum ECleanupLibraryType {
+ UCLN_START = -1,
+ UCLN_UPLUG, /* ICU plugins */
+ UCLN_CUSTOM, /* Custom is for anyone else. */
+ UCLN_CTESTFW,
+ UCLN_TOOLUTIL,
+ UCLN_LAYOUTEX,
+ UCLN_LAYOUT,
+ UCLN_IO,
+ UCLN_I18N,
+ UCLN_COMMON /* This must be the last one to cleanup. */
+} ECleanupLibraryType;
+
+/**
+ * Data type for cleanup function pointer
+ */
+U_CDECL_BEGIN
+typedef UBool U_CALLCONV cleanupFunc(void);
+typedef void U_CALLCONV initFunc(UErrorCode *);
+U_CDECL_END
+
+/**
+ * Register a cleanup function
+ * @param type which library to register for.
+ * @param func the function pointer
+ */
+U_CAPI void U_EXPORT2 ucln_registerCleanup(ECleanupLibraryType type,
+ cleanupFunc *func);
+
+/**
+ * Request cleanup for one specific library.
+ * Not thread safe.
+ * @param type which library to cleanup
+ */
+U_CAPI void U_EXPORT2 ucln_cleanupOne(ECleanupLibraryType type);
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/ucln_cmn.h b/src/core/basetypes/icu-ucsdet/include/ucln_cmn.h
new file mode 100644
index 00000000..3517ca78
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/ucln_cmn.h
@@ -0,0 +1,72 @@
+/*
+******************************************************************************
+* *
+* Copyright (C) 2001-2014, International Business Machines *
+* Corporation and others. All Rights Reserved. *
+* *
+******************************************************************************
+* file name: ucln_cmn.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2001July05
+* created by: George Rhoten
+*/
+
+#ifndef __UCLN_CMN_H__
+#define __UCLN_CMN_H__
+
+#include "unicode/utypes.h"
+#include "ucln.h"
+
+/* These are the cleanup functions for various APIs. */
+/* @return true if cleanup complete successfully.*/
+U_CFUNC UBool umtx_cleanup(void);
+
+U_CFUNC UBool utrace_cleanup(void);
+
+U_CFUNC UBool ucln_lib_cleanup(void);
+
+/*
+Please keep the order of enums declared in same order
+as the cleanup functions are suppose to be called. */
+typedef enum ECleanupCommonType {
+ UCLN_COMMON_START = -1,
+ UCLN_COMMON_USPREP,
+ UCLN_COMMON_BREAKITERATOR,
+ UCLN_COMMON_BREAKITERATOR_DICT,
+ UCLN_COMMON_SERVICE,
+ UCLN_COMMON_LOCALE_KEY_TYPE,
+ UCLN_COMMON_LOCALE,
+ UCLN_COMMON_LOCALE_AVAILABLE,
+ UCLN_COMMON_ULOC,
+ UCLN_COMMON_LOADED_NORMALIZER2,
+ UCLN_COMMON_NORMALIZER2,
+ UCLN_COMMON_USET,
+ UCLN_COMMON_UNAMES,
+ UCLN_COMMON_UPROPS,
+ UCLN_COMMON_UCNV,
+ UCLN_COMMON_UCNV_IO,
+ UCLN_COMMON_UDATA,
+ UCLN_COMMON_PUTIL,
+ UCLN_COMMON_LIST_FORMATTER,
+ UCLN_COMMON_UINIT,
+
+ /*
+ Unified caches caches collation stuff. Collation data structures
+ contain resource bundles which means that unified cache cleanup
+ must happen before resource bundle clean up.
+ */
+ UCLN_COMMON_UNIFIED_CACHE,
+ UCLN_COMMON_URES,
+ UCLN_COMMON_COUNT /* This must be last */
+} ECleanupCommonType;
+
+/* Main library cleanup registration function. */
+/* See common/ucln.h for details on adding a cleanup function. */
+/* Note: the global mutex must not be held when calling this function. */
+U_CFUNC void U_EXPORT2 ucln_common_registerCleanup(ECleanupCommonType type,
+ cleanupFunc *func);
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/ucln_imp.h b/src/core/basetypes/icu-ucsdet/include/ucln_imp.h
new file mode 100644
index 00000000..d5d202ec
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/ucln_imp.h
@@ -0,0 +1,178 @@
+/*
+******************************************************************************
+*
+* Copyright (C) 2009-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: ucln_imp.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* This file contains the platform specific implementation of per-library cleanup.
+*
+*/
+
+
+#ifndef __UCLN_IMP_H__
+#define __UCLN_IMP_H__
+
+#include "ucln.h"
+#include <stdlib.h>
+
+/**
+ * Auto cleanup of ICU libraries
+ * There are several methods in per library cleanup of icu libraries:
+ * 1) Compiler/Platform based cleanup:
+ * a) Windows MSVC uses DllMain()
+ * b) GCC uses destructor function attribute
+ * c) Sun Studio, AIX VA, and HP-UX aCC uses a linker option to set the exit function
+ * 2) Using atexit()
+ * 3) Implementing own automatic cleanup functions
+ *
+ * For option 1, ensure that UCLN_NO_AUTO_CLEANUP is set to 0 by using --enable-auto-cleanup
+ * configure option or by otherwise setting UCLN_NO_AUTO_CLEANUP to 0
+ * For option 2, follow option 1 and also define UCLN_AUTO_ATEXIT
+ * For option 3, follow option 1 and also define UCLN_AUTO_LOCAL (see below for more information)
+ */
+
+#if !UCLN_NO_AUTO_CLEANUP
+
+/*
+ * The following declarations are for when UCLN_AUTO_LOCAL or UCLN_AUTO_ATEXIT
+ * are defined. They are commented out because they are static and will be defined
+ * later. The information is still here to provide some guidance for the developer
+ * who chooses to use UCLN_AUTO_LOCAL.
+ */
+/**
+ * Give the library an opportunity to register an automatic cleanup.
+ * This may be called more than once.
+ */
+/*static void ucln_registerAutomaticCleanup();*/
+/**
+ * Unregister an automatic cleanup, if possible. Called from cleanup.
+ */
+/*static void ucln_unRegisterAutomaticCleanup();*/
+
+#ifdef UCLN_TYPE_IS_COMMON
+# define UCLN_CLEAN_ME_UP u_cleanup()
+#else
+# define UCLN_CLEAN_ME_UP ucln_cleanupOne(UCLN_TYPE)
+#endif
+
+/* ------------ automatic cleanup: registration. Choose ONE ------- */
+#if defined(UCLN_AUTO_LOCAL)
+/* To use:
+ * 1. define UCLN_AUTO_LOCAL,
+ * 2. create ucln_local_hook.c containing implementations of
+ * static void ucln_registerAutomaticCleanup()
+ * static void ucln_unRegisterAutomaticCleanup()
+ */
+#include "ucln_local_hook.c"
+
+#elif defined(UCLN_AUTO_ATEXIT)
+/*
+ * Use the ANSI C 'atexit' function. Note that this mechanism does not
+ * guarantee the order of cleanup relative to other users of ICU!
+ */
+static UBool gAutoCleanRegistered = FALSE;
+
+static void ucln_atexit_handler()
+{
+ UCLN_CLEAN_ME_UP;
+}
+
+static void ucln_registerAutomaticCleanup()
+{
+ if(!gAutoCleanRegistered) {
+ gAutoCleanRegistered = TRUE;
+ atexit(&ucln_atexit_handler);
+ }
+}
+
+static void ucln_unRegisterAutomaticCleanup () {
+}
+/* ------------end of automatic cleanup: registration. ------- */
+
+#elif defined (UCLN_FINI)
+/**
+ * If UCLN_FINI is defined, it is the (versioned, etc) name of a cleanup
+ * entrypoint. Add a stub to call ucln_cleanupOne
+ * Used on AIX, Solaris, and HP-UX
+ */
+U_CAPI void U_EXPORT2 UCLN_FINI (void);
+
+U_CAPI void U_EXPORT2 UCLN_FINI ()
+{
+ /* This function must be defined, if UCLN_FINI is defined, else link error. */
+ UCLN_CLEAN_ME_UP;
+}
+
+/* Windows: DllMain */
+#elif U_PLATFORM_HAS_WIN32_API
+/*
+ * ICU's own DllMain.
+ */
+
+/* these are from putil.c */
+/* READ READ READ READ! Are you getting compilation errors from windows.h?
+ Any source file which includes this (ucln_imp.h) header MUST
+ be defined with language extensions ON. */
+# define WIN32_LEAN_AND_MEAN
+# define VC_EXTRALEAN
+# define NOUSER
+# define NOSERVICE
+# define NOIME
+# define NOMCX
+# include <windows.h>
+/*
+ * This is a stub DllMain function with icu specific process handling code.
+ */
+BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved)
+{
+ BOOL status = TRUE;
+
+ switch(fdwReason) {
+ case DLL_PROCESS_ATTACH:
+ /* ICU does not trap process attach, but must pass these through properly. */
+ /* ICU specific process attach could go here */
+ break;
+
+ case DLL_PROCESS_DETACH:
+ /* Here is the one we actually care about. */
+
+ UCLN_CLEAN_ME_UP;
+
+ break;
+
+ case DLL_THREAD_ATTACH:
+ /* ICU does not trap thread attach, but must pass these through properly. */
+ /* ICU specific thread attach could go here */
+ break;
+
+ case DLL_THREAD_DETACH:
+ /* ICU does not trap thread detach, but must pass these through properly. */
+ /* ICU specific thread detach could go here */
+ break;
+
+ }
+ return status;
+}
+
+#elif defined(__GNUC__)
+/* GCC - use __attribute((destructor)) */
+static void ucln_destructor() __attribute__((destructor)) ;
+
+static void ucln_destructor()
+{
+ UCLN_CLEAN_ME_UP;
+}
+
+#endif
+
+#endif /* UCLN_NO_AUTO_CLEANUP */
+
+#else
+#error This file can only be included once.
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/ucln_in.h b/src/core/basetypes/icu-ucsdet/include/ucln_in.h
new file mode 100644
index 00000000..e9685161
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/ucln_in.h
@@ -0,0 +1,64 @@
+/*
+******************************************************************************
+* Copyright (C) 2001-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+******************************************************************************
+* file name: ucln_cmn.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2001July05
+* created by: George Rhoten
+*/
+
+#ifndef __UCLN_IN_H__
+#define __UCLN_IN_H__
+
+#include "unicode/utypes.h"
+#include "ucln.h"
+
+/*
+Please keep the order of enums declared in same order
+as the functions are suppose to be called.
+It's usually best to have child dependencies called first. */
+typedef enum ECleanupI18NType {
+ UCLN_I18N_START = -1,
+ UCLN_I18N_IDENTIFIER_INFO,
+ UCLN_I18N_SPOOF,
+ UCLN_I18N_TRANSLITERATOR,
+ UCLN_I18N_REGEX,
+ UCLN_I18N_ISLAMIC_CALENDAR,
+ UCLN_I18N_CHINESE_CALENDAR,
+ UCLN_I18N_HEBREW_CALENDAR,
+ UCLN_I18N_ASTRO_CALENDAR,
+ UCLN_I18N_DANGI_CALENDAR,
+ UCLN_I18N_CALENDAR,
+ UCLN_I18N_TIMEZONEFORMAT,
+ UCLN_I18N_TZDBTIMEZONENAMES,
+ UCLN_I18N_TIMEZONEGENERICNAMES,
+ UCLN_I18N_TIMEZONENAMES,
+ UCLN_I18N_ZONEMETA,
+ UCLN_I18N_TIMEZONE,
+ UCLN_I18N_CURRENCY,
+ UCLN_I18N_DECFMT,
+ UCLN_I18N_NUMFMT,
+ UCLN_I18N_SMPDTFMT,
+ UCLN_I18N_USEARCH,
+ UCLN_I18N_COLLATOR,
+ UCLN_I18N_UCOL_RES,
+ UCLN_I18N_CSDET,
+ UCLN_I18N_COLLATION_ROOT,
+ UCLN_I18N_GENDERINFO,
+ UCLN_I18N_CDFINFO,
+ UCLN_I18N_REGION,
+ UCLN_I18N_COUNT /* This must be last */
+} ECleanupI18NType;
+
+/* Main library cleanup registration function. */
+/* See common/ucln.h for details on adding a cleanup function. */
+/* Note: the global mutex must not be held when calling this function. */
+U_CFUNC void U_EXPORT2 ucln_i18n_registerCleanup(ECleanupI18NType type,
+ cleanupFunc *func);
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/ucmndata.h b/src/core/basetypes/icu-ucsdet/include/ucmndata.h
new file mode 100644
index 00000000..36163c50
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/ucmndata.h
@@ -0,0 +1,111 @@
+/*
+******************************************************************************
+*
+* Copyright (C) 1999-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************/
+
+
+/*----------------------------------------------------------------------------------
+ *
+ * UCommonData An abstract interface for dealing with ICU Common Data Files.
+ * ICU Common Data Files are a grouping of a number of individual
+ * data items (resources, converters, tables, anything) into a
+ * single file or dll. The combined format includes a table of
+ * contents for locating the individual items by name.
+ *
+ * Two formats for the table of contents are supported, which is
+ * why there is an abstract inteface involved.
+ *
+ * These functions are part of the ICU internal implementation, and
+ * are not inteded to be used directly by applications.
+ */
+
+#ifndef __UCMNDATA_H__
+#define __UCMNDATA_H__
+
+#include "unicode/udata.h"
+#include "umapfile.h"
+
+
+#define COMMON_DATA_NAME U_ICUDATA_NAME
+
+typedef struct {
+ uint16_t headerSize;
+ uint8_t magic1;
+ uint8_t magic2;
+} MappedData;
+
+
+typedef struct {
+ MappedData dataHeader;
+ UDataInfo info;
+} DataHeader;
+
+typedef struct {
+ uint32_t nameOffset;
+ uint32_t dataOffset;
+} UDataOffsetTOCEntry;
+
+typedef struct {
+ uint32_t count;
+ UDataOffsetTOCEntry entry[2]; /* Actual size of array is from count. */
+} UDataOffsetTOC;
+
+/**
+ * Get the header size from a const DataHeader *udh.
+ * Handles opposite-endian data.
+ *
+ * @internal
+ */
+U_CFUNC uint16_t
+udata_getHeaderSize(const DataHeader *udh);
+
+/**
+ * Get the UDataInfo.size from a const UDataInfo *info.
+ * Handles opposite-endian data.
+ *
+ * @internal
+ */
+U_CFUNC uint16_t
+udata_getInfoSize(const UDataInfo *info);
+
+U_CDECL_BEGIN
+/*
+ * "Virtual" functions for data lookup.
+ * To call one, given a UDataMemory *p, the code looks like this:
+ * p->vFuncs.Lookup(p, tocEntryName, pErrorCode);
+ * (I sure do wish this was written in C++, not C)
+ */
+
+typedef const DataHeader *
+(U_CALLCONV * LookupFn)(const UDataMemory *pData,
+ const char *tocEntryName,
+ int32_t *pLength,
+ UErrorCode *pErrorCode);
+
+typedef uint32_t
+(U_CALLCONV * NumEntriesFn)(const UDataMemory *pData);
+
+U_CDECL_END
+
+typedef struct {
+ LookupFn Lookup;
+ NumEntriesFn NumEntries;
+} commonDataFuncs;
+
+
+/*
+ * Functions to check whether a UDataMemory refers to memory containing
+ * a recognizable header and table of contents a Common Data Format
+ *
+ * If a valid header and TOC are found,
+ * set the CommonDataFuncs function dispatch vector in the UDataMemory
+ * to point to the right functions for the TOC type.
+ * otherwise
+ * set an errorcode.
+ */
+U_CFUNC void udata_checkCommonData(UDataMemory *pData, UErrorCode *pErrorCode);
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/udataswp.h b/src/core/basetypes/icu-ucsdet/include/udataswp.h
new file mode 100644
index 00000000..66c84955
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/udataswp.h
@@ -0,0 +1,351 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 2003-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: udataswp.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2003jun05
+* created by: Markus W. Scherer
+*
+* Definitions for ICU data transformations for different platforms,
+* changing between big- and little-endian data and/or between
+* charset families (ASCII<->EBCDIC).
+*/
+
+#ifndef __UDATASWP_H__
+#define __UDATASWP_H__
+
+#include <stdarg.h>
+#include "unicode/utypes.h"
+
+/* forward declaration */
+
+U_CDECL_BEGIN
+
+struct UDataSwapper;
+typedef struct UDataSwapper UDataSwapper;
+
+/**
+ * Function type for data transformation.
+ * Transforms data, or just returns the length of the data if
+ * the input length is -1.
+ * Swap functions assume that their data pointers are aligned properly.
+ *
+ * Quick implementation outline:
+ * (best to copy and adapt and existing swapper implementation)
+ * check that the data looks like the expected format
+ * if(length<0) {
+ * preflight:
+ * never dereference outData
+ * read inData and determine the data size
+ * assume that inData is long enough for this
+ * } else {
+ * outData can be NULL if length==0
+ * inData==outData (in-place swapping) possible but not required!
+ * verify that length>=(actual size)
+ * if there is a chance that not every byte up to size is reached
+ * due to padding etc.:
+ * if(inData!=outData) {
+ * memcpy(outData, inData, actual size);
+ * }
+ * swap contents
+ * }
+ * return actual size
+ *
+ * Further implementation notes:
+ * - read integers from inData before swapping them
+ * because in-place swapping can make them unreadable
+ * - compareInvChars compares a local Unicode string with already-swapped
+ * output charset strings
+ *
+ * @param ds Pointer to UDataSwapper containing global data about the
+ * transformation and function pointers for handling primitive
+ * types.
+ * @param inData Pointer to the input data to be transformed or examined.
+ * @param length Length of the data, counting bytes. May be -1 for preflighting.
+ * If length>=0, then transform the data.
+ * If length==-1, then only determine the length of the data.
+ * The length cannot be determined from the data itself for all
+ * types of data (e.g., not for simple arrays of integers).
+ * @param outData Pointer to the output data buffer.
+ * If length>=0 (transformation), then the output buffer must
+ * have a capacity of at least length.
+ * If length==-1, then outData will not be used and can be NULL.
+ * @param pErrorCode ICU UErrorCode parameter, must not be NULL and must
+ * fulfill U_SUCCESS on input.
+ * @return The actual length of the data.
+ *
+ * @see UDataSwapper
+ * @internal ICU 2.8
+ */
+typedef int32_t U_CALLCONV
+UDataSwapFn(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert one uint16_t from input to platform endianness.
+ * @internal ICU 2.8
+ */
+typedef uint16_t U_CALLCONV
+UDataReadUInt16(uint16_t x);
+
+/**
+ * Convert one uint32_t from input to platform endianness.
+ * @internal ICU 2.8
+ */
+typedef uint32_t U_CALLCONV
+UDataReadUInt32(uint32_t x);
+
+/**
+ * Convert one uint16_t from platform to input endianness.
+ * @internal ICU 2.8
+ */
+typedef void U_CALLCONV
+UDataWriteUInt16(uint16_t *p, uint16_t x);
+
+/**
+ * Convert one uint32_t from platform to input endianness.
+ * @internal ICU 2.8
+ */
+typedef void U_CALLCONV
+UDataWriteUInt32(uint32_t *p, uint32_t x);
+
+/**
+ * Compare invariant-character strings, one in the output data and the
+ * other one caller-provided in Unicode.
+ * An output data string is compared because strings are usually swapped
+ * before the rest of the data, to allow for sorting of string tables
+ * according to the output charset.
+ * You can use -1 for the length parameters of NUL-terminated strings as usual.
+ * Returns Unicode code point order for invariant characters.
+ * @internal ICU 2.8
+ */
+typedef int32_t U_CALLCONV
+UDataCompareInvChars(const UDataSwapper *ds,
+ const char *outString, int32_t outLength,
+ const UChar *localString, int32_t localLength);
+
+/**
+ * Function for message output when an error occurs during data swapping.
+ * A format string and variable number of arguments are passed
+ * like for vprintf().
+ *
+ * @param context A function-specific context pointer.
+ * @param fmt The format string.
+ * @param args The arguments for format string inserts.
+ *
+ * @internal ICU 2.8
+ */
+typedef void U_CALLCONV
+UDataPrintError(void *context, const char *fmt, va_list args);
+
+struct UDataSwapper {
+ /** Input endianness. @internal ICU 2.8 */
+ UBool inIsBigEndian;
+ /** Input charset family. @see U_CHARSET_FAMILY @internal ICU 2.8 */
+ uint8_t inCharset;
+ /** Output endianness. @internal ICU 2.8 */
+ UBool outIsBigEndian;
+ /** Output charset family. @see U_CHARSET_FAMILY @internal ICU 2.8 */
+ uint8_t outCharset;
+
+ /* basic functions for reading data values */
+
+ /** Convert one uint16_t from input to platform endianness. @internal ICU 2.8 */
+ UDataReadUInt16 *readUInt16;
+ /** Convert one uint32_t from input to platform endianness. @internal ICU 2.8 */
+ UDataReadUInt32 *readUInt32;
+ /** Compare an invariant-character output string with a local one. @internal ICU 2.8 */
+ UDataCompareInvChars *compareInvChars;
+
+ /* basic functions for writing data values */
+
+ /** Convert one uint16_t from platform to input endianness. @internal ICU 2.8 */
+ UDataWriteUInt16 *writeUInt16;
+ /** Convert one uint32_t from platform to input endianness. @internal ICU 2.8 */
+ UDataWriteUInt32 *writeUInt32;
+
+ /* basic functions for data transformations */
+
+ /** Transform an array of 16-bit integers. @internal ICU 2.8 */
+ UDataSwapFn *swapArray16;
+ /** Transform an array of 32-bit integers. @internal ICU 2.8 */
+ UDataSwapFn *swapArray32;
+ /** Transform an array of 64-bit integers. @internal ICU 53 */
+ UDataSwapFn *swapArray64;
+ /** Transform an invariant-character string. @internal ICU 2.8 */
+ UDataSwapFn *swapInvChars;
+
+ /**
+ * Function for message output when an error occurs during data swapping.
+ * Can be NULL.
+ * @internal ICU 2.8
+ */
+ UDataPrintError *printError;
+ /** Context pointer for printError. @internal ICU 2.8 */
+ void *printErrorContext;
+};
+
+U_CDECL_END
+
+U_CAPI UDataSwapper * U_EXPORT2
+udata_openSwapper(UBool inIsBigEndian, uint8_t inCharset,
+ UBool outIsBigEndian, uint8_t outCharset,
+ UErrorCode *pErrorCode);
+
+/**
+ * Open a UDataSwapper for the given input data and the specified output
+ * characteristics.
+ * Values of -1 for any of the characteristics mean the local platform's
+ * characteristics.
+ *
+ * @see udata_swap
+ * @internal ICU 2.8
+ */
+U_CAPI UDataSwapper * U_EXPORT2
+udata_openSwapperForInputData(const void *data, int32_t length,
+ UBool outIsBigEndian, uint8_t outCharset,
+ UErrorCode *pErrorCode);
+
+U_CAPI void U_EXPORT2
+udata_closeSwapper(UDataSwapper *ds);
+
+/**
+ * Read the beginning of an ICU data piece, recognize magic bytes,
+ * swap the structure.
+ * Set a U_UNSUPPORTED_ERROR if it does not look like an ICU data piece.
+ *
+ * @return The size of the data header, in bytes.
+ *
+ * @internal ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+udata_swapDataHeader(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert one int16_t from input to platform endianness.
+ * @internal ICU 2.8
+ */
+U_CAPI int16_t U_EXPORT2
+udata_readInt16(const UDataSwapper *ds, int16_t x);
+
+/**
+ * Convert one int32_t from input to platform endianness.
+ * @internal ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+udata_readInt32(const UDataSwapper *ds, int32_t x);
+
+/**
+ * Swap a block of invariant, NUL-terminated strings, but not padding
+ * bytes after the last string.
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+udata_swapInvStringBlock(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode);
+
+U_CAPI void U_EXPORT2
+udata_printError(const UDataSwapper *ds,
+ const char *fmt,
+ ...);
+
+/* internal exports from putil.c -------------------------------------------- */
+
+/* declared here to keep them out of the public putil.h */
+
+/**
+ * Swap invariant char * strings ASCII->EBCDIC.
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+uprv_ebcdicFromAscii(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode);
+
+/**
+ * Copy invariant ASCII char * strings and verify they are invariant.
+ * @internal
+ */
+U_CFUNC int32_t
+uprv_copyAscii(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode);
+
+/**
+ * Swap invariant char * strings EBCDIC->ASCII.
+ * @internal
+ */
+U_CFUNC int32_t
+uprv_asciiFromEbcdic(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode);
+
+/**
+ * Copy invariant EBCDIC char * strings and verify they are invariant.
+ * @internal
+ */
+U_CFUNC int32_t
+uprv_copyEbcdic(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode);
+
+/**
+ * Compare ASCII invariant char * with Unicode invariant UChar *
+ * @internal
+ */
+U_CFUNC int32_t
+uprv_compareInvAscii(const UDataSwapper *ds,
+ const char *outString, int32_t outLength,
+ const UChar *localString, int32_t localLength);
+
+/**
+ * Compare EBCDIC invariant char * with Unicode invariant UChar *
+ * @internal
+ */
+U_CFUNC int32_t
+uprv_compareInvEbcdic(const UDataSwapper *ds,
+ const char *outString, int32_t outLength,
+ const UChar *localString, int32_t localLength);
+
+/* material... -------------------------------------------------------------- */
+
+#if 0
+
+/* udata.h */
+
+/**
+ * Public API function in udata.c
+ *
+ * Same as udata_openChoice() but automatically swaps the data.
+ * isAcceptable, if not NULL, may accept data with endianness and charset family
+ * different from the current platform's properties.
+ * If the data is acceptable and the platform properties do not match, then
+ * the swap function is called to swap an allocated version of the data.
+ * Preflighting may or may not be performed depending on whether the size of
+ * the loaded data item is known.
+ *
+ * @param isAcceptable Same as for udata_openChoice(). May be NULL.
+ *
+ * @internal ICU 2.8
+ */
+U_CAPI UDataMemory * U_EXPORT2
+udata_openSwap(const char *path, const char *type, const char *name,
+ UDataMemoryIsAcceptable *isAcceptable, void *isAcceptableContext,
+ UDataSwapFn *swap,
+ UDataPrintError *printError, void *printErrorContext,
+ UErrorCode *pErrorCode);
+
+#endif
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/uelement.h b/src/core/basetypes/icu-ucsdet/include/uelement.h
new file mode 100644
index 00000000..4eaddd9d
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/uelement.h
@@ -0,0 +1,89 @@
+/*
+*******************************************************************************
+* Copyright (C) 1997-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: uelement.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2011jul04
+* created by: Markus W. Scherer
+*
+* Common definitions for UHashTable and UVector.
+* UHashTok moved here from uhash.h and renamed UElement.
+* This allows users of UVector to avoid the confusing #include of uhash.h.
+* uhash.h aliases UElement to UHashTok,
+* so that we need not change all of its code and its users.
+*/
+
+#ifndef __UELEMENT_H__
+#define __UELEMENT_H__
+
+#include "unicode/utypes.h"
+
+U_CDECL_BEGIN
+
+/**
+ * A UVector element, or a key or value within a UHashtable.
+ * It may be either a 32-bit integral value or an opaque void* pointer.
+ * The void* pointer may be smaller than 32 bits (e.g. 24 bits)
+ * or may be larger (e.g. 64 bits).
+ *
+ * Because a UElement is the size of a native pointer or a 32-bit
+ * integer, we pass it around by value.
+ */
+union UElement {
+ void* pointer;
+ int32_t integer;
+};
+typedef union UElement UElement;
+
+/**
+ * An element-equality (boolean) comparison function.
+ * @param e1 An element (object or integer)
+ * @param e2 An element (object or integer)
+ * @return TRUE if the two elements are equal.
+ */
+typedef UBool U_CALLCONV UElementsAreEqual(const UElement e1, const UElement e2);
+
+/**
+ * An element sorting (three-way) comparison function.
+ * @param e1 An element (object or integer)
+ * @param e2 An element (object or integer)
+ * @return 0 if the two elements are equal, -1 if e1 is < e2, or +1 if e1 is > e2.
+ */
+typedef int8_t U_CALLCONV UElementComparator(UElement e1, UElement e2);
+
+/**
+ * An element assignment function. It may copy an integer, copy
+ * a pointer, or clone a pointer, as appropriate.
+ * @param dst The element to be assigned to
+ * @param src The element to assign from
+ */
+typedef void U_CALLCONV UElementAssigner(UElement *dst, UElement *src);
+
+U_CDECL_END
+
+/**
+ * Comparator function for UnicodeString* keys. Implements UElementsAreEqual.
+ * @param key1 The string for comparison
+ * @param key2 The string for comparison
+ * @return true if key1 and key2 are equal, return false otherwise.
+ */
+U_CAPI UBool U_EXPORT2
+uhash_compareUnicodeString(const UElement key1, const UElement key2);
+
+/**
+ * Comparator function for UnicodeString* keys (case insensitive).
+ * Make sure to use together with uhash_hashCaselessUnicodeString.
+ * Implements UElementsAreEqual.
+ * @param key1 The string for comparison
+ * @param key2 The string for comparison
+ * @return true if key1 and key2 are equal, return false otherwise.
+ */
+U_CAPI UBool U_EXPORT2
+uhash_compareCaselessUnicodeString(const UElement key1, const UElement key2);
+
+#endif /* __UELEMENT_H__ */
diff --git a/src/core/basetypes/icu-ucsdet/include/uenumimp.h b/src/core/basetypes/icu-ucsdet/include/uenumimp.h
new file mode 100644
index 00000000..664bc686
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/uenumimp.h
@@ -0,0 +1,153 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 2002-2006, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: uenumimp.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:2
+*
+* created on: 2002jul08
+* created by: Vladimir Weinstein
+*/
+
+#ifndef __UENUMIMP_H
+#define __UENUMIMP_H
+
+#include "unicode/uenum.h"
+
+U_CDECL_BEGIN
+
+/**
+ * following are the type declarations for
+ * implementations of APIs. If any of these
+ * functions are NULL, U_UNSUPPORTED_ERROR
+ * is returned. If close is NULL, the enumeration
+ * object is going to be released.
+ * Initial error checking is done in the body
+ * of API function, so the implementations
+ * need not to check the initial error condition.
+ */
+
+/**
+ * Function type declaration for uenum_close().
+ *
+ * This function should cleanup the enumerator object
+ *
+ * @param en enumeration to be closed
+ */
+typedef void U_CALLCONV
+UEnumClose(UEnumeration *en);
+
+/**
+ * Function type declaration for uenum_count().
+ *
+ * This function should count the number of elements
+ * in this enumeration
+ *
+ * @param en enumeration to be counted
+ * @param status pointer to UErrorCode variable
+ * @return number of elements in enumeration
+ */
+typedef int32_t U_CALLCONV
+UEnumCount(UEnumeration *en, UErrorCode *status);
+
+/**
+ * Function type declaration for uenum_unext().
+ *
+ * This function returns the next element as a UChar *,
+ * or NULL after all elements haven been enumerated.
+ *
+ * @param en enumeration
+ * @param resultLength pointer to result length
+ * @param status pointer to UErrorCode variable
+ * @return next element as UChar *,
+ * or NULL after all elements haven been enumerated
+ */
+typedef const UChar* U_CALLCONV
+UEnumUNext(UEnumeration* en,
+ int32_t* resultLength,
+ UErrorCode* status);
+
+/**
+ * Function type declaration for uenum_next().
+ *
+ * This function returns the next element as a char *,
+ * or NULL after all elements haven been enumerated.
+ *
+ * @param en enumeration
+ * @param resultLength pointer to result length
+ * @param status pointer to UErrorCode variable
+ * @return next element as char *,
+ * or NULL after all elements haven been enumerated
+ */
+typedef const char* U_CALLCONV
+UEnumNext(UEnumeration* en,
+ int32_t* resultLength,
+ UErrorCode* status);
+
+/**
+ * Function type declaration for uenum_reset().
+ *
+ * This function should reset the enumeration
+ * object
+ *
+ * @param en enumeration
+ * @param status pointer to UErrorCode variable
+ */
+typedef void U_CALLCONV
+UEnumReset(UEnumeration* en,
+ UErrorCode* status);
+
+
+struct UEnumeration {
+ /* baseContext. For the base class only. Don't touch! */
+ void *baseContext;
+
+ /* context. Use it for what you need */
+ void *context;
+
+ /**
+ * these are functions that will
+ * be used for APIs
+ */
+ /* called from uenum_close */
+ UEnumClose *close;
+ /* called from uenum_count */
+ UEnumCount *count;
+ /* called from uenum_unext */
+ UEnumUNext *uNext;
+ /* called from uenum_next */
+ UEnumNext *next;
+ /* called from uenum_reset */
+ UEnumReset *reset;
+};
+
+U_CDECL_END
+
+/* This is the default implementation for uenum_unext().
+ * It automatically converts the char * string to UChar *.
+ * Don't call this directly. This is called internally by uenum_unext
+ * when a UEnumeration is defined with 'uNext' pointing to this
+ * function.
+ */
+U_CAPI const UChar* U_EXPORT2
+uenum_unextDefault(UEnumeration* en,
+ int32_t* resultLength,
+ UErrorCode* status);
+
+/* This is the default implementation for uenum_next().
+ * It automatically converts the UChar * string to char *.
+ * Don't call this directly. This is called internally by uenum_next
+ * when a UEnumeration is defined with 'next' pointing to this
+ * function.
+ */
+U_CAPI const char* U_EXPORT2
+uenum_nextDefault(UEnumeration* en,
+ int32_t* resultLength,
+ UErrorCode* status);
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/uinvchar.h b/src/core/basetypes/icu-ucsdet/include/uinvchar.h
new file mode 100644
index 00000000..f307bd6a
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/uinvchar.h
@@ -0,0 +1,125 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 1999-2010, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: uinvchar.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:2
+*
+* created on: 2004sep14
+* created by: Markus W. Scherer
+*
+* Definitions for handling invariant characters, moved here from putil.c
+* for better modularization.
+*/
+
+#ifndef __UINVCHAR_H__
+#define __UINVCHAR_H__
+
+#include "unicode/utypes.h"
+
+/**
+ * Check if a char string only contains invariant characters.
+ * See utypes.h for details.
+ *
+ * @param s Input string pointer.
+ * @param length Length of the string, can be -1 if NUL-terminated.
+ * @return TRUE if s contains only invariant characters.
+ *
+ * @internal (ICU 2.8)
+ */
+U_INTERNAL UBool U_EXPORT2
+uprv_isInvariantString(const char *s, int32_t length);
+
+/**
+ * Check if a Unicode string only contains invariant characters.
+ * See utypes.h for details.
+ *
+ * @param s Input string pointer.
+ * @param length Length of the string, can be -1 if NUL-terminated.
+ * @return TRUE if s contains only invariant characters.
+ *
+ * @internal (ICU 2.8)
+ */
+U_INTERNAL UBool U_EXPORT2
+uprv_isInvariantUString(const UChar *s, int32_t length);
+
+/**
+ * \def U_UPPER_ORDINAL
+ * Get the ordinal number of an uppercase invariant character
+ * @internal
+ */
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+# define U_UPPER_ORDINAL(x) ((x)-'A')
+#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+# define U_UPPER_ORDINAL(x) (((x) < 'J') ? ((x)-'A') : \
+ (((x) < 'S') ? ((x)-'J'+9) : \
+ ((x)-'S'+18)))
+#else
+# error Unknown charset family!
+#endif
+
+/**
+ * Compare two EBCDIC invariant-character strings in ASCII order.
+ * @internal
+ */
+U_INTERNAL int32_t U_EXPORT2
+uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2);
+
+/**
+ * \def uprv_compareInvCharsAsAscii
+ * Compare two invariant-character strings in ASCII order.
+ * @internal
+ */
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+# define uprv_compareInvCharsAsAscii(s1, s2) uprv_strcmp(s1, s2)
+#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+# define uprv_compareInvCharsAsAscii(s1, s2) uprv_compareInvEbcdicAsAscii(s1, s2)
+#else
+# error Unknown charset family!
+#endif
+
+/**
+ * Converts an EBCDIC invariant character to lowercase ASCII.
+ * @internal
+ */
+U_INTERNAL char U_EXPORT2
+uprv_ebcdicToLowercaseAscii(char c);
+
+/**
+ * \def uprv_invCharToLowercaseAscii
+ * Converts an invariant character to lowercase ASCII.
+ * @internal
+ */
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+# define uprv_invCharToLowercaseAscii uprv_asciitolower
+#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+# define uprv_invCharToLowercaseAscii uprv_ebcdicToLowercaseAscii
+#else
+# error Unknown charset family!
+#endif
+
+/**
+ * Copy EBCDIC to ASCII
+ * @internal
+ * @see uprv_strncpy
+ */
+U_INTERNAL uint8_t* U_EXPORT2
+uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n);
+
+
+/**
+ * Copy ASCII to EBCDIC
+ * @internal
+ * @see uprv_strncpy
+ */
+U_INTERNAL uint8_t* U_EXPORT2
+uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n);
+
+
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/umapfile.h b/src/core/basetypes/icu-ucsdet/include/umapfile.h
new file mode 100644
index 00000000..2995e381
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/umapfile.h
@@ -0,0 +1,55 @@
+/*
+******************************************************************************
+*
+* Copyright (C) 1999-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************/
+
+/*----------------------------------------------------------------------------------
+ *
+ * Memory mapped file wrappers for use by the ICU Data Implementation
+ *
+ * Porting note: The implementation of these functions is very platform specific.
+ * Not all platforms can do real memory mapping. Those that can't
+ * still must implement these functions, getting the data into memory using
+ * whatever means are available.
+ *
+ * These functions are part of the ICU internal implementation, and
+ * are not inteded to be used directly by applications.
+ *
+ *----------------------------------------------------------------------------------*/
+
+#ifndef __UMAPFILE_H__
+#define __UMAPFILE_H__
+
+#include "unicode/putil.h"
+#include "unicode/udata.h"
+#include "putilimp.h"
+
+U_CFUNC UBool uprv_mapFile(UDataMemory *pdm, const char *path);
+U_CFUNC void uprv_unmapFile(UDataMemory *pData);
+
+/* MAP_NONE: no memory mapping, no file access at all */
+#define MAP_NONE 0
+#define MAP_WIN32 1
+#define MAP_POSIX 2
+#define MAP_STDIO 3
+#define MAP_390DLL 4
+
+#if UCONFIG_NO_FILE_IO
+# define MAP_IMPLEMENTATION MAP_NONE
+#elif U_PLATFORM_USES_ONLY_WIN32_API
+# define MAP_IMPLEMENTATION MAP_WIN32
+#elif U_HAVE_MMAP || U_PLATFORM == U_PF_OS390
+# if U_PLATFORM == U_PF_OS390 && defined (OS390_STUBDATA)
+ /* No memory mapping for 390 batch mode. Fake it using dll loading. */
+# define MAP_IMPLEMENTATION MAP_390DLL
+# else
+# define MAP_IMPLEMENTATION MAP_POSIX
+# endif
+#else /* unknown platform, no memory map implementation: use stdio.h and uprv_malloc() instead */
+# define MAP_IMPLEMENTATION MAP_STDIO
+#endif
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/umutex.h b/src/core/basetypes/icu-ucsdet/include/umutex.h
new file mode 100644
index 00000000..e0ad0d3c
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/umutex.h
@@ -0,0 +1,424 @@
+/*
+**********************************************************************
+* Copyright (C) 1997-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*
+* File UMUTEX.H
+*
+* Modification History:
+*
+* Date Name Description
+* 04/02/97 aliu Creation.
+* 04/07/99 srl rewrite - C interface, multiple mutices
+* 05/13/99 stephen Changed to umutex (from cmutex)
+******************************************************************************
+*/
+
+#ifndef UMUTEX_H
+#define UMUTEX_H
+
+#include "unicode/utypes.h"
+#include "unicode/uclean.h"
+#include "putilimp.h"
+
+
+
+// Forward Declarations. UMutex is not in the ICU namespace (yet) because
+// there are some remaining references from plain C.
+struct UMutex;
+struct UConditionVar;
+
+U_NAMESPACE_BEGIN
+struct UInitOnce;
+U_NAMESPACE_END
+
+// Stringify macros, to allow #include of user supplied atomic & mutex files.
+#define U_MUTEX_STR(s) #s
+#define U_MUTEX_XSTR(s) U_MUTEX_STR(s)
+
+/****************************************************************************
+ *
+ * Low Level Atomic Operations.
+ * Compiler dependent. Not operating system dependent.
+ *
+ ****************************************************************************/
+#if defined (U_USER_ATOMICS_H)
+#include U_MUTEX_XSTR(U_USER_ATOMICS_H)
+
+#elif U_HAVE_STD_ATOMICS
+
+// C++11 atomics are available.
+
+#include <atomic>
+
+U_NAMESPACE_BEGIN
+
+typedef std::atomic<int32_t> u_atomic_int32_t;
+#define ATOMIC_INT32_T_INITIALIZER(val) ATOMIC_VAR_INIT(val)
+
+inline int32_t umtx_loadAcquire(u_atomic_int32_t &var) {
+ return var.load(std::memory_order_acquire);
+}
+
+inline void umtx_storeRelease(u_atomic_int32_t &var, int32_t val) {
+ var.store(val, std::memory_order_release);
+}
+
+inline int32_t umtx_atomic_inc(u_atomic_int32_t *var) {
+ return var->fetch_add(1) + 1;
+}
+
+inline int32_t umtx_atomic_dec(u_atomic_int32_t *var) {
+ return var->fetch_sub(1) - 1;
+}
+U_NAMESPACE_END
+
+#elif U_PLATFORM_HAS_WIN32_API
+
+// MSVC compiler. Reads and writes of volatile variables have
+// acquire and release memory semantics, respectively.
+// This is a Microsoft extension, not standard C++ behavior.
+//
+// Update: can't use this because of MinGW, built with gcc.
+// Original plan was to use gcc atomics for MinGW, but they
+// aren't supported, so we fold MinGW into this path.
+
+# define WIN32_LEAN_AND_MEAN
+# define VC_EXTRALEAN
+# define NOUSER
+# define NOSERVICE
+# define NOIME
+# define NOMCX
+# ifndef NOMINMAX
+# define NOMINMAX
+# endif
+# include <windows.h>
+
+U_NAMESPACE_BEGIN
+typedef volatile LONG u_atomic_int32_t;
+#define ATOMIC_INT32_T_INITIALIZER(val) val
+
+inline int32_t umtx_loadAcquire(u_atomic_int32_t &var) {
+ return InterlockedCompareExchange(&var, 0, 0);
+}
+
+inline void umtx_storeRelease(u_atomic_int32_t &var, int32_t val) {
+ InterlockedExchange(&var, val);
+}
+
+
+inline int32_t umtx_atomic_inc(u_atomic_int32_t *var) {
+ return InterlockedIncrement(var);
+}
+
+inline int32_t umtx_atomic_dec(u_atomic_int32_t *var) {
+ return InterlockedDecrement(var);
+}
+U_NAMESPACE_END
+
+
+#elif U_HAVE_GCC_ATOMICS
+/*
+ * gcc atomic ops. These are available on several other compilers as well.
+ */
+
+U_NAMESPACE_BEGIN
+typedef int32_t u_atomic_int32_t;
+#define ATOMIC_INT32_T_INITIALIZER(val) val
+
+inline int32_t umtx_loadAcquire(u_atomic_int32_t &var) {
+ int32_t val = var;
+ __sync_synchronize();
+ return val;
+}
+
+inline void umtx_storeRelease(u_atomic_int32_t &var, int32_t val) {
+ __sync_synchronize();
+ var = val;
+}
+
+inline int32_t umtx_atomic_inc(u_atomic_int32_t *p) {
+ return __sync_add_and_fetch(p, 1);
+}
+
+inline int32_t umtx_atomic_dec(u_atomic_int32_t *p) {
+ return __sync_sub_and_fetch(p, 1);
+}
+U_NAMESPACE_END
+
+#else
+
+/*
+ * Unknown Platform. Use out-of-line functions, which in turn use mutexes.
+ * Slow but correct.
+ */
+
+#define U_NO_PLATFORM_ATOMICS
+
+U_NAMESPACE_BEGIN
+typedef int32_t u_atomic_int32_t;
+#define ATOMIC_INT32_T_INITIALIZER(val) val
+
+U_COMMON_API int32_t U_EXPORT2
+umtx_loadAcquire(u_atomic_int32_t &var);
+
+U_COMMON_API void U_EXPORT2
+umtx_storeRelease(u_atomic_int32_t &var, int32_t val);
+
+U_COMMON_API int32_t U_EXPORT2
+umtx_atomic_inc(u_atomic_int32_t *p);
+
+U_COMMON_API int32_t U_EXPORT2
+umtx_atomic_dec(u_atomic_int32_t *p);
+
+U_NAMESPACE_END
+
+#endif /* Low Level Atomic Ops Platfrom Chain */
+
+
+
+/*************************************************************************************************
+ *
+ * UInitOnce Definitions.
+ * These are platform neutral.
+ *
+ *************************************************************************************************/
+
+U_NAMESPACE_BEGIN
+
+struct UInitOnce {
+ u_atomic_int32_t fState;
+ UErrorCode fErrCode;
+ void reset() {fState = 0;};
+ UBool isReset() {return umtx_loadAcquire(fState) == 0;};
+// Note: isReset() is used by service registration code.
+// Thread safety of this usage needs review.
+};
+
+#define U_INITONCE_INITIALIZER {ATOMIC_INT32_T_INITIALIZER(0), U_ZERO_ERROR}
+
+
+U_COMMON_API UBool U_EXPORT2 umtx_initImplPreInit(UInitOnce &);
+U_COMMON_API void U_EXPORT2 umtx_initImplPostInit(UInitOnce &);
+
+template<class T> void umtx_initOnce(UInitOnce &uio, T *obj, void (T::*fp)()) {
+ if (umtx_loadAcquire(uio.fState) == 2) {
+ return;
+ }
+ if (umtx_initImplPreInit(uio)) {
+ (obj->*fp)();
+ umtx_initImplPostInit(uio);
+ }
+}
+
+
+// umtx_initOnce variant for plain functions, or static class functions.
+// No context parameter.
+inline void umtx_initOnce(UInitOnce &uio, void (*fp)()) {
+ if (umtx_loadAcquire(uio.fState) == 2) {
+ return;
+ }
+ if (umtx_initImplPreInit(uio)) {
+ (*fp)();
+ umtx_initImplPostInit(uio);
+ }
+}
+
+// umtx_initOnce variant for plain functions, or static class functions.
+// With ErrorCode, No context parameter.
+inline void umtx_initOnce(UInitOnce &uio, void (*fp)(UErrorCode &), UErrorCode &errCode) {
+ if (U_FAILURE(errCode)) {
+ return;
+ }
+ if (umtx_loadAcquire(uio.fState) != 2 && umtx_initImplPreInit(uio)) {
+ // We run the initialization.
+ (*fp)(errCode);
+ uio.fErrCode = errCode;
+ umtx_initImplPostInit(uio);
+ } else {
+ // Someone else already ran the initialization.
+ if (U_FAILURE(uio.fErrCode)) {
+ errCode = uio.fErrCode;
+ }
+ }
+}
+
+// umtx_initOnce variant for plain functions, or static class functions,
+// with a context parameter.
+template<class T> void umtx_initOnce(UInitOnce &uio, void (*fp)(T), T context) {
+ if (umtx_loadAcquire(uio.fState) == 2) {
+ return;
+ }
+ if (umtx_initImplPreInit(uio)) {
+ (*fp)(context);
+ umtx_initImplPostInit(uio);
+ }
+}
+
+// umtx_initOnce variant for plain functions, or static class functions,
+// with a context parameter and an error code.
+template<class T> void umtx_initOnce(UInitOnce &uio, void (*fp)(T, UErrorCode &), T context, UErrorCode &errCode) {
+ if (U_FAILURE(errCode)) {
+ return;
+ }
+ if (umtx_loadAcquire(uio.fState) != 2 && umtx_initImplPreInit(uio)) {
+ // We run the initialization.
+ (*fp)(context, errCode);
+ uio.fErrCode = errCode;
+ umtx_initImplPostInit(uio);
+ } else {
+ // Someone else already ran the initialization.
+ if (U_FAILURE(uio.fErrCode)) {
+ errCode = uio.fErrCode;
+ }
+ }
+}
+
+U_NAMESPACE_END
+
+
+
+/*************************************************************************************************
+ *
+ * Mutex Definitions. Platform Dependent, #if platform chain follows.
+ * TODO: Add a C++11 version.
+ * Need to convert all mutex using files to C++ first.
+ *
+ *************************************************************************************************/
+
+#if defined(U_USER_MUTEX_H)
+// #inlcude "U_USER_MUTEX_H"
+#include U_MUTEX_XSTR(U_USER_MUTEX_H)
+
+#elif U_PLATFORM_HAS_WIN32_API
+
+/* Windows Definitions.
+ * Windows comes first in the platform chain.
+ * Cygwin (and possibly others) have both WIN32 and POSIX APIs. Prefer Win32 in this case.
+ */
+
+
+/* For CRITICAL_SECTION */
+
+/*
+ * Note: there is an earlier include of windows.h in this file, but it is in
+ * different conditionals.
+ * This one is needed if we are using C++11 for atomic ops, but
+ * win32 APIs for Critical Sections.
+ */
+
+# define WIN32_LEAN_AND_MEAN
+# define VC_EXTRALEAN
+# define NOUSER
+# define NOSERVICE
+# define NOIME
+# define NOMCX
+# ifndef NOMINMAX
+# define NOMINMAX
+# endif
+# include <windows.h>
+
+
+typedef struct UMutex {
+ icu::UInitOnce fInitOnce;
+ CRITICAL_SECTION fCS;
+} UMutex;
+
+/* Initializer for a static UMUTEX. Deliberately contains no value for the
+ * CRITICAL_SECTION.
+ */
+#define U_MUTEX_INITIALIZER {U_INITONCE_INITIALIZER}
+
+struct UConditionVar {
+ HANDLE fEntryGate;
+ HANDLE fExitGate;
+ int32_t fWaitCount;
+};
+
+#define U_CONDITION_INITIALIZER {NULL, NULL, 0}
+
+
+
+#elif U_PLATFORM_IMPLEMENTS_POSIX
+
+/*
+ * POSIX platform
+ */
+
+#include <pthread.h>
+
+struct UMutex {
+ pthread_mutex_t fMutex;
+};
+typedef struct UMutex UMutex;
+#define U_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER}
+
+struct UConditionVar {
+ pthread_cond_t fCondition;
+};
+#define U_CONDITION_INITIALIZER {PTHREAD_COND_INITIALIZER}
+
+#else
+
+/*
+ * Unknow platform type.
+ * This is an error condition. ICU requires mutexes.
+ */
+
+#error Unknown Platform.
+
+#endif
+
+
+
+/**************************************************************************************
+ *
+ * Mutex Implementation function declaratations.
+ * Declarations are platform neutral.
+ * Implementations, in umutex.cpp, are platform specific.
+ *
+ ************************************************************************************/
+
+/* Lock a mutex.
+ * @param mutex The given mutex to be locked. Pass NULL to specify
+ * the global ICU mutex. Recursive locks are an error
+ * and may cause a deadlock on some platforms.
+ */
+U_INTERNAL void U_EXPORT2 umtx_lock(UMutex* mutex);
+
+/* Unlock a mutex.
+ * @param mutex The given mutex to be unlocked. Pass NULL to specify
+ * the global ICU mutex.
+ */
+U_INTERNAL void U_EXPORT2 umtx_unlock (UMutex* mutex);
+
+/*
+ * Wait on a condition variable.
+ * The calling thread will unlock the mutex and wait on the condition variable.
+ * The mutex must be locked by the calling thread when invoking this function.
+ *
+ * @param cond the condition variable to wait on.
+ * @param mutex the associated mutex.
+ */
+
+U_INTERNAL void U_EXPORT2 umtx_condWait(UConditionVar *cond, UMutex *mutex);
+
+
+/*
+ * Broadcast wakeup of all threads waiting on a Condition.
+ * The associated mutex must be locked by the calling thread when calling
+ * this function; this is a temporary ICU restriction.
+ *
+ * @param cond the condition variable.
+ */
+U_INTERNAL void U_EXPORT2 umtx_condBroadcast(UConditionVar *cond);
+
+/*
+ * Signal a condition variable, waking up one waiting thread.
+ * CAUTION: Do not use. Place holder only. Not implemented for Windows.
+ */
+U_INTERNAL void U_EXPORT2 umtx_condSignal(UConditionVar *cond);
+
+#endif /* UMUTEX_H */
+/*eof*/
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/appendable.h b/src/core/basetypes/icu-ucsdet/include/unicode/appendable.h
new file mode 100644
index 00000000..a6a83b15
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/appendable.h
@@ -0,0 +1,232 @@
+/*
+*******************************************************************************
+* Copyright (C) 2011-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: appendable.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2010dec07
+* created by: Markus W. Scherer
+*/
+
+#ifndef __APPENDABLE_H__
+#define __APPENDABLE_H__
+
+/**
+ * \file
+ * \brief C++ API: Appendable class: Sink for Unicode code points and 16-bit code units (UChars).
+ */
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+
+U_NAMESPACE_BEGIN
+
+class UnicodeString;
+
+/**
+ * Base class for objects to which Unicode characters and strings can be appended.
+ * Combines elements of Java Appendable and ICU4C ByteSink.
+ *
+ * This class can be used in APIs where it does not matter whether the actual destination is
+ * a UnicodeString, a UChar[] array, a UnicodeSet, or any other object
+ * that receives and processes characters and/or strings.
+ *
+ * Implementation classes must implement at least appendCodeUnit(UChar).
+ * The base class provides default implementations for the other methods.
+ *
+ * The methods do not take UErrorCode parameters.
+ * If an error occurs (e.g., out-of-memory),
+ * in addition to returning FALSE from failing operations,
+ * the implementation must prevent unexpected behavior (e.g., crashes)
+ * from further calls and should make the error condition available separately
+ * (e.g., store a UErrorCode, make/keep a UnicodeString bogus).
+ * @stable ICU 4.8
+ */
+class U_COMMON_API Appendable : public UObject {
+public:
+ /**
+ * Destructor.
+ * @stable ICU 4.8
+ */
+ ~Appendable();
+
+ /**
+ * Appends a 16-bit code unit.
+ * @param c code unit
+ * @return TRUE if the operation succeeded
+ * @stable ICU 4.8
+ */
+ virtual UBool appendCodeUnit(UChar c) = 0;
+
+ /**
+ * Appends a code point.
+ * The default implementation calls appendCodeUnit(UChar) once or twice.
+ * @param c code point 0..0x10ffff
+ * @return TRUE if the operation succeeded
+ * @stable ICU 4.8
+ */
+ virtual UBool appendCodePoint(UChar32 c);
+
+ /**
+ * Appends a string.
+ * The default implementation calls appendCodeUnit(UChar) for each code unit.
+ * @param s string, must not be NULL if length!=0
+ * @param length string length, or -1 if NUL-terminated
+ * @return TRUE if the operation succeeded
+ * @stable ICU 4.8
+ */
+ virtual UBool appendString(const UChar *s, int32_t length);
+
+ /**
+ * Tells the object that the caller is going to append roughly
+ * appendCapacity UChars. A subclass might use this to pre-allocate
+ * a larger buffer if necessary.
+ * The default implementation does nothing. (It always returns TRUE.)
+ * @param appendCapacity estimated number of UChars that will be appended
+ * @return TRUE if the operation succeeded
+ * @stable ICU 4.8
+ */
+ virtual UBool reserveAppendCapacity(int32_t appendCapacity);
+
+ /**
+ * Returns a writable buffer for appending and writes the buffer's capacity to
+ * *resultCapacity. Guarantees *resultCapacity>=minCapacity.
+ * May return a pointer to the caller-owned scratch buffer which must have
+ * scratchCapacity>=minCapacity.
+ * The returned buffer is only valid until the next operation
+ * on this Appendable.
+ *
+ * After writing at most *resultCapacity UChars, call appendString() with the
+ * pointer returned from this function and the number of UChars written.
+ * Many appendString() implementations will avoid copying UChars if this function
+ * returned an internal buffer.
+ *
+ * Partial usage example:
+ * \code
+ * int32_t capacity;
+ * UChar* buffer = app.getAppendBuffer(..., &capacity);
+ * ... Write n UChars into buffer, with n <= capacity.
+ * app.appendString(buffer, n);
+ * \endcode
+ * In many implementations, that call to append will avoid copying UChars.
+ *
+ * If the Appendable allocates or reallocates an internal buffer, it should use
+ * the desiredCapacityHint if appropriate.
+ * If a caller cannot provide a reasonable guess at the desired capacity,
+ * it should pass desiredCapacityHint=0.
+ *
+ * If a non-scratch buffer is returned, the caller may only pass
+ * a prefix to it to appendString().
+ * That is, it is not correct to pass an interior pointer to appendString().
+ *
+ * The default implementation always returns the scratch buffer.
+ *
+ * @param minCapacity required minimum capacity of the returned buffer;
+ * must be non-negative
+ * @param desiredCapacityHint desired capacity of the returned buffer;
+ * must be non-negative
+ * @param scratch default caller-owned buffer
+ * @param scratchCapacity capacity of the scratch buffer
+ * @param resultCapacity pointer to an integer which will be set to the
+ * capacity of the returned buffer
+ * @return a buffer with *resultCapacity>=minCapacity
+ * @stable ICU 4.8
+ */
+ virtual UChar *getAppendBuffer(int32_t minCapacity,
+ int32_t desiredCapacityHint,
+ UChar *scratch, int32_t scratchCapacity,
+ int32_t *resultCapacity);
+};
+
+/**
+ * An Appendable implementation which writes to a UnicodeString.
+ *
+ * This class is not intended for public subclassing.
+ * @stable ICU 4.8
+ */
+class U_COMMON_API UnicodeStringAppendable : public Appendable {
+public:
+ /**
+ * Aliases the UnicodeString (keeps its reference) for writing.
+ * @param s The UnicodeString to which this Appendable will write.
+ * @stable ICU 4.8
+ */
+ explicit UnicodeStringAppendable(UnicodeString &s) : str(s) {}
+
+ /**
+ * Destructor.
+ * @stable ICU 4.8
+ */
+ ~UnicodeStringAppendable();
+
+ /**
+ * Appends a 16-bit code unit to the string.
+ * @param c code unit
+ * @return TRUE if the operation succeeded
+ * @stable ICU 4.8
+ */
+ virtual UBool appendCodeUnit(UChar c);
+
+ /**
+ * Appends a code point to the string.
+ * @param c code point 0..0x10ffff
+ * @return TRUE if the operation succeeded
+ * @stable ICU 4.8
+ */
+ virtual UBool appendCodePoint(UChar32 c);
+
+ /**
+ * Appends a string to the UnicodeString.
+ * @param s string, must not be NULL if length!=0
+ * @param length string length, or -1 if NUL-terminated
+ * @return TRUE if the operation succeeded
+ * @stable ICU 4.8
+ */
+ virtual UBool appendString(const UChar *s, int32_t length);
+
+ /**
+ * Tells the UnicodeString that the caller is going to append roughly
+ * appendCapacity UChars.
+ * @param appendCapacity estimated number of UChars that will be appended
+ * @return TRUE if the operation succeeded
+ * @stable ICU 4.8
+ */
+ virtual UBool reserveAppendCapacity(int32_t appendCapacity);
+
+ /**
+ * Returns a writable buffer for appending and writes the buffer's capacity to
+ * *resultCapacity. Guarantees *resultCapacity>=minCapacity.
+ * May return a pointer to the caller-owned scratch buffer which must have
+ * scratchCapacity>=minCapacity.
+ * The returned buffer is only valid until the next write operation
+ * on the UnicodeString.
+ *
+ * For details see Appendable::getAppendBuffer().
+ *
+ * @param minCapacity required minimum capacity of the returned buffer;
+ * must be non-negative
+ * @param desiredCapacityHint desired capacity of the returned buffer;
+ * must be non-negative
+ * @param scratch default caller-owned buffer
+ * @param scratchCapacity capacity of the scratch buffer
+ * @param resultCapacity pointer to an integer which will be set to the
+ * capacity of the returned buffer
+ * @return a buffer with *resultCapacity>=minCapacity
+ * @stable ICU 4.8
+ */
+ virtual UChar *getAppendBuffer(int32_t minCapacity,
+ int32_t desiredCapacityHint,
+ UChar *scratch, int32_t scratchCapacity,
+ int32_t *resultCapacity);
+
+private:
+ UnicodeString &str;
+};
+
+U_NAMESPACE_END
+
+#endif // __APPENDABLE_H__
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/bytestream.h b/src/core/basetypes/icu-ucsdet/include/unicode/bytestream.h
new file mode 100644
index 00000000..174aa38a
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/bytestream.h
@@ -0,0 +1,257 @@
+// Copyright (C) 2009-2012, International Business Machines
+// Corporation and others. All Rights Reserved.
+//
+// Copyright 2007 Google Inc. All Rights Reserved.
+// Author: sanjay@google.com (Sanjay Ghemawat)
+//
+// Abstract interface that consumes a sequence of bytes (ByteSink).
+//
+// Used so that we can write a single piece of code that can operate
+// on a variety of output string types.
+//
+// Various implementations of this interface are provided:
+// ByteSink:
+// CheckedArrayByteSink Write to a flat array, with bounds checking
+// StringByteSink Write to an STL string
+
+// This code is a contribution of Google code, and the style used here is
+// a compromise between the original Google code and the ICU coding guidelines.
+// For example, data types are ICU-ified (size_t,int->int32_t),
+// and API comments doxygen-ified, but function names and behavior are
+// as in the original, if possible.
+// Assertion-style error handling, not available in ICU, was changed to
+// parameter "pinning" similar to UnicodeString.
+//
+// In addition, this is only a partial port of the original Google code,
+// limited to what was needed so far. The (nearly) complete original code
+// is in the ICU svn repository at icuhtml/trunk/design/strings/contrib
+// (see ICU ticket 6765, r25517).
+
+#ifndef __BYTESTREAM_H__
+#define __BYTESTREAM_H__
+
+/**
+ * \file
+ * \brief C++ API: Interface for writing bytes, and implementation classes.
+ */
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "unicode/std_string.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * A ByteSink can be filled with bytes.
+ * @stable ICU 4.2
+ */
+class U_COMMON_API ByteSink : public UMemory {
+public:
+ /**
+ * Default constructor.
+ * @stable ICU 4.2
+ */
+ ByteSink() { }
+ /**
+ * Virtual destructor.
+ * @stable ICU 4.2
+ */
+ virtual ~ByteSink();
+
+ /**
+ * Append "bytes[0,n-1]" to this.
+ * @param bytes the pointer to the bytes
+ * @param n the number of bytes; must be non-negative
+ * @stable ICU 4.2
+ */
+ virtual void Append(const char* bytes, int32_t n) = 0;
+
+ /**
+ * Returns a writable buffer for appending and writes the buffer's capacity to
+ * *result_capacity. Guarantees *result_capacity>=min_capacity.
+ * May return a pointer to the caller-owned scratch buffer which must have
+ * scratch_capacity>=min_capacity.
+ * The returned buffer is only valid until the next operation
+ * on this ByteSink.
+ *
+ * After writing at most *result_capacity bytes, call Append() with the
+ * pointer returned from this function and the number of bytes written.
+ * Many Append() implementations will avoid copying bytes if this function
+ * returned an internal buffer.
+ *
+ * Partial usage example:
+ * int32_t capacity;
+ * char* buffer = sink->GetAppendBuffer(..., &capacity);
+ * ... Write n bytes into buffer, with n <= capacity.
+ * sink->Append(buffer, n);
+ * In many implementations, that call to Append will avoid copying bytes.
+ *
+ * If the ByteSink allocates or reallocates an internal buffer, it should use
+ * the desired_capacity_hint if appropriate.
+ * If a caller cannot provide a reasonable guess at the desired capacity,
+ * it should pass desired_capacity_hint=0.
+ *
+ * If a non-scratch buffer is returned, the caller may only pass
+ * a prefix to it to Append().
+ * That is, it is not correct to pass an interior pointer to Append().
+ *
+ * The default implementation always returns the scratch buffer.
+ *
+ * @param min_capacity required minimum capacity of the returned buffer;
+ * must be non-negative
+ * @param desired_capacity_hint desired capacity of the returned buffer;
+ * must be non-negative
+ * @param scratch default caller-owned buffer
+ * @param scratch_capacity capacity of the scratch buffer
+ * @param result_capacity pointer to an integer which will be set to the
+ * capacity of the returned buffer
+ * @return a buffer with *result_capacity>=min_capacity
+ * @stable ICU 4.2
+ */
+ virtual char* GetAppendBuffer(int32_t min_capacity,
+ int32_t desired_capacity_hint,
+ char* scratch, int32_t scratch_capacity,
+ int32_t* result_capacity);
+
+ /**
+ * Flush internal buffers.
+ * Some byte sinks use internal buffers or provide buffering
+ * and require calling Flush() at the end of the stream.
+ * The ByteSink should be ready for further Append() calls after Flush().
+ * The default implementation of Flush() does nothing.
+ * @stable ICU 4.2
+ */
+ virtual void Flush();
+
+private:
+ ByteSink(const ByteSink &); // copy constructor not implemented
+ ByteSink &operator=(const ByteSink &); // assignment operator not implemented
+};
+
+// -------------------------------------------------------------
+// Some standard implementations
+
+/**
+ * Implementation of ByteSink that writes to a flat byte array,
+ * with bounds-checking:
+ * This sink will not write more than capacity bytes to outbuf.
+ * If more than capacity bytes are Append()ed, then excess bytes are ignored,
+ * and Overflowed() will return true.
+ * Overflow does not cause a runtime error.
+ * @stable ICU 4.2
+ */
+class U_COMMON_API CheckedArrayByteSink : public ByteSink {
+public:
+ /**
+ * Constructs a ByteSink that will write to outbuf[0..capacity-1].
+ * @param outbuf buffer to write to
+ * @param capacity size of the buffer
+ * @stable ICU 4.2
+ */
+ CheckedArrayByteSink(char* outbuf, int32_t capacity);
+ /**
+ * Destructor.
+ * @stable ICU 4.2
+ */
+ virtual ~CheckedArrayByteSink();
+ /**
+ * Returns the sink to its original state, without modifying the buffer.
+ * Useful for reusing both the buffer and the sink for multiple streams.
+ * Resets the state to NumberOfBytesWritten()=NumberOfBytesAppended()=0
+ * and Overflowed()=FALSE.
+ * @return *this
+ * @stable ICU 4.6
+ */
+ virtual CheckedArrayByteSink& Reset();
+ /**
+ * Append "bytes[0,n-1]" to this.
+ * @param bytes the pointer to the bytes
+ * @param n the number of bytes; must be non-negative
+ * @stable ICU 4.2
+ */
+ virtual void Append(const char* bytes, int32_t n);
+ /**
+ * Returns a writable buffer for appending and writes the buffer's capacity to
+ * *result_capacity. For details see the base class documentation.
+ * @param min_capacity required minimum capacity of the returned buffer;
+ * must be non-negative
+ * @param desired_capacity_hint desired capacity of the returned buffer;
+ * must be non-negative
+ * @param scratch default caller-owned buffer
+ * @param scratch_capacity capacity of the scratch buffer
+ * @param result_capacity pointer to an integer which will be set to the
+ * capacity of the returned buffer
+ * @return a buffer with *result_capacity>=min_capacity
+ * @stable ICU 4.2
+ */
+ virtual char* GetAppendBuffer(int32_t min_capacity,
+ int32_t desired_capacity_hint,
+ char* scratch, int32_t scratch_capacity,
+ int32_t* result_capacity);
+ /**
+ * Returns the number of bytes actually written to the sink.
+ * @return number of bytes written to the buffer
+ * @stable ICU 4.2
+ */
+ int32_t NumberOfBytesWritten() const { return size_; }
+ /**
+ * Returns true if any bytes were discarded, i.e., if there was an
+ * attempt to write more than 'capacity' bytes.
+ * @return TRUE if more than 'capacity' bytes were Append()ed
+ * @stable ICU 4.2
+ */
+ UBool Overflowed() const { return overflowed_; }
+ /**
+ * Returns the number of bytes appended to the sink.
+ * If Overflowed() then NumberOfBytesAppended()>NumberOfBytesWritten()
+ * else they return the same number.
+ * @return number of bytes written to the buffer
+ * @stable ICU 4.6
+ */
+ int32_t NumberOfBytesAppended() const { return appended_; }
+private:
+ char* outbuf_;
+ const int32_t capacity_;
+ int32_t size_;
+ int32_t appended_;
+ UBool overflowed_;
+ CheckedArrayByteSink(); ///< default constructor not implemented
+ CheckedArrayByteSink(const CheckedArrayByteSink &); ///< copy constructor not implemented
+ CheckedArrayByteSink &operator=(const CheckedArrayByteSink &); ///< assignment operator not implemented
+};
+
+#if U_HAVE_STD_STRING
+
+/**
+ * Implementation of ByteSink that writes to a "string".
+ * The StringClass is usually instantiated with a std::string.
+ * @stable ICU 4.2
+ */
+template<typename StringClass>
+class StringByteSink : public ByteSink {
+ public:
+ /**
+ * Constructs a ByteSink that will append bytes to the dest string.
+ * @param dest pointer to string object to append to
+ * @stable ICU 4.2
+ */
+ StringByteSink(StringClass* dest) : dest_(dest) { }
+ /**
+ * Append "bytes[0,n-1]" to this.
+ * @param data the pointer to the bytes
+ * @param n the number of bytes; must be non-negative
+ * @stable ICU 4.2
+ */
+ virtual void Append(const char* data, int32_t n) { dest_->append(data, n); }
+ private:
+ StringClass* dest_;
+ StringByteSink(); ///< default constructor not implemented
+ StringByteSink(const StringByteSink &); ///< copy constructor not implemented
+ StringByteSink &operator=(const StringByteSink &); ///< assignment operator not implemented
+};
+
+#endif
+
+U_NAMESPACE_END
+
+#endif // __BYTESTREAM_H__
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/localpointer.h b/src/core/basetypes/icu-ucsdet/include/unicode/localpointer.h
new file mode 100644
index 00000000..e3ccb258
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/localpointer.h
@@ -0,0 +1,304 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 2009-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: localpointer.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2009nov13
+* created by: Markus W. Scherer
+*/
+
+#ifndef __LOCALPOINTER_H__
+#define __LOCALPOINTER_H__
+
+/**
+ * \file
+ * \brief C++ API: "Smart pointers" for use with and in ICU4C C++ code.
+ *
+ * These classes are inspired by
+ * - std::auto_ptr
+ * - boost::scoped_ptr & boost::scoped_array
+ * - Taligent Safe Pointers (TOnlyPointerTo)
+ *
+ * but none of those provide for all of the goals for ICU smart pointers:
+ * - Smart pointer owns the object and releases it when it goes out of scope.
+ * - No transfer of ownership via copy/assignment to reduce misuse. Simpler & more robust.
+ * - ICU-compatible: No exceptions.
+ * - Need to be able to orphan/release the pointer and its ownership.
+ * - Need variants for normal C++ object pointers, C++ arrays, and ICU C service objects.
+ *
+ * For details see http://site.icu-project.org/design/cpp/scoped_ptr
+ */
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * "Smart pointer" base class; do not use directly: use LocalPointer etc.
+ *
+ * Base class for smart pointer classes that do not throw exceptions.
+ *
+ * Do not use this base class directly, since it does not delete its pointer.
+ * A subclass must implement methods that delete the pointer:
+ * Destructor and adoptInstead().
+ *
+ * There is no operator T *() provided because the programmer must decide
+ * whether to use getAlias() (without transfer of ownership) or orpan()
+ * (with transfer of ownership and NULLing of the pointer).
+ *
+ * @see LocalPointer
+ * @see LocalArray
+ * @see U_DEFINE_LOCAL_OPEN_POINTER
+ * @stable ICU 4.4
+ */
+template<typename T>
+class LocalPointerBase {
+public:
+ /**
+ * Constructor takes ownership.
+ * @param p simple pointer to an object that is adopted
+ * @stable ICU 4.4
+ */
+ explicit LocalPointerBase(T *p=NULL) : ptr(p) {}
+ /**
+ * Destructor deletes the object it owns.
+ * Subclass must override: Base class does nothing.
+ * @stable ICU 4.4
+ */
+ ~LocalPointerBase() { /* delete ptr; */ }
+ /**
+ * NULL check.
+ * @return TRUE if ==NULL
+ * @stable ICU 4.4
+ */
+ UBool isNull() const { return ptr==NULL; }
+ /**
+ * NULL check.
+ * @return TRUE if !=NULL
+ * @stable ICU 4.4
+ */
+ UBool isValid() const { return ptr!=NULL; }
+ /**
+ * Comparison with a simple pointer, so that existing code
+ * with ==NULL need not be changed.
+ * @param other simple pointer for comparison
+ * @return true if this pointer value equals other
+ * @stable ICU 4.4
+ */
+ bool operator==(const T *other) const { return ptr==other; }
+ /**
+ * Comparison with a simple pointer, so that existing code
+ * with !=NULL need not be changed.
+ * @param other simple pointer for comparison
+ * @return true if this pointer value differs from other
+ * @stable ICU 4.4
+ */
+ bool operator!=(const T *other) const { return ptr!=other; }
+ /**
+ * Access without ownership change.
+ * @return the pointer value
+ * @stable ICU 4.4
+ */
+ T *getAlias() const { return ptr; }
+ /**
+ * Access without ownership change.
+ * @return the pointer value as a reference
+ * @stable ICU 4.4
+ */
+ T &operator*() const { return *ptr; }
+ /**
+ * Access without ownership change.
+ * @return the pointer value
+ * @stable ICU 4.4
+ */
+ T *operator->() const { return ptr; }
+ /**
+ * Gives up ownership; the internal pointer becomes NULL.
+ * @return the pointer value;
+ * caller becomes responsible for deleting the object
+ * @stable ICU 4.4
+ */
+ T *orphan() {
+ T *p=ptr;
+ ptr=NULL;
+ return p;
+ }
+ /**
+ * Deletes the object it owns,
+ * and adopts (takes ownership of) the one passed in.
+ * Subclass must override: Base class does not delete the object.
+ * @param p simple pointer to an object that is adopted
+ * @stable ICU 4.4
+ */
+ void adoptInstead(T *p) {
+ // delete ptr;
+ ptr=p;
+ }
+protected:
+ /**
+ * Actual pointer.
+ * @internal
+ */
+ T *ptr;
+private:
+ // No comparison operators with other LocalPointerBases.
+ bool operator==(const LocalPointerBase &other);
+ bool operator!=(const LocalPointerBase &other);
+ // No ownership transfer: No copy constructor, no assignment operator.
+ LocalPointerBase(const LocalPointerBase &other);
+ void operator=(const LocalPointerBase &other);
+ // No heap allocation. Use only on the stack.
+ static void * U_EXPORT2 operator new(size_t size);
+ static void * U_EXPORT2 operator new[](size_t size);
+#if U_HAVE_PLACEMENT_NEW
+ static void * U_EXPORT2 operator new(size_t, void *ptr);
+#endif
+};
+
+/**
+ * "Smart pointer" class, deletes objects via the standard C++ delete operator.
+ * For most methods see the LocalPointerBase base class.
+ *
+ * Usage example:
+ * \code
+ * LocalPointer<UnicodeString> s(new UnicodeString((UChar32)0x50005));
+ * int32_t length=s->length(); // 2
+ * UChar lead=s->charAt(0); // 0xd900
+ * if(some condition) { return; } // no need to explicitly delete the pointer
+ * s.adoptInstead(new UnicodeString((UChar)0xfffc));
+ * length=s->length(); // 1
+ * // no need to explicitly delete the pointer
+ * \endcode
+ *
+ * @see LocalPointerBase
+ * @stable ICU 4.4
+ */
+template<typename T>
+class LocalPointer : public LocalPointerBase<T> {
+public:
+ /**
+ * Constructor takes ownership.
+ * @param p simple pointer to an object that is adopted
+ * @stable ICU 4.4
+ */
+ explicit LocalPointer(T *p=NULL) : LocalPointerBase<T>(p) {}
+ /**
+ * Destructor deletes the object it owns.
+ * @stable ICU 4.4
+ */
+ ~LocalPointer() {
+ delete LocalPointerBase<T>::ptr;
+ }
+ /**
+ * Deletes the object it owns,
+ * and adopts (takes ownership of) the one passed in.
+ * @param p simple pointer to an object that is adopted
+ * @stable ICU 4.4
+ */
+ void adoptInstead(T *p) {
+ delete LocalPointerBase<T>::ptr;
+ LocalPointerBase<T>::ptr=p;
+ }
+};
+
+/**
+ * "Smart pointer" class, deletes objects via the C++ array delete[] operator.
+ * For most methods see the LocalPointerBase base class.
+ * Adds operator[] for array item access.
+ *
+ * Usage example:
+ * \code
+ * LocalArray<UnicodeString> a(new UnicodeString[2]);
+ * a[0].append((UChar)0x61);
+ * if(some condition) { return; } // no need to explicitly delete the array
+ * a.adoptInstead(new UnicodeString[4]);
+ * a[3].append((UChar)0x62).append((UChar)0x63).reverse();
+ * // no need to explicitly delete the array
+ * \endcode
+ *
+ * @see LocalPointerBase
+ * @stable ICU 4.4
+ */
+template<typename T>
+class LocalArray : public LocalPointerBase<T> {
+public:
+ /**
+ * Constructor takes ownership.
+ * @param p simple pointer to an array of T objects that is adopted
+ * @stable ICU 4.4
+ */
+ explicit LocalArray(T *p=NULL) : LocalPointerBase<T>(p) {}
+ /**
+ * Destructor deletes the array it owns.
+ * @stable ICU 4.4
+ */
+ ~LocalArray() {
+ delete[] LocalPointerBase<T>::ptr;
+ }
+ /**
+ * Deletes the array it owns,
+ * and adopts (takes ownership of) the one passed in.
+ * @param p simple pointer to an array of T objects that is adopted
+ * @stable ICU 4.4
+ */
+ void adoptInstead(T *p) {
+ delete[] LocalPointerBase<T>::ptr;
+ LocalPointerBase<T>::ptr=p;
+ }
+ /**
+ * Array item access (writable).
+ * No index bounds check.
+ * @param i array index
+ * @return reference to the array item
+ * @stable ICU 4.4
+ */
+ T &operator[](ptrdiff_t i) const { return LocalPointerBase<T>::ptr[i]; }
+};
+
+/**
+ * \def U_DEFINE_LOCAL_OPEN_POINTER
+ * "Smart pointer" definition macro, deletes objects via the closeFunction.
+ * Defines a subclass of LocalPointerBase which works just
+ * like LocalPointer<Type> except that this subclass will use the closeFunction
+ * rather than the C++ delete operator.
+ *
+ * Requirement: The closeFunction must tolerate a NULL pointer.
+ * (We could add a NULL check here but it is normally redundant.)
+ *
+ * Usage example:
+ * \code
+ * LocalUCaseMapPointer csm(ucasemap_open(localeID, options, &errorCode));
+ * utf8OutLength=ucasemap_utf8ToLower(csm.getAlias(),
+ * utf8Out, (int32_t)sizeof(utf8Out),
+ * utf8In, utf8InLength, &errorCode);
+ * if(U_FAILURE(errorCode)) { return; } // no need to explicitly delete the UCaseMap
+ * \endcode
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction) \
+ class LocalPointerClassName : public LocalPointerBase<Type> { \
+ public: \
+ explicit LocalPointerClassName(Type *p=NULL) : LocalPointerBase<Type>(p) {} \
+ ~LocalPointerClassName() { closeFunction(ptr); } \
+ void adoptInstead(Type *p) { \
+ closeFunction(ptr); \
+ ptr=p; \
+ } \
+ }
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+#endif /* __LOCALPOINTER_H__ */
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/platform.h b/src/core/basetypes/icu-ucsdet/include/unicode/platform.h
new file mode 100644
index 00000000..07dce8ad
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/platform.h
@@ -0,0 +1,751 @@
+/*
+******************************************************************************
+*
+* Copyright (C) 1997-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* FILE NAME : platform.h
+*
+* Date Name Description
+* 05/13/98 nos Creation (content moved here from ptypes.h).
+* 03/02/99 stephen Added AS400 support.
+* 03/30/99 stephen Added Linux support.
+* 04/13/99 stephen Reworked for autoconf.
+******************************************************************************
+*/
+
+#ifndef _PLATFORM_H
+#define _PLATFORM_H
+
+#include "unicode/uconfig.h"
+#include "unicode/uvernum.h"
+
+/**
+ * \file
+ * \brief Basic types for the platform.
+ *
+ * This file used to be generated by autoconf/configure.
+ * Starting with ICU 49, platform.h is a normal source file,
+ * to simplify cross-compiling and working with non-autoconf/make build systems.
+ *
+ * When a value in this file does not work on a platform, then please
+ * try to derive it from the U_PLATFORM value
+ * (for which we might need a new value constant in rare cases)
+ * and/or from other macros that are predefined by the compiler
+ * or defined in standard (POSIX or platform or compiler) headers.
+ *
+ * As a temporary workaround, you can add an explicit <code>#define</code> for some macros
+ * before it is first tested, or add an equivalent -D macro definition
+ * to the compiler's command line.
+ *
+ * Note: Some compilers provide ways to show the predefined macros.
+ * For example, with gcc you can compile an empty .c file and have the compiler
+ * print the predefined macros with
+ * \code
+ * gcc -E -dM -x c /dev/null | sort
+ * \endcode
+ * (You can provide an actual empty .c file rather than /dev/null.
+ * <code>-x c++</code> is for C++.)
+ */
+
+/**
+ * Define some things so that they can be documented.
+ * @internal
+ */
+#ifdef U_IN_DOXYGEN
+/*
+ * Problem: "platform.h:335: warning: documentation for unknown define U_HAVE_STD_STRING found." means that U_HAVE_STD_STRING is not documented.
+ * Solution: #define any defines for non @internal API here, so that they are visible in the docs. If you just set PREDEFINED in Doxyfile.in, they won't be documented.
+ */
+
+/* None for now. */
+#endif
+
+/**
+ * \def U_PLATFORM
+ * The U_PLATFORM macro defines the platform we're on.
+ *
+ * We used to define one different, value-less macro per platform.
+ * That made it hard to know the set of relevant platforms and macros,
+ * and hard to deal with variants of platforms.
+ *
+ * Starting with ICU 49, we define platforms as numeric macros,
+ * with ranges of values for related platforms and their variants.
+ * The U_PLATFORM macro is set to one of these values.
+ *
+ * Historical note from the Solaris Wikipedia article:
+ * AT&T and Sun collaborated on a project to merge the most popular Unix variants
+ * on the market at that time: BSD, System V, and Xenix.
+ * This became Unix System V Release 4 (SVR4).
+ *
+ * @internal
+ */
+
+/** Unknown platform. @internal */
+#define U_PF_UNKNOWN 0
+/** Windows @internal */
+#define U_PF_WINDOWS 1000
+/** MinGW. Windows, calls to Win32 API, but using GNU gcc and binutils. @internal */
+#define U_PF_MINGW 1800
+/**
+ * Cygwin. Windows, calls to cygwin1.dll for Posix functions,
+ * using MSVC or GNU gcc and binutils.
+ * @internal
+ */
+#define U_PF_CYGWIN 1900
+/* Reserve 2000 for U_PF_UNIX? */
+/** HP-UX is based on UNIX System V. @internal */
+#define U_PF_HPUX 2100
+/** Solaris is a Unix operating system based on SVR4. @internal */
+#define U_PF_SOLARIS 2600
+/** BSD is a UNIX operating system derivative. @internal */
+#define U_PF_BSD 3000
+/** AIX is based on UNIX System V Releases and 4.3 BSD. @internal */
+#define U_PF_AIX 3100
+/** IRIX is based on UNIX System V with BSD extensions. @internal */
+#define U_PF_IRIX 3200
+/**
+ * Darwin is a POSIX-compliant operating system, composed of code developed by Apple,
+ * as well as code derived from NeXTSTEP, BSD, and other projects,
+ * built around the Mach kernel.
+ * Darwin forms the core set of components upon which Mac OS X, Apple TV, and iOS are based.
+ * (Original description modified from WikiPedia.)
+ * @internal
+ */
+#define U_PF_DARWIN 3500
+/** iPhone OS (iOS) is a derivative of Mac OS X. @internal */
+#define U_PF_IPHONE 3550
+/** QNX is a commercial Unix-like real-time operating system related to BSD. @internal */
+#define U_PF_QNX 3700
+/** Linux is a Unix-like operating system. @internal */
+#define U_PF_LINUX 4000
+/** Android is based on Linux. @internal */
+#define U_PF_ANDROID 4050
+/** z/OS is the successor to OS/390 which was the successor to MVS. @internal */
+#define U_PF_OS390 9000
+/** "IBM i" is the current name of what used to be i5/OS and earlier OS/400. @internal */
+#define U_PF_OS400 9400
+
+#ifdef U_PLATFORM
+ /* Use the predefined value. */
+#elif defined(__MINGW32__)
+# define U_PLATFORM U_PF_MINGW
+#elif defined(__CYGWIN__)
+# define U_PLATFORM U_PF_CYGWIN
+#elif defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
+# define U_PLATFORM U_PF_WINDOWS
+#elif defined(__ANDROID__)
+# define U_PLATFORM U_PF_ANDROID
+ /* Android wchar_t support depends on the API level. */
+# include <android/api-level.h>
+#elif defined(linux) || defined(__linux__) || defined(__linux)
+# define U_PLATFORM U_PF_LINUX
+#elif defined(__APPLE__) && defined(__MACH__)
+# include <TargetConditionals.h>
+# if defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE /* variant of TARGET_OS_MAC */
+# define U_PLATFORM U_PF_IPHONE
+# else
+# define U_PLATFORM U_PF_DARWIN
+# endif
+#elif defined(BSD) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__MirBSD__)
+# define U_PLATFORM U_PF_BSD
+#elif defined(sun) || defined(__sun)
+ /* Check defined(__SVR4) || defined(__svr4__) to distinguish Solaris from SunOS? */
+# define U_PLATFORM U_PF_SOLARIS
+# if defined(__GNUC__)
+ /* Solaris/GCC needs this header file to get the proper endianness. Normally, this
+ * header file is included with stddef.h but on Solairs/GCC, the GCC version of stddef.h
+ * is included which does not include this header file.
+ */
+# include <sys/isa_defs.h>
+# endif
+#elif defined(_AIX) || defined(__TOS_AIX__)
+# define U_PLATFORM U_PF_AIX
+#elif defined(_hpux) || defined(hpux) || defined(__hpux)
+# define U_PLATFORM U_PF_HPUX
+#elif defined(sgi) || defined(__sgi)
+# define U_PLATFORM U_PF_IRIX
+#elif defined(__QNX__) || defined(__QNXNTO__)
+# define U_PLATFORM U_PF_QNX
+#elif defined(__TOS_MVS__)
+# define U_PLATFORM U_PF_OS390
+#elif defined(__OS400__) || defined(__TOS_OS400__)
+# define U_PLATFORM U_PF_OS400
+#else
+# define U_PLATFORM U_PF_UNKNOWN
+#endif
+
+/**
+ * \def CYGWINMSVC
+ * Defined if this is Windows with Cygwin, but using MSVC rather than gcc.
+ * Otherwise undefined.
+ * @internal
+ */
+/* Commented out because this is already set in mh-cygwin-msvc
+#if U_PLATFORM == U_PF_CYGWIN && defined(_MSC_VER)
+# define CYGWINMSVC
+#endif
+*/
+
+/**
+ * \def U_PLATFORM_USES_ONLY_WIN32_API
+ * Defines whether the platform uses only the Win32 API.
+ * Set to 1 for Windows/MSVC and MinGW but not Cygwin.
+ * @internal
+ */
+#ifdef U_PLATFORM_USES_ONLY_WIN32_API
+ /* Use the predefined value. */
+#elif (U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_MINGW) || defined(CYGWINMSVC)
+# define U_PLATFORM_USES_ONLY_WIN32_API 1
+#else
+ /* Cygwin implements POSIX. */
+# define U_PLATFORM_USES_ONLY_WIN32_API 0
+#endif
+
+/**
+ * \def U_PLATFORM_HAS_WIN32_API
+ * Defines whether the Win32 API is available on the platform.
+ * Set to 1 for Windows/MSVC, MinGW and Cygwin.
+ * @internal
+ */
+#ifdef U_PLATFORM_HAS_WIN32_API
+ /* Use the predefined value. */
+#elif U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN
+# define U_PLATFORM_HAS_WIN32_API 1
+#else
+# define U_PLATFORM_HAS_WIN32_API 0
+#endif
+
+/**
+ * \def U_PLATFORM_IMPLEMENTS_POSIX
+ * Defines whether the platform implements (most of) the POSIX API.
+ * Set to 1 for Cygwin and most other platforms.
+ * @internal
+ */
+#ifdef U_PLATFORM_IMPLEMENTS_POSIX
+ /* Use the predefined value. */
+#elif U_PLATFORM_USES_ONLY_WIN32_API
+# define U_PLATFORM_IMPLEMENTS_POSIX 0
+#else
+# define U_PLATFORM_IMPLEMENTS_POSIX 1
+#endif
+
+/**
+ * \def U_PLATFORM_IS_LINUX_BASED
+ * Defines whether the platform is Linux or one of its derivatives.
+ * @internal
+ */
+#ifdef U_PLATFORM_IS_LINUX_BASED
+ /* Use the predefined value. */
+#elif U_PF_LINUX <= U_PLATFORM && U_PLATFORM <= U_PF_ANDROID
+# define U_PLATFORM_IS_LINUX_BASED 1
+#else
+# define U_PLATFORM_IS_LINUX_BASED 0
+#endif
+
+/**
+ * \def U_PLATFORM_IS_DARWIN_BASED
+ * Defines whether the platform is Darwin or one of its derivatives.
+ * @internal
+ */
+#ifdef U_PLATFORM_IS_DARWIN_BASED
+ /* Use the predefined value. */
+#elif U_PF_DARWIN <= U_PLATFORM && U_PLATFORM <= U_PF_IPHONE
+# define U_PLATFORM_IS_DARWIN_BASED 1
+#else
+# define U_PLATFORM_IS_DARWIN_BASED 0
+#endif
+
+/**
+ * \def U_HAVE_STDINT_H
+ * Defines whether stdint.h is available. It is a C99 standard header.
+ * We used to include inttypes.h which includes stdint.h but we usually do not need
+ * the additional definitions from inttypes.h.
+ * @internal
+ */
+#ifdef U_HAVE_STDINT_H
+ /* Use the predefined value. */
+#elif U_PLATFORM_USES_ONLY_WIN32_API
+# if defined(__BORLANDC__) || U_PLATFORM == U_PF_MINGW || (defined(_MSC_VER) && _MSC_VER>=1600)
+ /* Windows Visual Studio 9 and below do not have stdint.h & inttypes.h, but VS 2010 adds them. */
+# define U_HAVE_STDINT_H 1
+# else
+# define U_HAVE_STDINT_H 0
+# endif
+#elif U_PLATFORM == U_PF_SOLARIS
+ /* Solaris has inttypes.h but not stdint.h. */
+# define U_HAVE_STDINT_H 0
+#elif U_PLATFORM == U_PF_AIX && !defined(_AIX51) && defined(_POWER)
+ /* PPC AIX <= 4.3 has inttypes.h but not stdint.h. */
+# define U_HAVE_STDINT_H 0
+#else
+# define U_HAVE_STDINT_H 1
+#endif
+
+/**
+ * \def U_HAVE_INTTYPES_H
+ * Defines whether inttypes.h is available. It is a C99 standard header.
+ * We include inttypes.h where it is available but stdint.h is not.
+ * @internal
+ */
+#ifdef U_HAVE_INTTYPES_H
+ /* Use the predefined value. */
+#elif U_PLATFORM == U_PF_SOLARIS
+ /* Solaris has inttypes.h but not stdint.h. */
+# define U_HAVE_INTTYPES_H 1
+#elif U_PLATFORM == U_PF_AIX && !defined(_AIX51) && defined(_POWER)
+ /* PPC AIX <= 4.3 has inttypes.h but not stdint.h. */
+# define U_HAVE_INTTYPES_H 1
+#else
+ /* Most platforms have both inttypes.h and stdint.h, or neither. */
+# define U_HAVE_INTTYPES_H U_HAVE_STDINT_H
+#endif
+
+/**
+ * \def U_IOSTREAM_SOURCE
+ * Defines what support for C++ streams is available.
+ *
+ * If U_IOSTREAM_SOURCE is set to 199711, then &lt;iostream&gt; is available
+ * (the ISO/IEC C++ FDIS was published in November 1997), and then
+ * one should qualify streams using the std namespace in ICU header
+ * files.
+ * Starting with ICU 49, this is the only supported version.
+ *
+ * If U_IOSTREAM_SOURCE is set to 198506, then &lt;iostream.h&gt; is
+ * available instead (in June 1985 Stroustrup published
+ * "An Extensible I/O Facility for C++" at the summer USENIX conference).
+ * Starting with ICU 49, this version is not supported any more.
+ *
+ * If U_IOSTREAM_SOURCE is 0 (or any value less than 199711),
+ * then C++ streams are not available and
+ * support for them will be silently suppressed in ICU.
+ *
+ * @internal
+ */
+#ifndef U_IOSTREAM_SOURCE
+#define U_IOSTREAM_SOURCE 199711
+#endif
+
+/**
+ * \def U_HAVE_STD_STRING
+ * Defines whether the standard C++ (STL) &lt;string&gt; header is available.
+ * @internal
+ */
+#ifdef U_HAVE_STD_STRING
+ /* Use the predefined value. */
+#else
+# define U_HAVE_STD_STRING 1
+#endif
+
+/*===========================================================================*/
+/** @{ Compiler and environment features */
+/*===========================================================================*/
+
+/**
+ * \def U_GCC_MAJOR_MINOR
+ * Indicates whether the compiler is gcc (test for != 0),
+ * and if so, contains its major (times 100) and minor version numbers.
+ * If the compiler is not gcc, then U_GCC_MAJOR_MINOR == 0.
+ *
+ * For example, for testing for whether we have gcc, and whether it's 4.6 or higher,
+ * use "#if U_GCC_MAJOR_MINOR >= 406".
+ * @internal
+ */
+#ifdef __GNUC__
+# define U_GCC_MAJOR_MINOR (__GNUC__ * 100 + __GNUC_MINOR__)
+#else
+# define U_GCC_MAJOR_MINOR 0
+#endif
+
+/**
+ * \def U_IS_BIG_ENDIAN
+ * Determines the endianness of the platform.
+ * @internal
+ */
+#ifdef U_IS_BIG_ENDIAN
+ /* Use the predefined value. */
+#elif defined(BYTE_ORDER) && defined(BIG_ENDIAN)
+# define U_IS_BIG_ENDIAN (BYTE_ORDER == BIG_ENDIAN)
+#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__)
+ /* gcc */
+# define U_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+#elif defined(__BIG_ENDIAN__) || defined(_BIG_ENDIAN)
+# define U_IS_BIG_ENDIAN 1
+#elif defined(__LITTLE_ENDIAN__) || defined(_LITTLE_ENDIAN)
+# define U_IS_BIG_ENDIAN 0
+#elif U_PLATFORM == U_PF_OS390 || U_PLATFORM == U_PF_OS400 || defined(__s390__) || defined(__s390x__)
+ /* These platforms do not appear to predefine any endianness macros. */
+# define U_IS_BIG_ENDIAN 1
+#elif defined(_PA_RISC1_0) || defined(_PA_RISC1_1) || defined(_PA_RISC2_0)
+ /* HPPA do not appear to predefine any endianness macros. */
+# define U_IS_BIG_ENDIAN 1
+#elif defined(sparc) || defined(__sparc) || defined(__sparc__)
+ /* Some sparc based systems (e.g. Linux) do not predefine any endianness macros. */
+# define U_IS_BIG_ENDIAN 1
+#else
+# define U_IS_BIG_ENDIAN 0
+#endif
+
+/**
+ * \def U_HAVE_PLACEMENT_NEW
+ * Determines whether to override placement new and delete for STL.
+ * @stable ICU 2.6
+ */
+#ifdef U_HAVE_PLACEMENT_NEW
+ /* Use the predefined value. */
+#elif defined(__BORLANDC__)
+# define U_HAVE_PLACEMENT_NEW 0
+#else
+# define U_HAVE_PLACEMENT_NEW 1
+#endif
+
+/**
+ * \def U_HAVE_DEBUG_LOCATION_NEW
+ * Define this to define the MFC debug version of the operator new.
+ *
+ * @stable ICU 3.4
+ */
+#ifdef U_HAVE_DEBUG_LOCATION_NEW
+ /* Use the predefined value. */
+#elif defined(_MSC_VER)
+# define U_HAVE_DEBUG_LOCATION_NEW 1
+#else
+# define U_HAVE_DEBUG_LOCATION_NEW 0
+#endif
+
+/* Compatibility with non clang compilers */
+#ifndef __has_attribute
+# define __has_attribute(x) 0
+#endif
+
+/**
+ * \def U_MALLOC_ATTR
+ * Attribute to mark functions as malloc-like
+ * @internal
+ */
+#if defined(__GNUC__) && __GNUC__>=3
+# define U_MALLOC_ATTR __attribute__ ((__malloc__))
+#else
+# define U_MALLOC_ATTR
+#endif
+
+/**
+ * \def U_ALLOC_SIZE_ATTR
+ * Attribute to specify the size of the allocated buffer for malloc-like functions
+ * @internal
+ */
+#if (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) || __has_attribute(alloc_size)
+# define U_ALLOC_SIZE_ATTR(X) __attribute__ ((alloc_size(X)))
+# define U_ALLOC_SIZE_ATTR2(X,Y) __attribute__ ((alloc_size(X,Y)))
+#else
+# define U_ALLOC_SIZE_ATTR(X)
+# define U_ALLOC_SIZE_ATTR2(X,Y)
+#endif
+
+/** @} */
+
+/*===========================================================================*/
+/** @{ Character data types */
+/*===========================================================================*/
+
+/**
+ * U_CHARSET_FAMILY is equal to this value when the platform is an ASCII based platform.
+ * @stable ICU 2.0
+ */
+#define U_ASCII_FAMILY 0
+
+/**
+ * U_CHARSET_FAMILY is equal to this value when the platform is an EBCDIC based platform.
+ * @stable ICU 2.0
+ */
+#define U_EBCDIC_FAMILY 1
+
+/**
+ * \def U_CHARSET_FAMILY
+ *
+ * <p>These definitions allow to specify the encoding of text
+ * in the char data type as defined by the platform and the compiler.
+ * It is enough to determine the code point values of "invariant characters",
+ * which are the ones shared by all encodings that are in use
+ * on a given platform.</p>
+ *
+ * <p>Those "invariant characters" should be all the uppercase and lowercase
+ * latin letters, the digits, the space, and "basic punctuation".
+ * Also, '\\n', '\\r', '\\t' should be available.</p>
+ *
+ * <p>The list of "invariant characters" is:<br>
+ * \code
+ * A-Z a-z 0-9 SPACE " % &amp; ' ( ) * + , - . / : ; < = > ? _
+ * \endcode
+ * <br>
+ * (52 letters + 10 numbers + 20 punc/sym/space = 82 total)</p>
+ *
+ * <p>This matches the IBM Syntactic Character Set (CS 640).</p>
+ *
+ * <p>In other words, all the graphic characters in 7-bit ASCII should
+ * be safely accessible except the following:</p>
+ *
+ * \code
+ * '\' <backslash>
+ * '[' <left bracket>
+ * ']' <right bracket>
+ * '{' <left brace>
+ * '}' <right brace>
+ * '^' <circumflex>
+ * '~' <tilde>
+ * '!' <exclamation mark>
+ * '#' <number sign>
+ * '|' <vertical line>
+ * '$' <dollar sign>
+ * '@' <commercial at>
+ * '`' <grave accent>
+ * \endcode
+ * @stable ICU 2.0
+ */
+#ifdef U_CHARSET_FAMILY
+ /* Use the predefined value. */
+#elif U_PLATFORM == U_PF_OS390 && (!defined(__CHARSET_LIB) || !__CHARSET_LIB)
+# define U_CHARSET_FAMILY U_EBCDIC_FAMILY
+#elif U_PLATFORM == U_PF_OS400 && !defined(__UTF32__)
+# define U_CHARSET_FAMILY U_EBCDIC_FAMILY
+#else
+# define U_CHARSET_FAMILY U_ASCII_FAMILY
+#endif
+
+/**
+ * \def U_CHARSET_IS_UTF8
+ *
+ * Hardcode the default charset to UTF-8.
+ *
+ * If this is set to 1, then
+ * - ICU will assume that all non-invariant char*, StringPiece, std::string etc.
+ * contain UTF-8 text, regardless of what the system API uses
+ * - some ICU code will use fast functions like u_strFromUTF8()
+ * rather than the more general and more heavy-weight conversion API (ucnv.h)
+ * - ucnv_getDefaultName() always returns "UTF-8"
+ * - ucnv_setDefaultName() is disabled and will not change the default charset
+ * - static builds of ICU are smaller
+ * - more functionality is available with the UCONFIG_NO_CONVERSION build-time
+ * configuration option (see unicode/uconfig.h)
+ * - the UCONFIG_NO_CONVERSION build option in uconfig.h is more usable
+ *
+ * @stable ICU 4.2
+ * @see UCONFIG_NO_CONVERSION
+ */
+#ifdef U_CHARSET_IS_UTF8
+ /* Use the predefined value. */
+#elif U_PLATFORM == U_PF_ANDROID || U_PLATFORM_IS_DARWIN_BASED
+# define U_CHARSET_IS_UTF8 1
+#else
+# define U_CHARSET_IS_UTF8 0
+#endif
+
+/** @} */
+
+/*===========================================================================*/
+/** @{ Information about wchar support */
+/*===========================================================================*/
+
+/**
+ * \def U_HAVE_WCHAR_H
+ * Indicates whether <wchar.h> is available (1) or not (0). Set to 1 by default.
+ *
+ * @stable ICU 2.0
+ */
+#ifdef U_HAVE_WCHAR_H
+ /* Use the predefined value. */
+#elif U_PLATFORM == U_PF_ANDROID && __ANDROID_API__ < 9
+ /*
+ * Android before Gingerbread (Android 2.3, API level 9) did not support wchar_t.
+ * The type and header existed, but the library functions did not work as expected.
+ * The size of wchar_t was 1 but L"xyz" string literals had 32-bit units anyway.
+ */
+# define U_HAVE_WCHAR_H 0
+#else
+# define U_HAVE_WCHAR_H 1
+#endif
+
+/**
+ * \def U_SIZEOF_WCHAR_T
+ * U_SIZEOF_WCHAR_T==sizeof(wchar_t)
+ *
+ * @stable ICU 2.0
+ */
+#ifdef U_SIZEOF_WCHAR_T
+ /* Use the predefined value. */
+#elif (U_PLATFORM == U_PF_ANDROID && __ANDROID_API__ < 9)
+ /*
+ * Classic Mac OS and Mac OS X before 10.3 (Panther) did not support wchar_t or wstring.
+ * Newer Mac OS X has size 4.
+ */
+# define U_SIZEOF_WCHAR_T 1
+#elif U_PLATFORM_HAS_WIN32_API || U_PLATFORM == U_PF_CYGWIN
+# define U_SIZEOF_WCHAR_T 2
+#elif U_PLATFORM == U_PF_AIX
+ /*
+ * AIX 6.1 information, section "Wide character data representation":
+ * "... the wchar_t datatype is 32-bit in the 64-bit environment and
+ * 16-bit in the 32-bit environment."
+ * and
+ * "All locales use Unicode for their wide character code values (process code),
+ * except the IBM-eucTW codeset."
+ */
+# ifdef __64BIT__
+# define U_SIZEOF_WCHAR_T 4
+# else
+# define U_SIZEOF_WCHAR_T 2
+# endif
+#elif U_PLATFORM == U_PF_OS390
+ /*
+ * z/OS V1R11 information center, section "LP64 | ILP32":
+ * "In 31-bit mode, the size of long and pointers is 4 bytes and the size of wchar_t is 2 bytes.
+ * Under LP64, the size of long and pointer is 8 bytes and the size of wchar_t is 4 bytes."
+ */
+# ifdef _LP64
+# define U_SIZEOF_WCHAR_T 4
+# else
+# define U_SIZEOF_WCHAR_T 2
+# endif
+#elif U_PLATFORM == U_PF_OS400
+# if defined(__UTF32__)
+ /*
+ * LOCALETYPE(*LOCALEUTF) is specified.
+ * Wide-character strings are in UTF-32,
+ * narrow-character strings are in UTF-8.
+ */
+# define U_SIZEOF_WCHAR_T 4
+# elif defined(__UCS2__)
+ /*
+ * LOCALETYPE(*LOCALEUCS2) is specified.
+ * Wide-character strings are in UCS-2,
+ * narrow-character strings are in EBCDIC.
+ */
+# define U_SIZEOF_WCHAR_T 2
+#else
+ /*
+ * LOCALETYPE(*CLD) or LOCALETYPE(*LOCALE) is specified.
+ * Wide-character strings are in 16-bit EBCDIC,
+ * narrow-character strings are in EBCDIC.
+ */
+# define U_SIZEOF_WCHAR_T 2
+# endif
+#else
+# define U_SIZEOF_WCHAR_T 4
+#endif
+
+#ifndef U_HAVE_WCSCPY
+#define U_HAVE_WCSCPY U_HAVE_WCHAR_H
+#endif
+
+/** @} */
+
+/**
+ * \def U_HAVE_CHAR16_T
+ * Defines whether the char16_t type is available for UTF-16
+ * and u"abc" UTF-16 string literals are supported.
+ * This is a new standard type and standard string literal syntax in C++0x
+ * but has been available in some compilers before.
+ * @internal
+ */
+#ifdef U_HAVE_CHAR16_T
+ /* Use the predefined value. */
+#else
+ /*
+ * Notes:
+ * Visual Studio 10 (_MSC_VER>=1600) defines char16_t but
+ * does not support u"abc" string literals.
+ * gcc 4.4 defines the __CHAR16_TYPE__ macro to a usable type but
+ * does not support u"abc" string literals.
+ * C++11 and C11 require support for UTF-16 literals
+ */
+# if (defined(__cplusplus) && __cplusplus >= 201103L) || (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L)
+# define U_HAVE_CHAR16_T 1
+# else
+# define U_HAVE_CHAR16_T 0
+# endif
+#endif
+
+/**
+ * @{
+ * \def U_DECLARE_UTF16
+ * Do not use this macro because it is not defined on all platforms.
+ * Use the UNICODE_STRING or U_STRING_DECL macros instead.
+ * @internal
+ */
+#ifdef U_DECLARE_UTF16
+ /* Use the predefined value. */
+#elif U_HAVE_CHAR16_T \
+ || (defined(__xlC__) && defined(__IBM_UTF_LITERAL) && U_SIZEOF_WCHAR_T != 2) \
+ || (defined(__HP_aCC) && __HP_aCC >= 035000) \
+ || (defined(__HP_cc) && __HP_cc >= 111106)
+# define U_DECLARE_UTF16(string) u ## string
+#elif U_SIZEOF_WCHAR_T == 2 \
+ && (U_CHARSET_FAMILY == 0 || (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400 && defined(__UCS2__)))
+# define U_DECLARE_UTF16(string) L ## string
+#else
+ /* Leave U_DECLARE_UTF16 undefined. See unistr.h. */
+#endif
+
+/** @} */
+
+/*===========================================================================*/
+/** @{ Symbol import-export control */
+/*===========================================================================*/
+
+#ifdef U_EXPORT
+ /* Use the predefined value. */
+#elif defined(U_STATIC_IMPLEMENTATION)
+# define U_EXPORT
+#elif defined(__GNUC__)
+# define U_EXPORT __attribute__((visibility("default")))
+#elif (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x550) \
+ || (defined(__SUNPRO_C) && __SUNPRO_C >= 0x550)
+# define U_EXPORT __global
+/*#elif defined(__HP_aCC) || defined(__HP_cc)
+# define U_EXPORT __declspec(dllexport)*/
+#elif defined(_MSC_VER)
+# define U_EXPORT __declspec(dllexport)
+#else
+# define U_EXPORT
+#endif
+
+/* U_CALLCONV is releated to U_EXPORT2 */
+#ifdef U_EXPORT2
+ /* Use the predefined value. */
+#elif defined(_MSC_VER)
+# define U_EXPORT2 __cdecl
+#else
+# define U_EXPORT2
+#endif
+
+#ifdef U_IMPORT
+ /* Use the predefined value. */
+#elif defined(_MSC_VER)
+ /* Windows needs to export/import data. */
+# define U_IMPORT __declspec(dllimport)
+#else
+# define U_IMPORT
+#endif
+
+/**
+ * \def U_CALLCONV
+ * Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary
+ * in callback function typedefs to make sure that the calling convention
+ * is compatible.
+ *
+ * This is only used for non-ICU-API functions.
+ * When a function is a public ICU API,
+ * you must use the U_CAPI and U_EXPORT2 qualifiers.
+ * @stable ICU 2.0
+ */
+#if U_PLATFORM == U_PF_OS390 && defined(__cplusplus)
+# define U_CALLCONV __cdecl
+#else
+# define U_CALLCONV U_EXPORT2
+#endif
+
+/* @} */
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/ptypes.h b/src/core/basetypes/icu-ucsdet/include/unicode/ptypes.h
new file mode 100644
index 00000000..b7f71160
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/ptypes.h
@@ -0,0 +1,126 @@
+/*
+******************************************************************************
+*
+* Copyright (C) 1997-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* FILE NAME : ptypes.h
+*
+* Date Name Description
+* 05/13/98 nos Creation (content moved here from ptypes.h).
+* 03/02/99 stephen Added AS400 support.
+* 03/30/99 stephen Added Linux support.
+* 04/13/99 stephen Reworked for autoconf.
+* 09/18/08 srl Moved basic types back to ptypes.h from platform.h
+******************************************************************************
+*/
+
+/**
+ * \file
+ * \brief C API: Definitions of integer types of various widths
+ */
+
+#ifndef _PTYPES_H
+#define _PTYPES_H
+
+/**
+ * \def __STDC_LIMIT_MACROS
+ * According to the Linux stdint.h, the ISO C99 standard specifies that in C++ implementations
+ * macros like INT32_MIN and UINTPTR_MAX should only be defined if explicitly requested.
+ * We need to define __STDC_LIMIT_MACROS before including stdint.h in C++ code
+ * that uses such limit macros.
+ * @internal
+ */
+#ifndef __STDC_LIMIT_MACROS
+#define __STDC_LIMIT_MACROS
+#endif
+
+/* NULL, size_t, wchar_t */
+#include <stddef.h>
+
+/*
+ * If all compilers provided all of the C99 headers and types,
+ * we would just unconditionally #include <stdint.h> here
+ * and not need any of the stuff after including platform.h.
+ */
+
+/* Find out if we have stdint.h etc. */
+#include "unicode/platform.h"
+
+/*===========================================================================*/
+/* Generic data types */
+/*===========================================================================*/
+
+/* If your platform does not have the <stdint.h> header, you may
+ need to edit the typedefs in the #else section below.
+ Use #if...#else...#endif with predefined compiler macros if possible. */
+#if U_HAVE_STDINT_H
+
+/*
+ * We mostly need <stdint.h> (which defines the standard integer types) but not <inttypes.h>.
+ * <inttypes.h> includes <stdint.h> and adds the printf/scanf helpers PRId32, SCNx16 etc.
+ * which we almost never use, plus stuff like imaxabs() which we never use.
+ */
+#include <stdint.h>
+
+#if U_PLATFORM == U_PF_OS390
+/* The features header is needed to get (u)int64_t sometimes. */
+#include <features.h>
+/* z/OS has <stdint.h>, but some versions are missing uint8_t (APAR PK62248). */
+#if !defined(__uint8_t)
+#define __uint8_t 1
+typedef unsigned char uint8_t;
+#endif
+#endif /* U_PLATFORM == U_PF_OS390 */
+
+#elif U_HAVE_INTTYPES_H
+
+# include <inttypes.h>
+
+#else /* neither U_HAVE_STDINT_H nor U_HAVE_INTTYPES_H */
+
+#if ! U_HAVE_INT8_T
+typedef signed char int8_t;
+#endif
+
+#if ! U_HAVE_UINT8_T
+typedef unsigned char uint8_t;
+#endif
+
+#if ! U_HAVE_INT16_T
+typedef signed short int16_t;
+#endif
+
+#if ! U_HAVE_UINT16_T
+typedef unsigned short uint16_t;
+#endif
+
+#if ! U_HAVE_INT32_T
+typedef signed int int32_t;
+#endif
+
+#if ! U_HAVE_UINT32_T
+typedef unsigned int uint32_t;
+#endif
+
+#if ! U_HAVE_INT64_T
+#ifdef _MSC_VER
+ typedef signed __int64 int64_t;
+#else
+ typedef signed long long int64_t;
+#endif
+#endif
+
+#if ! U_HAVE_UINT64_T
+#ifdef _MSC_VER
+ typedef unsigned __int64 uint64_t;
+#else
+ typedef unsigned long long uint64_t;
+#endif
+#endif
+
+#endif /* U_HAVE_STDINT_H / U_HAVE_INTTYPES_H */
+
+#endif /* _PTYPES_H */
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/putil.h b/src/core/basetypes/icu-ucsdet/include/unicode/putil.h
new file mode 100644
index 00000000..df1b17ba
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/putil.h
@@ -0,0 +1,181 @@
+/*
+******************************************************************************
+*
+* Copyright (C) 1997-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* FILE NAME : putil.h
+*
+* Date Name Description
+* 05/14/98 nos Creation (content moved here from utypes.h).
+* 06/17/99 erm Added IEEE_754
+* 07/22/98 stephen Added IEEEremainder, max, min, trunc
+* 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
+* 08/24/98 stephen Added longBitsFromDouble
+* 03/02/99 stephen Removed openFile(). Added AS400 support.
+* 04/15/99 stephen Converted to C
+* 11/15/99 helena Integrated S/390 changes for IEEE support.
+* 01/11/00 helena Added u_getVersion.
+******************************************************************************
+*/
+
+#ifndef PUTIL_H
+#define PUTIL_H
+
+#include "unicode/utypes.h"
+ /**
+ * \file
+ * \brief C API: Platform Utilities
+ */
+
+/*==========================================================================*/
+/* Platform utilities */
+/*==========================================================================*/
+
+/**
+ * Platform utilities isolates the platform dependencies of the
+ * libarary. For each platform which this code is ported to, these
+ * functions may have to be re-implemented.
+ */
+
+/**
+ * Return the ICU data directory.
+ * The data directory is where common format ICU data files (.dat files)
+ * are loaded from. Note that normal use of the built-in ICU
+ * facilities does not require loading of an external data file;
+ * unless you are adding custom data to ICU, the data directory
+ * does not need to be set.
+ *
+ * The data directory is determined as follows:
+ * If u_setDataDirectory() has been called, that is it, otherwise
+ * if the ICU_DATA environment variable is set, use that, otherwise
+ * If a data directory was specifed at ICU build time
+ * <code>
+ * \code
+ * #define ICU_DATA_DIR "path"
+ * \endcode
+ * </code> use that,
+ * otherwise no data directory is available.
+ *
+ * @return the data directory, or an empty string ("") if no data directory has
+ * been specified.
+ *
+ * @stable ICU 2.0
+ */
+U_STABLE const char* U_EXPORT2 u_getDataDirectory(void);
+
+
+/**
+ * Set the ICU data directory.
+ * The data directory is where common format ICU data files (.dat files)
+ * are loaded from. Note that normal use of the built-in ICU
+ * facilities does not require loading of an external data file;
+ * unless you are adding custom data to ICU, the data directory
+ * does not need to be set.
+ *
+ * This function should be called at most once in a process, before the
+ * first ICU operation (e.g., u_init()) that will require the loading of an
+ * ICU data file.
+ * This function is not thread-safe. Use it before calling ICU APIs from
+ * multiple threads.
+ *
+ * @param directory The directory to be set.
+ *
+ * @see u_init
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 u_setDataDirectory(const char *directory);
+
+#ifndef U_HIDE_INTERNAL_API
+/**
+ * Return the time zone files override directory, or an empty string if
+ * no directory was specified. Certain time zone resources will be preferrentially
+ * loaded from individual files in this directory.
+ *
+ * @return the time zone data override directory.
+ * @internal
+ */
+U_INTERNAL const char * U_EXPORT2 u_getTimeZoneFilesDirectory(UErrorCode *status);
+
+/**
+ * Set the time zone files override directory.
+ * This function is not thread safe; it must not be called concurrently with
+ * u_getTimeZoneFilesDirectory() or any other use of ICU time zone functions.
+ * This function should only be called before using any ICU service that
+ * will access the time zone data.
+ * @internal
+ */
+U_INTERNAL void U_EXPORT2 u_setTimeZoneFilesDirectory(const char *path, UErrorCode *status);
+#endif /* U_HIDE_INTERNAL_API */
+
+
+/**
+ * @{
+ * Filesystem file and path separator characters.
+ * Example: '/' and ':' on Unix, '\\' and ';' on Windows.
+ * @stable ICU 2.0
+ */
+#if U_PLATFORM_USES_ONLY_WIN32_API
+# define U_FILE_SEP_CHAR '\\'
+# define U_FILE_ALT_SEP_CHAR '/'
+# define U_PATH_SEP_CHAR ';'
+# define U_FILE_SEP_STRING "\\"
+# define U_FILE_ALT_SEP_STRING "/"
+# define U_PATH_SEP_STRING ";"
+#else
+# define U_FILE_SEP_CHAR '/'
+# define U_FILE_ALT_SEP_CHAR '/'
+# define U_PATH_SEP_CHAR ':'
+# define U_FILE_SEP_STRING "/"
+# define U_FILE_ALT_SEP_STRING "/"
+# define U_PATH_SEP_STRING ":"
+#endif
+
+/** @} */
+
+/**
+ * Convert char characters to UChar characters.
+ * This utility function is useful only for "invariant characters"
+ * that are encoded in the platform default encoding.
+ * They are a small, constant subset of the encoding and include
+ * just the latin letters, digits, and some punctuation.
+ * For details, see U_CHARSET_FAMILY.
+ *
+ * @param cs Input string, points to <code>length</code>
+ * character bytes from a subset of the platform encoding.
+ * @param us Output string, points to memory for <code>length</code>
+ * Unicode characters.
+ * @param length The number of characters to convert; this may
+ * include the terminating <code>NUL</code>.
+ *
+ * @see U_CHARSET_FAMILY
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+u_charsToUChars(const char *cs, UChar *us, int32_t length);
+
+/**
+ * Convert UChar characters to char characters.
+ * This utility function is useful only for "invariant characters"
+ * that can be encoded in the platform default encoding.
+ * They are a small, constant subset of the encoding and include
+ * just the latin letters, digits, and some punctuation.
+ * For details, see U_CHARSET_FAMILY.
+ *
+ * @param us Input string, points to <code>length</code>
+ * Unicode characters that can be encoded with the
+ * codepage-invariant subset of the platform encoding.
+ * @param cs Output string, points to memory for <code>length</code>
+ * character bytes.
+ * @param length The number of characters to convert; this may
+ * include the terminating <code>NUL</code>.
+ *
+ * @see U_CHARSET_FAMILY
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+u_UCharsToChars(const UChar *us, char *cs, int32_t length);
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/rep.h b/src/core/basetypes/icu-ucsdet/include/unicode/rep.h
new file mode 100644
index 00000000..4c7eae14
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/rep.h
@@ -0,0 +1,261 @@
+/*
+**************************************************************************
+* Copyright (C) 1999-2012, International Business Machines Corporation and
+* others. All Rights Reserved.
+**************************************************************************
+* Date Name Description
+* 11/17/99 aliu Creation. Ported from java. Modified to
+* match current UnicodeString API. Forced
+* to use name "handleReplaceBetween" because
+* of existing methods in UnicodeString.
+**************************************************************************
+*/
+
+#ifndef REP_H
+#define REP_H
+
+#include "unicode/uobject.h"
+
+/**
+ * \file
+ * \brief C++ API: Replaceable String
+ */
+
+U_NAMESPACE_BEGIN
+
+class UnicodeString;
+
+/**
+ * <code>Replaceable</code> is an abstract base class representing a
+ * string of characters that supports the replacement of a range of
+ * itself with a new string of characters. It is used by APIs that
+ * change a piece of text while retaining metadata. Metadata is data
+ * other than the Unicode characters returned by char32At(). One
+ * example of metadata is style attributes; another is an edit
+ * history, marking each character with an author and revision number.
+ *
+ * <p>An implicit aspect of the <code>Replaceable</code> API is that
+ * during a replace operation, new characters take on the metadata of
+ * the old characters. For example, if the string "the <b>bold</b>
+ * font" has range (4, 8) replaced with "strong", then it becomes "the
+ * <b>strong</b> font".
+ *
+ * <p><code>Replaceable</code> specifies ranges using a start
+ * offset and a limit offset. The range of characters thus specified
+ * includes the characters at offset start..limit-1. That is, the
+ * start offset is inclusive, and the limit offset is exclusive.
+ *
+ * <p><code>Replaceable</code> also includes API to access characters
+ * in the string: <code>length()</code>, <code>charAt()</code>,
+ * <code>char32At()</code>, and <code>extractBetween()</code>.
+ *
+ * <p>For a subclass to support metadata, typical behavior of
+ * <code>replace()</code> is the following:
+ * <ul>
+ * <li>Set the metadata of the new text to the metadata of the first
+ * character replaced</li>
+ * <li>If no characters are replaced, use the metadata of the
+ * previous character</li>
+ * <li>If there is no previous character (i.e. start == 0), use the
+ * following character</li>
+ * <li>If there is no following character (i.e. the replaceable was
+ * empty), use default metadata.<br>
+ * <li>If the code point U+FFFF is seen, it should be interpreted as
+ * a special marker having no metadata<li>
+ * </li>
+ * </ul>
+ * If this is not the behavior, the subclass should document any differences.
+ * @author Alan Liu
+ * @stable ICU 2.0
+ */
+class U_COMMON_API Replaceable : public UObject {
+
+public:
+ /**
+ * Destructor.
+ * @stable ICU 2.0
+ */
+ virtual ~Replaceable();
+
+ /**
+ * Returns the number of 16-bit code units in the text.
+ * @return number of 16-bit code units in text
+ * @stable ICU 1.8
+ */
+ inline int32_t length() const;
+
+ /**
+ * Returns the 16-bit code unit at the given offset into the text.
+ * @param offset an integer between 0 and <code>length()</code>-1
+ * inclusive
+ * @return 16-bit code unit of text at given offset
+ * @stable ICU 1.8
+ */
+ inline UChar charAt(int32_t offset) const;
+
+ /**
+ * Returns the 32-bit code point at the given 16-bit offset into
+ * the text. This assumes the text is stored as 16-bit code units
+ * with surrogate pairs intermixed. If the offset of a leading or
+ * trailing code unit of a surrogate pair is given, return the
+ * code point of the surrogate pair.
+ *
+ * @param offset an integer between 0 and <code>length()</code>-1
+ * inclusive
+ * @return 32-bit code point of text at given offset
+ * @stable ICU 1.8
+ */
+ inline UChar32 char32At(int32_t offset) const;
+
+ /**
+ * Copies characters in the range [<tt>start</tt>, <tt>limit</tt>)
+ * into the UnicodeString <tt>target</tt>.
+ * @param start offset of first character which will be copied
+ * @param limit offset immediately following the last character to
+ * be copied
+ * @param target UnicodeString into which to copy characters.
+ * @return A reference to <TT>target</TT>
+ * @stable ICU 2.1
+ */
+ virtual void extractBetween(int32_t start,
+ int32_t limit,
+ UnicodeString& target) const = 0;
+
+ /**
+ * Replaces a substring of this object with the given text. If the
+ * characters being replaced have metadata, the new characters
+ * that replace them should be given the same metadata.
+ *
+ * <p>Subclasses must ensure that if the text between start and
+ * limit is equal to the replacement text, that replace has no
+ * effect. That is, any metadata
+ * should be unaffected. In addition, subclasses are encouraged to
+ * check for initial and trailing identical characters, and make a
+ * smaller replacement if possible. This will preserve as much
+ * metadata as possible.
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= length()</code>.
+ * @param text the text to replace characters <code>start</code>
+ * to <code>limit - 1</code>
+ * @stable ICU 2.0
+ */
+ virtual void handleReplaceBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& text) = 0;
+ // Note: All other methods in this class take the names of
+ // existing UnicodeString methods. This method is the exception.
+ // It is named differently because all replace methods of
+ // UnicodeString return a UnicodeString&. The 'between' is
+ // required in order to conform to the UnicodeString naming
+ // convention; API taking start/length are named <operation>, and
+ // those taking start/limit are named <operationBetween>. The
+ // 'handle' is added because 'replaceBetween' and
+ // 'doReplaceBetween' are already taken.
+
+ /**
+ * Copies a substring of this object, retaining metadata.
+ * This method is used to duplicate or reorder substrings.
+ * The destination index must not overlap the source range.
+ *
+ * @param start the beginning index, inclusive; <code>0 <= start <=
+ * limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit <=
+ * length()</code>.
+ * @param dest the destination index. The characters from
+ * <code>start..limit-1</code> will be copied to <code>dest</code>.
+ * Implementations of this method may assume that <code>dest <= start ||
+ * dest >= limit</code>.
+ * @stable ICU 2.0
+ */
+ virtual void copy(int32_t start, int32_t limit, int32_t dest) = 0;
+
+ /**
+ * Returns true if this object contains metadata. If a
+ * Replaceable object has metadata, calls to the Replaceable API
+ * must be made so as to preserve metadata. If it does not, calls
+ * to the Replaceable API may be optimized to improve performance.
+ * The default implementation returns true.
+ * @return true if this object contains metadata
+ * @stable ICU 2.2
+ */
+ virtual UBool hasMetaData() const;
+
+ /**
+ * Clone this object, an instance of a subclass of Replaceable.
+ * Clones can be used concurrently in multiple threads.
+ * If a subclass does not implement clone(), or if an error occurs,
+ * then NULL is returned.
+ * The clone functions in all subclasses return a pointer to a Replaceable
+ * because some compilers do not support covariant (same-as-this)
+ * return types; cast to the appropriate subclass if necessary.
+ * The caller must delete the clone.
+ *
+ * @return a clone of this object
+ *
+ * @see getDynamicClassID
+ * @stable ICU 2.6
+ */
+ virtual Replaceable *clone() const;
+
+protected:
+
+ /**
+ * Default constructor.
+ * @stable ICU 2.4
+ */
+ inline Replaceable();
+
+ /*
+ * Assignment operator not declared. The compiler will provide one
+ * which does nothing since this class does not contain any data members.
+ * API/code coverage may show the assignment operator as present and
+ * untested - ignore.
+ * Subclasses need this assignment operator if they use compiler-provided
+ * assignment operators of their own. An alternative to not declaring one
+ * here would be to declare and empty-implement a protected or public one.
+ Replaceable &Replaceable::operator=(const Replaceable &);
+ */
+
+ /**
+ * Virtual version of length().
+ * @stable ICU 2.4
+ */
+ virtual int32_t getLength() const = 0;
+
+ /**
+ * Virtual version of charAt().
+ * @stable ICU 2.4
+ */
+ virtual UChar getCharAt(int32_t offset) const = 0;
+
+ /**
+ * Virtual version of char32At().
+ * @stable ICU 2.4
+ */
+ virtual UChar32 getChar32At(int32_t offset) const = 0;
+};
+
+inline Replaceable::Replaceable() {}
+
+inline int32_t
+Replaceable::length() const {
+ return getLength();
+}
+
+inline UChar
+Replaceable::charAt(int32_t offset) const {
+ return getCharAt(offset);
+}
+
+inline UChar32
+Replaceable::char32At(int32_t offset) const {
+ return getChar32At(offset);
+}
+
+// There is no rep.cpp, see unistr.cpp for Replaceable function implementations.
+
+U_NAMESPACE_END
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/std_string.h b/src/core/basetypes/icu-ucsdet/include/unicode/std_string.h
new file mode 100644
index 00000000..05955c5d
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/std_string.h
@@ -0,0 +1,37 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 2009-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: std_string.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2009feb19
+* created by: Markus W. Scherer
+*/
+
+#ifndef __STD_STRING_H__
+#define __STD_STRING_H__
+
+/**
+ * \file
+ * \brief C++ API: Central ICU header for including the C++ standard &lt;string&gt;
+ * header and for related definitions.
+ */
+
+#include "unicode/utypes.h"
+
+#if U_HAVE_STD_STRING
+
+#if !defined(_MSC_VER)
+namespace std { class type_info; } // WORKAROUND: http://llvm.org/bugs/show_bug.cgi?id=13364
+#endif
+#include <string>
+
+#endif // U_HAVE_STD_STRING
+
+#endif // __STD_STRING_H__
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/strenum.h b/src/core/basetypes/icu-ucsdet/include/unicode/strenum.h
new file mode 100644
index 00000000..3dbe21c6
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/strenum.h
@@ -0,0 +1,276 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 2002-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+*/
+
+#ifndef STRENUM_H
+#define STRENUM_H
+
+#include "unicode/uobject.h"
+#include "unicode/unistr.h"
+
+/**
+ * \file
+ * \brief C++ API: String Enumeration
+ */
+
+U_NAMESPACE_BEGIN
+
+/**
+ * Base class for 'pure' C++ implementations of uenum api. Adds a
+ * method that returns the next UnicodeString since in C++ this can
+ * be a common storage format for strings.
+ *
+ * <p>The model is that the enumeration is over strings maintained by
+ * a 'service.' At any point, the service might change, invalidating
+ * the enumerator (though this is expected to be rare). The iterator
+ * returns an error if this has occurred. Lack of the error is no
+ * guarantee that the service didn't change immediately after the
+ * call, so the returned string still might not be 'valid' on
+ * subsequent use.</p>
+ *
+ * <p>Strings may take the form of const char*, const UChar*, or const
+ * UnicodeString*. The type you get is determine by the variant of
+ * 'next' that you call. In general the StringEnumeration is
+ * optimized for one of these types, but all StringEnumerations can
+ * return all types. Returned strings are each terminated with a NUL.
+ * Depending on the service data, they might also include embedded NUL
+ * characters, so API is provided to optionally return the true
+ * length, counting the embedded NULs but not counting the terminating
+ * NUL.</p>
+ *
+ * <p>The pointers returned by next, unext, and snext become invalid
+ * upon any subsequent call to the enumeration's destructor, next,
+ * unext, snext, or reset.</p>
+ *
+ * ICU 2.8 adds some default implementations and helper functions
+ * for subclasses.
+ *
+ * @stable ICU 2.4
+ */
+class U_COMMON_API StringEnumeration : public UObject {
+public:
+ /**
+ * Destructor.
+ * @stable ICU 2.4
+ */
+ virtual ~StringEnumeration();
+
+ /**
+ * Clone this object, an instance of a subclass of StringEnumeration.
+ * Clones can be used concurrently in multiple threads.
+ * If a subclass does not implement clone(), or if an error occurs,
+ * then NULL is returned.
+ * The clone functions in all subclasses return a base class pointer
+ * because some compilers do not support covariant (same-as-this)
+ * return types; cast to the appropriate subclass if necessary.
+ * The caller must delete the clone.
+ *
+ * @return a clone of this object
+ *
+ * @see getDynamicClassID
+ * @stable ICU 2.8
+ */
+ virtual StringEnumeration *clone() const;
+
+ /**
+ * <p>Return the number of elements that the iterator traverses. If
+ * the iterator is out of sync with its service, status is set to
+ * U_ENUM_OUT_OF_SYNC_ERROR, and the return value is zero.</p>
+ *
+ * <p>The return value will not change except possibly as a result of
+ * a subsequent call to reset, or if the iterator becomes out of sync.</p>
+ *
+ * <p>This is a convenience function. It can end up being very
+ * expensive as all the items might have to be pre-fetched
+ * (depending on the storage format of the data being
+ * traversed).</p>
+ *
+ * @param status the error code.
+ * @return number of elements in the iterator.
+ *
+ * @stable ICU 2.4 */
+ virtual int32_t count(UErrorCode& status) const = 0;
+
+ /**
+ * <p>Returns the next element as a NUL-terminated char*. If there
+ * are no more elements, returns NULL. If the resultLength pointer
+ * is not NULL, the length of the string (not counting the
+ * terminating NUL) is returned at that address. If an error
+ * status is returned, the value at resultLength is undefined.</p>
+ *
+ * <p>The returned pointer is owned by this iterator and must not be
+ * deleted by the caller. The pointer is valid until the next call
+ * to next, unext, snext, reset, or the enumerator's destructor.</p>
+ *
+ * <p>If the iterator is out of sync with its service, status is set
+ * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p>
+ *
+ * <p>If the native service string is a UChar* string, it is
+ * converted to char* with the invariant converter. If the
+ * conversion fails (because a character cannot be converted) then
+ * status is set to U_INVARIANT_CONVERSION_ERROR and the return
+ * value is undefined (though not NULL).</p>
+ *
+ * Starting with ICU 2.8, the default implementation calls snext()
+ * and handles the conversion.
+ * Either next() or snext() must be implemented differently by a subclass.
+ *
+ * @param status the error code.
+ * @param resultLength a pointer to receive the length, can be NULL.
+ * @return a pointer to the string, or NULL.
+ *
+ * @stable ICU 2.4
+ */
+ virtual const char* next(int32_t *resultLength, UErrorCode& status);
+
+ /**
+ * <p>Returns the next element as a NUL-terminated UChar*. If there
+ * are no more elements, returns NULL. If the resultLength pointer
+ * is not NULL, the length of the string (not counting the
+ * terminating NUL) is returned at that address. If an error
+ * status is returned, the value at resultLength is undefined.</p>
+ *
+ * <p>The returned pointer is owned by this iterator and must not be
+ * deleted by the caller. The pointer is valid until the next call
+ * to next, unext, snext, reset, or the enumerator's destructor.</p>
+ *
+ * <p>If the iterator is out of sync with its service, status is set
+ * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p>
+ *
+ * Starting with ICU 2.8, the default implementation calls snext()
+ * and handles the conversion.
+ *
+ * @param status the error code.
+ * @param resultLength a ponter to receive the length, can be NULL.
+ * @return a pointer to the string, or NULL.
+ *
+ * @stable ICU 2.4
+ */
+ virtual const UChar* unext(int32_t *resultLength, UErrorCode& status);
+
+ /**
+ * <p>Returns the next element a UnicodeString*. If there are no
+ * more elements, returns NULL.</p>
+ *
+ * <p>The returned pointer is owned by this iterator and must not be
+ * deleted by the caller. The pointer is valid until the next call
+ * to next, unext, snext, reset, or the enumerator's destructor.</p>
+ *
+ * <p>If the iterator is out of sync with its service, status is set
+ * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p>
+ *
+ * Starting with ICU 2.8, the default implementation calls next()
+ * and handles the conversion.
+ * Either next() or snext() must be implemented differently by a subclass.
+ *
+ * @param status the error code.
+ * @return a pointer to the string, or NULL.
+ *
+ * @stable ICU 2.4
+ */
+ virtual const UnicodeString* snext(UErrorCode& status);
+
+ /**
+ * <p>Resets the iterator. This re-establishes sync with the
+ * service and rewinds the iterator to start at the first
+ * element.</p>
+ *
+ * <p>Previous pointers returned by next, unext, or snext become
+ * invalid, and the value returned by count might change.</p>
+ *
+ * @param status the error code.
+ *
+ * @stable ICU 2.4
+ */
+ virtual void reset(UErrorCode& status) = 0;
+
+ /**
+ * Compares this enumeration to other to check if both are equal
+ *
+ * @param that The other string enumeration to compare this object to
+ * @return TRUE if the enumerations are equal. FALSE if not.
+ * @stable ICU 3.6
+ */
+ virtual UBool operator==(const StringEnumeration& that)const;
+ /**
+ * Compares this enumeration to other to check if both are not equal
+ *
+ * @param that The other string enumeration to compare this object to
+ * @return TRUE if the enumerations are equal. FALSE if not.
+ * @stable ICU 3.6
+ */
+ virtual UBool operator!=(const StringEnumeration& that)const;
+
+protected:
+ /**
+ * UnicodeString field for use with default implementations and subclasses.
+ * @stable ICU 2.8
+ */
+ UnicodeString unistr;
+ /**
+ * char * default buffer for use with default implementations and subclasses.
+ * @stable ICU 2.8
+ */
+ char charsBuffer[32];
+ /**
+ * char * buffer for use with default implementations and subclasses.
+ * Allocated in constructor and in ensureCharsCapacity().
+ * @stable ICU 2.8
+ */
+ char *chars;
+ /**
+ * Capacity of chars, for use with default implementations and subclasses.
+ * @stable ICU 2.8
+ */
+ int32_t charsCapacity;
+
+ /**
+ * Default constructor for use with default implementations and subclasses.
+ * @stable ICU 2.8
+ */
+ StringEnumeration();
+
+ /**
+ * Ensures that chars is at least as large as the requested capacity.
+ * For use with default implementations and subclasses.
+ *
+ * @param capacity Requested capacity.
+ * @param status ICU in/out error code.
+ * @stable ICU 2.8
+ */
+ void ensureCharsCapacity(int32_t capacity, UErrorCode &status);
+
+ /**
+ * Converts s to Unicode and sets unistr to the result.
+ * For use with default implementations and subclasses,
+ * especially for implementations of snext() in terms of next().
+ * This is provided with a helper function instead of a default implementation
+ * of snext() to avoid potential infinite loops between next() and snext().
+ *
+ * For example:
+ * \code
+ * const UnicodeString* snext(UErrorCode& status) {
+ * int32_t resultLength=0;
+ * const char *s=next(&resultLength, status);
+ * return setChars(s, resultLength, status);
+ * }
+ * \endcode
+ *
+ * @param s String to be converted to Unicode.
+ * @param length Length of the string.
+ * @param status ICU in/out error code.
+ * @return A pointer to unistr.
+ * @stable ICU 2.8
+ */
+ UnicodeString *setChars(const char *s, int32_t length, UErrorCode &status);
+};
+
+U_NAMESPACE_END
+
+/* STRENUM_H */
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/stringpiece.h b/src/core/basetypes/icu-ucsdet/include/unicode/stringpiece.h
new file mode 100644
index 00000000..b29571d4
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/stringpiece.h
@@ -0,0 +1,224 @@
+// Copyright (C) 2009-2013, International Business Machines
+// Corporation and others. All Rights Reserved.
+//
+// Copyright 2001 and onwards Google Inc.
+// Author: Sanjay Ghemawat
+
+// This code is a contribution of Google code, and the style used here is
+// a compromise between the original Google code and the ICU coding guidelines.
+// For example, data types are ICU-ified (size_t,int->int32_t),
+// and API comments doxygen-ified, but function names and behavior are
+// as in the original, if possible.
+// Assertion-style error handling, not available in ICU, was changed to
+// parameter "pinning" similar to UnicodeString.
+//
+// In addition, this is only a partial port of the original Google code,
+// limited to what was needed so far. The (nearly) complete original code
+// is in the ICU svn repository at icuhtml/trunk/design/strings/contrib
+// (see ICU ticket 6765, r25517).
+
+#ifndef __STRINGPIECE_H__
+#define __STRINGPIECE_H__
+
+/**
+ * \file
+ * \brief C++ API: StringPiece: Read-only byte string wrapper class.
+ */
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "unicode/std_string.h"
+
+// Arghh! I wish C++ literals were "string".
+
+U_NAMESPACE_BEGIN
+
+/**
+ * A string-like object that points to a sized piece of memory.
+ *
+ * We provide non-explicit singleton constructors so users can pass
+ * in a "const char*" or a "string" wherever a "StringPiece" is
+ * expected.
+ *
+ * Functions or methods may use const StringPiece& parameters to accept either
+ * a "const char*" or a "string" value that will be implicitly converted to
+ * a StringPiece.
+ *
+ * Systematic usage of StringPiece is encouraged as it will reduce unnecessary
+ * conversions from "const char*" to "string" and back again.
+ *
+ * @stable ICU 4.2
+ */
+class U_COMMON_API StringPiece : public UMemory {
+ private:
+ const char* ptr_;
+ int32_t length_;
+
+ public:
+ /**
+ * Default constructor, creates an empty StringPiece.
+ * @stable ICU 4.2
+ */
+ StringPiece() : ptr_(NULL), length_(0) { }
+ /**
+ * Constructs from a NUL-terminated const char * pointer.
+ * @param str a NUL-terminated const char * pointer
+ * @stable ICU 4.2
+ */
+ StringPiece(const char* str);
+#if U_HAVE_STD_STRING
+ /**
+ * Constructs from a std::string.
+ * @stable ICU 4.2
+ */
+ StringPiece(const std::string& str)
+ : ptr_(str.data()), length_(static_cast<int32_t>(str.size())) { }
+#endif
+ /**
+ * Constructs from a const char * pointer and a specified length.
+ * @param offset a const char * pointer (need not be terminated)
+ * @param len the length of the string; must be non-negative
+ * @stable ICU 4.2
+ */
+ StringPiece(const char* offset, int32_t len) : ptr_(offset), length_(len) { }
+ /**
+ * Substring of another StringPiece.
+ * @param x the other StringPiece
+ * @param pos start position in x; must be non-negative and <= x.length().
+ * @stable ICU 4.2
+ */
+ StringPiece(const StringPiece& x, int32_t pos);
+ /**
+ * Substring of another StringPiece.
+ * @param x the other StringPiece
+ * @param pos start position in x; must be non-negative and <= x.length().
+ * @param len length of the substring;
+ * must be non-negative and will be pinned to at most x.length() - pos.
+ * @stable ICU 4.2
+ */
+ StringPiece(const StringPiece& x, int32_t pos, int32_t len);
+
+ /**
+ * Returns the string pointer. May be NULL if it is empty.
+ *
+ * data() may return a pointer to a buffer with embedded NULs, and the
+ * returned buffer may or may not be null terminated. Therefore it is
+ * typically a mistake to pass data() to a routine that expects a NUL
+ * terminated string.
+ * @return the string pointer
+ * @stable ICU 4.2
+ */
+ const char* data() const { return ptr_; }
+ /**
+ * Returns the string length. Same as length().
+ * @return the string length
+ * @stable ICU 4.2
+ */
+ int32_t size() const { return length_; }
+ /**
+ * Returns the string length. Same as size().
+ * @return the string length
+ * @stable ICU 4.2
+ */
+ int32_t length() const { return length_; }
+ /**
+ * Returns whether the string is empty.
+ * @return TRUE if the string is empty
+ * @stable ICU 4.2
+ */
+ UBool empty() const { return length_ == 0; }
+
+ /**
+ * Sets to an empty string.
+ * @stable ICU 4.2
+ */
+ void clear() { ptr_ = NULL; length_ = 0; }
+
+ /**
+ * Reset the stringpiece to refer to new data.
+ * @param xdata pointer the new string data. Need not be nul terminated.
+ * @param len the length of the new data
+ * @stable ICU 4.8
+ */
+ void set(const char* xdata, int32_t len) { ptr_ = xdata; length_ = len; }
+
+ /**
+ * Reset the stringpiece to refer to new data.
+ * @param str a pointer to a NUL-terminated string.
+ * @stable ICU 4.8
+ */
+ void set(const char* str);
+
+ /**
+ * Removes the first n string units.
+ * @param n prefix length, must be non-negative and <=length()
+ * @stable ICU 4.2
+ */
+ void remove_prefix(int32_t n) {
+ if (n >= 0) {
+ if (n > length_) {
+ n = length_;
+ }
+ ptr_ += n;
+ length_ -= n;
+ }
+ }
+
+ /**
+ * Removes the last n string units.
+ * @param n suffix length, must be non-negative and <=length()
+ * @stable ICU 4.2
+ */
+ void remove_suffix(int32_t n) {
+ if (n >= 0) {
+ if (n <= length_) {
+ length_ -= n;
+ } else {
+ length_ = 0;
+ }
+ }
+ }
+
+ /**
+ * Maximum integer, used as a default value for substring methods.
+ * @stable ICU 4.2
+ */
+ static const int32_t npos; // = 0x7fffffff;
+
+ /**
+ * Returns a substring of this StringPiece.
+ * @param pos start position; must be non-negative and <= length().
+ * @param len length of the substring;
+ * must be non-negative and will be pinned to at most length() - pos.
+ * @return the substring StringPiece
+ * @stable ICU 4.2
+ */
+ StringPiece substr(int32_t pos, int32_t len = npos) const {
+ return StringPiece(*this, pos, len);
+ }
+};
+
+/**
+ * Global operator == for StringPiece
+ * @param x The first StringPiece to compare.
+ * @param y The second StringPiece to compare.
+ * @return TRUE if the string data is equal
+ * @stable ICU 4.8
+ */
+U_EXPORT UBool U_EXPORT2
+operator==(const StringPiece& x, const StringPiece& y);
+
+/**
+ * Global operator != for StringPiece
+ * @param x The first StringPiece to compare.
+ * @param y The second StringPiece to compare.
+ * @return TRUE if the string data is not equal
+ * @stable ICU 4.8
+ */
+inline UBool operator!=(const StringPiece& x, const StringPiece& y) {
+ return !(x == y);
+}
+
+U_NAMESPACE_END
+
+#endif // __STRINGPIECE_H__
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/ucasemap.h b/src/core/basetypes/icu-ucsdet/include/unicode/ucasemap.h
new file mode 100644
index 00000000..b37e1658
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/ucasemap.h
@@ -0,0 +1,423 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 2005-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: ucasemap.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2005may06
+* created by: Markus W. Scherer
+*
+* Case mapping service object and functions using it.
+*/
+
+#ifndef __UCASEMAP_H__
+#define __UCASEMAP_H__
+
+#include "unicode/utypes.h"
+#include "unicode/ustring.h"
+#include "unicode/localpointer.h"
+
+/**
+ * \file
+ * \brief C API: Unicode case mapping functions using a UCaseMap service object.
+ *
+ * The service object takes care of memory allocations, data loading, and setup
+ * for the attributes, as usual.
+ *
+ * Currently, the functionality provided here does not overlap with uchar.h
+ * and ustring.h, except for ucasemap_toTitle().
+ *
+ * ucasemap_utf8XYZ() functions operate directly on UTF-8 strings.
+ */
+
+/**
+ * UCaseMap is an opaque service object for newer ICU case mapping functions.
+ * Older functions did not use a service object.
+ * @stable ICU 3.4
+ */
+struct UCaseMap;
+typedef struct UCaseMap UCaseMap; /**< C typedef for struct UCaseMap. @stable ICU 3.4 */
+
+/**
+ * Open a UCaseMap service object for a locale and a set of options.
+ * The locale ID and options are preprocessed so that functions using the
+ * service object need not process them in each call.
+ *
+ * @param locale ICU locale ID, used for language-dependent
+ * upper-/lower-/title-casing according to the Unicode standard.
+ * Usual semantics: ""=root, NULL=default locale, etc.
+ * @param options Options bit set, used for case folding and string comparisons.
+ * Same flags as for u_foldCase(), u_strFoldCase(),
+ * u_strCaseCompare(), etc.
+ * Use 0 or U_FOLD_CASE_DEFAULT for default behavior.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return Pointer to a UCaseMap service object, if successful.
+ *
+ * @see U_FOLD_CASE_DEFAULT
+ * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * @see U_TITLECASE_NO_LOWERCASE
+ * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
+ * @stable ICU 3.4
+ */
+U_STABLE UCaseMap * U_EXPORT2
+ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode);
+
+/**
+ * Close a UCaseMap service object.
+ * @param csm Object to be closed.
+ * @stable ICU 3.4
+ */
+U_STABLE void U_EXPORT2
+ucasemap_close(UCaseMap *csm);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUCaseMapPointer
+ * "Smart pointer" class, closes a UCaseMap via ucasemap_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUCaseMapPointer, UCaseMap, ucasemap_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Get the locale ID that is used for language-dependent case mappings.
+ * @param csm UCaseMap service object.
+ * @return locale ID
+ * @stable ICU 3.4
+ */
+U_STABLE const char * U_EXPORT2
+ucasemap_getLocale(const UCaseMap *csm);
+
+/**
+ * Get the options bit set that is used for case folding and string comparisons.
+ * @param csm UCaseMap service object.
+ * @return options bit set
+ * @stable ICU 3.4
+ */
+U_STABLE uint32_t U_EXPORT2
+ucasemap_getOptions(const UCaseMap *csm);
+
+/**
+ * Set the locale ID that is used for language-dependent case mappings.
+ *
+ * @param csm UCaseMap service object.
+ * @param locale Locale ID, see ucasemap_open().
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ *
+ * @see ucasemap_open
+ * @stable ICU 3.4
+ */
+U_STABLE void U_EXPORT2
+ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode);
+
+/**
+ * Set the options bit set that is used for case folding and string comparisons.
+ *
+ * @param csm UCaseMap service object.
+ * @param options Options bit set, see ucasemap_open().
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ *
+ * @see ucasemap_open
+ * @stable ICU 3.4
+ */
+U_STABLE void U_EXPORT2
+ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode);
+
+/**
+ * Do not lowercase non-initial parts of words when titlecasing.
+ * Option bit for titlecasing APIs that take an options bit set.
+ *
+ * By default, titlecasing will titlecase the first cased character
+ * of a word and lowercase all other characters.
+ * With this option, the other characters will not be modified.
+ *
+ * @see ucasemap_setOptions
+ * @see ucasemap_toTitle
+ * @see ucasemap_utf8ToTitle
+ * @see UnicodeString::toTitle
+ * @stable ICU 3.8
+ */
+#define U_TITLECASE_NO_LOWERCASE 0x100
+
+/**
+ * Do not adjust the titlecasing indexes from BreakIterator::next() indexes;
+ * titlecase exactly the characters at breaks from the iterator.
+ * Option bit for titlecasing APIs that take an options bit set.
+ *
+ * By default, titlecasing will take each break iterator index,
+ * adjust it by looking for the next cased character, and titlecase that one.
+ * Other characters are lowercased.
+ *
+ * This follows Unicode 4 & 5 section 3.13 Default Case Operations:
+ *
+ * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
+ * #29, "Text Boundaries." Between each pair of word boundaries, find the first
+ * cased character F. If F exists, map F to default_title(F); then map each
+ * subsequent character C to default_lower(C).
+ *
+ * @see ucasemap_setOptions
+ * @see ucasemap_toTitle
+ * @see ucasemap_utf8ToTitle
+ * @see UnicodeString::toTitle
+ * @see U_TITLECASE_NO_LOWERCASE
+ * @stable ICU 3.8
+ */
+#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+/**
+ * Get the break iterator that is used for titlecasing.
+ * Do not modify the returned break iterator.
+ * @param csm UCaseMap service object.
+ * @return titlecasing break iterator
+ * @stable ICU 3.8
+ */
+U_STABLE const UBreakIterator * U_EXPORT2
+ucasemap_getBreakIterator(const UCaseMap *csm);
+
+/**
+ * Set the break iterator that is used for titlecasing.
+ * The UCaseMap service object releases a previously set break iterator
+ * and "adopts" this new one, taking ownership of it.
+ * It will be released in a subsequent call to ucasemap_setBreakIterator()
+ * or ucasemap_close().
+ *
+ * Break iterator operations are not thread-safe. Therefore, titlecasing
+ * functions use non-const UCaseMap objects. It is not possible to titlecase
+ * strings concurrently using the same UCaseMap.
+ *
+ * @param csm UCaseMap service object.
+ * @param iterToAdopt Break iterator to be adopted for titlecasing.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ *
+ * @see ucasemap_toTitle
+ * @see ucasemap_utf8ToTitle
+ * @stable ICU 3.8
+ */
+U_STABLE void U_EXPORT2
+ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode *pErrorCode);
+
+/**
+ * Titlecase a UTF-16 string. This function is almost a duplicate of u_strToTitle(),
+ * except that it takes ucasemap_setOptions() into account and has performance
+ * advantages from being able to use a UCaseMap object for multiple case mapping
+ * operations, saving setup time.
+ *
+ * Casing is locale-dependent and context-sensitive.
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others. (This can be modified with ucasemap_setOptions().)
+ *
+ * Note: This function takes a non-const UCaseMap pointer because it will
+ * open a default break iterator if no break iterator was set yet,
+ * and effectively call ucasemap_setBreakIterator();
+ * also because the break iterator is stateful and will be modified during
+ * the iteration.
+ *
+ * The titlecase break iterator can be provided to customize for arbitrary
+ * styles, using rules and dictionaries beyond the standard iterators.
+ * The standard titlecase iterator for the root locale implements the
+ * algorithm of Unicode TR 21.
+ *
+ * This function uses only the setUText(), first(), next() and close() methods of the
+ * provided break iterator.
+ *
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * @param csm UCaseMap service object. This pointer is non-const!
+ * See the note above for details.
+ * @param dest A buffer for the result string. The result will be NUL-terminated if
+ * the buffer is large enough.
+ * The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param src The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ * in which case it will be greater than destCapacity.
+ *
+ * @see u_strToTitle
+ * @stable ICU 3.8
+ */
+U_STABLE int32_t U_EXPORT2
+ucasemap_toTitle(UCaseMap *csm,
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+#endif
+
+/**
+ * Lowercase the characters in a UTF-8 string.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * @param csm UCaseMap service object.
+ * @param dest A buffer for the result string. The result will be NUL-terminated if
+ * the buffer is large enough.
+ * The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param src The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ * in which case it will be greater than destCapacity.
+ *
+ * @see u_strToLower
+ * @stable ICU 3.4
+ */
+U_STABLE int32_t U_EXPORT2
+ucasemap_utf8ToLower(const UCaseMap *csm,
+ char *dest, int32_t destCapacity,
+ const char *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Uppercase the characters in a UTF-8 string.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * @param csm UCaseMap service object.
+ * @param dest A buffer for the result string. The result will be NUL-terminated if
+ * the buffer is large enough.
+ * The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param src The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ * in which case it will be greater than destCapacity.
+ *
+ * @see u_strToUpper
+ * @stable ICU 3.4
+ */
+U_STABLE int32_t U_EXPORT2
+ucasemap_utf8ToUpper(const UCaseMap *csm,
+ char *dest, int32_t destCapacity,
+ const char *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+/**
+ * Titlecase a UTF-8 string.
+ * Casing is locale-dependent and context-sensitive.
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others. (This can be modified with ucasemap_setOptions().)
+ *
+ * Note: This function takes a non-const UCaseMap pointer because it will
+ * open a default break iterator if no break iterator was set yet,
+ * and effectively call ucasemap_setBreakIterator();
+ * also because the break iterator is stateful and will be modified during
+ * the iteration.
+ *
+ * The titlecase break iterator can be provided to customize for arbitrary
+ * styles, using rules and dictionaries beyond the standard iterators.
+ * The standard titlecase iterator for the root locale implements the
+ * algorithm of Unicode TR 21.
+ *
+ * This function uses only the setUText(), first(), next() and close() methods of the
+ * provided break iterator.
+ *
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * @param csm UCaseMap service object. This pointer is non-const!
+ * See the note above for details.
+ * @param dest A buffer for the result string. The result will be NUL-terminated if
+ * the buffer is large enough.
+ * The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param src The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ * in which case it will be greater than destCapacity.
+ *
+ * @see u_strToTitle
+ * @see U_TITLECASE_NO_LOWERCASE
+ * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
+ * @stable ICU 3.8
+ */
+U_STABLE int32_t U_EXPORT2
+ucasemap_utf8ToTitle(UCaseMap *csm,
+ char *dest, int32_t destCapacity,
+ const char *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+#endif
+
+/**
+ * Case-folds the characters in a UTF-8 string.
+ *
+ * Case-folding is locale-independent and not context-sensitive,
+ * but there is an option for whether to include or exclude mappings for dotted I
+ * and dotless i that are marked with 'T' in CaseFolding.txt.
+ *
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * @param csm UCaseMap service object.
+ * @param dest A buffer for the result string. The result will be NUL-terminated if
+ * the buffer is large enough.
+ * The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param src The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ * in which case it will be greater than destCapacity.
+ *
+ * @see u_strFoldCase
+ * @see ucasemap_setOptions
+ * @see U_FOLD_CASE_DEFAULT
+ * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * @stable ICU 3.8
+ */
+U_STABLE int32_t U_EXPORT2
+ucasemap_utf8FoldCase(const UCaseMap *csm,
+ char *dest, int32_t destCapacity,
+ const char *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/uchar.h b/src/core/basetypes/icu-ucsdet/include/unicode/uchar.h
new file mode 100644
index 00000000..1d3ae0dd
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/uchar.h
@@ -0,0 +1,3426 @@
+/*
+**********************************************************************
+* Copyright (C) 1997-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*
+* File UCHAR.H
+*
+* Modification History:
+*
+* Date Name Description
+* 04/02/97 aliu Creation.
+* 03/29/99 helena Updated for C APIs.
+* 4/15/99 Madhu Updated for C Implementation and Javadoc
+* 5/20/99 Madhu Added the function u_getVersion()
+* 8/19/1999 srl Upgraded scripts to Unicode 3.0
+* 8/27/1999 schererm UCharDirection constants: U_...
+* 11/11/1999 weiv added u_isalnum(), cleaned comments
+* 01/11/2000 helena Renamed u_getVersion to u_getUnicodeVersion().
+******************************************************************************
+*/
+
+#ifndef UCHAR_H
+#define UCHAR_H
+
+#include "unicode/utypes.h"
+
+U_CDECL_BEGIN
+
+/*==========================================================================*/
+/* Unicode version number */
+/*==========================================================================*/
+/**
+ * Unicode version number, default for the current ICU version.
+ * The actual Unicode Character Database (UCD) data is stored in uprops.dat
+ * and may be generated from UCD files from a different Unicode version.
+ * Call u_getUnicodeVersion to get the actual Unicode version of the data.
+ *
+ * @see u_getUnicodeVersion
+ * @stable ICU 2.0
+ */
+#define U_UNICODE_VERSION "7.0"
+
+/**
+ * \file
+ * \brief C API: Unicode Properties
+ *
+ * This C API provides low-level access to the Unicode Character Database.
+ * In addition to raw property values, some convenience functions calculate
+ * derived properties, for example for Java-style programming.
+ *
+ * Unicode assigns each code point (not just assigned character) values for
+ * many properties.
+ * Most of them are simple boolean flags, or constants from a small enumerated list.
+ * For some properties, values are strings or other relatively more complex types.
+ *
+ * For more information see
+ * "About the Unicode Character Database" (http://www.unicode.org/ucd/)
+ * and the ICU User Guide chapter on Properties (http://icu-project.org/userguide/properties.html).
+ *
+ * Many functions are designed to match java.lang.Character functions.
+ * See the individual function documentation,
+ * and see the JDK 1.4 java.lang.Character documentation
+ * at http://java.sun.com/j2se/1.4/docs/api/java/lang/Character.html
+ *
+ * There are also functions that provide easy migration from C/POSIX functions
+ * like isblank(). Their use is generally discouraged because the C/POSIX
+ * standards do not define their semantics beyond the ASCII range, which means
+ * that different implementations exhibit very different behavior.
+ * Instead, Unicode properties should be used directly.
+ *
+ * There are also only a few, broad C/POSIX character classes, and they tend
+ * to be used for conflicting purposes. For example, the "isalpha()" class
+ * is sometimes used to determine word boundaries, while a more sophisticated
+ * approach would at least distinguish initial letters from continuation
+ * characters (the latter including combining marks).
+ * (In ICU, BreakIterator is the most sophisticated API for word boundaries.)
+ * Another example: There is no "istitle()" class for titlecase characters.
+ *
+ * ICU 3.4 and later provides API access for all twelve C/POSIX character classes.
+ * ICU implements them according to the Standard Recommendations in
+ * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions
+ * (http://www.unicode.org/reports/tr18/#Compatibility_Properties).
+ *
+ * API access for C/POSIX character classes is as follows:
+ * - alpha: u_isUAlphabetic(c) or u_hasBinaryProperty(c, UCHAR_ALPHABETIC)
+ * - lower: u_isULowercase(c) or u_hasBinaryProperty(c, UCHAR_LOWERCASE)
+ * - upper: u_isUUppercase(c) or u_hasBinaryProperty(c, UCHAR_UPPERCASE)
+ * - punct: u_ispunct(c)
+ * - digit: u_isdigit(c) or u_charType(c)==U_DECIMAL_DIGIT_NUMBER
+ * - xdigit: u_isxdigit(c) or u_hasBinaryProperty(c, UCHAR_POSIX_XDIGIT)
+ * - alnum: u_hasBinaryProperty(c, UCHAR_POSIX_ALNUM)
+ * - space: u_isUWhiteSpace(c) or u_hasBinaryProperty(c, UCHAR_WHITE_SPACE)
+ * - blank: u_isblank(c) or u_hasBinaryProperty(c, UCHAR_POSIX_BLANK)
+ * - cntrl: u_charType(c)==U_CONTROL_CHAR
+ * - graph: u_hasBinaryProperty(c, UCHAR_POSIX_GRAPH)
+ * - print: u_hasBinaryProperty(c, UCHAR_POSIX_PRINT)
+ *
+ * Note: Some of the u_isxyz() functions in uchar.h predate, and do not match,
+ * the Standard Recommendations in UTS #18. Instead, they match Java
+ * functions according to their API documentation.
+ *
+ * \htmlonly
+ * The C/POSIX character classes are also available in UnicodeSet patterns,
+ * using patterns like [:graph:] or \p{graph}.
+ * \endhtmlonly
+ *
+ * Note: There are several ICU whitespace functions.
+ * Comparison:
+ * - u_isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property;
+ * most of general categories "Z" (separators) + most whitespace ISO controls
+ * (including no-break spaces, but excluding IS1..IS4 and ZWSP)
+ * - u_isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces
+ * - u_isJavaSpaceChar: Java isSpaceChar; just Z (including no-break spaces)
+ * - u_isspace: Z + whitespace ISO controls (including no-break spaces)
+ * - u_isblank: "horizontal spaces" = TAB + Zs - ZWSP
+ */
+
+/**
+ * Constants.
+ */
+
+/** The lowest Unicode code point value. Code points are non-negative. @stable ICU 2.0 */
+#define UCHAR_MIN_VALUE 0
+
+/**
+ * The highest Unicode code point value (scalar value) according to
+ * The Unicode Standard. This is a 21-bit value (20.1 bits, rounded up).
+ * For a single character, UChar32 is a simple type that can hold any code point value.
+ *
+ * @see UChar32
+ * @stable ICU 2.0
+ */
+#define UCHAR_MAX_VALUE 0x10ffff
+
+/**
+ * Get a single-bit bit set (a flag) from a bit number 0..31.
+ * @stable ICU 2.1
+ */
+#define U_MASK(x) ((uint32_t)1<<(x))
+
+/**
+ * Selection constants for Unicode properties.
+ * These constants are used in functions like u_hasBinaryProperty to select
+ * one of the Unicode properties.
+ *
+ * The properties APIs are intended to reflect Unicode properties as defined
+ * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).
+ * For details about the properties see http://www.unicode.org/ucd/ .
+ * For names of Unicode properties see the UCD file PropertyAliases.txt.
+ *
+ * Important: If ICU is built with UCD files from Unicode versions below, e.g., 3.2,
+ * then properties marked with "new in Unicode 3.2" are not or not fully available.
+ * Check u_getUnicodeVersion to be sure.
+ *
+ * @see u_hasBinaryProperty
+ * @see u_getIntPropertyValue
+ * @see u_getUnicodeVersion
+ * @stable ICU 2.1
+ */
+typedef enum UProperty {
+ /*
+ * Note: UProperty constants are parsed by preparseucd.py.
+ * It matches lines like
+ * UCHAR_<Unicode property name>=<integer>,
+ */
+
+ /* Note: Place UCHAR_ALPHABETIC before UCHAR_BINARY_START so that
+ debuggers display UCHAR_ALPHABETIC as the symbolic name for 0,
+ rather than UCHAR_BINARY_START. Likewise for other *_START
+ identifiers. */
+
+ /** Binary property Alphabetic. Same as u_isUAlphabetic, different from u_isalpha.
+ Lu+Ll+Lt+Lm+Lo+Nl+Other_Alphabetic @stable ICU 2.1 */
+ UCHAR_ALPHABETIC=0,
+ /** First constant for binary Unicode properties. @stable ICU 2.1 */
+ UCHAR_BINARY_START=UCHAR_ALPHABETIC,
+ /** Binary property ASCII_Hex_Digit. 0-9 A-F a-f @stable ICU 2.1 */
+ UCHAR_ASCII_HEX_DIGIT=1,
+ /** Binary property Bidi_Control.
+ Format controls which have specific functions
+ in the Bidi Algorithm. @stable ICU 2.1 */
+ UCHAR_BIDI_CONTROL=2,
+ /** Binary property Bidi_Mirrored.
+ Characters that may change display in RTL text.
+ Same as u_isMirrored.
+ See Bidi Algorithm, UTR 9. @stable ICU 2.1 */
+ UCHAR_BIDI_MIRRORED=3,
+ /** Binary property Dash. Variations of dashes. @stable ICU 2.1 */
+ UCHAR_DASH=4,
+ /** Binary property Default_Ignorable_Code_Point (new in Unicode 3.2).
+ Ignorable in most processing.
+ <2060..206F, FFF0..FFFB, E0000..E0FFF>+Other_Default_Ignorable_Code_Point+(Cf+Cc+Cs-White_Space) @stable ICU 2.1 */
+ UCHAR_DEFAULT_IGNORABLE_CODE_POINT=5,
+ /** Binary property Deprecated (new in Unicode 3.2).
+ The usage of deprecated characters is strongly discouraged. @stable ICU 2.1 */
+ UCHAR_DEPRECATED=6,
+ /** Binary property Diacritic. Characters that linguistically modify
+ the meaning of another character to which they apply. @stable ICU 2.1 */
+ UCHAR_DIACRITIC=7,
+ /** Binary property Extender.
+ Extend the value or shape of a preceding alphabetic character,
+ e.g., length and iteration marks. @stable ICU 2.1 */
+ UCHAR_EXTENDER=8,
+ /** Binary property Full_Composition_Exclusion.
+ CompositionExclusions.txt+Singleton Decompositions+
+ Non-Starter Decompositions. @stable ICU 2.1 */
+ UCHAR_FULL_COMPOSITION_EXCLUSION=9,
+ /** Binary property Grapheme_Base (new in Unicode 3.2).
+ For programmatic determination of grapheme cluster boundaries.
+ [0..10FFFF]-Cc-Cf-Cs-Co-Cn-Zl-Zp-Grapheme_Link-Grapheme_Extend-CGJ @stable ICU 2.1 */
+ UCHAR_GRAPHEME_BASE=10,
+ /** Binary property Grapheme_Extend (new in Unicode 3.2).
+ For programmatic determination of grapheme cluster boundaries.
+ Me+Mn+Mc+Other_Grapheme_Extend-Grapheme_Link-CGJ @stable ICU 2.1 */
+ UCHAR_GRAPHEME_EXTEND=11,
+ /** Binary property Grapheme_Link (new in Unicode 3.2).
+ For programmatic determination of grapheme cluster boundaries. @stable ICU 2.1 */
+ UCHAR_GRAPHEME_LINK=12,
+ /** Binary property Hex_Digit.
+ Characters commonly used for hexadecimal numbers. @stable ICU 2.1 */
+ UCHAR_HEX_DIGIT=13,
+ /** Binary property Hyphen. Dashes used to mark connections
+ between pieces of words, plus the Katakana middle dot. @stable ICU 2.1 */
+ UCHAR_HYPHEN=14,
+ /** Binary property ID_Continue.
+ Characters that can continue an identifier.
+ DerivedCoreProperties.txt also says "NOTE: Cf characters should be filtered out."
+ ID_Start+Mn+Mc+Nd+Pc @stable ICU 2.1 */
+ UCHAR_ID_CONTINUE=15,
+ /** Binary property ID_Start.
+ Characters that can start an identifier.
+ Lu+Ll+Lt+Lm+Lo+Nl @stable ICU 2.1 */
+ UCHAR_ID_START=16,
+ /** Binary property Ideographic.
+ CJKV ideographs. @stable ICU 2.1 */
+ UCHAR_IDEOGRAPHIC=17,
+ /** Binary property IDS_Binary_Operator (new in Unicode 3.2).
+ For programmatic determination of
+ Ideographic Description Sequences. @stable ICU 2.1 */
+ UCHAR_IDS_BINARY_OPERATOR=18,
+ /** Binary property IDS_Trinary_Operator (new in Unicode 3.2).
+ For programmatic determination of
+ Ideographic Description Sequences. @stable ICU 2.1 */
+ UCHAR_IDS_TRINARY_OPERATOR=19,
+ /** Binary property Join_Control.
+ Format controls for cursive joining and ligation. @stable ICU 2.1 */
+ UCHAR_JOIN_CONTROL=20,
+ /** Binary property Logical_Order_Exception (new in Unicode 3.2).
+ Characters that do not use logical order and
+ require special handling in most processing. @stable ICU 2.1 */
+ UCHAR_LOGICAL_ORDER_EXCEPTION=21,
+ /** Binary property Lowercase. Same as u_isULowercase, different from u_islower.
+ Ll+Other_Lowercase @stable ICU 2.1 */
+ UCHAR_LOWERCASE=22,
+ /** Binary property Math. Sm+Other_Math @stable ICU 2.1 */
+ UCHAR_MATH=23,
+ /** Binary property Noncharacter_Code_Point.
+ Code points that are explicitly defined as illegal
+ for the encoding of characters. @stable ICU 2.1 */
+ UCHAR_NONCHARACTER_CODE_POINT=24,
+ /** Binary property Quotation_Mark. @stable ICU 2.1 */
+ UCHAR_QUOTATION_MARK=25,
+ /** Binary property Radical (new in Unicode 3.2).
+ For programmatic determination of
+ Ideographic Description Sequences. @stable ICU 2.1 */
+ UCHAR_RADICAL=26,
+ /** Binary property Soft_Dotted (new in Unicode 3.2).
+ Characters with a "soft dot", like i or j.
+ An accent placed on these characters causes
+ the dot to disappear. @stable ICU 2.1 */
+ UCHAR_SOFT_DOTTED=27,
+ /** Binary property Terminal_Punctuation.
+ Punctuation characters that generally mark
+ the end of textual units. @stable ICU 2.1 */
+ UCHAR_TERMINAL_PUNCTUATION=28,
+ /** Binary property Unified_Ideograph (new in Unicode 3.2).
+ For programmatic determination of
+ Ideographic Description Sequences. @stable ICU 2.1 */
+ UCHAR_UNIFIED_IDEOGRAPH=29,
+ /** Binary property Uppercase. Same as u_isUUppercase, different from u_isupper.
+ Lu+Other_Uppercase @stable ICU 2.1 */
+ UCHAR_UPPERCASE=30,
+ /** Binary property White_Space.
+ Same as u_isUWhiteSpace, different from u_isspace and u_isWhitespace.
+ Space characters+TAB+CR+LF-ZWSP-ZWNBSP @stable ICU 2.1 */
+ UCHAR_WHITE_SPACE=31,
+ /** Binary property XID_Continue.
+ ID_Continue modified to allow closure under
+ normalization forms NFKC and NFKD. @stable ICU 2.1 */
+ UCHAR_XID_CONTINUE=32,
+ /** Binary property XID_Start. ID_Start modified to allow
+ closure under normalization forms NFKC and NFKD. @stable ICU 2.1 */
+ UCHAR_XID_START=33,
+ /** Binary property Case_Sensitive. Either the source of a case
+ mapping or _in_ the target of a case mapping. Not the same as
+ the general category Cased_Letter. @stable ICU 2.6 */
+ UCHAR_CASE_SENSITIVE=34,
+ /** Binary property STerm (new in Unicode 4.0.1).
+ Sentence Terminal. Used in UAX #29: Text Boundaries
+ (http://www.unicode.org/reports/tr29/)
+ @stable ICU 3.0 */
+ UCHAR_S_TERM=35,
+ /** Binary property Variation_Selector (new in Unicode 4.0.1).
+ Indicates all those characters that qualify as Variation Selectors.
+ For details on the behavior of these characters,
+ see StandardizedVariants.html and 15.6 Variation Selectors.
+ @stable ICU 3.0 */
+ UCHAR_VARIATION_SELECTOR=36,
+ /** Binary property NFD_Inert.
+ ICU-specific property for characters that are inert under NFD,
+ i.e., they do not interact with adjacent characters.
+ See the documentation for the Normalizer2 class and the
+ Normalizer2::isInert() method.
+ @stable ICU 3.0 */
+ UCHAR_NFD_INERT=37,
+ /** Binary property NFKD_Inert.
+ ICU-specific property for characters that are inert under NFKD,
+ i.e., they do not interact with adjacent characters.
+ See the documentation for the Normalizer2 class and the
+ Normalizer2::isInert() method.
+ @stable ICU 3.0 */
+ UCHAR_NFKD_INERT=38,
+ /** Binary property NFC_Inert.
+ ICU-specific property for characters that are inert under NFC,
+ i.e., they do not interact with adjacent characters.
+ See the documentation for the Normalizer2 class and the
+ Normalizer2::isInert() method.
+ @stable ICU 3.0 */
+ UCHAR_NFC_INERT=39,
+ /** Binary property NFKC_Inert.
+ ICU-specific property for characters that are inert under NFKC,
+ i.e., they do not interact with adjacent characters.
+ See the documentation for the Normalizer2 class and the
+ Normalizer2::isInert() method.
+ @stable ICU 3.0 */
+ UCHAR_NFKC_INERT=40,
+ /** Binary Property Segment_Starter.
+ ICU-specific property for characters that are starters in terms of
+ Unicode normalization and combining character sequences.
+ They have ccc=0 and do not occur in non-initial position of the
+ canonical decomposition of any character
+ (like a-umlaut in NFD and a Jamo T in an NFD(Hangul LVT)).
+ ICU uses this property for segmenting a string for generating a set of
+ canonically equivalent strings, e.g. for canonical closure while
+ processing collation tailoring rules.
+ @stable ICU 3.0 */
+ UCHAR_SEGMENT_STARTER=41,
+ /** Binary property Pattern_Syntax (new in Unicode 4.1).
+ See UAX #31 Identifier and Pattern Syntax
+ (http://www.unicode.org/reports/tr31/)
+ @stable ICU 3.4 */
+ UCHAR_PATTERN_SYNTAX=42,
+ /** Binary property Pattern_White_Space (new in Unicode 4.1).
+ See UAX #31 Identifier and Pattern Syntax
+ (http://www.unicode.org/reports/tr31/)
+ @stable ICU 3.4 */
+ UCHAR_PATTERN_WHITE_SPACE=43,
+ /** Binary property alnum (a C/POSIX character class).
+ Implemented according to the UTS #18 Annex C Standard Recommendation.
+ See the uchar.h file documentation.
+ @stable ICU 3.4 */
+ UCHAR_POSIX_ALNUM=44,
+ /** Binary property blank (a C/POSIX character class).
+ Implemented according to the UTS #18 Annex C Standard Recommendation.
+ See the uchar.h file documentation.
+ @stable ICU 3.4 */
+ UCHAR_POSIX_BLANK=45,
+ /** Binary property graph (a C/POSIX character class).
+ Implemented according to the UTS #18 Annex C Standard Recommendation.
+ See the uchar.h file documentation.
+ @stable ICU 3.4 */
+ UCHAR_POSIX_GRAPH=46,
+ /** Binary property print (a C/POSIX character class).
+ Implemented according to the UTS #18 Annex C Standard Recommendation.
+ See the uchar.h file documentation.
+ @stable ICU 3.4 */
+ UCHAR_POSIX_PRINT=47,
+ /** Binary property xdigit (a C/POSIX character class).
+ Implemented according to the UTS #18 Annex C Standard Recommendation.
+ See the uchar.h file documentation.
+ @stable ICU 3.4 */
+ UCHAR_POSIX_XDIGIT=48,
+ /** Binary property Cased. For Lowercase, Uppercase and Titlecase characters. @stable ICU 4.4 */
+ UCHAR_CASED=49,
+ /** Binary property Case_Ignorable. Used in context-sensitive case mappings. @stable ICU 4.4 */
+ UCHAR_CASE_IGNORABLE=50,
+ /** Binary property Changes_When_Lowercased. @stable ICU 4.4 */
+ UCHAR_CHANGES_WHEN_LOWERCASED=51,
+ /** Binary property Changes_When_Uppercased. @stable ICU 4.4 */
+ UCHAR_CHANGES_WHEN_UPPERCASED=52,
+ /** Binary property Changes_When_Titlecased. @stable ICU 4.4 */
+ UCHAR_CHANGES_WHEN_TITLECASED=53,
+ /** Binary property Changes_When_Casefolded. @stable ICU 4.4 */
+ UCHAR_CHANGES_WHEN_CASEFOLDED=54,
+ /** Binary property Changes_When_Casemapped. @stable ICU 4.4 */
+ UCHAR_CHANGES_WHEN_CASEMAPPED=55,
+ /** Binary property Changes_When_NFKC_Casefolded. @stable ICU 4.4 */
+ UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED=56,
+ /** One more than the last constant for binary Unicode properties. @stable ICU 2.1 */
+ UCHAR_BINARY_LIMIT=57,
+
+ /** Enumerated property Bidi_Class.
+ Same as u_charDirection, returns UCharDirection values. @stable ICU 2.2 */
+ UCHAR_BIDI_CLASS=0x1000,
+ /** First constant for enumerated/integer Unicode properties. @stable ICU 2.2 */
+ UCHAR_INT_START=UCHAR_BIDI_CLASS,
+ /** Enumerated property Block.
+ Same as ublock_getCode, returns UBlockCode values. @stable ICU 2.2 */
+ UCHAR_BLOCK=0x1001,
+ /** Enumerated property Canonical_Combining_Class.
+ Same as u_getCombiningClass, returns 8-bit numeric values. @stable ICU 2.2 */
+ UCHAR_CANONICAL_COMBINING_CLASS=0x1002,
+ /** Enumerated property Decomposition_Type.
+ Returns UDecompositionType values. @stable ICU 2.2 */
+ UCHAR_DECOMPOSITION_TYPE=0x1003,
+ /** Enumerated property East_Asian_Width.
+ See http://www.unicode.org/reports/tr11/
+ Returns UEastAsianWidth values. @stable ICU 2.2 */
+ UCHAR_EAST_ASIAN_WIDTH=0x1004,
+ /** Enumerated property General_Category.
+ Same as u_charType, returns UCharCategory values. @stable ICU 2.2 */
+ UCHAR_GENERAL_CATEGORY=0x1005,
+ /** Enumerated property Joining_Group.
+ Returns UJoiningGroup values. @stable ICU 2.2 */
+ UCHAR_JOINING_GROUP=0x1006,
+ /** Enumerated property Joining_Type.
+ Returns UJoiningType values. @stable ICU 2.2 */
+ UCHAR_JOINING_TYPE=0x1007,
+ /** Enumerated property Line_Break.
+ Returns ULineBreak values. @stable ICU 2.2 */
+ UCHAR_LINE_BREAK=0x1008,
+ /** Enumerated property Numeric_Type.
+ Returns UNumericType values. @stable ICU 2.2 */
+ UCHAR_NUMERIC_TYPE=0x1009,
+ /** Enumerated property Script.
+ Same as uscript_getScript, returns UScriptCode values. @stable ICU 2.2 */
+ UCHAR_SCRIPT=0x100A,
+ /** Enumerated property Hangul_Syllable_Type, new in Unicode 4.
+ Returns UHangulSyllableType values. @stable ICU 2.6 */
+ UCHAR_HANGUL_SYLLABLE_TYPE=0x100B,
+ /** Enumerated property NFD_Quick_Check.
+ Returns UNormalizationCheckResult values. @stable ICU 3.0 */
+ UCHAR_NFD_QUICK_CHECK=0x100C,
+ /** Enumerated property NFKD_Quick_Check.
+ Returns UNormalizationCheckResult values. @stable ICU 3.0 */
+ UCHAR_NFKD_QUICK_CHECK=0x100D,
+ /** Enumerated property NFC_Quick_Check.
+ Returns UNormalizationCheckResult values. @stable ICU 3.0 */
+ UCHAR_NFC_QUICK_CHECK=0x100E,
+ /** Enumerated property NFKC_Quick_Check.
+ Returns UNormalizationCheckResult values. @stable ICU 3.0 */
+ UCHAR_NFKC_QUICK_CHECK=0x100F,
+ /** Enumerated property Lead_Canonical_Combining_Class.
+ ICU-specific property for the ccc of the first code point
+ of the decomposition, or lccc(c)=ccc(NFD(c)[0]).
+ Useful for checking for canonically ordered text;
+ see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD .
+ Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @stable ICU 3.0 */
+ UCHAR_LEAD_CANONICAL_COMBINING_CLASS=0x1010,
+ /** Enumerated property Trail_Canonical_Combining_Class.
+ ICU-specific property for the ccc of the last code point
+ of the decomposition, or tccc(c)=ccc(NFD(c)[last]).
+ Useful for checking for canonically ordered text;
+ see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD .
+ Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @stable ICU 3.0 */
+ UCHAR_TRAIL_CANONICAL_COMBINING_CLASS=0x1011,
+ /** Enumerated property Grapheme_Cluster_Break (new in Unicode 4.1).
+ Used in UAX #29: Text Boundaries
+ (http://www.unicode.org/reports/tr29/)
+ Returns UGraphemeClusterBreak values. @stable ICU 3.4 */
+ UCHAR_GRAPHEME_CLUSTER_BREAK=0x1012,
+ /** Enumerated property Sentence_Break (new in Unicode 4.1).
+ Used in UAX #29: Text Boundaries
+ (http://www.unicode.org/reports/tr29/)
+ Returns USentenceBreak values. @stable ICU 3.4 */
+ UCHAR_SENTENCE_BREAK=0x1013,
+ /** Enumerated property Word_Break (new in Unicode 4.1).
+ Used in UAX #29: Text Boundaries
+ (http://www.unicode.org/reports/tr29/)
+ Returns UWordBreakValues values. @stable ICU 3.4 */
+ UCHAR_WORD_BREAK=0x1014,
+ /** Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3).
+ Used in UAX #9: Unicode Bidirectional Algorithm
+ (http://www.unicode.org/reports/tr9/)
+ Returns UBidiPairedBracketType values. @stable ICU 52 */
+ UCHAR_BIDI_PAIRED_BRACKET_TYPE=0x1015,
+ /** One more than the last constant for enumerated/integer Unicode properties. @stable ICU 2.2 */
+ UCHAR_INT_LIMIT=0x1016,
+
+ /** Bitmask property General_Category_Mask.
+ This is the General_Category property returned as a bit mask.
+ When used in u_getIntPropertyValue(c), same as U_MASK(u_charType(c)),
+ returns bit masks for UCharCategory values where exactly one bit is set.
+ When used with u_getPropertyValueName() and u_getPropertyValueEnum(),
+ a multi-bit mask is used for sets of categories like "Letters".
+ Mask values should be cast to uint32_t.
+ @stable ICU 2.4 */
+ UCHAR_GENERAL_CATEGORY_MASK=0x2000,
+ /** First constant for bit-mask Unicode properties. @stable ICU 2.4 */
+ UCHAR_MASK_START=UCHAR_GENERAL_CATEGORY_MASK,
+ /** One more than the last constant for bit-mask Unicode properties. @stable ICU 2.4 */
+ UCHAR_MASK_LIMIT=0x2001,
+
+ /** Double property Numeric_Value.
+ Corresponds to u_getNumericValue. @stable ICU 2.4 */
+ UCHAR_NUMERIC_VALUE=0x3000,
+ /** First constant for double Unicode properties. @stable ICU 2.4 */
+ UCHAR_DOUBLE_START=UCHAR_NUMERIC_VALUE,
+ /** One more than the last constant for double Unicode properties. @stable ICU 2.4 */
+ UCHAR_DOUBLE_LIMIT=0x3001,
+
+ /** String property Age.
+ Corresponds to u_charAge. @stable ICU 2.4 */
+ UCHAR_AGE=0x4000,
+ /** First constant for string Unicode properties. @stable ICU 2.4 */
+ UCHAR_STRING_START=UCHAR_AGE,
+ /** String property Bidi_Mirroring_Glyph.
+ Corresponds to u_charMirror. @stable ICU 2.4 */
+ UCHAR_BIDI_MIRRORING_GLYPH=0x4001,
+ /** String property Case_Folding.
+ Corresponds to u_strFoldCase in ustring.h. @stable ICU 2.4 */
+ UCHAR_CASE_FOLDING=0x4002,
+#ifndef U_HIDE_DEPRECATED_API
+ /** Deprecated string property ISO_Comment.
+ Corresponds to u_getISOComment. @deprecated ICU 49 */
+ UCHAR_ISO_COMMENT=0x4003,
+#endif /* U_HIDE_DEPRECATED_API */
+ /** String property Lowercase_Mapping.
+ Corresponds to u_strToLower in ustring.h. @stable ICU 2.4 */
+ UCHAR_LOWERCASE_MAPPING=0x4004,
+ /** String property Name.
+ Corresponds to u_charName. @stable ICU 2.4 */
+ UCHAR_NAME=0x4005,
+ /** String property Simple_Case_Folding.
+ Corresponds to u_foldCase. @stable ICU 2.4 */
+ UCHAR_SIMPLE_CASE_FOLDING=0x4006,
+ /** String property Simple_Lowercase_Mapping.
+ Corresponds to u_tolower. @stable ICU 2.4 */
+ UCHAR_SIMPLE_LOWERCASE_MAPPING=0x4007,
+ /** String property Simple_Titlecase_Mapping.
+ Corresponds to u_totitle. @stable ICU 2.4 */
+ UCHAR_SIMPLE_TITLECASE_MAPPING=0x4008,
+ /** String property Simple_Uppercase_Mapping.
+ Corresponds to u_toupper. @stable ICU 2.4 */
+ UCHAR_SIMPLE_UPPERCASE_MAPPING=0x4009,
+ /** String property Titlecase_Mapping.
+ Corresponds to u_strToTitle in ustring.h. @stable ICU 2.4 */
+ UCHAR_TITLECASE_MAPPING=0x400A,
+#ifndef U_HIDE_DEPRECATED_API
+ /** String property Unicode_1_Name.
+ This property is of little practical value.
+ Beginning with ICU 49, ICU APIs return an empty string for this property.
+ Corresponds to u_charName(U_UNICODE_10_CHAR_NAME). @deprecated ICU 49 */
+ UCHAR_UNICODE_1_NAME=0x400B,
+#endif /* U_HIDE_DEPRECATED_API */
+ /** String property Uppercase_Mapping.
+ Corresponds to u_strToUpper in ustring.h. @stable ICU 2.4 */
+ UCHAR_UPPERCASE_MAPPING=0x400C,
+ /** String property Bidi_Paired_Bracket (new in Unicode 6.3).
+ Corresponds to u_getBidiPairedBracket. @stable ICU 52 */
+ UCHAR_BIDI_PAIRED_BRACKET=0x400D,
+ /** One more than the last constant for string Unicode properties. @stable ICU 2.4 */
+ UCHAR_STRING_LIMIT=0x400E,
+
+ /** Miscellaneous property Script_Extensions (new in Unicode 6.0).
+ Some characters are commonly used in multiple scripts.
+ For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
+ Corresponds to uscript_hasScript and uscript_getScriptExtensions in uscript.h.
+ @stable ICU 4.6 */
+ UCHAR_SCRIPT_EXTENSIONS=0x7000,
+ /** First constant for Unicode properties with unusual value types. @stable ICU 4.6 */
+ UCHAR_OTHER_PROPERTY_START=UCHAR_SCRIPT_EXTENSIONS,
+ /** One more than the last constant for Unicode properties with unusual value types.
+ * @stable ICU 4.6 */
+ UCHAR_OTHER_PROPERTY_LIMIT=0x7001,
+ /** Represents a nonexistent or invalid property or property value. @stable ICU 2.4 */
+ UCHAR_INVALID_CODE = -1
+} UProperty;
+
+/**
+ * Data for enumerated Unicode general category types.
+ * See http://www.unicode.org/Public/UNIDATA/UnicodeData.html .
+ * @stable ICU 2.0
+ */
+typedef enum UCharCategory
+{
+ /*
+ * Note: UCharCategory constants and their API comments are parsed by preparseucd.py.
+ * It matches pairs of lines like
+ * / ** <Unicode 2-letter General_Category value> comment... * /
+ * U_<[A-Z_]+> = <integer>,
+ */
+
+ /** Non-category for unassigned and non-character code points. @stable ICU 2.0 */
+ U_UNASSIGNED = 0,
+ /** Cn "Other, Not Assigned (no characters in [UnicodeData.txt] have this property)" (same as U_UNASSIGNED!) @stable ICU 2.0 */
+ U_GENERAL_OTHER_TYPES = 0,
+ /** Lu @stable ICU 2.0 */
+ U_UPPERCASE_LETTER = 1,
+ /** Ll @stable ICU 2.0 */
+ U_LOWERCASE_LETTER = 2,
+ /** Lt @stable ICU 2.0 */
+ U_TITLECASE_LETTER = 3,
+ /** Lm @stable ICU 2.0 */
+ U_MODIFIER_LETTER = 4,
+ /** Lo @stable ICU 2.0 */
+ U_OTHER_LETTER = 5,
+ /** Mn @stable ICU 2.0 */
+ U_NON_SPACING_MARK = 6,
+ /** Me @stable ICU 2.0 */
+ U_ENCLOSING_MARK = 7,
+ /** Mc @stable ICU 2.0 */
+ U_COMBINING_SPACING_MARK = 8,
+ /** Nd @stable ICU 2.0 */
+ U_DECIMAL_DIGIT_NUMBER = 9,
+ /** Nl @stable ICU 2.0 */
+ U_LETTER_NUMBER = 10,
+ /** No @stable ICU 2.0 */
+ U_OTHER_NUMBER = 11,
+ /** Zs @stable ICU 2.0 */
+ U_SPACE_SEPARATOR = 12,
+ /** Zl @stable ICU 2.0 */
+ U_LINE_SEPARATOR = 13,
+ /** Zp @stable ICU 2.0 */
+ U_PARAGRAPH_SEPARATOR = 14,
+ /** Cc @stable ICU 2.0 */
+ U_CONTROL_CHAR = 15,
+ /** Cf @stable ICU 2.0 */
+ U_FORMAT_CHAR = 16,
+ /** Co @stable ICU 2.0 */
+ U_PRIVATE_USE_CHAR = 17,
+ /** Cs @stable ICU 2.0 */
+ U_SURROGATE = 18,
+ /** Pd @stable ICU 2.0 */
+ U_DASH_PUNCTUATION = 19,
+ /** Ps @stable ICU 2.0 */
+ U_START_PUNCTUATION = 20,
+ /** Pe @stable ICU 2.0 */
+ U_END_PUNCTUATION = 21,
+ /** Pc @stable ICU 2.0 */
+ U_CONNECTOR_PUNCTUATION = 22,
+ /** Po @stable ICU 2.0 */
+ U_OTHER_PUNCTUATION = 23,
+ /** Sm @stable ICU 2.0 */
+ U_MATH_SYMBOL = 24,
+ /** Sc @stable ICU 2.0 */
+ U_CURRENCY_SYMBOL = 25,
+ /** Sk @stable ICU 2.0 */
+ U_MODIFIER_SYMBOL = 26,
+ /** So @stable ICU 2.0 */
+ U_OTHER_SYMBOL = 27,
+ /** Pi @stable ICU 2.0 */
+ U_INITIAL_PUNCTUATION = 28,
+ /** Pf @stable ICU 2.0 */
+ U_FINAL_PUNCTUATION = 29,
+ /** One higher than the last enum UCharCategory constant. @stable ICU 2.0 */
+ U_CHAR_CATEGORY_COUNT
+} UCharCategory;
+
+/**
+ * U_GC_XX_MASK constants are bit flags corresponding to Unicode
+ * general category values.
+ * For each category, the nth bit is set if the numeric value of the
+ * corresponding UCharCategory constant is n.
+ *
+ * There are also some U_GC_Y_MASK constants for groups of general categories
+ * like L for all letter categories.
+ *
+ * @see u_charType
+ * @see U_GET_GC_MASK
+ * @see UCharCategory
+ * @stable ICU 2.1
+ */
+#define U_GC_CN_MASK U_MASK(U_GENERAL_OTHER_TYPES)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_LU_MASK U_MASK(U_UPPERCASE_LETTER)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_LL_MASK U_MASK(U_LOWERCASE_LETTER)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_LT_MASK U_MASK(U_TITLECASE_LETTER)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_LM_MASK U_MASK(U_MODIFIER_LETTER)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_LO_MASK U_MASK(U_OTHER_LETTER)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_MN_MASK U_MASK(U_NON_SPACING_MARK)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_ME_MASK U_MASK(U_ENCLOSING_MARK)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_MC_MASK U_MASK(U_COMBINING_SPACING_MARK)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_ND_MASK U_MASK(U_DECIMAL_DIGIT_NUMBER)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_NL_MASK U_MASK(U_LETTER_NUMBER)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_NO_MASK U_MASK(U_OTHER_NUMBER)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_ZS_MASK U_MASK(U_SPACE_SEPARATOR)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_ZL_MASK U_MASK(U_LINE_SEPARATOR)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_ZP_MASK U_MASK(U_PARAGRAPH_SEPARATOR)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_CC_MASK U_MASK(U_CONTROL_CHAR)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_CF_MASK U_MASK(U_FORMAT_CHAR)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_CO_MASK U_MASK(U_PRIVATE_USE_CHAR)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_CS_MASK U_MASK(U_SURROGATE)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PD_MASK U_MASK(U_DASH_PUNCTUATION)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PS_MASK U_MASK(U_START_PUNCTUATION)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PE_MASK U_MASK(U_END_PUNCTUATION)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PC_MASK U_MASK(U_CONNECTOR_PUNCTUATION)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PO_MASK U_MASK(U_OTHER_PUNCTUATION)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_SM_MASK U_MASK(U_MATH_SYMBOL)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_SC_MASK U_MASK(U_CURRENCY_SYMBOL)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_SK_MASK U_MASK(U_MODIFIER_SYMBOL)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_SO_MASK U_MASK(U_OTHER_SYMBOL)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PI_MASK U_MASK(U_INITIAL_PUNCTUATION)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PF_MASK U_MASK(U_FINAL_PUNCTUATION)
+
+
+/** Mask constant for multiple UCharCategory bits (L Letters). @stable ICU 2.1 */
+#define U_GC_L_MASK \
+ (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK|U_GC_LM_MASK|U_GC_LO_MASK)
+
+/** Mask constant for multiple UCharCategory bits (LC Cased Letters). @stable ICU 2.1 */
+#define U_GC_LC_MASK \
+ (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK)
+
+/** Mask constant for multiple UCharCategory bits (M Marks). @stable ICU 2.1 */
+#define U_GC_M_MASK (U_GC_MN_MASK|U_GC_ME_MASK|U_GC_MC_MASK)
+
+/** Mask constant for multiple UCharCategory bits (N Numbers). @stable ICU 2.1 */
+#define U_GC_N_MASK (U_GC_ND_MASK|U_GC_NL_MASK|U_GC_NO_MASK)
+
+/** Mask constant for multiple UCharCategory bits (Z Separators). @stable ICU 2.1 */
+#define U_GC_Z_MASK (U_GC_ZS_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK)
+
+/** Mask constant for multiple UCharCategory bits (C Others). @stable ICU 2.1 */
+#define U_GC_C_MASK \
+ (U_GC_CN_MASK|U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CO_MASK|U_GC_CS_MASK)
+
+/** Mask constant for multiple UCharCategory bits (P Punctuation). @stable ICU 2.1 */
+#define U_GC_P_MASK \
+ (U_GC_PD_MASK|U_GC_PS_MASK|U_GC_PE_MASK|U_GC_PC_MASK|U_GC_PO_MASK| \
+ U_GC_PI_MASK|U_GC_PF_MASK)
+
+/** Mask constant for multiple UCharCategory bits (S Symbols). @stable ICU 2.1 */
+#define U_GC_S_MASK (U_GC_SM_MASK|U_GC_SC_MASK|U_GC_SK_MASK|U_GC_SO_MASK)
+
+/**
+ * This specifies the language directional property of a character set.
+ * @stable ICU 2.0
+ */
+typedef enum UCharDirection {
+ /*
+ * Note: UCharDirection constants and their API comments are parsed by preparseucd.py.
+ * It matches pairs of lines like
+ * / ** <Unicode 1..3-letter Bidi_Class value> comment... * /
+ * U_<[A-Z_]+> = <integer>,
+ */
+
+ /** L @stable ICU 2.0 */
+ U_LEFT_TO_RIGHT = 0,
+ /** R @stable ICU 2.0 */
+ U_RIGHT_TO_LEFT = 1,
+ /** EN @stable ICU 2.0 */
+ U_EUROPEAN_NUMBER = 2,
+ /** ES @stable ICU 2.0 */
+ U_EUROPEAN_NUMBER_SEPARATOR = 3,
+ /** ET @stable ICU 2.0 */
+ U_EUROPEAN_NUMBER_TERMINATOR = 4,
+ /** AN @stable ICU 2.0 */
+ U_ARABIC_NUMBER = 5,
+ /** CS @stable ICU 2.0 */
+ U_COMMON_NUMBER_SEPARATOR = 6,
+ /** B @stable ICU 2.0 */
+ U_BLOCK_SEPARATOR = 7,
+ /** S @stable ICU 2.0 */
+ U_SEGMENT_SEPARATOR = 8,
+ /** WS @stable ICU 2.0 */
+ U_WHITE_SPACE_NEUTRAL = 9,
+ /** ON @stable ICU 2.0 */
+ U_OTHER_NEUTRAL = 10,
+ /** LRE @stable ICU 2.0 */
+ U_LEFT_TO_RIGHT_EMBEDDING = 11,
+ /** LRO @stable ICU 2.0 */
+ U_LEFT_TO_RIGHT_OVERRIDE = 12,
+ /** AL @stable ICU 2.0 */
+ U_RIGHT_TO_LEFT_ARABIC = 13,
+ /** RLE @stable ICU 2.0 */
+ U_RIGHT_TO_LEFT_EMBEDDING = 14,
+ /** RLO @stable ICU 2.0 */
+ U_RIGHT_TO_LEFT_OVERRIDE = 15,
+ /** PDF @stable ICU 2.0 */
+ U_POP_DIRECTIONAL_FORMAT = 16,
+ /** NSM @stable ICU 2.0 */
+ U_DIR_NON_SPACING_MARK = 17,
+ /** BN @stable ICU 2.0 */
+ U_BOUNDARY_NEUTRAL = 18,
+ /** FSI @stable ICU 52 */
+ U_FIRST_STRONG_ISOLATE = 19,
+ /** LRI @stable ICU 52 */
+ U_LEFT_TO_RIGHT_ISOLATE = 20,
+ /** RLI @stable ICU 52 */
+ U_RIGHT_TO_LEFT_ISOLATE = 21,
+ /** PDI @stable ICU 52 */
+ U_POP_DIRECTIONAL_ISOLATE = 22,
+ /** @stable ICU 2.0 */
+ U_CHAR_DIRECTION_COUNT
+} UCharDirection;
+
+/**
+ * Bidi Paired Bracket Type constants.
+ *
+ * @see UCHAR_BIDI_PAIRED_BRACKET_TYPE
+ * @stable ICU 52
+ */
+typedef enum UBidiPairedBracketType {
+ /*
+ * Note: UBidiPairedBracketType constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_BPT_<Unicode Bidi_Paired_Bracket_Type value name>
+ */
+
+ /** Not a paired bracket. @stable ICU 52 */
+ U_BPT_NONE,
+ /** Open paired bracket. @stable ICU 52 */
+ U_BPT_OPEN,
+ /** Close paired bracket. @stable ICU 52 */
+ U_BPT_CLOSE,
+ /** @stable ICU 52 */
+ U_BPT_COUNT /* 3 */
+} UBidiPairedBracketType;
+
+/**
+ * Constants for Unicode blocks, see the Unicode Data file Blocks.txt
+ * @stable ICU 2.0
+ */
+enum UBlockCode {
+ /*
+ * Note: UBlockCode constants are parsed by preparseucd.py.
+ * It matches lines like
+ * UBLOCK_<Unicode Block value name> = <integer>,
+ */
+
+ /** New No_Block value in Unicode 4. @stable ICU 2.6 */
+ UBLOCK_NO_BLOCK = 0, /*[none]*/ /* Special range indicating No_Block */
+
+ /** @stable ICU 2.0 */
+ UBLOCK_BASIC_LATIN = 1, /*[0000]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_LATIN_1_SUPPLEMENT=2, /*[0080]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_LATIN_EXTENDED_A =3, /*[0100]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_LATIN_EXTENDED_B =4, /*[0180]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_IPA_EXTENSIONS =5, /*[0250]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_SPACING_MODIFIER_LETTERS =6, /*[02B0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_COMBINING_DIACRITICAL_MARKS =7, /*[0300]*/
+
+ /**
+ * Unicode 3.2 renames this block to "Greek and Coptic".
+ * @stable ICU 2.0
+ */
+ UBLOCK_GREEK =8, /*[0370]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CYRILLIC =9, /*[0400]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ARMENIAN =10, /*[0530]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_HEBREW =11, /*[0590]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ARABIC =12, /*[0600]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_SYRIAC =13, /*[0700]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_THAANA =14, /*[0780]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_DEVANAGARI =15, /*[0900]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_BENGALI =16, /*[0980]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_GURMUKHI =17, /*[0A00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_GUJARATI =18, /*[0A80]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ORIYA =19, /*[0B00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_TAMIL =20, /*[0B80]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_TELUGU =21, /*[0C00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_KANNADA =22, /*[0C80]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_MALAYALAM =23, /*[0D00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_SINHALA =24, /*[0D80]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_THAI =25, /*[0E00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_LAO =26, /*[0E80]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_TIBETAN =27, /*[0F00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_MYANMAR =28, /*[1000]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_GEORGIAN =29, /*[10A0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_HANGUL_JAMO =30, /*[1100]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ETHIOPIC =31, /*[1200]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CHEROKEE =32, /*[13A0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =33, /*[1400]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_OGHAM =34, /*[1680]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_RUNIC =35, /*[16A0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_KHMER =36, /*[1780]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_MONGOLIAN =37, /*[1800]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_LATIN_EXTENDED_ADDITIONAL =38, /*[1E00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_GREEK_EXTENDED =39, /*[1F00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_GENERAL_PUNCTUATION =40, /*[2000]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS =41, /*[2070]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CURRENCY_SYMBOLS =42, /*[20A0]*/
+
+ /**
+ * Unicode 3.2 renames this block to "Combining Diacritical Marks for Symbols".
+ * @stable ICU 2.0
+ */
+ UBLOCK_COMBINING_MARKS_FOR_SYMBOLS =43, /*[20D0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_LETTERLIKE_SYMBOLS =44, /*[2100]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_NUMBER_FORMS =45, /*[2150]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ARROWS =46, /*[2190]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_MATHEMATICAL_OPERATORS =47, /*[2200]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_MISCELLANEOUS_TECHNICAL =48, /*[2300]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CONTROL_PICTURES =49, /*[2400]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_OPTICAL_CHARACTER_RECOGNITION =50, /*[2440]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ENCLOSED_ALPHANUMERICS =51, /*[2460]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_BOX_DRAWING =52, /*[2500]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_BLOCK_ELEMENTS =53, /*[2580]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_GEOMETRIC_SHAPES =54, /*[25A0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_MISCELLANEOUS_SYMBOLS =55, /*[2600]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_DINGBATS =56, /*[2700]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_BRAILLE_PATTERNS =57, /*[2800]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_RADICALS_SUPPLEMENT =58, /*[2E80]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_KANGXI_RADICALS =59, /*[2F00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS =60, /*[2FF0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION =61, /*[3000]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_HIRAGANA =62, /*[3040]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_KATAKANA =63, /*[30A0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_BOPOMOFO =64, /*[3100]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_HANGUL_COMPATIBILITY_JAMO =65, /*[3130]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_KANBUN =66, /*[3190]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_BOPOMOFO_EXTENDED =67, /*[31A0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS =68, /*[3200]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_COMPATIBILITY =69, /*[3300]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =70, /*[3400]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_UNIFIED_IDEOGRAPHS =71, /*[4E00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_YI_SYLLABLES =72, /*[A000]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_YI_RADICALS =73, /*[A490]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_HANGUL_SYLLABLES =74, /*[AC00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_HIGH_SURROGATES =75, /*[D800]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_HIGH_PRIVATE_USE_SURROGATES =76, /*[DB80]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_LOW_SURROGATES =77, /*[DC00]*/
+
+ /**
+ * Same as UBLOCK_PRIVATE_USE.
+ * Until Unicode 3.1.1, the corresponding block name was "Private Use",
+ * and multiple code point ranges had this block.
+ * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and
+ * adds separate blocks for the supplementary PUAs.
+ *
+ * @stable ICU 2.0
+ */
+ UBLOCK_PRIVATE_USE_AREA =78, /*[E000]*/
+ /**
+ * Same as UBLOCK_PRIVATE_USE_AREA.
+ * Until Unicode 3.1.1, the corresponding block name was "Private Use",
+ * and multiple code point ranges had this block.
+ * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and
+ * adds separate blocks for the supplementary PUAs.
+ *
+ * @stable ICU 2.0
+ */
+ UBLOCK_PRIVATE_USE = UBLOCK_PRIVATE_USE_AREA,
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS =79, /*[F900]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ALPHABETIC_PRESENTATION_FORMS =80, /*[FB00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ARABIC_PRESENTATION_FORMS_A =81, /*[FB50]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_COMBINING_HALF_MARKS =82, /*[FE20]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_COMPATIBILITY_FORMS =83, /*[FE30]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_SMALL_FORM_VARIANTS =84, /*[FE50]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ARABIC_PRESENTATION_FORMS_B =85, /*[FE70]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_SPECIALS =86, /*[FFF0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS =87, /*[FF00]*/
+
+ /* New blocks in Unicode 3.1 */
+
+ /** @stable ICU 2.0 */
+ UBLOCK_OLD_ITALIC = 88, /*[10300]*/
+ /** @stable ICU 2.0 */
+ UBLOCK_GOTHIC = 89, /*[10330]*/
+ /** @stable ICU 2.0 */
+ UBLOCK_DESERET = 90, /*[10400]*/
+ /** @stable ICU 2.0 */
+ UBLOCK_BYZANTINE_MUSICAL_SYMBOLS = 91, /*[1D000]*/
+ /** @stable ICU 2.0 */
+ UBLOCK_MUSICAL_SYMBOLS = 92, /*[1D100]*/
+ /** @stable ICU 2.0 */
+ UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 93, /*[1D400]*/
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 94, /*[20000]*/
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 95, /*[2F800]*/
+ /** @stable ICU 2.0 */
+ UBLOCK_TAGS = 96, /*[E0000]*/
+
+ /* New blocks in Unicode 3.2 */
+
+ /** @stable ICU 3.0 */
+ UBLOCK_CYRILLIC_SUPPLEMENT = 97, /*[0500]*/
+ /**
+ * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
+ * @stable ICU 2.2
+ */
+ UBLOCK_CYRILLIC_SUPPLEMENTARY = UBLOCK_CYRILLIC_SUPPLEMENT,
+ /** @stable ICU 2.2 */
+ UBLOCK_TAGALOG = 98, /*[1700]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_HANUNOO = 99, /*[1720]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_BUHID = 100, /*[1740]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_TAGBANWA = 101, /*[1760]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 102, /*[27C0]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_SUPPLEMENTAL_ARROWS_A = 103, /*[27F0]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_SUPPLEMENTAL_ARROWS_B = 104, /*[2900]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 105, /*[2980]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 106, /*[2A00]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_KATAKANA_PHONETIC_EXTENSIONS = 107, /*[31F0]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_VARIATION_SELECTORS = 108, /*[FE00]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_A = 109, /*[F0000]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B = 110, /*[100000]*/
+
+ /* New blocks in Unicode 4 */
+
+ /** @stable ICU 2.6 */
+ UBLOCK_LIMBU = 111, /*[1900]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_TAI_LE = 112, /*[1950]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_KHMER_SYMBOLS = 113, /*[19E0]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_PHONETIC_EXTENSIONS = 114, /*[1D00]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_MISCELLANEOUS_SYMBOLS_AND_ARROWS = 115, /*[2B00]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_YIJING_HEXAGRAM_SYMBOLS = 116, /*[4DC0]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_LINEAR_B_SYLLABARY = 117, /*[10000]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_LINEAR_B_IDEOGRAMS = 118, /*[10080]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_AEGEAN_NUMBERS = 119, /*[10100]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_UGARITIC = 120, /*[10380]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_SHAVIAN = 121, /*[10450]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_OSMANYA = 122, /*[10480]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_CYPRIOT_SYLLABARY = 123, /*[10800]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_TAI_XUAN_JING_SYMBOLS = 124, /*[1D300]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_VARIATION_SELECTORS_SUPPLEMENT = 125, /*[E0100]*/
+
+ /* New blocks in Unicode 4.1 */
+
+ /** @stable ICU 3.4 */
+ UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION = 126, /*[1D200]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_ANCIENT_GREEK_NUMBERS = 127, /*[10140]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_ARABIC_SUPPLEMENT = 128, /*[0750]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_BUGINESE = 129, /*[1A00]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_CJK_STROKES = 130, /*[31C0]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 131, /*[1DC0]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_COPTIC = 132, /*[2C80]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_ETHIOPIC_EXTENDED = 133, /*[2D80]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_ETHIOPIC_SUPPLEMENT = 134, /*[1380]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_GEORGIAN_SUPPLEMENT = 135, /*[2D00]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_GLAGOLITIC = 136, /*[2C00]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_KHAROSHTHI = 137, /*[10A00]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_MODIFIER_TONE_LETTERS = 138, /*[A700]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_NEW_TAI_LUE = 139, /*[1980]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_OLD_PERSIAN = 140, /*[103A0]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_PHONETIC_EXTENSIONS_SUPPLEMENT = 141, /*[1D80]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_SUPPLEMENTAL_PUNCTUATION = 142, /*[2E00]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_SYLOTI_NAGRI = 143, /*[A800]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_TIFINAGH = 144, /*[2D30]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_VERTICAL_FORMS = 145, /*[FE10]*/
+
+ /* New blocks in Unicode 5.0 */
+
+ /** @stable ICU 3.6 */
+ UBLOCK_NKO = 146, /*[07C0]*/
+ /** @stable ICU 3.6 */
+ UBLOCK_BALINESE = 147, /*[1B00]*/
+ /** @stable ICU 3.6 */
+ UBLOCK_LATIN_EXTENDED_C = 148, /*[2C60]*/
+ /** @stable ICU 3.6 */
+ UBLOCK_LATIN_EXTENDED_D = 149, /*[A720]*/
+ /** @stable ICU 3.6 */
+ UBLOCK_PHAGS_PA = 150, /*[A840]*/
+ /** @stable ICU 3.6 */
+ UBLOCK_PHOENICIAN = 151, /*[10900]*/
+ /** @stable ICU 3.6 */
+ UBLOCK_CUNEIFORM = 152, /*[12000]*/
+ /** @stable ICU 3.6 */
+ UBLOCK_CUNEIFORM_NUMBERS_AND_PUNCTUATION = 153, /*[12400]*/
+ /** @stable ICU 3.6 */
+ UBLOCK_COUNTING_ROD_NUMERALS = 154, /*[1D360]*/
+
+ /* New blocks in Unicode 5.1 */
+
+ /** @stable ICU 4.0 */
+ UBLOCK_SUNDANESE = 155, /*[1B80]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_LEPCHA = 156, /*[1C00]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_OL_CHIKI = 157, /*[1C50]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_CYRILLIC_EXTENDED_A = 158, /*[2DE0]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_VAI = 159, /*[A500]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_CYRILLIC_EXTENDED_B = 160, /*[A640]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_SAURASHTRA = 161, /*[A880]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_KAYAH_LI = 162, /*[A900]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_REJANG = 163, /*[A930]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_CHAM = 164, /*[AA00]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_ANCIENT_SYMBOLS = 165, /*[10190]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_PHAISTOS_DISC = 166, /*[101D0]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_LYCIAN = 167, /*[10280]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_CARIAN = 168, /*[102A0]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_LYDIAN = 169, /*[10920]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_MAHJONG_TILES = 170, /*[1F000]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_DOMINO_TILES = 171, /*[1F030]*/
+
+ /* New blocks in Unicode 5.2 */
+
+ /** @stable ICU 4.4 */
+ UBLOCK_SAMARITAN = 172, /*[0800]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 173, /*[18B0]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_TAI_THAM = 174, /*[1A20]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_VEDIC_EXTENSIONS = 175, /*[1CD0]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_LISU = 176, /*[A4D0]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_BAMUM = 177, /*[A6A0]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_COMMON_INDIC_NUMBER_FORMS = 178, /*[A830]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_DEVANAGARI_EXTENDED = 179, /*[A8E0]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_HANGUL_JAMO_EXTENDED_A = 180, /*[A960]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_JAVANESE = 181, /*[A980]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_MYANMAR_EXTENDED_A = 182, /*[AA60]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_TAI_VIET = 183, /*[AA80]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_MEETEI_MAYEK = 184, /*[ABC0]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_HANGUL_JAMO_EXTENDED_B = 185, /*[D7B0]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_IMPERIAL_ARAMAIC = 186, /*[10840]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_OLD_SOUTH_ARABIAN = 187, /*[10A60]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_AVESTAN = 188, /*[10B00]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_INSCRIPTIONAL_PARTHIAN = 189, /*[10B40]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_INSCRIPTIONAL_PAHLAVI = 190, /*[10B60]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_OLD_TURKIC = 191, /*[10C00]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_RUMI_NUMERAL_SYMBOLS = 192, /*[10E60]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_KAITHI = 193, /*[11080]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_EGYPTIAN_HIEROGLYPHS = 194, /*[13000]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 195, /*[1F100]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 196, /*[1F200]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 197, /*[2A700]*/
+
+ /* New blocks in Unicode 6.0 */
+
+ /** @stable ICU 4.6 */
+ UBLOCK_MANDAIC = 198, /*[0840]*/
+ /** @stable ICU 4.6 */
+ UBLOCK_BATAK = 199, /*[1BC0]*/
+ /** @stable ICU 4.6 */
+ UBLOCK_ETHIOPIC_EXTENDED_A = 200, /*[AB00]*/
+ /** @stable ICU 4.6 */
+ UBLOCK_BRAHMI = 201, /*[11000]*/
+ /** @stable ICU 4.6 */
+ UBLOCK_BAMUM_SUPPLEMENT = 202, /*[16800]*/
+ /** @stable ICU 4.6 */
+ UBLOCK_KANA_SUPPLEMENT = 203, /*[1B000]*/
+ /** @stable ICU 4.6 */
+ UBLOCK_PLAYING_CARDS = 204, /*[1F0A0]*/
+ /** @stable ICU 4.6 */
+ UBLOCK_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 205, /*[1F300]*/
+ /** @stable ICU 4.6 */
+ UBLOCK_EMOTICONS = 206, /*[1F600]*/
+ /** @stable ICU 4.6 */
+ UBLOCK_TRANSPORT_AND_MAP_SYMBOLS = 207, /*[1F680]*/
+ /** @stable ICU 4.6 */
+ UBLOCK_ALCHEMICAL_SYMBOLS = 208, /*[1F700]*/
+ /** @stable ICU 4.6 */
+ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 209, /*[2B740]*/
+
+ /* New blocks in Unicode 6.1 */
+
+ /** @stable ICU 49 */
+ UBLOCK_ARABIC_EXTENDED_A = 210, /*[08A0]*/
+ /** @stable ICU 49 */
+ UBLOCK_ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 211, /*[1EE00]*/
+ /** @stable ICU 49 */
+ UBLOCK_CHAKMA = 212, /*[11100]*/
+ /** @stable ICU 49 */
+ UBLOCK_MEETEI_MAYEK_EXTENSIONS = 213, /*[AAE0]*/
+ /** @stable ICU 49 */
+ UBLOCK_MEROITIC_CURSIVE = 214, /*[109A0]*/
+ /** @stable ICU 49 */
+ UBLOCK_MEROITIC_HIEROGLYPHS = 215, /*[10980]*/
+ /** @stable ICU 49 */
+ UBLOCK_MIAO = 216, /*[16F00]*/
+ /** @stable ICU 49 */
+ UBLOCK_SHARADA = 217, /*[11180]*/
+ /** @stable ICU 49 */
+ UBLOCK_SORA_SOMPENG = 218, /*[110D0]*/
+ /** @stable ICU 49 */
+ UBLOCK_SUNDANESE_SUPPLEMENT = 219, /*[1CC0]*/
+ /** @stable ICU 49 */
+ UBLOCK_TAKRI = 220, /*[11680]*/
+
+ /* New blocks in Unicode 7.0 */
+
+ /** @stable ICU 54 */
+ UBLOCK_BASSA_VAH = 221, /*[16AD0]*/
+ /** @stable ICU 54 */
+ UBLOCK_CAUCASIAN_ALBANIAN = 222, /*[10530]*/
+ /** @stable ICU 54 */
+ UBLOCK_COPTIC_EPACT_NUMBERS = 223, /*[102E0]*/
+ /** @stable ICU 54 */
+ UBLOCK_COMBINING_DIACRITICAL_MARKS_EXTENDED = 224, /*[1AB0]*/
+ /** @stable ICU 54 */
+ UBLOCK_DUPLOYAN = 225, /*[1BC00]*/
+ /** @stable ICU 54 */
+ UBLOCK_ELBASAN = 226, /*[10500]*/
+ /** @stable ICU 54 */
+ UBLOCK_GEOMETRIC_SHAPES_EXTENDED = 227, /*[1F780]*/
+ /** @stable ICU 54 */
+ UBLOCK_GRANTHA = 228, /*[11300]*/
+ /** @stable ICU 54 */
+ UBLOCK_KHOJKI = 229, /*[11200]*/
+ /** @stable ICU 54 */
+ UBLOCK_KHUDAWADI = 230, /*[112B0]*/
+ /** @stable ICU 54 */
+ UBLOCK_LATIN_EXTENDED_E = 231, /*[AB30]*/
+ /** @stable ICU 54 */
+ UBLOCK_LINEAR_A = 232, /*[10600]*/
+ /** @stable ICU 54 */
+ UBLOCK_MAHAJANI = 233, /*[11150]*/
+ /** @stable ICU 54 */
+ UBLOCK_MANICHAEAN = 234, /*[10AC0]*/
+ /** @stable ICU 54 */
+ UBLOCK_MENDE_KIKAKUI = 235, /*[1E800]*/
+ /** @stable ICU 54 */
+ UBLOCK_MODI = 236, /*[11600]*/
+ /** @stable ICU 54 */
+ UBLOCK_MRO = 237, /*[16A40]*/
+ /** @stable ICU 54 */
+ UBLOCK_MYANMAR_EXTENDED_B = 238, /*[A9E0]*/
+ /** @stable ICU 54 */
+ UBLOCK_NABATAEAN = 239, /*[10880]*/
+ /** @stable ICU 54 */
+ UBLOCK_OLD_NORTH_ARABIAN = 240, /*[10A80]*/
+ /** @stable ICU 54 */
+ UBLOCK_OLD_PERMIC = 241, /*[10350]*/
+ /** @stable ICU 54 */
+ UBLOCK_ORNAMENTAL_DINGBATS = 242, /*[1F650]*/
+ /** @stable ICU 54 */
+ UBLOCK_PAHAWH_HMONG = 243, /*[16B00]*/
+ /** @stable ICU 54 */
+ UBLOCK_PALMYRENE = 244, /*[10860]*/
+ /** @stable ICU 54 */
+ UBLOCK_PAU_CIN_HAU = 245, /*[11AC0]*/
+ /** @stable ICU 54 */
+ UBLOCK_PSALTER_PAHLAVI = 246, /*[10B80]*/
+ /** @stable ICU 54 */
+ UBLOCK_SHORTHAND_FORMAT_CONTROLS = 247, /*[1BCA0]*/
+ /** @stable ICU 54 */
+ UBLOCK_SIDDHAM = 248, /*[11580]*/
+ /** @stable ICU 54 */
+ UBLOCK_SINHALA_ARCHAIC_NUMBERS = 249, /*[111E0]*/
+ /** @stable ICU 54 */
+ UBLOCK_SUPPLEMENTAL_ARROWS_C = 250, /*[1F800]*/
+ /** @stable ICU 54 */
+ UBLOCK_TIRHUTA = 251, /*[11480]*/
+ /** @stable ICU 54 */
+ UBLOCK_WARANG_CITI = 252, /*[118A0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_COUNT = 253,
+
+ /** @stable ICU 2.0 */
+ UBLOCK_INVALID_CODE=-1
+};
+
+/** @stable ICU 2.0 */
+typedef enum UBlockCode UBlockCode;
+
+/**
+ * East Asian Width constants.
+ *
+ * @see UCHAR_EAST_ASIAN_WIDTH
+ * @see u_getIntPropertyValue
+ * @stable ICU 2.2
+ */
+typedef enum UEastAsianWidth {
+ /*
+ * Note: UEastAsianWidth constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_EA_<Unicode East_Asian_Width value name>
+ */
+
+ U_EA_NEUTRAL, /*[N]*/
+ U_EA_AMBIGUOUS, /*[A]*/
+ U_EA_HALFWIDTH, /*[H]*/
+ U_EA_FULLWIDTH, /*[F]*/
+ U_EA_NARROW, /*[Na]*/
+ U_EA_WIDE, /*[W]*/
+ U_EA_COUNT
+} UEastAsianWidth;
+
+/**
+ * Selector constants for u_charName().
+ * u_charName() returns the "modern" name of a
+ * Unicode character; or the name that was defined in
+ * Unicode version 1.0, before the Unicode standard merged
+ * with ISO-10646; or an "extended" name that gives each
+ * Unicode code point a unique name.
+ *
+ * @see u_charName
+ * @stable ICU 2.0
+ */
+typedef enum UCharNameChoice {
+ /** Unicode character name (Name property). @stable ICU 2.0 */
+ U_UNICODE_CHAR_NAME,
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * The Unicode_1_Name property value which is of little practical value.
+ * Beginning with ICU 49, ICU APIs return an empty string for this name choice.
+ * @deprecated ICU 49
+ */
+ U_UNICODE_10_CHAR_NAME,
+#endif /* U_HIDE_DEPRECATED_API */
+ /** Standard or synthetic character name. @stable ICU 2.0 */
+ U_EXTENDED_CHAR_NAME = U_UNICODE_CHAR_NAME+2,
+ /** Corrected name from NameAliases.txt. @stable ICU 4.4 */
+ U_CHAR_NAME_ALIAS,
+ /** @stable ICU 2.0 */
+ U_CHAR_NAME_CHOICE_COUNT
+} UCharNameChoice;
+
+/**
+ * Selector constants for u_getPropertyName() and
+ * u_getPropertyValueName(). These selectors are used to choose which
+ * name is returned for a given property or value. All properties and
+ * values have a long name. Most have a short name, but some do not.
+ * Unicode allows for additional names, beyond the long and short
+ * name, which would be indicated by U_LONG_PROPERTY_NAME + i, where
+ * i=1, 2,...
+ *
+ * @see u_getPropertyName()
+ * @see u_getPropertyValueName()
+ * @stable ICU 2.4
+ */
+typedef enum UPropertyNameChoice {
+ U_SHORT_PROPERTY_NAME,
+ U_LONG_PROPERTY_NAME,
+ U_PROPERTY_NAME_CHOICE_COUNT
+} UPropertyNameChoice;
+
+/**
+ * Decomposition Type constants.
+ *
+ * @see UCHAR_DECOMPOSITION_TYPE
+ * @stable ICU 2.2
+ */
+typedef enum UDecompositionType {
+ /*
+ * Note: UDecompositionType constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_DT_<Unicode Decomposition_Type value name>
+ */
+
+ U_DT_NONE, /*[none]*/
+ U_DT_CANONICAL, /*[can]*/
+ U_DT_COMPAT, /*[com]*/
+ U_DT_CIRCLE, /*[enc]*/
+ U_DT_FINAL, /*[fin]*/
+ U_DT_FONT, /*[font]*/
+ U_DT_FRACTION, /*[fra]*/
+ U_DT_INITIAL, /*[init]*/
+ U_DT_ISOLATED, /*[iso]*/
+ U_DT_MEDIAL, /*[med]*/
+ U_DT_NARROW, /*[nar]*/
+ U_DT_NOBREAK, /*[nb]*/
+ U_DT_SMALL, /*[sml]*/
+ U_DT_SQUARE, /*[sqr]*/
+ U_DT_SUB, /*[sub]*/
+ U_DT_SUPER, /*[sup]*/
+ U_DT_VERTICAL, /*[vert]*/
+ U_DT_WIDE, /*[wide]*/
+ U_DT_COUNT /* 18 */
+} UDecompositionType;
+
+/**
+ * Joining Type constants.
+ *
+ * @see UCHAR_JOINING_TYPE
+ * @stable ICU 2.2
+ */
+typedef enum UJoiningType {
+ /*
+ * Note: UJoiningType constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_JT_<Unicode Joining_Type value name>
+ */
+
+ U_JT_NON_JOINING, /*[U]*/
+ U_JT_JOIN_CAUSING, /*[C]*/
+ U_JT_DUAL_JOINING, /*[D]*/
+ U_JT_LEFT_JOINING, /*[L]*/
+ U_JT_RIGHT_JOINING, /*[R]*/
+ U_JT_TRANSPARENT, /*[T]*/
+ U_JT_COUNT /* 6 */
+} UJoiningType;
+
+/**
+ * Joining Group constants.
+ *
+ * @see UCHAR_JOINING_GROUP
+ * @stable ICU 2.2
+ */
+typedef enum UJoiningGroup {
+ /*
+ * Note: UJoiningGroup constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_JG_<Unicode Joining_Group value name>
+ */
+
+ U_JG_NO_JOINING_GROUP,
+ U_JG_AIN,
+ U_JG_ALAPH,
+ U_JG_ALEF,
+ U_JG_BEH,
+ U_JG_BETH,
+ U_JG_DAL,
+ U_JG_DALATH_RISH,
+ U_JG_E,
+ U_JG_FEH,
+ U_JG_FINAL_SEMKATH,
+ U_JG_GAF,
+ U_JG_GAMAL,
+ U_JG_HAH,
+ U_JG_TEH_MARBUTA_GOAL, /**< @stable ICU 4.6 */
+ U_JG_HAMZA_ON_HEH_GOAL=U_JG_TEH_MARBUTA_GOAL,
+ U_JG_HE,
+ U_JG_HEH,
+ U_JG_HEH_GOAL,
+ U_JG_HETH,
+ U_JG_KAF,
+ U_JG_KAPH,
+ U_JG_KNOTTED_HEH,
+ U_JG_LAM,
+ U_JG_LAMADH,
+ U_JG_MEEM,
+ U_JG_MIM,
+ U_JG_NOON,
+ U_JG_NUN,
+ U_JG_PE,
+ U_JG_QAF,
+ U_JG_QAPH,
+ U_JG_REH,
+ U_JG_REVERSED_PE,
+ U_JG_SAD,
+ U_JG_SADHE,
+ U_JG_SEEN,
+ U_JG_SEMKATH,
+ U_JG_SHIN,
+ U_JG_SWASH_KAF,
+ U_JG_SYRIAC_WAW,
+ U_JG_TAH,
+ U_JG_TAW,
+ U_JG_TEH_MARBUTA,
+ U_JG_TETH,
+ U_JG_WAW,
+ U_JG_YEH,
+ U_JG_YEH_BARREE,
+ U_JG_YEH_WITH_TAIL,
+ U_JG_YUDH,
+ U_JG_YUDH_HE,
+ U_JG_ZAIN,
+ U_JG_FE, /**< @stable ICU 2.6 */
+ U_JG_KHAPH, /**< @stable ICU 2.6 */
+ U_JG_ZHAIN, /**< @stable ICU 2.6 */
+ U_JG_BURUSHASKI_YEH_BARREE, /**< @stable ICU 4.0 */
+ U_JG_FARSI_YEH, /**< @stable ICU 4.4 */
+ U_JG_NYA, /**< @stable ICU 4.4 */
+ U_JG_ROHINGYA_YEH, /**< @stable ICU 49 */
+ U_JG_MANICHAEAN_ALEPH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_AYIN, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_BETH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_DALETH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_DHAMEDH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_FIVE, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_GIMEL, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_HETH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_HUNDRED, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_KAPH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_LAMEDH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_MEM, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_NUN, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_ONE, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_PE, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_QOPH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_RESH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_SADHE, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_SAMEKH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_TAW, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_TEN, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_TETH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_THAMEDH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_TWENTY, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_WAW, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_YODH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_ZAYIN, /**< @stable ICU 54 */
+ U_JG_STRAIGHT_WAW, /**< @stable ICU 54 */
+ U_JG_COUNT
+} UJoiningGroup;
+
+/**
+ * Grapheme Cluster Break constants.
+ *
+ * @see UCHAR_GRAPHEME_CLUSTER_BREAK
+ * @stable ICU 3.4
+ */
+typedef enum UGraphemeClusterBreak {
+ /*
+ * Note: UGraphemeClusterBreak constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_GCB_<Unicode Grapheme_Cluster_Break value name>
+ */
+
+ U_GCB_OTHER = 0, /*[XX]*/
+ U_GCB_CONTROL = 1, /*[CN]*/
+ U_GCB_CR = 2, /*[CR]*/
+ U_GCB_EXTEND = 3, /*[EX]*/
+ U_GCB_L = 4, /*[L]*/
+ U_GCB_LF = 5, /*[LF]*/
+ U_GCB_LV = 6, /*[LV]*/
+ U_GCB_LVT = 7, /*[LVT]*/
+ U_GCB_T = 8, /*[T]*/
+ U_GCB_V = 9, /*[V]*/
+ U_GCB_SPACING_MARK = 10, /*[SM]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */
+ U_GCB_PREPEND = 11, /*[PP]*/
+ U_GCB_REGIONAL_INDICATOR = 12, /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
+ U_GCB_COUNT = 13
+} UGraphemeClusterBreak;
+
+/**
+ * Word Break constants.
+ * (UWordBreak is a pre-existing enum type in ubrk.h for word break status tags.)
+ *
+ * @see UCHAR_WORD_BREAK
+ * @stable ICU 3.4
+ */
+typedef enum UWordBreakValues {
+ /*
+ * Note: UWordBreakValues constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_WB_<Unicode Word_Break value name>
+ */
+
+ U_WB_OTHER = 0, /*[XX]*/
+ U_WB_ALETTER = 1, /*[LE]*/
+ U_WB_FORMAT = 2, /*[FO]*/
+ U_WB_KATAKANA = 3, /*[KA]*/
+ U_WB_MIDLETTER = 4, /*[ML]*/
+ U_WB_MIDNUM = 5, /*[MN]*/
+ U_WB_NUMERIC = 6, /*[NU]*/
+ U_WB_EXTENDNUMLET = 7, /*[EX]*/
+ U_WB_CR = 8, /*[CR]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */
+ U_WB_EXTEND = 9, /*[Extend]*/
+ U_WB_LF = 10, /*[LF]*/
+ U_WB_MIDNUMLET =11, /*[MB]*/
+ U_WB_NEWLINE =12, /*[NL]*/
+ U_WB_REGIONAL_INDICATOR = 13, /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
+ U_WB_HEBREW_LETTER = 14, /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */
+ U_WB_SINGLE_QUOTE = 15, /*[SQ]*/
+ U_WB_DOUBLE_QUOTE = 16, /*[DQ]*/
+ U_WB_COUNT = 17
+} UWordBreakValues;
+
+/**
+ * Sentence Break constants.
+ *
+ * @see UCHAR_SENTENCE_BREAK
+ * @stable ICU 3.4
+ */
+typedef enum USentenceBreak {
+ /*
+ * Note: USentenceBreak constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_SB_<Unicode Sentence_Break value name>
+ */
+
+ U_SB_OTHER = 0, /*[XX]*/
+ U_SB_ATERM = 1, /*[AT]*/
+ U_SB_CLOSE = 2, /*[CL]*/
+ U_SB_FORMAT = 3, /*[FO]*/
+ U_SB_LOWER = 4, /*[LO]*/
+ U_SB_NUMERIC = 5, /*[NU]*/
+ U_SB_OLETTER = 6, /*[LE]*/
+ U_SB_SEP = 7, /*[SE]*/
+ U_SB_SP = 8, /*[SP]*/
+ U_SB_STERM = 9, /*[ST]*/
+ U_SB_UPPER = 10, /*[UP]*/
+ U_SB_CR = 11, /*[CR]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */
+ U_SB_EXTEND = 12, /*[EX]*/
+ U_SB_LF = 13, /*[LF]*/
+ U_SB_SCONTINUE = 14, /*[SC]*/
+ U_SB_COUNT = 15
+} USentenceBreak;
+
+/**
+ * Line Break constants.
+ *
+ * @see UCHAR_LINE_BREAK
+ * @stable ICU 2.2
+ */
+typedef enum ULineBreak {
+ /*
+ * Note: ULineBreak constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_LB_<Unicode Line_Break value name>
+ */
+
+ U_LB_UNKNOWN = 0, /*[XX]*/
+ U_LB_AMBIGUOUS = 1, /*[AI]*/
+ U_LB_ALPHABETIC = 2, /*[AL]*/
+ U_LB_BREAK_BOTH = 3, /*[B2]*/
+ U_LB_BREAK_AFTER = 4, /*[BA]*/
+ U_LB_BREAK_BEFORE = 5, /*[BB]*/
+ U_LB_MANDATORY_BREAK = 6, /*[BK]*/
+ U_LB_CONTINGENT_BREAK = 7, /*[CB]*/
+ U_LB_CLOSE_PUNCTUATION = 8, /*[CL]*/
+ U_LB_COMBINING_MARK = 9, /*[CM]*/
+ U_LB_CARRIAGE_RETURN = 10, /*[CR]*/
+ U_LB_EXCLAMATION = 11, /*[EX]*/
+ U_LB_GLUE = 12, /*[GL]*/
+ U_LB_HYPHEN = 13, /*[HY]*/
+ U_LB_IDEOGRAPHIC = 14, /*[ID]*/
+ /** Renamed from the misspelled "inseperable" in Unicode 4.0.1/ICU 3.0 @stable ICU 3.0 */
+ U_LB_INSEPARABLE = 15, /*[IN]*/
+ U_LB_INSEPERABLE = U_LB_INSEPARABLE,
+ U_LB_INFIX_NUMERIC = 16, /*[IS]*/
+ U_LB_LINE_FEED = 17, /*[LF]*/
+ U_LB_NONSTARTER = 18, /*[NS]*/
+ U_LB_NUMERIC = 19, /*[NU]*/
+ U_LB_OPEN_PUNCTUATION = 20, /*[OP]*/
+ U_LB_POSTFIX_NUMERIC = 21, /*[PO]*/
+ U_LB_PREFIX_NUMERIC = 22, /*[PR]*/
+ U_LB_QUOTATION = 23, /*[QU]*/
+ U_LB_COMPLEX_CONTEXT = 24, /*[SA]*/
+ U_LB_SURROGATE = 25, /*[SG]*/
+ U_LB_SPACE = 26, /*[SP]*/
+ U_LB_BREAK_SYMBOLS = 27, /*[SY]*/
+ U_LB_ZWSPACE = 28, /*[ZW]*/
+ U_LB_NEXT_LINE = 29, /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */
+ U_LB_WORD_JOINER = 30, /*[WJ]*/
+ U_LB_H2 = 31, /*[H2]*/ /* from here on: new in Unicode 4.1/ICU 3.4 */
+ U_LB_H3 = 32, /*[H3]*/
+ U_LB_JL = 33, /*[JL]*/
+ U_LB_JT = 34, /*[JT]*/
+ U_LB_JV = 35, /*[JV]*/
+ U_LB_CLOSE_PARENTHESIS = 36, /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */
+ U_LB_CONDITIONAL_JAPANESE_STARTER = 37,/*[CJ]*/ /* new in Unicode 6.1/ICU 49 */
+ U_LB_HEBREW_LETTER = 38, /*[HL]*/ /* new in Unicode 6.1/ICU 49 */
+ U_LB_REGIONAL_INDICATOR = 39,/*[RI]*/ /* new in Unicode 6.2/ICU 50 */
+ U_LB_COUNT = 40
+} ULineBreak;
+
+/**
+ * Numeric Type constants.
+ *
+ * @see UCHAR_NUMERIC_TYPE
+ * @stable ICU 2.2
+ */
+typedef enum UNumericType {
+ /*
+ * Note: UNumericType constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_NT_<Unicode Numeric_Type value name>
+ */
+
+ U_NT_NONE, /*[None]*/
+ U_NT_DECIMAL, /*[de]*/
+ U_NT_DIGIT, /*[di]*/
+ U_NT_NUMERIC, /*[nu]*/
+ U_NT_COUNT
+} UNumericType;
+
+/**
+ * Hangul Syllable Type constants.
+ *
+ * @see UCHAR_HANGUL_SYLLABLE_TYPE
+ * @stable ICU 2.6
+ */
+typedef enum UHangulSyllableType {
+ /*
+ * Note: UHangulSyllableType constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_HST_<Unicode Hangul_Syllable_Type value name>
+ */
+
+ U_HST_NOT_APPLICABLE, /*[NA]*/
+ U_HST_LEADING_JAMO, /*[L]*/
+ U_HST_VOWEL_JAMO, /*[V]*/
+ U_HST_TRAILING_JAMO, /*[T]*/
+ U_HST_LV_SYLLABLE, /*[LV]*/
+ U_HST_LVT_SYLLABLE, /*[LVT]*/
+ U_HST_COUNT
+} UHangulSyllableType;
+
+/**
+ * Check a binary Unicode property for a code point.
+ *
+ * Unicode, especially in version 3.2, defines many more properties than the
+ * original set in UnicodeData.txt.
+ *
+ * The properties APIs are intended to reflect Unicode properties as defined
+ * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).
+ * For details about the properties see http://www.unicode.org/ucd/ .
+ * For names of Unicode properties see the UCD file PropertyAliases.txt.
+ *
+ * Important: If ICU is built with UCD files from Unicode versions below 3.2,
+ * then properties marked with "new in Unicode 3.2" are not or not fully available.
+ *
+ * @param c Code point to test.
+ * @param which UProperty selector constant, identifies which binary property to check.
+ * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT.
+ * @return TRUE or FALSE according to the binary Unicode property value for c.
+ * Also FALSE if 'which' is out of bounds or if the Unicode version
+ * does not have data for the property at all, or not for this code point.
+ *
+ * @see UProperty
+ * @see u_getIntPropertyValue
+ * @see u_getUnicodeVersion
+ * @stable ICU 2.1
+ */
+U_STABLE UBool U_EXPORT2
+u_hasBinaryProperty(UChar32 c, UProperty which);
+
+/**
+ * Check if a code point has the Alphabetic Unicode property.
+ * Same as u_hasBinaryProperty(c, UCHAR_ALPHABETIC).
+ * This is different from u_isalpha!
+ * @param c Code point to test
+ * @return true if the code point has the Alphabetic Unicode property, false otherwise
+ *
+ * @see UCHAR_ALPHABETIC
+ * @see u_isalpha
+ * @see u_hasBinaryProperty
+ * @stable ICU 2.1
+ */
+U_STABLE UBool U_EXPORT2
+u_isUAlphabetic(UChar32 c);
+
+/**
+ * Check if a code point has the Lowercase Unicode property.
+ * Same as u_hasBinaryProperty(c, UCHAR_LOWERCASE).
+ * This is different from u_islower!
+ * @param c Code point to test
+ * @return true if the code point has the Lowercase Unicode property, false otherwise
+ *
+ * @see UCHAR_LOWERCASE
+ * @see u_islower
+ * @see u_hasBinaryProperty
+ * @stable ICU 2.1
+ */
+U_STABLE UBool U_EXPORT2
+u_isULowercase(UChar32 c);
+
+/**
+ * Check if a code point has the Uppercase Unicode property.
+ * Same as u_hasBinaryProperty(c, UCHAR_UPPERCASE).
+ * This is different from u_isupper!
+ * @param c Code point to test
+ * @return true if the code point has the Uppercase Unicode property, false otherwise
+ *
+ * @see UCHAR_UPPERCASE
+ * @see u_isupper
+ * @see u_hasBinaryProperty
+ * @stable ICU 2.1
+ */
+U_STABLE UBool U_EXPORT2
+u_isUUppercase(UChar32 c);
+
+/**
+ * Check if a code point has the White_Space Unicode property.
+ * Same as u_hasBinaryProperty(c, UCHAR_WHITE_SPACE).
+ * This is different from both u_isspace and u_isWhitespace!
+ *
+ * Note: There are several ICU whitespace functions; please see the uchar.h
+ * file documentation for a detailed comparison.
+ *
+ * @param c Code point to test
+ * @return true if the code point has the White_Space Unicode property, false otherwise.
+ *
+ * @see UCHAR_WHITE_SPACE
+ * @see u_isWhitespace
+ * @see u_isspace
+ * @see u_isJavaSpaceChar
+ * @see u_hasBinaryProperty
+ * @stable ICU 2.1
+ */
+U_STABLE UBool U_EXPORT2
+u_isUWhiteSpace(UChar32 c);
+
+/**
+ * Get the property value for an enumerated or integer Unicode property for a code point.
+ * Also returns binary and mask property values.
+ *
+ * Unicode, especially in version 3.2, defines many more properties than the
+ * original set in UnicodeData.txt.
+ *
+ * The properties APIs are intended to reflect Unicode properties as defined
+ * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).
+ * For details about the properties see http://www.unicode.org/ .
+ * For names of Unicode properties see the UCD file PropertyAliases.txt.
+ *
+ * Sample usage:
+ * UEastAsianWidth ea=(UEastAsianWidth)u_getIntPropertyValue(c, UCHAR_EAST_ASIAN_WIDTH);
+ * UBool b=(UBool)u_getIntPropertyValue(c, UCHAR_IDEOGRAPHIC);
+ *
+ * @param c Code point to test.
+ * @param which UProperty selector constant, identifies which property to check.
+ * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
+ * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT
+ * or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT.
+ * @return Numeric value that is directly the property value or,
+ * for enumerated properties, corresponds to the numeric value of the enumerated
+ * constant of the respective property value enumeration type
+ * (cast to enum type if necessary).
+ * Returns 0 or 1 (for FALSE/TRUE) for binary Unicode properties.
+ * Returns a bit-mask for mask properties.
+ * Returns 0 if 'which' is out of bounds or if the Unicode version
+ * does not have data for the property at all, or not for this code point.
+ *
+ * @see UProperty
+ * @see u_hasBinaryProperty
+ * @see u_getIntPropertyMinValue
+ * @see u_getIntPropertyMaxValue
+ * @see u_getUnicodeVersion
+ * @stable ICU 2.2
+ */
+U_STABLE int32_t U_EXPORT2
+u_getIntPropertyValue(UChar32 c, UProperty which);
+
+/**
+ * Get the minimum value for an enumerated/integer/binary Unicode property.
+ * Can be used together with u_getIntPropertyMaxValue
+ * to allocate arrays of UnicodeSet or similar.
+ *
+ * @param which UProperty selector constant, identifies which binary property to check.
+ * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
+ * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT.
+ * @return Minimum value returned by u_getIntPropertyValue for a Unicode property.
+ * 0 if the property selector is out of range.
+ *
+ * @see UProperty
+ * @see u_hasBinaryProperty
+ * @see u_getUnicodeVersion
+ * @see u_getIntPropertyMaxValue
+ * @see u_getIntPropertyValue
+ * @stable ICU 2.2
+ */
+U_STABLE int32_t U_EXPORT2
+u_getIntPropertyMinValue(UProperty which);
+
+/**
+ * Get the maximum value for an enumerated/integer/binary Unicode property.
+ * Can be used together with u_getIntPropertyMinValue
+ * to allocate arrays of UnicodeSet or similar.
+ *
+ * Examples for min/max values (for Unicode 3.2):
+ *
+ * - UCHAR_BIDI_CLASS: 0/18 (U_LEFT_TO_RIGHT/U_BOUNDARY_NEUTRAL)
+ * - UCHAR_SCRIPT: 0/45 (USCRIPT_COMMON/USCRIPT_TAGBANWA)
+ * - UCHAR_IDEOGRAPHIC: 0/1 (FALSE/TRUE)
+ *
+ * For undefined UProperty constant values, min/max values will be 0/-1.
+ *
+ * @param which UProperty selector constant, identifies which binary property to check.
+ * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
+ * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT.
+ * @return Maximum value returned by u_getIntPropertyValue for a Unicode property.
+ * <=0 if the property selector is out of range.
+ *
+ * @see UProperty
+ * @see u_hasBinaryProperty
+ * @see u_getUnicodeVersion
+ * @see u_getIntPropertyMaxValue
+ * @see u_getIntPropertyValue
+ * @stable ICU 2.2
+ */
+U_STABLE int32_t U_EXPORT2
+u_getIntPropertyMaxValue(UProperty which);
+
+/**
+ * Get the numeric value for a Unicode code point as defined in the
+ * Unicode Character Database.
+ *
+ * A "double" return type is necessary because
+ * some numeric values are fractions, negative, or too large for int32_t.
+ *
+ * For characters without any numeric values in the Unicode Character Database,
+ * this function will return U_NO_NUMERIC_VALUE.
+ * Note: This is different from the Unicode Standard which specifies NaN as the default value.
+ * (NaN is not available on all platforms.)
+ *
+ * Similar to java.lang.Character.getNumericValue(), but u_getNumericValue()
+ * also supports negative values, large values, and fractions,
+ * while Java's getNumericValue() returns values 10..35 for ASCII letters.
+ *
+ * @param c Code point to get the numeric value for.
+ * @return Numeric value of c, or U_NO_NUMERIC_VALUE if none is defined.
+ *
+ * @see U_NO_NUMERIC_VALUE
+ * @stable ICU 2.2
+ */
+U_STABLE double U_EXPORT2
+u_getNumericValue(UChar32 c);
+
+/**
+ * Special value that is returned by u_getNumericValue when
+ * no numeric value is defined for a code point.
+ *
+ * @see u_getNumericValue
+ * @stable ICU 2.2
+ */
+#define U_NO_NUMERIC_VALUE ((double)-123456789.)
+
+/**
+ * Determines whether the specified code point has the general category "Ll"
+ * (lowercase letter).
+ *
+ * Same as java.lang.Character.isLowerCase().
+ *
+ * This misses some characters that are also lowercase but
+ * have a different general category value.
+ * In order to include those, use UCHAR_LOWERCASE.
+ *
+ * In addition to being equivalent to a Java function, this also serves
+ * as a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is an Ll lowercase letter
+ *
+ * @see UCHAR_LOWERCASE
+ * @see u_isupper
+ * @see u_istitle
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_islower(UChar32 c);
+
+/**
+ * Determines whether the specified code point has the general category "Lu"
+ * (uppercase letter).
+ *
+ * Same as java.lang.Character.isUpperCase().
+ *
+ * This misses some characters that are also uppercase but
+ * have a different general category value.
+ * In order to include those, use UCHAR_UPPERCASE.
+ *
+ * In addition to being equivalent to a Java function, this also serves
+ * as a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is an Lu uppercase letter
+ *
+ * @see UCHAR_UPPERCASE
+ * @see u_islower
+ * @see u_istitle
+ * @see u_tolower
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isupper(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a titlecase letter.
+ * True for general category "Lt" (titlecase letter).
+ *
+ * Same as java.lang.Character.isTitleCase().
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is an Lt titlecase letter
+ *
+ * @see u_isupper
+ * @see u_islower
+ * @see u_totitle
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_istitle(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a digit character according to Java.
+ * True for characters with general category "Nd" (decimal digit numbers).
+ * Beginning with Unicode 4, this is the same as
+ * testing for the Numeric_Type of Decimal.
+ *
+ * Same as java.lang.Character.isDigit().
+ *
+ * In addition to being equivalent to a Java function, this also serves
+ * as a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a digit character according to Character.isDigit()
+ *
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isdigit(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a letter character.
+ * True for general categories "L" (letters).
+ *
+ * Same as java.lang.Character.isLetter().
+ *
+ * In addition to being equivalent to a Java function, this also serves
+ * as a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a letter character
+ *
+ * @see u_isdigit
+ * @see u_isalnum
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isalpha(UChar32 c);
+
+/**
+ * Determines whether the specified code point is an alphanumeric character
+ * (letter or digit) according to Java.
+ * True for characters with general categories
+ * "L" (letters) and "Nd" (decimal digit numbers).
+ *
+ * Same as java.lang.Character.isLetterOrDigit().
+ *
+ * In addition to being equivalent to a Java function, this also serves
+ * as a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is an alphanumeric character according to Character.isLetterOrDigit()
+ *
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isalnum(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a hexadecimal digit.
+ * This is equivalent to u_digit(c, 16)>=0.
+ * True for characters with general category "Nd" (decimal digit numbers)
+ * as well as Latin letters a-f and A-F in both ASCII and Fullwidth ASCII.
+ * (That is, for letters with code points
+ * 0041..0046, 0061..0066, FF21..FF26, FF41..FF46.)
+ *
+ * In order to narrow the definition of hexadecimal digits to only ASCII
+ * characters, use (c<=0x7f && u_isxdigit(c)).
+ *
+ * This is a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a hexadecimal digit
+ *
+ * @stable ICU 2.6
+ */
+U_STABLE UBool U_EXPORT2
+u_isxdigit(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a punctuation character.
+ * True for characters with general categories "P" (punctuation).
+ *
+ * This is a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a punctuation character
+ *
+ * @stable ICU 2.6
+ */
+U_STABLE UBool U_EXPORT2
+u_ispunct(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a "graphic" character
+ * (printable, excluding spaces).
+ * TRUE for all characters except those with general categories
+ * "Cc" (control codes), "Cf" (format controls), "Cs" (surrogates),
+ * "Cn" (unassigned), and "Z" (separators).
+ *
+ * This is a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a "graphic" character
+ *
+ * @stable ICU 2.6
+ */
+U_STABLE UBool U_EXPORT2
+u_isgraph(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a "blank" or "horizontal space",
+ * a character that visibly separates words on a line.
+ * The following are equivalent definitions:
+ *
+ * TRUE for Unicode White_Space characters except for "vertical space controls"
+ * where "vertical space controls" are the following characters:
+ * U+000A (LF) U+000B (VT) U+000C (FF) U+000D (CR) U+0085 (NEL) U+2028 (LS) U+2029 (PS)
+ *
+ * same as
+ *
+ * TRUE for U+0009 (TAB) and characters with general category "Zs" (space separators)
+ * except Zero Width Space (ZWSP, U+200B).
+ *
+ * Note: There are several ICU whitespace functions; please see the uchar.h
+ * file documentation for a detailed comparison.
+ *
+ * This is a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a "blank"
+ *
+ * @stable ICU 2.6
+ */
+U_STABLE UBool U_EXPORT2
+u_isblank(UChar32 c);
+
+/**
+ * Determines whether the specified code point is "defined",
+ * which usually means that it is assigned a character.
+ * True for general categories other than "Cn" (other, not assigned),
+ * i.e., true for all code points mentioned in UnicodeData.txt.
+ *
+ * Note that non-character code points (e.g., U+FDD0) are not "defined"
+ * (they are Cn), but surrogate code points are "defined" (Cs).
+ *
+ * Same as java.lang.Character.isDefined().
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is assigned a character
+ *
+ * @see u_isdigit
+ * @see u_isalpha
+ * @see u_isalnum
+ * @see u_isupper
+ * @see u_islower
+ * @see u_istitle
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isdefined(UChar32 c);
+
+/**
+ * Determines if the specified character is a space character or not.
+ *
+ * Note: There are several ICU whitespace functions; please see the uchar.h
+ * file documentation for a detailed comparison.
+ *
+ * This is a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the character to be tested
+ * @return true if the character is a space character; false otherwise.
+ *
+ * @see u_isJavaSpaceChar
+ * @see u_isWhitespace
+ * @see u_isUWhiteSpace
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isspace(UChar32 c);
+
+/**
+ * Determine if the specified code point is a space character according to Java.
+ * True for characters with general categories "Z" (separators),
+ * which does not include control codes (e.g., TAB or Line Feed).
+ *
+ * Same as java.lang.Character.isSpaceChar().
+ *
+ * Note: There are several ICU whitespace functions; please see the uchar.h
+ * file documentation for a detailed comparison.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a space character according to Character.isSpaceChar()
+ *
+ * @see u_isspace
+ * @see u_isWhitespace
+ * @see u_isUWhiteSpace
+ * @stable ICU 2.6
+ */
+U_STABLE UBool U_EXPORT2
+u_isJavaSpaceChar(UChar32 c);
+
+/**
+ * Determines if the specified code point is a whitespace character according to Java/ICU.
+ * A character is considered to be a Java whitespace character if and only
+ * if it satisfies one of the following criteria:
+ *
+ * - It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not
+ * also a non-breaking space (U+00A0 NBSP or U+2007 Figure Space or U+202F Narrow NBSP).
+ * - It is U+0009 HORIZONTAL TABULATION.
+ * - It is U+000A LINE FEED.
+ * - It is U+000B VERTICAL TABULATION.
+ * - It is U+000C FORM FEED.
+ * - It is U+000D CARRIAGE RETURN.
+ * - It is U+001C FILE SEPARATOR.
+ * - It is U+001D GROUP SEPARATOR.
+ * - It is U+001E RECORD SEPARATOR.
+ * - It is U+001F UNIT SEPARATOR.
+ *
+ * This API tries to sync with the semantics of Java's
+ * java.lang.Character.isWhitespace(), but it may not return
+ * the exact same results because of the Unicode version
+ * difference.
+ *
+ * Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs)
+ * to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false.
+ * See http://www.unicode.org/versions/Unicode4.0.1/
+ *
+ * Note: There are several ICU whitespace functions; please see the uchar.h
+ * file documentation for a detailed comparison.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a whitespace character according to Java/ICU
+ *
+ * @see u_isspace
+ * @see u_isJavaSpaceChar
+ * @see u_isUWhiteSpace
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isWhitespace(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a control character
+ * (as defined by this function).
+ * A control character is one of the following:
+ * - ISO 8-bit control character (U+0000..U+001f and U+007f..U+009f)
+ * - U_CONTROL_CHAR (Cc)
+ * - U_FORMAT_CHAR (Cf)
+ * - U_LINE_SEPARATOR (Zl)
+ * - U_PARAGRAPH_SEPARATOR (Zp)
+ *
+ * This is a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a control character
+ *
+ * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT
+ * @see u_isprint
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_iscntrl(UChar32 c);
+
+/**
+ * Determines whether the specified code point is an ISO control code.
+ * True for U+0000..U+001f and U+007f..U+009f (general category "Cc").
+ *
+ * Same as java.lang.Character.isISOControl().
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is an ISO control code
+ *
+ * @see u_iscntrl
+ * @stable ICU 2.6
+ */
+U_STABLE UBool U_EXPORT2
+u_isISOControl(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a printable character.
+ * True for general categories <em>other</em> than "C" (controls).
+ *
+ * This is a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a printable character
+ *
+ * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT
+ * @see u_iscntrl
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isprint(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a base character.
+ * True for general categories "L" (letters), "N" (numbers),
+ * "Mc" (spacing combining marks), and "Me" (enclosing marks).
+ *
+ * Note that this is different from the Unicode definition in
+ * chapter 3.5, conformance clause D13,
+ * which defines base characters to be all characters (not Cn)
+ * that do not graphically combine with preceding characters (M)
+ * and that are neither control (Cc) or format (Cf) characters.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a base character according to this function
+ *
+ * @see u_isalpha
+ * @see u_isdigit
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isbase(UChar32 c);
+
+/**
+ * Returns the bidirectional category value for the code point,
+ * which is used in the Unicode bidirectional algorithm
+ * (UAX #9 http://www.unicode.org/reports/tr9/).
+ * Note that some <em>unassigned</em> code points have bidi values
+ * of R or AL because they are in blocks that are reserved
+ * for Right-To-Left scripts.
+ *
+ * Same as java.lang.Character.getDirectionality()
+ *
+ * @param c the code point to be tested
+ * @return the bidirectional category (UCharDirection) value
+ *
+ * @see UCharDirection
+ * @stable ICU 2.0
+ */
+U_STABLE UCharDirection U_EXPORT2
+u_charDirection(UChar32 c);
+
+/**
+ * Determines whether the code point has the Bidi_Mirrored property.
+ * This property is set for characters that are commonly used in
+ * Right-To-Left contexts and need to be displayed with a "mirrored"
+ * glyph.
+ *
+ * Same as java.lang.Character.isMirrored().
+ * Same as UCHAR_BIDI_MIRRORED
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the character has the Bidi_Mirrored property
+ *
+ * @see UCHAR_BIDI_MIRRORED
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isMirrored(UChar32 c);
+
+/**
+ * Maps the specified character to a "mirror-image" character.
+ * For characters with the Bidi_Mirrored property, implementations
+ * sometimes need a "poor man's" mapping to another Unicode
+ * character (code point) such that the default glyph may serve
+ * as the mirror-image of the default glyph of the specified
+ * character. This is useful for text conversion to and from
+ * codepages with visual order, and for displays without glyph
+ * selection capabilities.
+ *
+ * @param c the code point to be mapped
+ * @return another Unicode code point that may serve as a mirror-image
+ * substitute, or c itself if there is no such mapping or c
+ * does not have the Bidi_Mirrored property
+ *
+ * @see UCHAR_BIDI_MIRRORED
+ * @see u_isMirrored
+ * @stable ICU 2.0
+ */
+U_STABLE UChar32 U_EXPORT2
+u_charMirror(UChar32 c);
+
+/**
+ * Maps the specified character to its paired bracket character.
+ * For Bidi_Paired_Bracket_Type!=None, this is the same as u_charMirror().
+ * Otherwise c itself is returned.
+ * See http://www.unicode.org/reports/tr9/
+ *
+ * @param c the code point to be mapped
+ * @return the paired bracket code point,
+ * or c itself if there is no such mapping
+ * (Bidi_Paired_Bracket_Type=None)
+ *
+ * @see UCHAR_BIDI_PAIRED_BRACKET
+ * @see UCHAR_BIDI_PAIRED_BRACKET_TYPE
+ * @see u_charMirror
+ * @stable ICU 52
+ */
+U_STABLE UChar32 U_EXPORT2
+u_getBidiPairedBracket(UChar32 c);
+
+/**
+ * Returns the general category value for the code point.
+ *
+ * Same as java.lang.Character.getType().
+ *
+ * @param c the code point to be tested
+ * @return the general category (UCharCategory) value
+ *
+ * @see UCharCategory
+ * @stable ICU 2.0
+ */
+U_STABLE int8_t U_EXPORT2
+u_charType(UChar32 c);
+
+/**
+ * Get a single-bit bit set for the general category of a character.
+ * This bit set can be compared bitwise with U_GC_SM_MASK, U_GC_L_MASK, etc.
+ * Same as U_MASK(u_charType(c)).
+ *
+ * @param c the code point to be tested
+ * @return a single-bit mask corresponding to the general category (UCharCategory) value
+ *
+ * @see u_charType
+ * @see UCharCategory
+ * @see U_GC_CN_MASK
+ * @stable ICU 2.1
+ */
+#define U_GET_GC_MASK(c) U_MASK(u_charType(c))
+
+/**
+ * Callback from u_enumCharTypes(), is called for each contiguous range
+ * of code points c (where start<=c<limit)
+ * with the same Unicode general category ("character type").
+ *
+ * The callback function can stop the enumeration by returning FALSE.
+ *
+ * @param context an opaque pointer, as passed into utrie_enum()
+ * @param start the first code point in a contiguous range with value
+ * @param limit one past the last code point in a contiguous range with value
+ * @param type the general category for all code points in [start..limit[
+ * @return FALSE to stop the enumeration
+ *
+ * @stable ICU 2.1
+ * @see UCharCategory
+ * @see u_enumCharTypes
+ */
+typedef UBool U_CALLCONV
+UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type);
+
+/**
+ * Enumerate efficiently all code points with their Unicode general categories.
+ *
+ * This is useful for building data structures (e.g., UnicodeSet's),
+ * for enumerating all assigned code points (type!=U_UNASSIGNED), etc.
+ *
+ * For each contiguous range of code points with a given general category ("character type"),
+ * the UCharEnumTypeRange function is called.
+ * Adjacent ranges have different types.
+ * The Unicode Standard guarantees that the numeric value of the type is 0..31.
+ *
+ * @param enumRange a pointer to a function that is called for each contiguous range
+ * of code points with the same general category
+ * @param context an opaque pointer that is passed on to the callback function
+ *
+ * @stable ICU 2.1
+ * @see UCharCategory
+ * @see UCharEnumTypeRange
+ */
+U_STABLE void U_EXPORT2
+u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context);
+
+#if !UCONFIG_NO_NORMALIZATION
+
+/**
+ * Returns the combining class of the code point as specified in UnicodeData.txt.
+ *
+ * @param c the code point of the character
+ * @return the combining class of the character
+ * @stable ICU 2.0
+ */
+U_STABLE uint8_t U_EXPORT2
+u_getCombiningClass(UChar32 c);
+
+#endif
+
+/**
+ * Returns the decimal digit value of a decimal digit character.
+ * Such characters have the general category "Nd" (decimal digit numbers)
+ * and a Numeric_Type of Decimal.
+ *
+ * Unlike ICU releases before 2.6, no digit values are returned for any
+ * Han characters because Han number characters are often used with a special
+ * Chinese-style number format (with characters for powers of 10 in between)
+ * instead of in decimal-positional notation.
+ * Unicode 4 explicitly assigns Han number characters the Numeric_Type
+ * Numeric instead of Decimal.
+ * See Jitterbug 1483 for more details.
+ *
+ * Use u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE) and u_getNumericValue()
+ * for complete numeric Unicode properties.
+ *
+ * @param c the code point for which to get the decimal digit value
+ * @return the decimal digit value of c,
+ * or -1 if c is not a decimal digit character
+ *
+ * @see u_getNumericValue
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_charDigitValue(UChar32 c);
+
+/**
+ * Returns the Unicode allocation block that contains the character.
+ *
+ * @param c the code point to be tested
+ * @return the block value (UBlockCode) for c
+ *
+ * @see UBlockCode
+ * @stable ICU 2.0
+ */
+U_STABLE UBlockCode U_EXPORT2
+ublock_getCode(UChar32 c);
+
+/**
+ * Retrieve the name of a Unicode character.
+ * Depending on <code>nameChoice</code>, the character name written
+ * into the buffer is the "modern" name or the name that was defined
+ * in Unicode version 1.0.
+ * The name contains only "invariant" characters
+ * like A-Z, 0-9, space, and '-'.
+ * Unicode 1.0 names are only retrieved if they are different from the modern
+ * names and if the data file contains the data for them. gennames may or may
+ * not be called with a command line option to include 1.0 names in unames.dat.
+ *
+ * @param code The character (code point) for which to get the name.
+ * It must be <code>0<=code<=0x10ffff</code>.
+ * @param nameChoice Selector for which name to get.
+ * @param buffer Destination address for copying the name.
+ * The name will always be zero-terminated.
+ * If there is no name, then the buffer will be set to the empty string.
+ * @param bufferLength <code>==sizeof(buffer)</code>
+ * @param pErrorCode Pointer to a UErrorCode variable;
+ * check for <code>U_SUCCESS()</code> after <code>u_charName()</code>
+ * returns.
+ * @return The length of the name, or 0 if there is no name for this character.
+ * If the bufferLength is less than or equal to the length, then the buffer
+ * contains the truncated name and the returned length indicates the full
+ * length of the name.
+ * The length does not include the zero-termination.
+ *
+ * @see UCharNameChoice
+ * @see u_charFromName
+ * @see u_enumCharNames
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_charName(UChar32 code, UCharNameChoice nameChoice,
+ char *buffer, int32_t bufferLength,
+ UErrorCode *pErrorCode);
+
+#ifndef U_HIDE_DEPRECATED_API
+/**
+ * Returns an empty string.
+ * Used to return the ISO 10646 comment for a character.
+ * The Unicode ISO_Comment property is deprecated and has no values.
+ *
+ * @param c The character (code point) for which to get the ISO comment.
+ * It must be <code>0<=c<=0x10ffff</code>.
+ * @param dest Destination address for copying the comment.
+ * The comment will be zero-terminated if possible.
+ * If there is no comment, then the buffer will be set to the empty string.
+ * @param destCapacity <code>==sizeof(dest)</code>
+ * @param pErrorCode Pointer to a UErrorCode variable;
+ * check for <code>U_SUCCESS()</code> after <code>u_getISOComment()</code>
+ * returns.
+ * @return 0
+ *
+ * @deprecated ICU 49
+ */
+U_DEPRECATED int32_t U_EXPORT2
+u_getISOComment(UChar32 c,
+ char *dest, int32_t destCapacity,
+ UErrorCode *pErrorCode);
+#endif /* U_HIDE_DEPRECATED_API */
+
+/**
+ * Find a Unicode character by its name and return its code point value.
+ * The name is matched exactly and completely.
+ * If the name does not correspond to a code point, <i>pErrorCode</i>
+ * is set to <code>U_INVALID_CHAR_FOUND</code>.
+ * A Unicode 1.0 name is matched only if it differs from the modern name.
+ * Unicode names are all uppercase. Extended names are lowercase followed
+ * by an uppercase hexadecimal number, and within angle brackets.
+ *
+ * @param nameChoice Selector for which name to match.
+ * @param name The name to match.
+ * @param pErrorCode Pointer to a UErrorCode variable
+ * @return The Unicode value of the code point with the given name,
+ * or an undefined value if there is no such code point.
+ *
+ * @see UCharNameChoice
+ * @see u_charName
+ * @see u_enumCharNames
+ * @stable ICU 1.7
+ */
+U_STABLE UChar32 U_EXPORT2
+u_charFromName(UCharNameChoice nameChoice,
+ const char *name,
+ UErrorCode *pErrorCode);
+
+/**
+ * Type of a callback function for u_enumCharNames() that gets called
+ * for each Unicode character with the code point value and
+ * the character name.
+ * If such a function returns FALSE, then the enumeration is stopped.
+ *
+ * @param context The context pointer that was passed to u_enumCharNames().
+ * @param code The Unicode code point for the character with this name.
+ * @param nameChoice Selector for which kind of names is enumerated.
+ * @param name The character's name, zero-terminated.
+ * @param length The length of the name.
+ * @return TRUE if the enumeration should continue, FALSE to stop it.
+ *
+ * @see UCharNameChoice
+ * @see u_enumCharNames
+ * @stable ICU 1.7
+ */
+typedef UBool U_CALLCONV UEnumCharNamesFn(void *context,
+ UChar32 code,
+ UCharNameChoice nameChoice,
+ const char *name,
+ int32_t length);
+
+/**
+ * Enumerate all assigned Unicode characters between the start and limit
+ * code points (start inclusive, limit exclusive) and call a function
+ * for each, passing the code point value and the character name.
+ * For Unicode 1.0 names, only those are enumerated that differ from the
+ * modern names.
+ *
+ * @param start The first code point in the enumeration range.
+ * @param limit One more than the last code point in the enumeration range
+ * (the first one after the range).
+ * @param fn The function that is to be called for each character name.
+ * @param context An arbitrary pointer that is passed to the function.
+ * @param nameChoice Selector for which kind of names to enumerate.
+ * @param pErrorCode Pointer to a UErrorCode variable
+ *
+ * @see UCharNameChoice
+ * @see UEnumCharNamesFn
+ * @see u_charName
+ * @see u_charFromName
+ * @stable ICU 1.7
+ */
+U_STABLE void U_EXPORT2
+u_enumCharNames(UChar32 start, UChar32 limit,
+ UEnumCharNamesFn *fn,
+ void *context,
+ UCharNameChoice nameChoice,
+ UErrorCode *pErrorCode);
+
+/**
+ * Return the Unicode name for a given property, as given in the
+ * Unicode database file PropertyAliases.txt.
+ *
+ * In addition, this function maps the property
+ * UCHAR_GENERAL_CATEGORY_MASK to the synthetic names "gcm" /
+ * "General_Category_Mask". These names are not in
+ * PropertyAliases.txt.
+ *
+ * @param property UProperty selector other than UCHAR_INVALID_CODE.
+ * If out of range, NULL is returned.
+ *
+ * @param nameChoice selector for which name to get. If out of range,
+ * NULL is returned. All properties have a long name. Most
+ * have a short name, but some do not. Unicode allows for
+ * additional names; if present these will be returned by
+ * U_LONG_PROPERTY_NAME + i, where i=1, 2,...
+ *
+ * @return a pointer to the name, or NULL if either the
+ * property or the nameChoice is out of range. If a given
+ * nameChoice returns NULL, then all larger values of
+ * nameChoice will return NULL, with one exception: if NULL is
+ * returned for U_SHORT_PROPERTY_NAME, then
+ * U_LONG_PROPERTY_NAME (and higher) may still return a
+ * non-NULL value. The returned pointer is valid until
+ * u_cleanup() is called.
+ *
+ * @see UProperty
+ * @see UPropertyNameChoice
+ * @stable ICU 2.4
+ */
+U_STABLE const char* U_EXPORT2
+u_getPropertyName(UProperty property,
+ UPropertyNameChoice nameChoice);
+
+/**
+ * Return the UProperty enum for a given property name, as specified
+ * in the Unicode database file PropertyAliases.txt. Short, long, and
+ * any other variants are recognized.
+ *
+ * In addition, this function maps the synthetic names "gcm" /
+ * "General_Category_Mask" to the property
+ * UCHAR_GENERAL_CATEGORY_MASK. These names are not in
+ * PropertyAliases.txt.
+ *
+ * @param alias the property name to be matched. The name is compared
+ * using "loose matching" as described in PropertyAliases.txt.
+ *
+ * @return a UProperty enum, or UCHAR_INVALID_CODE if the given name
+ * does not match any property.
+ *
+ * @see UProperty
+ * @stable ICU 2.4
+ */
+U_STABLE UProperty U_EXPORT2
+u_getPropertyEnum(const char* alias);
+
+/**
+ * Return the Unicode name for a given property value, as given in the
+ * Unicode database file PropertyValueAliases.txt.
+ *
+ * Note: Some of the names in PropertyValueAliases.txt can only be
+ * retrieved using UCHAR_GENERAL_CATEGORY_MASK, not
+ * UCHAR_GENERAL_CATEGORY. These include: "C" / "Other", "L" /
+ * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
+ * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
+ *
+ * @param property UProperty selector constant.
+ * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
+ * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT
+ * or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT.
+ * If out of range, NULL is returned.
+ *
+ * @param value selector for a value for the given property. If out
+ * of range, NULL is returned. In general, valid values range
+ * from 0 up to some maximum. There are a few exceptions:
+ * (1.) UCHAR_BLOCK values begin at the non-zero value
+ * UBLOCK_BASIC_LATIN. (2.) UCHAR_CANONICAL_COMBINING_CLASS
+ * values are not contiguous and range from 0..240. (3.)
+ * UCHAR_GENERAL_CATEGORY_MASK values are not values of
+ * UCharCategory, but rather mask values produced by
+ * U_GET_GC_MASK(). This allows grouped categories such as
+ * [:L:] to be represented. Mask values range
+ * non-contiguously from 1..U_GC_P_MASK.
+ *
+ * @param nameChoice selector for which name to get. If out of range,
+ * NULL is returned. All values have a long name. Most have
+ * a short name, but some do not. Unicode allows for
+ * additional names; if present these will be returned by
+ * U_LONG_PROPERTY_NAME + i, where i=1, 2,...
+
+ * @return a pointer to the name, or NULL if either the
+ * property or the nameChoice is out of range. If a given
+ * nameChoice returns NULL, then all larger values of
+ * nameChoice will return NULL, with one exception: if NULL is
+ * returned for U_SHORT_PROPERTY_NAME, then
+ * U_LONG_PROPERTY_NAME (and higher) may still return a
+ * non-NULL value. The returned pointer is valid until
+ * u_cleanup() is called.
+ *
+ * @see UProperty
+ * @see UPropertyNameChoice
+ * @stable ICU 2.4
+ */
+U_STABLE const char* U_EXPORT2
+u_getPropertyValueName(UProperty property,
+ int32_t value,
+ UPropertyNameChoice nameChoice);
+
+/**
+ * Return the property value integer for a given value name, as
+ * specified in the Unicode database file PropertyValueAliases.txt.
+ * Short, long, and any other variants are recognized.
+ *
+ * Note: Some of the names in PropertyValueAliases.txt will only be
+ * recognized with UCHAR_GENERAL_CATEGORY_MASK, not
+ * UCHAR_GENERAL_CATEGORY. These include: "C" / "Other", "L" /
+ * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
+ * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
+ *
+ * @param property UProperty selector constant.
+ * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
+ * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT
+ * or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT.
+ * If out of range, UCHAR_INVALID_CODE is returned.
+ *
+ * @param alias the value name to be matched. The name is compared
+ * using "loose matching" as described in
+ * PropertyValueAliases.txt.
+ *
+ * @return a value integer or UCHAR_INVALID_CODE if the given name
+ * does not match any value of the given property, or if the
+ * property is invalid. Note: UCHAR_GENERAL_CATEGORY_MASK values
+ * are not values of UCharCategory, but rather mask values
+ * produced by U_GET_GC_MASK(). This allows grouped
+ * categories such as [:L:] to be represented.
+ *
+ * @see UProperty
+ * @stable ICU 2.4
+ */
+U_STABLE int32_t U_EXPORT2
+u_getPropertyValueEnum(UProperty property,
+ const char* alias);
+
+/**
+ * Determines if the specified character is permissible as the
+ * first character in an identifier according to Unicode
+ * (The Unicode Standard, Version 3.0, chapter 5.16 Identifiers).
+ * True for characters with general categories "L" (letters) and "Nl" (letter numbers).
+ *
+ * Same as java.lang.Character.isUnicodeIdentifierStart().
+ * Same as UCHAR_ID_START
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point may start an identifier
+ *
+ * @see UCHAR_ID_START
+ * @see u_isalpha
+ * @see u_isIDPart
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isIDStart(UChar32 c);
+
+/**
+ * Determines if the specified character is permissible
+ * in an identifier according to Java.
+ * True for characters with general categories "L" (letters),
+ * "Nl" (letter numbers), "Nd" (decimal digits),
+ * "Mc" and "Mn" (combining marks), "Pc" (connecting punctuation), and
+ * u_isIDIgnorable(c).
+ *
+ * Same as java.lang.Character.isUnicodeIdentifierPart().
+ * Almost the same as Unicode's ID_Continue (UCHAR_ID_CONTINUE)
+ * except that Unicode recommends to ignore Cf which is less than
+ * u_isIDIgnorable(c).
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point may occur in an identifier according to Java
+ *
+ * @see UCHAR_ID_CONTINUE
+ * @see u_isIDStart
+ * @see u_isIDIgnorable
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isIDPart(UChar32 c);
+
+/**
+ * Determines if the specified character should be regarded
+ * as an ignorable character in an identifier,
+ * according to Java.
+ * True for characters with general category "Cf" (format controls) as well as
+ * non-whitespace ISO controls
+ * (U+0000..U+0008, U+000E..U+001B, U+007F..U+009F).
+ *
+ * Same as java.lang.Character.isIdentifierIgnorable().
+ *
+ * Note that Unicode just recommends to ignore Cf (format controls).
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is ignorable in identifiers according to Java
+ *
+ * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT
+ * @see u_isIDStart
+ * @see u_isIDPart
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isIDIgnorable(UChar32 c);
+
+/**
+ * Determines if the specified character is permissible as the
+ * first character in a Java identifier.
+ * In addition to u_isIDStart(c), true for characters with
+ * general categories "Sc" (currency symbols) and "Pc" (connecting punctuation).
+ *
+ * Same as java.lang.Character.isJavaIdentifierStart().
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point may start a Java identifier
+ *
+ * @see u_isJavaIDPart
+ * @see u_isalpha
+ * @see u_isIDStart
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isJavaIDStart(UChar32 c);
+
+/**
+ * Determines if the specified character is permissible
+ * in a Java identifier.
+ * In addition to u_isIDPart(c), true for characters with
+ * general category "Sc" (currency symbols).
+ *
+ * Same as java.lang.Character.isJavaIdentifierPart().
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point may occur in a Java identifier
+ *
+ * @see u_isIDIgnorable
+ * @see u_isJavaIDStart
+ * @see u_isalpha
+ * @see u_isdigit
+ * @see u_isIDPart
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isJavaIDPart(UChar32 c);
+
+/**
+ * The given character is mapped to its lowercase equivalent according to
+ * UnicodeData.txt; if the character has no lowercase equivalent, the character
+ * itself is returned.
+ *
+ * Same as java.lang.Character.toLowerCase().
+ *
+ * This function only returns the simple, single-code point case mapping.
+ * Full case mappings should be used whenever possible because they produce
+ * better results by working on whole strings.
+ * They take into account the string context and the language and can map
+ * to a result string with a different length as appropriate.
+ * Full case mappings are applied by the string case mapping functions,
+ * see ustring.h and the UnicodeString class.
+ * See also the User Guide chapter on C/POSIX migration:
+ * http://icu-project.org/userguide/posix.html#case_mappings
+ *
+ * @param c the code point to be mapped
+ * @return the Simple_Lowercase_Mapping of the code point, if any;
+ * otherwise the code point itself.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar32 U_EXPORT2
+u_tolower(UChar32 c);
+
+/**
+ * The given character is mapped to its uppercase equivalent according to UnicodeData.txt;
+ * if the character has no uppercase equivalent, the character itself is
+ * returned.
+ *
+ * Same as java.lang.Character.toUpperCase().
+ *
+ * This function only returns the simple, single-code point case mapping.
+ * Full case mappings should be used whenever possible because they produce
+ * better results by working on whole strings.
+ * They take into account the string context and the language and can map
+ * to a result string with a different length as appropriate.
+ * Full case mappings are applied by the string case mapping functions,
+ * see ustring.h and the UnicodeString class.
+ * See also the User Guide chapter on C/POSIX migration:
+ * http://icu-project.org/userguide/posix.html#case_mappings
+ *
+ * @param c the code point to be mapped
+ * @return the Simple_Uppercase_Mapping of the code point, if any;
+ * otherwise the code point itself.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar32 U_EXPORT2
+u_toupper(UChar32 c);
+
+/**
+ * The given character is mapped to its titlecase equivalent
+ * according to UnicodeData.txt;
+ * if none is defined, the character itself is returned.
+ *
+ * Same as java.lang.Character.toTitleCase().
+ *
+ * This function only returns the simple, single-code point case mapping.
+ * Full case mappings should be used whenever possible because they produce
+ * better results by working on whole strings.
+ * They take into account the string context and the language and can map
+ * to a result string with a different length as appropriate.
+ * Full case mappings are applied by the string case mapping functions,
+ * see ustring.h and the UnicodeString class.
+ * See also the User Guide chapter on C/POSIX migration:
+ * http://icu-project.org/userguide/posix.html#case_mappings
+ *
+ * @param c the code point to be mapped
+ * @return the Simple_Titlecase_Mapping of the code point, if any;
+ * otherwise the code point itself.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar32 U_EXPORT2
+u_totitle(UChar32 c);
+
+/** Option value for case folding: use default mappings defined in CaseFolding.txt. @stable ICU 2.0 */
+#define U_FOLD_CASE_DEFAULT 0
+
+/**
+ * Option value for case folding:
+ *
+ * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
+ * and dotless i appropriately for Turkic languages (tr, az).
+ *
+ * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
+ * are to be included for default mappings and
+ * excluded for the Turkic-specific mappings.
+ *
+ * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
+ * are to be excluded for default mappings and
+ * included for the Turkic-specific mappings.
+ *
+ * @stable ICU 2.0
+ */
+#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
+
+/**
+ * The given character is mapped to its case folding equivalent according to
+ * UnicodeData.txt and CaseFolding.txt;
+ * if the character has no case folding equivalent, the character
+ * itself is returned.
+ *
+ * This function only returns the simple, single-code point case mapping.
+ * Full case mappings should be used whenever possible because they produce
+ * better results by working on whole strings.
+ * They take into account the string context and the language and can map
+ * to a result string with a different length as appropriate.
+ * Full case mappings are applied by the string case mapping functions,
+ * see ustring.h and the UnicodeString class.
+ * See also the User Guide chapter on C/POSIX migration:
+ * http://icu-project.org/userguide/posix.html#case_mappings
+ *
+ * @param c the code point to be mapped
+ * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * @return the Simple_Case_Folding of the code point, if any;
+ * otherwise the code point itself.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar32 U_EXPORT2
+u_foldCase(UChar32 c, uint32_t options);
+
+/**
+ * Returns the decimal digit value of the code point in the
+ * specified radix.
+ *
+ * If the radix is not in the range <code>2<=radix<=36</code> or if the
+ * value of <code>c</code> is not a valid digit in the specified
+ * radix, <code>-1</code> is returned. A character is a valid digit
+ * if at least one of the following is true:
+ * <ul>
+ * <li>The character has a decimal digit value.
+ * Such characters have the general category "Nd" (decimal digit numbers)
+ * and a Numeric_Type of Decimal.
+ * In this case the value is the character's decimal digit value.</li>
+ * <li>The character is one of the uppercase Latin letters
+ * <code>'A'</code> through <code>'Z'</code>.
+ * In this case the value is <code>c-'A'+10</code>.</li>
+ * <li>The character is one of the lowercase Latin letters
+ * <code>'a'</code> through <code>'z'</code>.
+ * In this case the value is <code>ch-'a'+10</code>.</li>
+ * <li>Latin letters from both the ASCII range (0061..007A, 0041..005A)
+ * as well as from the Fullwidth ASCII range (FF41..FF5A, FF21..FF3A)
+ * are recognized.</li>
+ * </ul>
+ *
+ * Same as java.lang.Character.digit().
+ *
+ * @param ch the code point to be tested.
+ * @param radix the radix.
+ * @return the numeric value represented by the character in the
+ * specified radix,
+ * or -1 if there is no value or if the value exceeds the radix.
+ *
+ * @see UCHAR_NUMERIC_TYPE
+ * @see u_forDigit
+ * @see u_charDigitValue
+ * @see u_isdigit
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_digit(UChar32 ch, int8_t radix);
+
+/**
+ * Determines the character representation for a specific digit in
+ * the specified radix. If the value of <code>radix</code> is not a
+ * valid radix, or the value of <code>digit</code> is not a valid
+ * digit in the specified radix, the null character
+ * (<code>U+0000</code>) is returned.
+ * <p>
+ * The <code>radix</code> argument is valid if it is greater than or
+ * equal to 2 and less than or equal to 36.
+ * The <code>digit</code> argument is valid if
+ * <code>0 <= digit < radix</code>.
+ * <p>
+ * If the digit is less than 10, then
+ * <code>'0' + digit</code> is returned. Otherwise, the value
+ * <code>'a' + digit - 10</code> is returned.
+ *
+ * Same as java.lang.Character.forDigit().
+ *
+ * @param digit the number to convert to a character.
+ * @param radix the radix.
+ * @return the <code>char</code> representation of the specified digit
+ * in the specified radix.
+ *
+ * @see u_digit
+ * @see u_charDigitValue
+ * @see u_isdigit
+ * @stable ICU 2.0
+ */
+U_STABLE UChar32 U_EXPORT2
+u_forDigit(int32_t digit, int8_t radix);
+
+/**
+ * Get the "age" of the code point.
+ * The "age" is the Unicode version when the code point was first
+ * designated (as a non-character or for Private Use)
+ * or assigned a character.
+ * This can be useful to avoid emitting code points to receiving
+ * processes that do not accept newer characters.
+ * The data is from the UCD file DerivedAge.txt.
+ *
+ * @param c The code point.
+ * @param versionArray The Unicode version number array, to be filled in.
+ *
+ * @stable ICU 2.1
+ */
+U_STABLE void U_EXPORT2
+u_charAge(UChar32 c, UVersionInfo versionArray);
+
+/**
+ * Gets the Unicode version information.
+ * The version array is filled in with the version information
+ * for the Unicode standard that is currently used by ICU.
+ * For example, Unicode version 3.1.1 is represented as an array with
+ * the values { 3, 1, 1, 0 }.
+ *
+ * @param versionArray an output array that will be filled in with
+ * the Unicode version number
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+u_getUnicodeVersion(UVersionInfo versionArray);
+
+#if !UCONFIG_NO_NORMALIZATION
+/**
+ * Get the FC_NFKC_Closure property string for a character.
+ * See Unicode Standard Annex #15 for details, search for "FC_NFKC_Closure"
+ * or for "FNC": http://www.unicode.org/reports/tr15/
+ *
+ * @param c The character (code point) for which to get the FC_NFKC_Closure string.
+ * It must be <code>0<=c<=0x10ffff</code>.
+ * @param dest Destination address for copying the string.
+ * The string will be zero-terminated if possible.
+ * If there is no FC_NFKC_Closure string,
+ * then the buffer will be set to the empty string.
+ * @param destCapacity <code>==sizeof(dest)</code>
+ * @param pErrorCode Pointer to a UErrorCode variable.
+ * @return The length of the string, or 0 if there is no FC_NFKC_Closure string for this character.
+ * If the destCapacity is less than or equal to the length, then the buffer
+ * contains the truncated name and the returned length indicates the full
+ * length of the name.
+ * The length does not include the zero-termination.
+ *
+ * @stable ICU 2.2
+ */
+U_STABLE int32_t U_EXPORT2
+u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode);
+
+#endif
+
+
+U_CDECL_END
+
+#endif /*_UCHAR*/
+/*eof*/
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/uclean.h b/src/core/basetypes/icu-ucsdet/include/unicode/uclean.h
new file mode 100644
index 00000000..d9a1e539
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/uclean.h
@@ -0,0 +1,258 @@
+/*
+******************************************************************************
+* Copyright (C) 2001-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+******************************************************************************
+* file name: uclean.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2001July05
+* created by: George Rhoten
+*/
+
+#ifndef __UCLEAN_H__
+#define __UCLEAN_H__
+
+#include "unicode/utypes.h"
+/**
+ * \file
+ * \brief C API: Initialize and clean up ICU
+ */
+
+/**
+ * Initialize ICU.
+ *
+ * Use of this function is optional. It is OK to simply use ICU
+ * services and functions without first having initialized
+ * ICU by calling u_init().
+ *
+ * u_init() will attempt to load some part of ICU's data, and is
+ * useful as a test for configuration or installation problems that
+ * leave the ICU data inaccessible. A successful invocation of u_init()
+ * does not, however, guarantee that all ICU data is accessible.
+ *
+ * Multiple calls to u_init() cause no harm, aside from the small amount
+ * of time required.
+ *
+ * In old versions of ICU, u_init() was required in multi-threaded applications
+ * to ensure the thread safety of ICU. u_init() is no longer needed for this purpose.
+ *
+ * @param status An ICU UErrorCode parameter. It must not be <code>NULL</code>.
+ * An Error will be returned if some required part of ICU data can not
+ * be loaded or initialized.
+ * The function returns immediately if the input error code indicates a
+ * failure, as usual.
+ *
+ * @stable ICU 2.6
+ */
+U_STABLE void U_EXPORT2
+u_init(UErrorCode *status);
+
+#ifndef U_HIDE_SYSTEM_API
+/**
+ * Clean up the system resources, such as allocated memory or open files,
+ * used in all ICU libraries. This will free/delete all memory owned by the
+ * ICU libraries, and return them to their original load state. All open ICU
+ * items (collators, resource bundles, converters, etc.) must be closed before
+ * calling this function, otherwise ICU may not free its allocated memory
+ * (e.g. close your converters and resource bundles before calling this
+ * function). Generally, this function should be called once just before
+ * an application exits. For applications that dynamically load and unload
+ * the ICU libraries (relatively uncommon), u_cleanup() should be called
+ * just before the library unload.
+ * <p>
+ * u_cleanup() also clears any ICU heap functions, mutex functions or
+ * trace functions that may have been set for the process.
+ * This has the effect of restoring ICU to its initial condition, before
+ * any of these override functions were installed. Refer to
+ * u_setMemoryFunctions(), u_setMutexFunctions and
+ * utrace_setFunctions(). If ICU is to be reinitialized after after
+ * calling u_cleanup(), these runtime override functions will need to
+ * be set up again if they are still required.
+ * <p>
+ * u_cleanup() is not thread safe. All other threads should stop using ICU
+ * before calling this function.
+ * <p>
+ * Any open ICU items will be left in an undefined state by u_cleanup(),
+ * and any subsequent attempt to use such an item will give unpredictable
+ * results.
+ * <p>
+ * After calling u_cleanup(), an application may continue to use ICU by
+ * calling u_init(). An application must invoke u_init() first from one single
+ * thread before allowing other threads call u_init(). All threads existing
+ * at the time of the first thread's call to u_init() must also call
+ * u_init() themselves before continuing with other ICU operations.
+ * <p>
+ * The use of u_cleanup() just before an application terminates is optional,
+ * but it should be called only once for performance reasons. The primary
+ * benefit is to eliminate reports of memory or resource leaks originating
+ * in ICU code from the results generated by heap analysis tools.
+ * <p>
+ * <strong>Use this function with great care!</strong>
+ * </p>
+ *
+ * @stable ICU 2.0
+ * @system
+ */
+U_STABLE void U_EXPORT2
+u_cleanup(void);
+
+
+/**
+ * Pointer type for a user supplied memory allocation function.
+ * @param context user supplied value, obtained from from u_setMemoryFunctions().
+ * @param size The number of bytes to be allocated
+ * @return Pointer to the newly allocated memory, or NULL if the allocation failed.
+ * @stable ICU 2.8
+ * @system
+ */
+typedef void *U_CALLCONV UMemAllocFn(const void *context, size_t size);
+/**
+ * Pointer type for a user supplied memory re-allocation function.
+ * @param context user supplied value, obtained from from u_setMemoryFunctions().
+ * @param size The number of bytes to be allocated
+ * @return Pointer to the newly allocated memory, or NULL if the allocation failed.
+ * @stable ICU 2.8
+ * @system
+ */
+typedef void *U_CALLCONV UMemReallocFn(const void *context, void *mem, size_t size);
+/**
+ * Pointer type for a user supplied memory free function. Behavior should be
+ * similar the standard C library free().
+ * @param context user supplied value, obtained from from u_setMemoryFunctions().
+ * @param mem Pointer to the memory block to be resized
+ * @param size The new size for the block
+ * @return Pointer to the resized memory block, or NULL if the resizing failed.
+ * @stable ICU 2.8
+ * @system
+ */
+typedef void U_CALLCONV UMemFreeFn (const void *context, void *mem);
+
+/**
+ * Set the functions that ICU will use for memory allocation.
+ * Use of this function is optional; by default (without this function), ICU will
+ * use the standard C library malloc() and free() functions.
+ * This function can only be used when ICU is in an initial, unused state, before
+ * u_init() has been called.
+ * @param context This pointer value will be saved, and then (later) passed as
+ * a parameter to the memory functions each time they
+ * are called.
+ * @param a Pointer to a user-supplied malloc function.
+ * @param r Pointer to a user-supplied realloc function.
+ * @param f Pointer to a user-supplied free function.
+ * @param status Receives error values.
+ * @stable ICU 2.8
+ * @system
+ */
+U_STABLE void U_EXPORT2
+u_setMemoryFunctions(const void *context, UMemAllocFn *a, UMemReallocFn *r, UMemFreeFn *f,
+ UErrorCode *status);
+
+
+#ifndef U_HIDE_DEPRECATED_API
+/*********************************************************************************
+ *
+ * Deprecated Functions
+ *
+ * The following functions for user supplied mutexes are no longer supported.
+ * Any attempt to use them will return a U_UNSUPPORTED_ERROR.
+ *
+ **********************************************************************************/
+
+/**
+ * An opaque pointer type that represents an ICU mutex.
+ * For user-implemented mutexes, the value will typically point to a
+ * struct or object that implements the mutex.
+ * @deprecated ICU 52. This type is no longer supported.
+ * @system
+ */
+typedef void *UMTX;
+
+/**
+ * Function Pointer type for a user supplied mutex initialization function.
+ * The user-supplied function will be called by ICU whenever ICU needs to create a
+ * new mutex. The function implementation should create a mutex, and store a pointer
+ * to something that uniquely identifies the mutex into the UMTX that is supplied
+ * as a paramter.
+ * @param context user supplied value, obtained from from u_setMutexFunctions().
+ * @param mutex Receives a pointer that identifies the new mutex.
+ * The mutex init function must set the UMTX to a non-null value.
+ * Subsequent calls by ICU to lock, unlock, or destroy a mutex will
+ * identify the mutex by the UMTX value.
+ * @param status Error status. Report errors back to ICU by setting this variable
+ * with an error code.
+ * @deprecated ICU 52. This function is no longer supported.
+ * @system
+ */
+typedef void U_CALLCONV UMtxInitFn (const void *context, UMTX *mutex, UErrorCode* status);
+
+
+/**
+ * Function Pointer type for a user supplied mutex functions.
+ * One of the user-supplied functions with this signature will be called by ICU
+ * whenever ICU needs to lock, unlock, or destroy a mutex.
+ * @param context user supplied value, obtained from from u_setMutexFunctions().
+ * @param mutex specify the mutex on which to operate.
+ * @deprecated ICU 52. This function is no longer supported.
+ * @system
+ */
+typedef void U_CALLCONV UMtxFn (const void *context, UMTX *mutex);
+
+
+/**
+ * Set the functions that ICU will use for mutex operations
+ * Use of this function is optional; by default (without this function), ICU will
+ * directly access system functions for mutex operations
+ * This function can only be used when ICU is in an initial, unused state, before
+ * u_init() has been called.
+ * @param context This pointer value will be saved, and then (later) passed as
+ * a parameter to the user-supplied mutex functions each time they
+ * are called.
+ * @param init Pointer to a mutex initialization function. Must be non-null.
+ * @param destroy Pointer to the mutex destroy function. Must be non-null.
+ * @param lock pointer to the mutex lock function. Must be non-null.
+ * @param unlock Pointer to the mutex unlock function. Must be non-null.
+ * @param status Receives error values.
+ * @deprecated ICU 52. This function is no longer supported.
+ * @system
+ */
+U_DEPRECATED void U_EXPORT2
+u_setMutexFunctions(const void *context, UMtxInitFn *init, UMtxFn *destroy, UMtxFn *lock, UMtxFn *unlock,
+ UErrorCode *status);
+
+
+/**
+ * Pointer type for a user supplied atomic increment or decrement function.
+ * @param context user supplied value, obtained from from u_setAtomicIncDecFunctions().
+ * @param p Pointer to a 32 bit int to be incremented or decremented
+ * @return The value of the variable after the inc or dec operation.
+ * @deprecated ICU 52. This function is no longer supported.
+ * @system
+ */
+typedef int32_t U_CALLCONV UMtxAtomicFn(const void *context, int32_t *p);
+
+/**
+ * Set the functions that ICU will use for atomic increment and decrement of int32_t values.
+ * Use of this function is optional; by default (without this function), ICU will
+ * use its own internal implementation of atomic increment/decrement.
+ * This function can only be used when ICU is in an initial, unused state, before
+ * u_init() has been called.
+ * @param context This pointer value will be saved, and then (later) passed as
+ * a parameter to the increment and decrement functions each time they
+ * are called. This function can only be called
+ * @param inc Pointer to a function to do an atomic increment operation. Must be non-null.
+ * @param dec Pointer to a function to do an atomic decrement operation. Must be non-null.
+ * @param status Receives error values.
+ * @deprecated ICU 52. This function is no longer supported.
+ * @system
+ */
+U_DEPRECATED void U_EXPORT2
+u_setAtomicIncDecFunctions(const void *context, UMtxAtomicFn *inc, UMtxAtomicFn *dec,
+ UErrorCode *status);
+
+#endif /* U_HIDE_DEPRECATED_API */
+#endif /* U_HIDE_SYSTEM_API */
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/ucnv.h b/src/core/basetypes/icu-ucsdet/include/unicode/ucnv.h
new file mode 100644
index 00000000..564656c2
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/ucnv.h
@@ -0,0 +1,2036 @@
+/*
+**********************************************************************
+* Copyright (C) 1999-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+ * ucnv.h:
+ * External APIs for the ICU's codeset conversion library
+ * Bertrand A. Damiba
+ *
+ * Modification History:
+ *
+ * Date Name Description
+ * 04/04/99 helena Fixed internal header inclusion.
+ * 05/11/00 helena Added setFallback and usesFallback APIs.
+ * 06/29/2000 helena Major rewrite of the callback APIs.
+ * 12/07/2000 srl Update of documentation
+ */
+
+/**
+ * \file
+ * \brief C API: Character conversion
+ *
+ * <h2>Character Conversion C API</h2>
+ *
+ * <p>This API is used to convert codepage or character encoded data to and
+ * from UTF-16. You can open a converter with {@link ucnv_open() }. With that
+ * converter, you can get its properties, set options, convert your data and
+ * close the converter.</p>
+ *
+ * <p>Since many software programs recogize different converter names for
+ * different types of converters, there are other functions in this API to
+ * iterate over the converter aliases. The functions {@link ucnv_getAvailableName() },
+ * {@link ucnv_getAlias() } and {@link ucnv_getStandardName() } are some of the
+ * more frequently used alias functions to get this information.</p>
+ *
+ * <p>When a converter encounters an illegal, irregular, invalid or unmappable character
+ * its default behavior is to use a substitution character to replace the
+ * bad byte sequence. This behavior can be changed by using {@link ucnv_setFromUCallBack() }
+ * or {@link ucnv_setToUCallBack() } on the converter. The header ucnv_err.h defines
+ * many other callback actions that can be used instead of a character substitution.</p>
+ *
+ * <p>More information about this API can be found in our
+ * <a href="http://icu-project.org/userguide/conversion.html">User's
+ * Guide</a>.</p>
+ */
+
+#ifndef UCNV_H
+#define UCNV_H
+
+#include "unicode/ucnv_err.h"
+#include "unicode/uenum.h"
+#include "unicode/localpointer.h"
+
+#ifndef __USET_H__
+
+/**
+ * USet is the C API type for Unicode sets.
+ * It is forward-declared here to avoid including the header file if related
+ * conversion APIs are not used.
+ * See unicode/uset.h
+ *
+ * @see ucnv_getUnicodeSet
+ * @stable ICU 2.6
+ */
+struct USet;
+/** @stable ICU 2.6 */
+typedef struct USet USet;
+
+#endif
+
+#if !UCONFIG_NO_CONVERSION
+
+U_CDECL_BEGIN
+
+/** Maximum length of a converter name including the terminating NULL @stable ICU 2.0 */
+#define UCNV_MAX_CONVERTER_NAME_LENGTH 60
+/** Maximum length of a converter name including path and terminating NULL @stable ICU 2.0 */
+#define UCNV_MAX_FULL_FILE_NAME_LENGTH (600+UCNV_MAX_CONVERTER_NAME_LENGTH)
+
+/** Shift in for EBDCDIC_STATEFUL and iso2022 states @stable ICU 2.0 */
+#define UCNV_SI 0x0F
+/** Shift out for EBDCDIC_STATEFUL and iso2022 states @stable ICU 2.0 */
+#define UCNV_SO 0x0E
+
+/**
+ * Enum for specifying basic types of converters
+ * @see ucnv_getType
+ * @stable ICU 2.0
+ */
+typedef enum {
+ /** @stable ICU 2.0 */
+ UCNV_UNSUPPORTED_CONVERTER = -1,
+ /** @stable ICU 2.0 */
+ UCNV_SBCS = 0,
+ /** @stable ICU 2.0 */
+ UCNV_DBCS = 1,
+ /** @stable ICU 2.0 */
+ UCNV_MBCS = 2,
+ /** @stable ICU 2.0 */
+ UCNV_LATIN_1 = 3,
+ /** @stable ICU 2.0 */
+ UCNV_UTF8 = 4,
+ /** @stable ICU 2.0 */
+ UCNV_UTF16_BigEndian = 5,
+ /** @stable ICU 2.0 */
+ UCNV_UTF16_LittleEndian = 6,
+ /** @stable ICU 2.0 */
+ UCNV_UTF32_BigEndian = 7,
+ /** @stable ICU 2.0 */
+ UCNV_UTF32_LittleEndian = 8,
+ /** @stable ICU 2.0 */
+ UCNV_EBCDIC_STATEFUL = 9,
+ /** @stable ICU 2.0 */
+ UCNV_ISO_2022 = 10,
+
+ /** @stable ICU 2.0 */
+ UCNV_LMBCS_1 = 11,
+ /** @stable ICU 2.0 */
+ UCNV_LMBCS_2,
+ /** @stable ICU 2.0 */
+ UCNV_LMBCS_3,
+ /** @stable ICU 2.0 */
+ UCNV_LMBCS_4,
+ /** @stable ICU 2.0 */
+ UCNV_LMBCS_5,
+ /** @stable ICU 2.0 */
+ UCNV_LMBCS_6,
+ /** @stable ICU 2.0 */
+ UCNV_LMBCS_8,
+ /** @stable ICU 2.0 */
+ UCNV_LMBCS_11,
+ /** @stable ICU 2.0 */
+ UCNV_LMBCS_16,
+ /** @stable ICU 2.0 */
+ UCNV_LMBCS_17,
+ /** @stable ICU 2.0 */
+ UCNV_LMBCS_18,
+ /** @stable ICU 2.0 */
+ UCNV_LMBCS_19,
+ /** @stable ICU 2.0 */
+ UCNV_LMBCS_LAST = UCNV_LMBCS_19,
+ /** @stable ICU 2.0 */
+ UCNV_HZ,
+ /** @stable ICU 2.0 */
+ UCNV_SCSU,
+ /** @stable ICU 2.0 */
+ UCNV_ISCII,
+ /** @stable ICU 2.0 */
+ UCNV_US_ASCII,
+ /** @stable ICU 2.0 */
+ UCNV_UTF7,
+ /** @stable ICU 2.2 */
+ UCNV_BOCU1,
+ /** @stable ICU 2.2 */
+ UCNV_UTF16,
+ /** @stable ICU 2.2 */
+ UCNV_UTF32,
+ /** @stable ICU 2.2 */
+ UCNV_CESU8,
+ /** @stable ICU 2.4 */
+ UCNV_IMAP_MAILBOX,
+ /** @stable ICU 4.8 */
+ UCNV_COMPOUND_TEXT,
+
+ /* Number of converter types for which we have conversion routines. */
+ UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES
+} UConverterType;
+
+/**
+ * Enum for specifying which platform a converter ID refers to.
+ * The use of platform/CCSID is not recommended. See ucnv_openCCSID().
+ *
+ * @see ucnv_getPlatform
+ * @see ucnv_openCCSID
+ * @see ucnv_getCCSID
+ * @stable ICU 2.0
+ */
+typedef enum {
+ UCNV_UNKNOWN = -1,
+ UCNV_IBM = 0
+} UConverterPlatform;
+
+/**
+ * Function pointer for error callback in the codepage to unicode direction.
+ * Called when an error has occured in conversion to unicode, or on open/close of the callback (see reason).
+ * @param context Pointer to the callback's private data
+ * @param args Information about the conversion in progress
+ * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param reason Defines the reason the callback was invoked
+ * @param pErrorCode ICU error code in/out parameter.
+ * For converter callback functions, set to a conversion error
+ * before the call, and the callback may reset it to U_ZERO_ERROR.
+ * @see ucnv_setToUCallBack
+ * @see UConverterToUnicodeArgs
+ * @stable ICU 2.0
+ */
+typedef void (U_EXPORT2 *UConverterToUCallback) (
+ const void* context,
+ UConverterToUnicodeArgs *args,
+ const char *codeUnits,
+ int32_t length,
+ UConverterCallbackReason reason,
+ UErrorCode *pErrorCode);
+
+/**
+ * Function pointer for error callback in the unicode to codepage direction.
+ * Called when an error has occured in conversion from unicode, or on open/close of the callback (see reason).
+ * @param context Pointer to the callback's private data
+ * @param args Information about the conversion in progress
+ * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
+ * @param reason Defines the reason the callback was invoked
+ * @param pErrorCode ICU error code in/out parameter.
+ * For converter callback functions, set to a conversion error
+ * before the call, and the callback may reset it to U_ZERO_ERROR.
+ * @see ucnv_setFromUCallBack
+ * @stable ICU 2.0
+ */
+typedef void (U_EXPORT2 *UConverterFromUCallback) (
+ const void* context,
+ UConverterFromUnicodeArgs *args,
+ const UChar* codeUnits,
+ int32_t length,
+ UChar32 codePoint,
+ UConverterCallbackReason reason,
+ UErrorCode *pErrorCode);
+
+U_CDECL_END
+
+/**
+ * Character that separates converter names from options and options from each other.
+ * @see ucnv_open
+ * @stable ICU 2.0
+ */
+#define UCNV_OPTION_SEP_CHAR ','
+
+/**
+ * String version of UCNV_OPTION_SEP_CHAR.
+ * @see ucnv_open
+ * @stable ICU 2.0
+ */
+#define UCNV_OPTION_SEP_STRING ","
+
+/**
+ * Character that separates a converter option from its value.
+ * @see ucnv_open
+ * @stable ICU 2.0
+ */
+#define UCNV_VALUE_SEP_CHAR '='
+
+/**
+ * String version of UCNV_VALUE_SEP_CHAR.
+ * @see ucnv_open
+ * @stable ICU 2.0
+ */
+#define UCNV_VALUE_SEP_STRING "="
+
+/**
+ * Converter option for specifying a locale.
+ * For example, ucnv_open("SCSU,locale=ja", &errorCode);
+ * See convrtrs.txt.
+ *
+ * @see ucnv_open
+ * @stable ICU 2.0
+ */
+#define UCNV_LOCALE_OPTION_STRING ",locale="
+
+/**
+ * Converter option for specifying a version selector (0..9) for some converters.
+ * For example,
+ * \code
+ * ucnv_open("UTF-7,version=1", &errorCode);
+ * \endcode
+ * See convrtrs.txt.
+ *
+ * @see ucnv_open
+ * @stable ICU 2.4
+ */
+#define UCNV_VERSION_OPTION_STRING ",version="
+
+/**
+ * Converter option for EBCDIC SBCS or mixed-SBCS/DBCS (stateful) codepages.
+ * Swaps Unicode mappings for EBCDIC LF and NL codes, as used on
+ * S/390 (z/OS) Unix System Services (Open Edition).
+ * For example, ucnv_open("ibm-1047,swaplfnl", &errorCode);
+ * See convrtrs.txt.
+ *
+ * @see ucnv_open
+ * @stable ICU 2.4
+ */
+#define UCNV_SWAP_LFNL_OPTION_STRING ",swaplfnl"
+
+/**
+ * Do a fuzzy compare of two converter/alias names.
+ * The comparison is case-insensitive, ignores leading zeroes if they are not
+ * followed by further digits, and ignores all but letters and digits.
+ * Thus the strings "UTF-8", "utf_8", "u*T@f08" and "Utf 8" are exactly equivalent.
+ * See section 1.4, Charset Alias Matching in Unicode Technical Standard #22
+ * at http://www.unicode.org/reports/tr22/
+ *
+ * @param name1 a converter name or alias, zero-terminated
+ * @param name2 a converter name or alias, zero-terminated
+ * @return 0 if the names match, or a negative value if the name1
+ * lexically precedes name2, or a positive value if the name1
+ * lexically follows name2.
+ * @stable ICU 2.0
+ */
+U_STABLE int U_EXPORT2
+ucnv_compareNames(const char *name1, const char *name2);
+
+
+/**
+ * Creates a UConverter object with the name of a coded character set specified as a C string.
+ * The actual name will be resolved with the alias file
+ * using a case-insensitive string comparison that ignores
+ * leading zeroes and all non-alphanumeric characters.
+ * E.g., the names "UTF8", "utf-8", "u*T@f08" and "Utf 8" are all equivalent.
+ * (See also ucnv_compareNames().)
+ * If <code>NULL</code> is passed for the converter name, it will create one with the
+ * getDefaultName return value.
+ *
+ * <p>A converter name for ICU 1.5 and above may contain options
+ * like a locale specification to control the specific behavior of
+ * the newly instantiated converter.
+ * The meaning of the options depends on the particular converter.
+ * If an option is not defined for or recognized by a given converter, then it is ignored.</p>
+ *
+ * <p>Options are appended to the converter name string, with a
+ * <code>UCNV_OPTION_SEP_CHAR</code> between the name and the first option and
+ * also between adjacent options.</p>
+ *
+ * <p>If the alias is ambiguous, then the preferred converter is used
+ * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.</p>
+ *
+ * <p>The conversion behavior and names can vary between platforms. ICU may
+ * convert some characters differently from other platforms. Details on this topic
+ * are in the <a href="http://icu-project.org/userguide/conversion.html">User's
+ * Guide</a>. Aliases starting with a "cp" prefix have no specific meaning
+ * other than its an alias starting with the letters "cp". Please do not
+ * associate any meaning to these aliases.</p>
+ *
+ * \snippet samples/ucnv/convsamp.cpp ucnv_open
+ *
+ * @param converterName Name of the coded character set table.
+ * This may have options appended to the string.
+ * IANA alias character set names, IBM CCSIDs starting with "ibm-",
+ * Windows codepage numbers starting with "windows-" are frequently
+ * used for this parameter. See ucnv_getAvailableName and
+ * ucnv_getAlias for a complete list that is available.
+ * If this parameter is NULL, the default converter will be used.
+ * @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT>
+ * @return the created Unicode converter object, or <TT>NULL</TT> if an error occured
+ * @see ucnv_openU
+ * @see ucnv_openCCSID
+ * @see ucnv_getAvailableName
+ * @see ucnv_getAlias
+ * @see ucnv_getDefaultName
+ * @see ucnv_close
+ * @see ucnv_compareNames
+ * @stable ICU 2.0
+ */
+U_STABLE UConverter* U_EXPORT2
+ucnv_open(const char *converterName, UErrorCode *err);
+
+
+/**
+ * Creates a Unicode converter with the names specified as unicode string.
+ * The name should be limited to the ASCII-7 alphanumerics range.
+ * The actual name will be resolved with the alias file
+ * using a case-insensitive string comparison that ignores
+ * leading zeroes and all non-alphanumeric characters.
+ * E.g., the names "UTF8", "utf-8", "u*T@f08" and "Utf 8" are all equivalent.
+ * (See also ucnv_compareNames().)
+ * If <TT>NULL</TT> is passed for the converter name, it will create
+ * one with the ucnv_getDefaultName() return value.
+ * If the alias is ambiguous, then the preferred converter is used
+ * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
+ *
+ * <p>See ucnv_open for the complete details</p>
+ * @param name Name of the UConverter table in a zero terminated
+ * Unicode string
+ * @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR,
+ * U_FILE_ACCESS_ERROR</TT>
+ * @return the created Unicode converter object, or <TT>NULL</TT> if an
+ * error occured
+ * @see ucnv_open
+ * @see ucnv_openCCSID
+ * @see ucnv_close
+ * @see ucnv_compareNames
+ * @stable ICU 2.0
+ */
+U_STABLE UConverter* U_EXPORT2
+ucnv_openU(const UChar *name,
+ UErrorCode *err);
+
+/**
+ * Creates a UConverter object from a CCSID number and platform pair.
+ * Note that the usefulness of this function is limited to platforms with numeric
+ * encoding IDs. Only IBM and Microsoft platforms use numeric (16-bit) identifiers for
+ * encodings.
+ *
+ * In addition, IBM CCSIDs and Unicode conversion tables are not 1:1 related.
+ * For many IBM CCSIDs there are multiple (up to six) Unicode conversion tables, and
+ * for some Unicode conversion tables there are multiple CCSIDs.
+ * Some "alternate" Unicode conversion tables are provided by the
+ * IBM CDRA conversion table registry.
+ * The most prominent example of a systematic modification of conversion tables that is
+ * not provided in the form of conversion table files in the repository is
+ * that S/390 Unix System Services swaps the codes for Line Feed and New Line in all
+ * EBCDIC codepages, which requires such a swap in the Unicode conversion tables as well.
+ *
+ * Only IBM default conversion tables are accessible with ucnv_openCCSID().
+ * ucnv_getCCSID() will return the same CCSID for all conversion tables that are associated
+ * with that CCSID.
+ *
+ * Currently, the only "platform" supported in the ICU converter API is UCNV_IBM.
+ *
+ * In summary, the use of CCSIDs and the associated API functions is not recommended.
+ *
+ * In order to open a converter with the default IBM CDRA Unicode conversion table,
+ * you can use this function or use the prefix "ibm-":
+ * \code
+ * char name[20];
+ * sprintf(name, "ibm-%hu", ccsid);
+ * cnv=ucnv_open(name, &errorCode);
+ * \endcode
+ *
+ * In order to open a converter with the IBM S/390 Unix System Services variant
+ * of a Unicode/EBCDIC conversion table,
+ * you can use the prefix "ibm-" together with the option string UCNV_SWAP_LFNL_OPTION_STRING:
+ * \code
+ * char name[20];
+ * sprintf(name, "ibm-%hu" UCNV_SWAP_LFNL_OPTION_STRING, ccsid);
+ * cnv=ucnv_open(name, &errorCode);
+ * \endcode
+ *
+ * In order to open a converter from a Microsoft codepage number, use the prefix "cp":
+ * \code
+ * char name[20];
+ * sprintf(name, "cp%hu", codepageID);
+ * cnv=ucnv_open(name, &errorCode);
+ * \endcode
+ *
+ * If the alias is ambiguous, then the preferred converter is used
+ * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
+ *
+ * @param codepage codepage number to create
+ * @param platform the platform in which the codepage number exists
+ * @param err error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT>
+ * @return the created Unicode converter object, or <TT>NULL</TT> if an error
+ * occured.
+ * @see ucnv_open
+ * @see ucnv_openU
+ * @see ucnv_close
+ * @see ucnv_getCCSID
+ * @see ucnv_getPlatform
+ * @see UConverterPlatform
+ * @stable ICU 2.0
+ */
+U_STABLE UConverter* U_EXPORT2
+ucnv_openCCSID(int32_t codepage,
+ UConverterPlatform platform,
+ UErrorCode * err);
+
+/**
+ * <p>Creates a UConverter object specified from a packageName and a converterName.</p>
+ *
+ * <p>The packageName and converterName must point to an ICU udata object, as defined by
+ * <code> udata_open( packageName, "cnv", converterName, err) </code> or equivalent.
+ * Typically, packageName will refer to a (.dat) file, or to a package registered with
+ * udata_setAppData(). Using a full file or directory pathname for packageName is deprecated.</p>
+ *
+ * <p>The name will NOT be looked up in the alias mechanism, nor will the converter be
+ * stored in the converter cache or the alias table. The only way to open further converters
+ * is call this function multiple times, or use the ucnv_safeClone() function to clone a
+ * 'master' converter.</p>
+ *
+ * <p>A future version of ICU may add alias table lookups and/or caching
+ * to this function.</p>
+ *
+ * <p>Example Use:
+ * <code>cnv = ucnv_openPackage("myapp", "myconverter", &err);</code>
+ * </p>
+ *
+ * @param packageName name of the package (equivalent to 'path' in udata_open() call)
+ * @param converterName name of the data item to be used, without suffix.
+ * @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT>
+ * @return the created Unicode converter object, or <TT>NULL</TT> if an error occured
+ * @see udata_open
+ * @see ucnv_open
+ * @see ucnv_safeClone
+ * @see ucnv_close
+ * @stable ICU 2.2
+ */
+U_STABLE UConverter* U_EXPORT2
+ucnv_openPackage(const char *packageName, const char *converterName, UErrorCode *err);
+
+/**
+ * Thread safe converter cloning operation.
+ * For most efficient operation, pass in a stackBuffer (and a *pBufferSize)
+ * with at least U_CNV_SAFECLONE_BUFFERSIZE bytes of space.
+ * If the buffer size is sufficient, then the clone will use the stack buffer;
+ * otherwise, it will be allocated, and *pBufferSize will indicate
+ * the actual size. (This should not occur with U_CNV_SAFECLONE_BUFFERSIZE.)
+ *
+ * You must ucnv_close() the clone in any case.
+ *
+ * If *pBufferSize==0, (regardless of whether stackBuffer==NULL or not)
+ * then *pBufferSize will be changed to a sufficient size
+ * for cloning this converter,
+ * without actually cloning the converter ("pure pre-flighting").
+ *
+ * If *pBufferSize is greater than zero but not large enough for a stack-based
+ * clone, then the converter is cloned using newly allocated memory
+ * and *pBufferSize is changed to the necessary size.
+ *
+ * If the converter clone fits into the stack buffer but the stack buffer is not
+ * sufficiently aligned for the clone, then the clone will use an
+ * adjusted pointer and use an accordingly smaller buffer size.
+ *
+ * @param cnv converter to be cloned
+ * @param stackBuffer <em>Deprecated functionality as of ICU 52, use NULL.</em><br>
+ * user allocated space for the new clone. If NULL new memory will be allocated.
+ * If buffer is not large enough, new memory will be allocated.
+ * Clients can use the U_CNV_SAFECLONE_BUFFERSIZE. This will probably be enough to avoid memory allocations.
+ * @param pBufferSize <em>Deprecated functionality as of ICU 52, use NULL or 1.</em><br>
+ * pointer to size of allocated space.
+ * @param status to indicate whether the operation went on smoothly or there were errors
+ * An informational status value, U_SAFECLONE_ALLOCATED_WARNING,
+ * is used if any allocations were necessary.
+ * However, it is better to check if *pBufferSize grew for checking for
+ * allocations because warning codes can be overridden by subsequent
+ * function calls.
+ * @return pointer to the new clone
+ * @stable ICU 2.0
+ */
+U_STABLE UConverter * U_EXPORT2
+ucnv_safeClone(const UConverter *cnv,
+ void *stackBuffer,
+ int32_t *pBufferSize,
+ UErrorCode *status);
+
+#ifndef U_HIDE_DEPRECATED_API
+
+/**
+ * \def U_CNV_SAFECLONE_BUFFERSIZE
+ * Definition of a buffer size that is designed to be large enough for
+ * converters to be cloned with ucnv_safeClone().
+ * @deprecated ICU 52. Do not rely on ucnv_safeClone() cloning into any provided buffer.
+ */
+#define U_CNV_SAFECLONE_BUFFERSIZE 1024
+
+#endif /* U_HIDE_DEPRECATED_API */
+
+/**
+ * Deletes the unicode converter and releases resources associated
+ * with just this instance.
+ * Does not free up shared converter tables.
+ *
+ * @param converter the converter object to be deleted
+ * @see ucnv_open
+ * @see ucnv_openU
+ * @see ucnv_openCCSID
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_close(UConverter * converter);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUConverterPointer
+ * "Smart pointer" class, closes a UConverter via ucnv_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUConverterPointer, UConverter, ucnv_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Fills in the output parameter, subChars, with the substitution characters
+ * as multiple bytes.
+ * If ucnv_setSubstString() set a Unicode string because the converter is
+ * stateful, then subChars will be an empty string.
+ *
+ * @param converter the Unicode converter
+ * @param subChars the subsitution characters
+ * @param len on input the capacity of subChars, on output the number
+ * of bytes copied to it
+ * @param err the outgoing error status code.
+ * If the substitution character array is too small, an
+ * <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned.
+ * @see ucnv_setSubstString
+ * @see ucnv_setSubstChars
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_getSubstChars(const UConverter *converter,
+ char *subChars,
+ int8_t *len,
+ UErrorCode *err);
+
+/**
+ * Sets the substitution chars when converting from unicode to a codepage. The
+ * substitution is specified as a string of 1-4 bytes, and may contain
+ * <TT>NULL</TT> bytes.
+ * The subChars must represent a single character. The caller needs to know the
+ * byte sequence of a valid character in the converter's charset.
+ * For some converters, for example some ISO 2022 variants, only single-byte
+ * substitution characters may be supported.
+ * The newer ucnv_setSubstString() function relaxes these limitations.
+ *
+ * @param converter the Unicode converter
+ * @param subChars the substitution character byte sequence we want set
+ * @param len the number of bytes in subChars
+ * @param err the error status code. <TT>U_INDEX_OUTOFBOUNDS_ERROR </TT> if
+ * len is bigger than the maximum number of bytes allowed in subchars
+ * @see ucnv_setSubstString
+ * @see ucnv_getSubstChars
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_setSubstChars(UConverter *converter,
+ const char *subChars,
+ int8_t len,
+ UErrorCode *err);
+
+/**
+ * Set a substitution string for converting from Unicode to a charset.
+ * The caller need not know the charset byte sequence for each charset.
+ *
+ * Unlike ucnv_setSubstChars() which is designed to set a charset byte sequence
+ * for a single character, this function takes a Unicode string with
+ * zero, one or more characters, and immediately verifies that the string can be
+ * converted to the charset.
+ * If not, or if the result is too long (more than 32 bytes as of ICU 3.6),
+ * then the function returns with an error accordingly.
+ *
+ * Also unlike ucnv_setSubstChars(), this function works for stateful charsets
+ * by converting on the fly at the point of substitution rather than setting
+ * a fixed byte sequence.
+ *
+ * @param cnv The UConverter object.
+ * @param s The Unicode string.
+ * @param length The number of UChars in s, or -1 for a NUL-terminated string.
+ * @param err Pointer to a standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ *
+ * @see ucnv_setSubstChars
+ * @see ucnv_getSubstChars
+ * @stable ICU 3.6
+ */
+U_STABLE void U_EXPORT2
+ucnv_setSubstString(UConverter *cnv,
+ const UChar *s,
+ int32_t length,
+ UErrorCode *err);
+
+/**
+ * Fills in the output parameter, errBytes, with the error characters from the
+ * last failing conversion.
+ *
+ * @param converter the Unicode converter
+ * @param errBytes the codepage bytes which were in error
+ * @param len on input the capacity of errBytes, on output the number of
+ * bytes which were copied to it
+ * @param err the error status code.
+ * If the substitution character array is too small, an
+ * <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_getInvalidChars(const UConverter *converter,
+ char *errBytes,
+ int8_t *len,
+ UErrorCode *err);
+
+/**
+ * Fills in the output parameter, errChars, with the error characters from the
+ * last failing conversion.
+ *
+ * @param converter the Unicode converter
+ * @param errUChars the UChars which were in error
+ * @param len on input the capacity of errUChars, on output the number of
+ * UChars which were copied to it
+ * @param err the error status code.
+ * If the substitution character array is too small, an
+ * <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_getInvalidUChars(const UConverter *converter,
+ UChar *errUChars,
+ int8_t *len,
+ UErrorCode *err);
+
+/**
+ * Resets the state of a converter to the default state. This is used
+ * in the case of an error, to restart a conversion from a known default state.
+ * It will also empty the internal output buffers.
+ * @param converter the Unicode converter
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_reset(UConverter *converter);
+
+/**
+ * Resets the to-Unicode part of a converter state to the default state.
+ * This is used in the case of an error to restart a conversion to
+ * Unicode to a known default state. It will also empty the internal
+ * output buffers used for the conversion to Unicode codepoints.
+ * @param converter the Unicode converter
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_resetToUnicode(UConverter *converter);
+
+/**
+ * Resets the from-Unicode part of a converter state to the default state.
+ * This is used in the case of an error to restart a conversion from
+ * Unicode to a known default state. It will also empty the internal output
+ * buffers used for the conversion from Unicode codepoints.
+ * @param converter the Unicode converter
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_resetFromUnicode(UConverter *converter);
+
+/**
+ * Returns the maximum number of bytes that are output per UChar in conversion
+ * from Unicode using this converter.
+ * The returned number can be used with UCNV_GET_MAX_BYTES_FOR_STRING
+ * to calculate the size of a target buffer for conversion from Unicode.
+ *
+ * Note: Before ICU 2.8, this function did not return reliable numbers for
+ * some stateful converters (EBCDIC_STATEFUL, ISO-2022) and LMBCS.
+ *
+ * This number may not be the same as the maximum number of bytes per
+ * "conversion unit". In other words, it may not be the intuitively expected
+ * number of bytes per character that would be published for a charset,
+ * and may not fulfill any other purpose than the allocation of an output
+ * buffer of guaranteed sufficient size for a given input length and converter.
+ *
+ * Examples for special cases that are taken into account:
+ * - Supplementary code points may convert to more bytes than BMP code points.
+ * This function returns bytes per UChar (UTF-16 code unit), not per
+ * Unicode code point, for efficient buffer allocation.
+ * - State-shifting output (SI/SO, escapes, etc.) from stateful converters.
+ * - When m input UChars are converted to n output bytes, then the maximum m/n
+ * is taken into account.
+ *
+ * The number returned here does not take into account
+ * (see UCNV_GET_MAX_BYTES_FOR_STRING):
+ * - callbacks which output more than one charset character sequence per call,
+ * like escape callbacks
+ * - initial and final non-character bytes that are output by some converters
+ * (automatic BOMs, initial escape sequence, final SI, etc.)
+ *
+ * Examples for returned values:
+ * - SBCS charsets: 1
+ * - Shift-JIS: 2
+ * - UTF-16: 2 (2 per BMP, 4 per surrogate _pair_, BOM not counted)
+ * - UTF-8: 3 (3 per BMP, 4 per surrogate _pair_)
+ * - EBCDIC_STATEFUL (EBCDIC mixed SBCS/DBCS): 3 (SO + DBCS)
+ * - ISO-2022: 3 (always outputs UTF-8)
+ * - ISO-2022-JP: 6 (4-byte escape sequences + DBCS)
+ * - ISO-2022-CN: 8 (4-byte designator sequences + 2-byte SS2/SS3 + DBCS)
+ *
+ * @param converter The Unicode converter.
+ * @return The maximum number of bytes per UChar (16 bit code unit)
+ * that are output by ucnv_fromUnicode(),
+ * to be used together with UCNV_GET_MAX_BYTES_FOR_STRING
+ * for buffer allocation.
+ *
+ * @see UCNV_GET_MAX_BYTES_FOR_STRING
+ * @see ucnv_getMinCharSize
+ * @stable ICU 2.0
+ */
+U_STABLE int8_t U_EXPORT2
+ucnv_getMaxCharSize(const UConverter *converter);
+
+/**
+ * Calculates the size of a buffer for conversion from Unicode to a charset.
+ * The calculated size is guaranteed to be sufficient for this conversion.
+ *
+ * It takes into account initial and final non-character bytes that are output
+ * by some converters.
+ * It does not take into account callbacks which output more than one charset
+ * character sequence per call, like escape callbacks.
+ * The default (substitution) callback only outputs one charset character sequence.
+ *
+ * @param length Number of UChars to be converted.
+ * @param maxCharSize Return value from ucnv_getMaxCharSize() for the converter
+ * that will be used.
+ * @return Size of a buffer that will be large enough to hold the output bytes of
+ * converting length UChars with the converter that returned the maxCharSize.
+ *
+ * @see ucnv_getMaxCharSize
+ * @stable ICU 2.8
+ */
+#define UCNV_GET_MAX_BYTES_FOR_STRING(length, maxCharSize) \
+ (((int32_t)(length)+10)*(int32_t)(maxCharSize))
+
+/**
+ * Returns the minimum byte length (per codepoint) for characters in this codepage.
+ * This is usually either 1 or 2.
+ * @param converter the Unicode converter
+ * @return the minimum number of bytes per codepoint allowed by this particular converter
+ * @see ucnv_getMaxCharSize
+ * @stable ICU 2.0
+ */
+U_STABLE int8_t U_EXPORT2
+ucnv_getMinCharSize(const UConverter *converter);
+
+/**
+ * Returns the display name of the converter passed in based on the Locale
+ * passed in. If the locale contains no display name, the internal ASCII
+ * name will be filled in.
+ *
+ * @param converter the Unicode converter.
+ * @param displayLocale is the specific Locale we want to localised for
+ * @param displayName user provided buffer to be filled in
+ * @param displayNameCapacity size of displayName Buffer
+ * @param err error status code
+ * @return displayNameLength number of UChar needed in displayName
+ * @see ucnv_getName
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_getDisplayName(const UConverter *converter,
+ const char *displayLocale,
+ UChar *displayName,
+ int32_t displayNameCapacity,
+ UErrorCode *err);
+
+/**
+ * Gets the internal, canonical name of the converter (zero-terminated).
+ * The lifetime of the returned string will be that of the converter
+ * passed to this function.
+ * @param converter the Unicode converter
+ * @param err UErrorCode status
+ * @return the internal name of the converter
+ * @see ucnv_getDisplayName
+ * @stable ICU 2.0
+ */
+U_STABLE const char * U_EXPORT2
+ucnv_getName(const UConverter *converter, UErrorCode *err);
+
+/**
+ * Gets a codepage number associated with the converter. This is not guaranteed
+ * to be the one used to create the converter. Some converters do not represent
+ * platform registered codepages and return zero for the codepage number.
+ * The error code fill-in parameter indicates if the codepage number
+ * is available.
+ * Does not check if the converter is <TT>NULL</TT> or if converter's data
+ * table is <TT>NULL</TT>.
+ *
+ * Important: The use of CCSIDs is not recommended because it is limited
+ * to only two platforms in principle and only one (UCNV_IBM) in the current
+ * ICU converter API.
+ * Also, CCSIDs are insufficient to identify IBM Unicode conversion tables precisely.
+ * For more details see ucnv_openCCSID().
+ *
+ * @param converter the Unicode converter
+ * @param err the error status code.
+ * @return If any error occurrs, -1 will be returned otherwise, the codepage number
+ * will be returned
+ * @see ucnv_openCCSID
+ * @see ucnv_getPlatform
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_getCCSID(const UConverter *converter,
+ UErrorCode *err);
+
+/**
+ * Gets a codepage platform associated with the converter. Currently,
+ * only <TT>UCNV_IBM</TT> will be returned.
+ * Does not test if the converter is <TT>NULL</TT> or if converter's data
+ * table is <TT>NULL</TT>.
+ * @param converter the Unicode converter
+ * @param err the error status code.
+ * @return The codepage platform
+ * @stable ICU 2.0
+ */
+U_STABLE UConverterPlatform U_EXPORT2
+ucnv_getPlatform(const UConverter *converter,
+ UErrorCode *err);
+
+/**
+ * Gets the type of the converter
+ * e.g. SBCS, MBCS, DBCS, UTF8, UTF16_BE, UTF16_LE, ISO_2022,
+ * EBCDIC_STATEFUL, LATIN_1
+ * @param converter a valid, opened converter
+ * @return the type of the converter
+ * @stable ICU 2.0
+ */
+U_STABLE UConverterType U_EXPORT2
+ucnv_getType(const UConverter * converter);
+
+/**
+ * Gets the "starter" (lead) bytes for converters of type MBCS.
+ * Will fill in an <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> if converter passed in
+ * is not MBCS. Fills in an array of type UBool, with the value of the byte
+ * as offset to the array. For example, if (starters[0x20] == TRUE) at return,
+ * it means that the byte 0x20 is a starter byte in this converter.
+ * Context pointers are always owned by the caller.
+ *
+ * @param converter a valid, opened converter of type MBCS
+ * @param starters an array of size 256 to be filled in
+ * @param err error status, <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> if the
+ * converter is not a type which can return starters.
+ * @see ucnv_getType
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_getStarters(const UConverter* converter,
+ UBool starters[256],
+ UErrorCode* err);
+
+
+/**
+ * Selectors for Unicode sets that can be returned by ucnv_getUnicodeSet().
+ * @see ucnv_getUnicodeSet
+ * @stable ICU 2.6
+ */
+typedef enum UConverterUnicodeSet {
+ /** Select the set of roundtrippable Unicode code points. @stable ICU 2.6 */
+ UCNV_ROUNDTRIP_SET,
+ /** Select the set of Unicode code points with roundtrip or fallback mappings. @stable ICU 4.0 */
+ UCNV_ROUNDTRIP_AND_FALLBACK_SET,
+ /** Number of UConverterUnicodeSet selectors. @stable ICU 2.6 */
+ UCNV_SET_COUNT
+} UConverterUnicodeSet;
+
+
+/**
+ * Returns the set of Unicode code points that can be converted by an ICU converter.
+ *
+ * Returns one of several kinds of set:
+ *
+ * 1. UCNV_ROUNDTRIP_SET
+ *
+ * The set of all Unicode code points that can be roundtrip-converted
+ * (converted without any data loss) with the converter (ucnv_fromUnicode()).
+ * This set will not include code points that have fallback mappings
+ * or are only the result of reverse fallback mappings.
+ * This set will also not include PUA code points with fallbacks, although
+ * ucnv_fromUnicode() will always uses those mappings despite ucnv_setFallback().
+ * See UTR #22 "Character Mapping Markup Language"
+ * at http://www.unicode.org/reports/tr22/
+ *
+ * This is useful for example for
+ * - checking that a string or document can be roundtrip-converted with a converter,
+ * without/before actually performing the conversion
+ * - testing if a converter can be used for text for typical text for a certain locale,
+ * by comparing its roundtrip set with the set of ExemplarCharacters from
+ * ICU's locale data or other sources
+ *
+ * 2. UCNV_ROUNDTRIP_AND_FALLBACK_SET
+ *
+ * The set of all Unicode code points that can be converted with the converter (ucnv_fromUnicode())
+ * when fallbacks are turned on (see ucnv_setFallback()).
+ * This set includes all code points with roundtrips and fallbacks (but not reverse fallbacks).
+ *
+ * In the future, there may be more UConverterUnicodeSet choices to select
+ * sets with different properties.
+ *
+ * @param cnv The converter for which a set is requested.
+ * @param setFillIn A valid USet *. It will be cleared by this function before
+ * the converter's specific set is filled into the USet.
+ * @param whichSet A UConverterUnicodeSet selector;
+ * currently UCNV_ROUNDTRIP_SET is the only supported value.
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ *
+ * @see UConverterUnicodeSet
+ * @see uset_open
+ * @see uset_close
+ * @stable ICU 2.6
+ */
+U_STABLE void U_EXPORT2
+ucnv_getUnicodeSet(const UConverter *cnv,
+ USet *setFillIn,
+ UConverterUnicodeSet whichSet,
+ UErrorCode *pErrorCode);
+
+/**
+ * Gets the current calback function used by the converter when an illegal
+ * or invalid codepage sequence is found.
+ * Context pointers are always owned by the caller.
+ *
+ * @param converter the unicode converter
+ * @param action fillin: returns the callback function pointer
+ * @param context fillin: returns the callback's private void* context
+ * @see ucnv_setToUCallBack
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_getToUCallBack (const UConverter * converter,
+ UConverterToUCallback *action,
+ const void **context);
+
+/**
+ * Gets the current callback function used by the converter when illegal
+ * or invalid Unicode sequence is found.
+ * Context pointers are always owned by the caller.
+ *
+ * @param converter the unicode converter
+ * @param action fillin: returns the callback function pointer
+ * @param context fillin: returns the callback's private void* context
+ * @see ucnv_setFromUCallBack
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_getFromUCallBack (const UConverter * converter,
+ UConverterFromUCallback *action,
+ const void **context);
+
+/**
+ * Changes the callback function used by the converter when
+ * an illegal or invalid sequence is found.
+ * Context pointers are always owned by the caller.
+ * Predefined actions and contexts can be found in the ucnv_err.h header.
+ *
+ * @param converter the unicode converter
+ * @param newAction the new callback function
+ * @param newContext the new toUnicode callback context pointer. This can be NULL.
+ * @param oldAction fillin: returns the old callback function pointer. This can be NULL.
+ * @param oldContext fillin: returns the old callback's private void* context. This can be NULL.
+ * @param err The error code status
+ * @see ucnv_getToUCallBack
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_setToUCallBack (UConverter * converter,
+ UConverterToUCallback newAction,
+ const void* newContext,
+ UConverterToUCallback *oldAction,
+ const void** oldContext,
+ UErrorCode * err);
+
+/**
+ * Changes the current callback function used by the converter when
+ * an illegal or invalid sequence is found.
+ * Context pointers are always owned by the caller.
+ * Predefined actions and contexts can be found in the ucnv_err.h header.
+ *
+ * @param converter the unicode converter
+ * @param newAction the new callback function
+ * @param newContext the new fromUnicode callback context pointer. This can be NULL.
+ * @param oldAction fillin: returns the old callback function pointer. This can be NULL.
+ * @param oldContext fillin: returns the old callback's private void* context. This can be NULL.
+ * @param err The error code status
+ * @see ucnv_getFromUCallBack
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_setFromUCallBack (UConverter * converter,
+ UConverterFromUCallback newAction,
+ const void *newContext,
+ UConverterFromUCallback *oldAction,
+ const void **oldContext,
+ UErrorCode * err);
+
+/**
+ * Converts an array of unicode characters to an array of codepage
+ * characters. This function is optimized for converting a continuous
+ * stream of data in buffer-sized chunks, where the entire source and
+ * target does not fit in available buffers.
+ *
+ * The source pointer is an in/out parameter. It starts out pointing where the
+ * conversion is to begin, and ends up pointing after the last UChar consumed.
+ *
+ * Target similarly starts out pointer at the first available byte in the output
+ * buffer, and ends up pointing after the last byte written to the output.
+ *
+ * The converter always attempts to consume the entire source buffer, unless
+ * (1.) the target buffer is full, or (2.) a failing error is returned from the
+ * current callback function. When a successful error status has been
+ * returned, it means that all of the source buffer has been
+ * consumed. At that point, the caller should reset the source and
+ * sourceLimit pointers to point to the next chunk.
+ *
+ * At the end of the stream (flush==TRUE), the input is completely consumed
+ * when *source==sourceLimit and no error code is set.
+ * The converter object is then automatically reset by this function.
+ * (This means that a converter need not be reset explicitly between data
+ * streams if it finishes the previous stream without errors.)
+ *
+ * This is a <I>stateful</I> conversion. Additionally, even when all source data has
+ * been consumed, some data may be in the converters' internal state.
+ * Call this function repeatedly, updating the target pointers with
+ * the next empty chunk of target in case of a
+ * <TT>U_BUFFER_OVERFLOW_ERROR</TT>, and updating the source pointers
+ * with the next chunk of source when a successful error status is
+ * returned, until there are no more chunks of source data.
+ * @param converter the Unicode converter
+ * @param target I/O parameter. Input : Points to the beginning of the buffer to copy
+ * codepage characters to. Output : points to after the last codepage character copied
+ * to <TT>target</TT>.
+ * @param targetLimit the pointer just after last of the <TT>target</TT> buffer
+ * @param source I/O parameter, pointer to pointer to the source Unicode character buffer.
+ * @param sourceLimit the pointer just after the last of the source buffer
+ * @param offsets if NULL is passed, nothing will happen to it, otherwise it needs to have the same number
+ * of allocated cells as <TT>target</TT>. Will fill in offsets from target to source pointer
+ * e.g: <TT>offsets[3]</TT> is equal to 6, it means that the <TT>target[3]</TT> was a result of transcoding <TT>source[6]</TT>
+ * For output data carried across calls, and other data without a specific source character
+ * (such as from escape sequences or callbacks) -1 will be placed for offsets.
+ * @param flush set to <TT>TRUE</TT> if the current source buffer is the last available
+ * chunk of the source, <TT>FALSE</TT> otherwise. Note that if a failing status is returned,
+ * this function may have to be called multiple times with flush set to <TT>TRUE</TT> until
+ * the source buffer is consumed.
+ * @param err the error status. <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> will be set if the
+ * converter is <TT>NULL</TT>.
+ * <code>U_BUFFER_OVERFLOW_ERROR</code> will be set if the target is full and there is
+ * still data to be written to the target.
+ * @see ucnv_fromUChars
+ * @see ucnv_convert
+ * @see ucnv_getMinCharSize
+ * @see ucnv_setToUCallBack
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_fromUnicode (UConverter * converter,
+ char **target,
+ const char *targetLimit,
+ const UChar ** source,
+ const UChar * sourceLimit,
+ int32_t* offsets,
+ UBool flush,
+ UErrorCode * err);
+
+/**
+ * Converts a buffer of codepage bytes into an array of unicode UChars
+ * characters. This function is optimized for converting a continuous
+ * stream of data in buffer-sized chunks, where the entire source and
+ * target does not fit in available buffers.
+ *
+ * The source pointer is an in/out parameter. It starts out pointing where the
+ * conversion is to begin, and ends up pointing after the last byte of source consumed.
+ *
+ * Target similarly starts out pointer at the first available UChar in the output
+ * buffer, and ends up pointing after the last UChar written to the output.
+ * It does NOT necessarily keep UChar sequences together.
+ *
+ * The converter always attempts to consume the entire source buffer, unless
+ * (1.) the target buffer is full, or (2.) a failing error is returned from the
+ * current callback function. When a successful error status has been
+ * returned, it means that all of the source buffer has been
+ * consumed. At that point, the caller should reset the source and
+ * sourceLimit pointers to point to the next chunk.
+ *
+ * At the end of the stream (flush==TRUE), the input is completely consumed
+ * when *source==sourceLimit and no error code is set
+ * The converter object is then automatically reset by this function.
+ * (This means that a converter need not be reset explicitly between data
+ * streams if it finishes the previous stream without errors.)
+ *
+ * This is a <I>stateful</I> conversion. Additionally, even when all source data has
+ * been consumed, some data may be in the converters' internal state.
+ * Call this function repeatedly, updating the target pointers with
+ * the next empty chunk of target in case of a
+ * <TT>U_BUFFER_OVERFLOW_ERROR</TT>, and updating the source pointers
+ * with the next chunk of source when a successful error status is
+ * returned, until there are no more chunks of source data.
+ * @param converter the Unicode converter
+ * @param target I/O parameter. Input : Points to the beginning of the buffer to copy
+ * UChars into. Output : points to after the last UChar copied.
+ * @param targetLimit the pointer just after the end of the <TT>target</TT> buffer
+ * @param source I/O parameter, pointer to pointer to the source codepage buffer.
+ * @param sourceLimit the pointer to the byte after the end of the source buffer
+ * @param offsets if NULL is passed, nothing will happen to it, otherwise it needs to have the same number
+ * of allocated cells as <TT>target</TT>. Will fill in offsets from target to source pointer
+ * e.g: <TT>offsets[3]</TT> is equal to 6, it means that the <TT>target[3]</TT> was a result of transcoding <TT>source[6]</TT>
+ * For output data carried across calls, and other data without a specific source character
+ * (such as from escape sequences or callbacks) -1 will be placed for offsets.
+ * @param flush set to <TT>TRUE</TT> if the current source buffer is the last available
+ * chunk of the source, <TT>FALSE</TT> otherwise. Note that if a failing status is returned,
+ * this function may have to be called multiple times with flush set to <TT>TRUE</TT> until
+ * the source buffer is consumed.
+ * @param err the error status. <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> will be set if the
+ * converter is <TT>NULL</TT>.
+ * <code>U_BUFFER_OVERFLOW_ERROR</code> will be set if the target is full and there is
+ * still data to be written to the target.
+ * @see ucnv_fromUChars
+ * @see ucnv_convert
+ * @see ucnv_getMinCharSize
+ * @see ucnv_setFromUCallBack
+ * @see ucnv_getNextUChar
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_toUnicode(UConverter *converter,
+ UChar **target,
+ const UChar *targetLimit,
+ const char **source,
+ const char *sourceLimit,
+ int32_t *offsets,
+ UBool flush,
+ UErrorCode *err);
+
+/**
+ * Convert the Unicode string into a codepage string using an existing UConverter.
+ * The output string is NUL-terminated if possible.
+ *
+ * This function is a more convenient but less powerful version of ucnv_fromUnicode().
+ * It is only useful for whole strings, not for streaming conversion.
+ *
+ * The maximum output buffer capacity required (barring output from callbacks) will be
+ * UCNV_GET_MAX_BYTES_FOR_STRING(srcLength, ucnv_getMaxCharSize(cnv)).
+ *
+ * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called)
+ * @param src the input Unicode string
+ * @param srcLength the input string length, or -1 if NUL-terminated
+ * @param dest destination string buffer, can be NULL if destCapacity==0
+ * @param destCapacity the number of chars available at dest
+ * @param pErrorCode normal ICU error code;
+ * common error codes that may be set by this function include
+ * U_BUFFER_OVERFLOW_ERROR, U_STRING_NOT_TERMINATED_WARNING,
+ * U_ILLEGAL_ARGUMENT_ERROR, and conversion errors
+ * @return the length of the output string, not counting the terminating NUL;
+ * if the length is greater than destCapacity, then the string will not fit
+ * and a buffer of the indicated length would need to be passed in
+ * @see ucnv_fromUnicode
+ * @see ucnv_convert
+ * @see UCNV_GET_MAX_BYTES_FOR_STRING
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_fromUChars(UConverter *cnv,
+ char *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert the codepage string into a Unicode string using an existing UConverter.
+ * The output string is NUL-terminated if possible.
+ *
+ * This function is a more convenient but less powerful version of ucnv_toUnicode().
+ * It is only useful for whole strings, not for streaming conversion.
+ *
+ * The maximum output buffer capacity required (barring output from callbacks) will be
+ * 2*srcLength (each char may be converted into a surrogate pair).
+ *
+ * @param cnv the converter object to be used (ucnv_resetToUnicode() will be called)
+ * @param src the input codepage string
+ * @param srcLength the input string length, or -1 if NUL-terminated
+ * @param dest destination string buffer, can be NULL if destCapacity==0
+ * @param destCapacity the number of UChars available at dest
+ * @param pErrorCode normal ICU error code;
+ * common error codes that may be set by this function include
+ * U_BUFFER_OVERFLOW_ERROR, U_STRING_NOT_TERMINATED_WARNING,
+ * U_ILLEGAL_ARGUMENT_ERROR, and conversion errors
+ * @return the length of the output string, not counting the terminating NUL;
+ * if the length is greater than destCapacity, then the string will not fit
+ * and a buffer of the indicated length would need to be passed in
+ * @see ucnv_toUnicode
+ * @see ucnv_convert
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_toUChars(UConverter *cnv,
+ UChar *dest, int32_t destCapacity,
+ const char *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert a codepage buffer into Unicode one character at a time.
+ * The input is completely consumed when the U_INDEX_OUTOFBOUNDS_ERROR is set.
+ *
+ * Advantage compared to ucnv_toUnicode() or ucnv_toUChars():
+ * - Faster for small amounts of data, for most converters, e.g.,
+ * US-ASCII, ISO-8859-1, UTF-8/16/32, and most "normal" charsets.
+ * (For complex converters, e.g., SCSU, UTF-7 and ISO 2022 variants,
+ * it uses ucnv_toUnicode() internally.)
+ * - Convenient.
+ *
+ * Limitations compared to ucnv_toUnicode():
+ * - Always assumes flush=TRUE.
+ * This makes ucnv_getNextUChar() unsuitable for "streaming" conversion,
+ * that is, for where the input is supplied in multiple buffers,
+ * because ucnv_getNextUChar() will assume the end of the input at the end
+ * of the first buffer.
+ * - Does not provide offset output.
+ *
+ * It is possible to "mix" ucnv_getNextUChar() and ucnv_toUnicode() because
+ * ucnv_getNextUChar() uses the current state of the converter
+ * (unlike ucnv_toUChars() which always resets first).
+ * However, if ucnv_getNextUChar() is called after ucnv_toUnicode()
+ * stopped in the middle of a character sequence (with flush=FALSE),
+ * then ucnv_getNextUChar() will always use the slower ucnv_toUnicode()
+ * internally until the next character boundary.
+ * (This is new in ICU 2.6. In earlier releases, ucnv_getNextUChar() had to
+ * start at a character boundary.)
+ *
+ * Instead of using ucnv_getNextUChar(), it is recommended
+ * to convert using ucnv_toUnicode() or ucnv_toUChars()
+ * and then iterate over the text using U16_NEXT() or a UCharIterator (uiter.h)
+ * or a C++ CharacterIterator or similar.
+ * This allows streaming conversion and offset output, for example.
+ *
+ * <p>Handling of surrogate pairs and supplementary-plane code points:<br>
+ * There are two different kinds of codepages that provide mappings for surrogate characters:
+ * <ul>
+ * <li>Codepages like UTF-8, UTF-32, and GB 18030 provide direct representations for Unicode
+ * code points U+10000-U+10ffff as well as for single surrogates U+d800-U+dfff.
+ * Each valid sequence will result in exactly one returned code point.
+ * If a sequence results in a single surrogate, then that will be returned
+ * by itself, even if a neighboring sequence encodes the matching surrogate.</li>
+ * <li>Codepages like SCSU and LMBCS (and UTF-16) provide direct representations only for BMP code points
+ * including surrogates. Code points in supplementary planes are represented with
+ * two sequences, each encoding a surrogate.
+ * For these codepages, matching pairs of surrogates will be combined into single
+ * code points for returning from this function.
+ * (Note that SCSU is actually a mix of these codepage types.)</li>
+ * </ul></p>
+ *
+ * @param converter an open UConverter
+ * @param source the address of a pointer to the codepage buffer, will be
+ * updated to point after the bytes consumed in the conversion call.
+ * @param sourceLimit points to the end of the input buffer
+ * @param err fills in error status (see ucnv_toUnicode)
+ * <code>U_INDEX_OUTOFBOUNDS_ERROR</code> will be set if the input
+ * is empty or does not convert to any output (e.g.: pure state-change
+ * codes SI/SO, escape sequences for ISO 2022,
+ * or if the callback did not output anything, ...).
+ * This function will not set a <code>U_BUFFER_OVERFLOW_ERROR</code> because
+ * the "buffer" is the return code. However, there might be subsequent output
+ * stored in the converter object
+ * that will be returned in following calls to this function.
+ * @return a UChar32 resulting from the partial conversion of source
+ * @see ucnv_toUnicode
+ * @see ucnv_toUChars
+ * @see ucnv_convert
+ * @stable ICU 2.0
+ */
+U_STABLE UChar32 U_EXPORT2
+ucnv_getNextUChar(UConverter * converter,
+ const char **source,
+ const char * sourceLimit,
+ UErrorCode * err);
+
+/**
+ * Convert from one external charset to another using two existing UConverters.
+ * Internally, two conversions - ucnv_toUnicode() and ucnv_fromUnicode() -
+ * are used, "pivoting" through 16-bit Unicode.
+ *
+ * Important: For streaming conversion (multiple function calls for successive
+ * parts of a text stream), the caller must provide a pivot buffer explicitly,
+ * and must preserve the pivot buffer and associated pointers from one
+ * call to another. (The buffer may be moved if its contents and the relative
+ * pointer positions are preserved.)
+ *
+ * There is a similar function, ucnv_convert(),
+ * which has the following limitations:
+ * - it takes charset names, not converter objects, so that
+ * - two converters are opened for each call
+ * - only single-string conversion is possible, not streaming operation
+ * - it does not provide enough information to find out,
+ * in case of failure, whether the toUnicode or
+ * the fromUnicode conversion failed
+ *
+ * By contrast, ucnv_convertEx()
+ * - takes UConverter parameters instead of charset names
+ * - fully exposes the pivot buffer for streaming conversion and complete error handling
+ *
+ * ucnv_convertEx() also provides further convenience:
+ * - an option to reset the converters at the beginning
+ * (if reset==TRUE, see parameters;
+ * also sets *pivotTarget=*pivotSource=pivotStart)
+ * - allow NUL-terminated input
+ * (only a single NUL byte, will not work for charsets with multi-byte NULs)
+ * (if sourceLimit==NULL, see parameters)
+ * - terminate with a NUL on output
+ * (only a single NUL byte, not useful for charsets with multi-byte NULs),
+ * or set U_STRING_NOT_TERMINATED_WARNING if the output exactly fills
+ * the target buffer
+ * - the pivot buffer can be provided internally;
+ * possible only for whole-string conversion, not streaming conversion;
+ * in this case, the caller will not be able to get details about where an
+ * error occurred
+ * (if pivotStart==NULL, see below)
+ *
+ * The function returns when one of the following is true:
+ * - the entire source text has been converted successfully to the target buffer
+ * - a target buffer overflow occurred (U_BUFFER_OVERFLOW_ERROR)
+ * - a conversion error occurred
+ * (other U_FAILURE(), see description of pErrorCode)
+ *
+ * Limitation compared to the direct use of
+ * ucnv_fromUnicode() and ucnv_toUnicode():
+ * ucnv_convertEx() does not provide offset information.
+ *
+ * Limitation compared to ucnv_fromUChars() and ucnv_toUChars():
+ * ucnv_convertEx() does not support preflighting directly.
+ *
+ * Sample code for converting a single string from
+ * one external charset to UTF-8, ignoring the location of errors:
+ *
+ * \code
+ * int32_t
+ * myToUTF8(UConverter *cnv,
+ * const char *s, int32_t length,
+ * char *u8, int32_t capacity,
+ * UErrorCode *pErrorCode) {
+ * UConverter *utf8Cnv;
+ * char *target;
+ *
+ * if(U_FAILURE(*pErrorCode)) {
+ * return 0;
+ * }
+ *
+ * utf8Cnv=myGetCachedUTF8Converter(pErrorCode);
+ * if(U_FAILURE(*pErrorCode)) {
+ * return 0;
+ * }
+ *
+ * if(length<0) {
+ * length=strlen(s);
+ * }
+ * target=u8;
+ * ucnv_convertEx(utf8Cnv, cnv,
+ * &target, u8+capacity,
+ * &s, s+length,
+ * NULL, NULL, NULL, NULL,
+ * TRUE, TRUE,
+ * pErrorCode);
+ *
+ * myReleaseCachedUTF8Converter(utf8Cnv);
+ *
+ * // return the output string length, but without preflighting
+ * return (int32_t)(target-u8);
+ * }
+ * \endcode
+ *
+ * @param targetCnv Output converter, used to convert from the UTF-16 pivot
+ * to the target using ucnv_fromUnicode().
+ * @param sourceCnv Input converter, used to convert from the source to
+ * the UTF-16 pivot using ucnv_toUnicode().
+ * @param target I/O parameter, same as for ucnv_fromUChars().
+ * Input: *target points to the beginning of the target buffer.
+ * Output: *target points to the first unit after the last char written.
+ * @param targetLimit Pointer to the first unit after the target buffer.
+ * @param source I/O parameter, same as for ucnv_toUChars().
+ * Input: *source points to the beginning of the source buffer.
+ * Output: *source points to the first unit after the last char read.
+ * @param sourceLimit Pointer to the first unit after the source buffer.
+ * @param pivotStart Pointer to the UTF-16 pivot buffer. If pivotStart==NULL,
+ * then an internal buffer is used and the other pivot
+ * arguments are ignored and can be NULL as well.
+ * @param pivotSource I/O parameter, same as source in ucnv_fromUChars() for
+ * conversion from the pivot buffer to the target buffer.
+ * @param pivotTarget I/O parameter, same as target in ucnv_toUChars() for
+ * conversion from the source buffer to the pivot buffer.
+ * It must be pivotStart<=*pivotSource<=*pivotTarget<=pivotLimit
+ * and pivotStart<pivotLimit (unless pivotStart==NULL).
+ * @param pivotLimit Pointer to the first unit after the pivot buffer.
+ * @param reset If TRUE, then ucnv_resetToUnicode(sourceCnv) and
+ * ucnv_resetFromUnicode(targetCnv) are called, and the
+ * pivot pointers are reset (*pivotTarget=*pivotSource=pivotStart).
+ * @param flush If true, indicates the end of the input.
+ * Passed directly to ucnv_toUnicode(), and carried over to
+ * ucnv_fromUnicode() when the source is empty as well.
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * U_BUFFER_OVERFLOW_ERROR always refers to the target buffer
+ * because overflows into the pivot buffer are handled internally.
+ * Other conversion errors are from the source-to-pivot
+ * conversion if *pivotSource==pivotStart, otherwise from
+ * the pivot-to-target conversion.
+ *
+ * @see ucnv_convert
+ * @see ucnv_fromAlgorithmic
+ * @see ucnv_toAlgorithmic
+ * @see ucnv_fromUnicode
+ * @see ucnv_toUnicode
+ * @see ucnv_fromUChars
+ * @see ucnv_toUChars
+ * @stable ICU 2.6
+ */
+U_STABLE void U_EXPORT2
+ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv,
+ char **target, const char *targetLimit,
+ const char **source, const char *sourceLimit,
+ UChar *pivotStart, UChar **pivotSource,
+ UChar **pivotTarget, const UChar *pivotLimit,
+ UBool reset, UBool flush,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert from one external charset to another.
+ * Internally, two converters are opened according to the name arguments,
+ * then the text is converted to and from the 16-bit Unicode "pivot"
+ * using ucnv_convertEx(), then the converters are closed again.
+ *
+ * This is a convenience function, not an efficient way to convert a lot of text:
+ * ucnv_convert()
+ * - takes charset names, not converter objects, so that
+ * - two converters are opened for each call
+ * - only single-string conversion is possible, not streaming operation
+ * - does not provide enough information to find out,
+ * in case of failure, whether the toUnicode or
+ * the fromUnicode conversion failed
+ * - allows NUL-terminated input
+ * (only a single NUL byte, will not work for charsets with multi-byte NULs)
+ * (if sourceLength==-1, see parameters)
+ * - terminate with a NUL on output
+ * (only a single NUL byte, not useful for charsets with multi-byte NULs),
+ * or set U_STRING_NOT_TERMINATED_WARNING if the output exactly fills
+ * the target buffer
+ * - a pivot buffer is provided internally
+ *
+ * The function returns when one of the following is true:
+ * - the entire source text has been converted successfully to the target buffer
+ * and either the target buffer is terminated with a single NUL byte
+ * or the error code is set to U_STRING_NOT_TERMINATED_WARNING
+ * - a target buffer overflow occurred (U_BUFFER_OVERFLOW_ERROR)
+ * and the full output string length is returned ("preflighting")
+ * - a conversion error occurred
+ * (other U_FAILURE(), see description of pErrorCode)
+ *
+ * @param toConverterName The name of the converter that is used to convert
+ * from the UTF-16 pivot buffer to the target.
+ * @param fromConverterName The name of the converter that is used to convert
+ * from the source to the UTF-16 pivot buffer.
+ * @param target Pointer to the output buffer.
+ * @param targetCapacity Capacity of the target, in bytes.
+ * @param source Pointer to the input buffer.
+ * @param sourceLength Length of the input text, in bytes, or -1 for NUL-terminated input.
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Length of the complete output text in bytes, even if it exceeds the targetCapacity
+ * and a U_BUFFER_OVERFLOW_ERROR is set.
+ *
+ * @see ucnv_convertEx
+ * @see ucnv_fromAlgorithmic
+ * @see ucnv_toAlgorithmic
+ * @see ucnv_fromUnicode
+ * @see ucnv_toUnicode
+ * @see ucnv_fromUChars
+ * @see ucnv_toUChars
+ * @see ucnv_getNextUChar
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_convert(const char *toConverterName,
+ const char *fromConverterName,
+ char *target,
+ int32_t targetCapacity,
+ const char *source,
+ int32_t sourceLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert from one external charset to another.
+ * Internally, the text is converted to and from the 16-bit Unicode "pivot"
+ * using ucnv_convertEx(). ucnv_toAlgorithmic() works exactly like ucnv_convert()
+ * except that the two converters need not be looked up and opened completely.
+ *
+ * The source-to-pivot conversion uses the cnv converter parameter.
+ * The pivot-to-target conversion uses a purely algorithmic converter
+ * according to the specified type, e.g., UCNV_UTF8 for a UTF-8 converter.
+ *
+ * Internally, the algorithmic converter is opened and closed for each
+ * function call, which is more efficient than using the public ucnv_open()
+ * but somewhat less efficient than only resetting an existing converter
+ * and using ucnv_convertEx().
+ *
+ * This function is more convenient than ucnv_convertEx() for single-string
+ * conversions, especially when "preflighting" is desired (returning the length
+ * of the complete output even if it does not fit into the target buffer;
+ * see the User Guide Strings chapter). See ucnv_convert() for details.
+ *
+ * @param algorithmicType UConverterType constant identifying the desired target
+ * charset as a purely algorithmic converter.
+ * Those are converters for Unicode charsets like
+ * UTF-8, BOCU-1, SCSU, UTF-7, IMAP-mailbox-name, etc.,
+ * as well as US-ASCII and ISO-8859-1.
+ * @param cnv The converter that is used to convert
+ * from the source to the UTF-16 pivot buffer.
+ * @param target Pointer to the output buffer.
+ * @param targetCapacity Capacity of the target, in bytes.
+ * @param source Pointer to the input buffer.
+ * @param sourceLength Length of the input text, in bytes
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Length of the complete output text in bytes, even if it exceeds the targetCapacity
+ * and a U_BUFFER_OVERFLOW_ERROR is set.
+ *
+ * @see ucnv_fromAlgorithmic
+ * @see ucnv_convert
+ * @see ucnv_convertEx
+ * @see ucnv_fromUnicode
+ * @see ucnv_toUnicode
+ * @see ucnv_fromUChars
+ * @see ucnv_toUChars
+ * @stable ICU 2.6
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_toAlgorithmic(UConverterType algorithmicType,
+ UConverter *cnv,
+ char *target, int32_t targetCapacity,
+ const char *source, int32_t sourceLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert from one external charset to another.
+ * Internally, the text is converted to and from the 16-bit Unicode "pivot"
+ * using ucnv_convertEx(). ucnv_fromAlgorithmic() works exactly like ucnv_convert()
+ * except that the two converters need not be looked up and opened completely.
+ *
+ * The source-to-pivot conversion uses a purely algorithmic converter
+ * according to the specified type, e.g., UCNV_UTF8 for a UTF-8 converter.
+ * The pivot-to-target conversion uses the cnv converter parameter.
+ *
+ * Internally, the algorithmic converter is opened and closed for each
+ * function call, which is more efficient than using the public ucnv_open()
+ * but somewhat less efficient than only resetting an existing converter
+ * and using ucnv_convertEx().
+ *
+ * This function is more convenient than ucnv_convertEx() for single-string
+ * conversions, especially when "preflighting" is desired (returning the length
+ * of the complete output even if it does not fit into the target buffer;
+ * see the User Guide Strings chapter). See ucnv_convert() for details.
+ *
+ * @param cnv The converter that is used to convert
+ * from the UTF-16 pivot buffer to the target.
+ * @param algorithmicType UConverterType constant identifying the desired source
+ * charset as a purely algorithmic converter.
+ * Those are converters for Unicode charsets like
+ * UTF-8, BOCU-1, SCSU, UTF-7, IMAP-mailbox-name, etc.,
+ * as well as US-ASCII and ISO-8859-1.
+ * @param target Pointer to the output buffer.
+ * @param targetCapacity Capacity of the target, in bytes.
+ * @param source Pointer to the input buffer.
+ * @param sourceLength Length of the input text, in bytes
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Length of the complete output text in bytes, even if it exceeds the targetCapacity
+ * and a U_BUFFER_OVERFLOW_ERROR is set.
+ *
+ * @see ucnv_fromAlgorithmic
+ * @see ucnv_convert
+ * @see ucnv_convertEx
+ * @see ucnv_fromUnicode
+ * @see ucnv_toUnicode
+ * @see ucnv_fromUChars
+ * @see ucnv_toUChars
+ * @stable ICU 2.6
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_fromAlgorithmic(UConverter *cnv,
+ UConverterType algorithmicType,
+ char *target, int32_t targetCapacity,
+ const char *source, int32_t sourceLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Frees up memory occupied by unused, cached converter shared data.
+ *
+ * @return the number of cached converters successfully deleted
+ * @see ucnv_close
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_flushCache(void);
+
+/**
+ * Returns the number of available converters, as per the alias file.
+ *
+ * @return the number of available converters
+ * @see ucnv_getAvailableName
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_countAvailable(void);
+
+/**
+ * Gets the canonical converter name of the specified converter from a list of
+ * all available converters contaied in the alias file. All converters
+ * in this list can be opened.
+ *
+ * @param n the index to a converter available on the system (in the range <TT>[0..ucnv_countAvaiable()]</TT>)
+ * @return a pointer a string (library owned), or <TT>NULL</TT> if the index is out of bounds.
+ * @see ucnv_countAvailable
+ * @stable ICU 2.0
+ */
+U_STABLE const char* U_EXPORT2
+ucnv_getAvailableName(int32_t n);
+
+/**
+ * Returns a UEnumeration to enumerate all of the canonical converter
+ * names, as per the alias file, regardless of the ability to open each
+ * converter.
+ *
+ * @return A UEnumeration object for getting all the recognized canonical
+ * converter names.
+ * @see ucnv_getAvailableName
+ * @see uenum_close
+ * @see uenum_next
+ * @stable ICU 2.4
+ */
+U_STABLE UEnumeration * U_EXPORT2
+ucnv_openAllNames(UErrorCode *pErrorCode);
+
+/**
+ * Gives the number of aliases for a given converter or alias name.
+ * If the alias is ambiguous, then the preferred converter is used
+ * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
+ * This method only enumerates the listed entries in the alias file.
+ * @param alias alias name
+ * @param pErrorCode error status
+ * @return number of names on alias list for given alias
+ * @stable ICU 2.0
+ */
+U_STABLE uint16_t U_EXPORT2
+ucnv_countAliases(const char *alias, UErrorCode *pErrorCode);
+
+/**
+ * Gives the name of the alias at given index of alias list.
+ * This method only enumerates the listed entries in the alias file.
+ * If the alias is ambiguous, then the preferred converter is used
+ * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
+ * @param alias alias name
+ * @param n index in alias list
+ * @param pErrorCode result of operation
+ * @return returns the name of the alias at given index
+ * @see ucnv_countAliases
+ * @stable ICU 2.0
+ */
+U_STABLE const char * U_EXPORT2
+ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode);
+
+/**
+ * Fill-up the list of alias names for the given alias.
+ * This method only enumerates the listed entries in the alias file.
+ * If the alias is ambiguous, then the preferred converter is used
+ * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
+ * @param alias alias name
+ * @param aliases fill-in list, aliases is a pointer to an array of
+ * <code>ucnv_countAliases()</code> string-pointers
+ * (<code>const char *</code>) that will be filled in.
+ * The strings themselves are owned by the library.
+ * @param pErrorCode result of operation
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode);
+
+/**
+ * Return a new UEnumeration object for enumerating all the
+ * alias names for a given converter that are recognized by a standard.
+ * This method only enumerates the listed entries in the alias file.
+ * The convrtrs.txt file can be modified to change the results of
+ * this function.
+ * The first result in this list is the same result given by
+ * <code>ucnv_getStandardName</code>, which is the default alias for
+ * the specified standard name. The returned object must be closed with
+ * <code>uenum_close</code> when you are done with the object.
+ *
+ * @param convName original converter name
+ * @param standard name of the standard governing the names; MIME and IANA
+ * are such standards
+ * @param pErrorCode The error code
+ * @return A UEnumeration object for getting all aliases that are recognized
+ * by a standard. If any of the parameters are invalid, NULL
+ * is returned.
+ * @see ucnv_getStandardName
+ * @see uenum_close
+ * @see uenum_next
+ * @stable ICU 2.2
+ */
+U_STABLE UEnumeration * U_EXPORT2
+ucnv_openStandardNames(const char *convName,
+ const char *standard,
+ UErrorCode *pErrorCode);
+
+/**
+ * Gives the number of standards associated to converter names.
+ * @return number of standards
+ * @stable ICU 2.0
+ */
+U_STABLE uint16_t U_EXPORT2
+ucnv_countStandards(void);
+
+/**
+ * Gives the name of the standard at given index of standard list.
+ * @param n index in standard list
+ * @param pErrorCode result of operation
+ * @return returns the name of the standard at given index. Owned by the library.
+ * @stable ICU 2.0
+ */
+U_STABLE const char * U_EXPORT2
+ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode);
+
+/**
+ * Returns a standard name for a given converter name.
+ * <p>
+ * Example alias table:<br>
+ * conv alias1 { STANDARD1 } alias2 { STANDARD1* }
+ * <p>
+ * Result of ucnv_getStandardName("conv", "STANDARD1") from example
+ * alias table:<br>
+ * <b>"alias2"</b>
+ *
+ * @param name original converter name
+ * @param standard name of the standard governing the names; MIME and IANA
+ * are such standards
+ * @param pErrorCode result of operation
+ * @return returns the standard converter name;
+ * if a standard converter name cannot be determined,
+ * then <code>NULL</code> is returned. Owned by the library.
+ * @stable ICU 2.0
+ */
+U_STABLE const char * U_EXPORT2
+ucnv_getStandardName(const char *name, const char *standard, UErrorCode *pErrorCode);
+
+/**
+ * This function will return the internal canonical converter name of the
+ * tagged alias. This is the opposite of ucnv_openStandardNames, which
+ * returns the tagged alias given the canonical name.
+ * <p>
+ * Example alias table:<br>
+ * conv alias1 { STANDARD1 } alias2 { STANDARD1* }
+ * <p>
+ * Result of ucnv_getStandardName("alias1", "STANDARD1") from example
+ * alias table:<br>
+ * <b>"conv"</b>
+ *
+ * @return returns the canonical converter name;
+ * if a standard or alias name cannot be determined,
+ * then <code>NULL</code> is returned. The returned string is
+ * owned by the library.
+ * @see ucnv_getStandardName
+ * @stable ICU 2.4
+ */
+U_STABLE const char * U_EXPORT2
+ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode);
+
+/**
+ * Returns the current default converter name. If you want to open
+ * a default converter, you do not need to use this function.
+ * It is faster if you pass a NULL argument to ucnv_open the
+ * default converter.
+ *
+ * If U_CHARSET_IS_UTF8 is defined to 1 in utypes.h then this function
+ * always returns "UTF-8".
+ *
+ * @return returns the current default converter name.
+ * Storage owned by the library
+ * @see ucnv_setDefaultName
+ * @stable ICU 2.0
+ */
+U_STABLE const char * U_EXPORT2
+ucnv_getDefaultName(void);
+
+#ifndef U_HIDE_SYSTEM_API
+/**
+ * This function is not thread safe. DO NOT call this function when ANY ICU
+ * function is being used from more than one thread! This function sets the
+ * current default converter name. If this function needs to be called, it
+ * should be called during application initialization. Most of the time, the
+ * results from ucnv_getDefaultName() or ucnv_open with a NULL string argument
+ * is sufficient for your application.
+ *
+ * If U_CHARSET_IS_UTF8 is defined to 1 in utypes.h then this function
+ * does nothing.
+ *
+ * @param name the converter name to be the default (must be known by ICU).
+ * @see ucnv_getDefaultName
+ * @system
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_setDefaultName(const char *name);
+#endif /* U_HIDE_SYSTEM_API */
+
+/**
+ * Fixes the backslash character mismapping. For example, in SJIS, the backslash
+ * character in the ASCII portion is also used to represent the yen currency sign.
+ * When mapping from Unicode character 0x005C, it's unclear whether to map the
+ * character back to yen or backslash in SJIS. This function will take the input
+ * buffer and replace all the yen sign characters with backslash. This is necessary
+ * when the user tries to open a file with the input buffer on Windows.
+ * This function will test the converter to see whether such mapping is
+ * required. You can sometimes avoid using this function by using the correct version
+ * of Shift-JIS.
+ *
+ * @param cnv The converter representing the target codepage.
+ * @param source the input buffer to be fixed
+ * @param sourceLen the length of the input buffer
+ * @see ucnv_isAmbiguous
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_fixFileSeparator(const UConverter *cnv, UChar *source, int32_t sourceLen);
+
+/**
+ * Determines if the converter contains ambiguous mappings of the same
+ * character or not.
+ * @param cnv the converter to be tested
+ * @return TRUE if the converter contains ambiguous mapping of the same
+ * character, FALSE otherwise.
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+ucnv_isAmbiguous(const UConverter *cnv);
+
+/**
+ * Sets the converter to use fallback mappings or not.
+ * Regardless of this flag, the converter will always use
+ * fallbacks from Unicode Private Use code points, as well as
+ * reverse fallbacks (to Unicode).
+ * For details see ".ucm File Format"
+ * in the Conversion Data chapter of the ICU User Guide:
+ * http://www.icu-project.org/userguide/conversion-data.html#ucmformat
+ *
+ * @param cnv The converter to set the fallback mapping usage on.
+ * @param usesFallback TRUE if the user wants the converter to take advantage of the fallback
+ * mapping, FALSE otherwise.
+ * @stable ICU 2.0
+ * @see ucnv_usesFallback
+ */
+U_STABLE void U_EXPORT2
+ucnv_setFallback(UConverter *cnv, UBool usesFallback);
+
+/**
+ * Determines if the converter uses fallback mappings or not.
+ * This flag has restrictions, see ucnv_setFallback().
+ *
+ * @param cnv The converter to be tested
+ * @return TRUE if the converter uses fallback, FALSE otherwise.
+ * @stable ICU 2.0
+ * @see ucnv_setFallback
+ */
+U_STABLE UBool U_EXPORT2
+ucnv_usesFallback(const UConverter *cnv);
+
+/**
+ * Detects Unicode signature byte sequences at the start of the byte stream
+ * and returns the charset name of the indicated Unicode charset.
+ * NULL is returned when no Unicode signature is recognized.
+ * The number of bytes in the signature is output as well.
+ *
+ * The caller can ucnv_open() a converter using the charset name.
+ * The first code unit (UChar) from the start of the stream will be U+FEFF
+ * (the Unicode BOM/signature character) and can usually be ignored.
+ *
+ * For most Unicode charsets it is also possible to ignore the indicated
+ * number of initial stream bytes and start converting after them.
+ * However, there are stateful Unicode charsets (UTF-7 and BOCU-1) for which
+ * this will not work. Therefore, it is best to ignore the first output UChar
+ * instead of the input signature bytes.
+ * <p>
+ * Usage:
+ * \snippet samples/ucnv/convsamp.cpp ucnv_detectUnicodeSignature
+ *
+ * @param source The source string in which the signature should be detected.
+ * @param sourceLength Length of the input string, or -1 if terminated with a NUL byte.
+ * @param signatureLength A pointer to int32_t to receive the number of bytes that make up the signature
+ * of the detected UTF. 0 if not detected.
+ * Can be a NULL pointer.
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return The name of the encoding detected. NULL if encoding is not detected.
+ * @stable ICU 2.4
+ */
+U_STABLE const char* U_EXPORT2
+ucnv_detectUnicodeSignature(const char* source,
+ int32_t sourceLength,
+ int32_t *signatureLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Returns the number of UChars held in the converter's internal state
+ * because more input is needed for completing the conversion. This function is
+ * useful for mapping semantics of ICU's converter interface to those of iconv,
+ * and this information is not needed for normal conversion.
+ * @param cnv The converter in which the input is held
+ * @param status ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return The number of UChars in the state. -1 if an error is encountered.
+ * @stable ICU 3.4
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status);
+
+/**
+ * Returns the number of chars held in the converter's internal state
+ * because more input is needed for completing the conversion. This function is
+ * useful for mapping semantics of ICU's converter interface to those of iconv,
+ * and this information is not needed for normal conversion.
+ * @param cnv The converter in which the input is held as internal state
+ * @param status ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return The number of chars in the state. -1 if an error is encountered.
+ * @stable ICU 3.4
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status);
+
+/**
+ * Returns whether or not the charset of the converter has a fixed number of bytes
+ * per charset character.
+ * An example of this are converters that are of the type UCNV_SBCS or UCNV_DBCS.
+ * Another example is UTF-32 which is always 4 bytes per character.
+ * A Unicode code point may be represented by more than one UTF-8 or UTF-16 code unit
+ * but a UTF-32 converter encodes each code point with 4 bytes.
+ * Note: This method is not intended to be used to determine whether the charset has a
+ * fixed ratio of bytes to Unicode codes <i>units</i> for any particular Unicode encoding form.
+ * FALSE is returned with the UErrorCode if error occurs or cnv is NULL.
+ * @param cnv The converter to be tested
+ * @param status ICU error code in/out paramter
+ * @return TRUE if the converter is fixed-width
+ * @stable ICU 4.8
+ */
+U_STABLE UBool U_EXPORT2
+ucnv_isFixedWidth(UConverter *cnv, UErrorCode *status);
+
+#endif
+
+#endif
+/*_UCNV*/
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/ucnv_err.h b/src/core/basetypes/icu-ucsdet/include/unicode/ucnv_err.h
new file mode 100644
index 00000000..e092e95f
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/ucnv_err.h
@@ -0,0 +1,463 @@
+/*
+**********************************************************************
+* Copyright (C) 1999-2009, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+ *
+ *
+ * ucnv_err.h:
+ */
+
+/**
+ * \file
+ * \brief C UConverter predefined error callbacks
+ *
+ * <h2>Error Behaviour Functions</h2>
+ * Defines some error behaviour functions called by ucnv_{from,to}Unicode
+ * These are provided as part of ICU and many are stable, but they
+ * can also be considered only as an example of what can be done with
+ * callbacks. You may of course write your own.
+ *
+ * If you want to write your own, you may also find the functions from
+ * ucnv_cb.h useful when writing your own callbacks.
+ *
+ * These functions, although public, should NEVER be called directly.
+ * They should be used as parameters to the ucnv_setFromUCallback
+ * and ucnv_setToUCallback functions, to set the behaviour of a converter
+ * when it encounters ILLEGAL/UNMAPPED/INVALID sequences.
+ *
+ * usage example: 'STOP' doesn't need any context, but newContext
+ * could be set to something other than 'NULL' if needed. The available
+ * contexts in this header can modify the default behavior of the callback.
+ *
+ * \code
+ * UErrorCode err = U_ZERO_ERROR;
+ * UConverter *myConverter = ucnv_open("ibm-949", &err);
+ * const void *oldContext;
+ * UConverterFromUCallback oldAction;
+ *
+ *
+ * if (U_SUCCESS(err))
+ * {
+ * ucnv_setFromUCallBack(myConverter,
+ * UCNV_FROM_U_CALLBACK_STOP,
+ * NULL,
+ * &oldAction,
+ * &oldContext,
+ * &status);
+ * }
+ * \endcode
+ *
+ * The code above tells "myConverter" to stop when it encounters an
+ * ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
+ * Unicode -> Codepage. The behavior from Codepage to Unicode is not changed,
+ * and ucnv_setToUCallBack would need to be called in order to change
+ * that behavior too.
+ *
+ * Here is an example with a context:
+ *
+ * \code
+ * UErrorCode err = U_ZERO_ERROR;
+ * UConverter *myConverter = ucnv_open("ibm-949", &err);
+ * const void *oldContext;
+ * UConverterFromUCallback oldAction;
+ *
+ *
+ * if (U_SUCCESS(err))
+ * {
+ * ucnv_setToUCallBack(myConverter,
+ * UCNV_TO_U_CALLBACK_SUBSTITUTE,
+ * UCNV_SUB_STOP_ON_ILLEGAL,
+ * &oldAction,
+ * &oldContext,
+ * &status);
+ * }
+ * \endcode
+ *
+ * The code above tells "myConverter" to stop when it encounters an
+ * ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
+ * Codepage -> Unicode. Any unmapped and legal characters will be
+ * substituted to be the default substitution character.
+ */
+
+#ifndef UCNV_ERR_H
+#define UCNV_ERR_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+/** Forward declaring the UConverter structure. @stable ICU 2.0 */
+struct UConverter;
+
+/** @stable ICU 2.0 */
+typedef struct UConverter UConverter;
+
+/**
+ * FROM_U, TO_U context options for sub callback
+ * @stable ICU 2.0
+ */
+#define UCNV_SUB_STOP_ON_ILLEGAL "i"
+
+/**
+ * FROM_U, TO_U context options for skip callback
+ * @stable ICU 2.0
+ */
+#define UCNV_SKIP_STOP_ON_ILLEGAL "i"
+
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX)
+ * @stable ICU 2.0
+ */
+#define UCNV_ESCAPE_ICU NULL
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\\uXXXX)
+ * @stable ICU 2.0
+ */
+#define UCNV_ESCAPE_JAVA "J"
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX)
+ * TO_U_CALLBACK_ESCAPE option to escape the character value accoding to C (\\xXXXX)
+ * @stable ICU 2.0
+ */
+#define UCNV_ESCAPE_C "C"
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&amp;#DDDD;)\endhtmlonly
+ * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Decimal escape \htmlonly(&amp;#DDDD;)\endhtmlonly
+ * @stable ICU 2.0
+ */
+#define UCNV_ESCAPE_XML_DEC "D"
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&amp;#xXXXX;)\endhtmlonly
+ * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Hex escape \htmlonly(&amp;#xXXXX;)\endhtmlonly
+ * @stable ICU 2.0
+ */
+#define UCNV_ESCAPE_XML_HEX "X"
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to Unicode (U+XXXXX)
+ * @stable ICU 2.0
+ */
+#define UCNV_ESCAPE_UNICODE "U"
+
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to CSS2 conventions (\\HH..H<space>, that is,
+ * a backslash, 1..6 hex digits, and a space)
+ * @stable ICU 4.0
+ */
+#define UCNV_ESCAPE_CSS2 "S"
+
+/**
+ * The process condition code to be used with the callbacks.
+ * Codes which are greater than UCNV_IRREGULAR should be
+ * passed on to any chained callbacks.
+ * @stable ICU 2.0
+ */
+typedef enum {
+ UCNV_UNASSIGNED = 0, /**< The code point is unassigned.
+ The error code U_INVALID_CHAR_FOUND will be set. */
+ UCNV_ILLEGAL = 1, /**< The code point is illegal. For example,
+ \\x81\\x2E is illegal in SJIS because \\x2E
+ is not a valid trail byte for the \\x81
+ lead byte.
+ Also, starting with Unicode 3.0.1, non-shortest byte sequences
+ in UTF-8 (like \\xC1\\xA1 instead of \\x61 for U+0061)
+ are also illegal, not just irregular.
+ The error code U_ILLEGAL_CHAR_FOUND will be set. */
+ UCNV_IRREGULAR = 2, /**< The codepoint is not a regular sequence in
+ the encoding. For example, \\xED\\xA0\\x80..\\xED\\xBF\\xBF
+ are irregular UTF-8 byte sequences for single surrogate
+ code points.
+ The error code U_INVALID_CHAR_FOUND will be set. */
+ UCNV_RESET = 3, /**< The callback is called with this reason when a
+ 'reset' has occured. Callback should reset all
+ state. */
+ UCNV_CLOSE = 4, /**< Called when the converter is closed. The
+ callback should release any allocated memory.*/
+ UCNV_CLONE = 5 /**< Called when ucnv_safeClone() is called on the
+ converter. the pointer available as the
+ 'context' is an alias to the original converters'
+ context pointer. If the context must be owned
+ by the new converter, the callback must clone
+ the data and call ucnv_setFromUCallback
+ (or setToUCallback) with the correct pointer.
+ @stable ICU 2.2
+ */
+} UConverterCallbackReason;
+
+
+/**
+ * The structure for the fromUnicode callback function parameter.
+ * @stable ICU 2.0
+ */
+typedef struct {
+ uint16_t size; /**< The size of this struct. @stable ICU 2.0 */
+ UBool flush; /**< The internal state of converter will be reset and data flushed if set to TRUE. @stable ICU 2.0 */
+ UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */
+ const UChar *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */
+ const UChar *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */
+ char *target; /**< Pointer to the target buffer. @stable ICU 2.0 */
+ const char *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */
+ int32_t *offsets; /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */
+} UConverterFromUnicodeArgs;
+
+
+/**
+ * The structure for the toUnicode callback function parameter.
+ * @stable ICU 2.0
+ */
+typedef struct {
+ uint16_t size; /**< The size of this struct @stable ICU 2.0 */
+ UBool flush; /**< The internal state of converter will be reset and data flushed if set to TRUE. @stable ICU 2.0 */
+ UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */
+ const char *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */
+ const char *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */
+ UChar *target; /**< Pointer to the target buffer. @stable ICU 2.0 */
+ const UChar *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */
+ int32_t *offsets; /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */
+} UConverterToUnicodeArgs;
+
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This From Unicode callback STOPS at the ILLEGAL_SEQUENCE,
+ * returning the error code back to the caller immediately.
+ *
+ * @param context Pointer to the callback's private data
+ * @param fromUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
+ * @param reason Defines the reason the callback was invoked
+ * @param err This should always be set to a failure status prior to calling.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_STOP (
+ const void *context,
+ UConverterFromUnicodeArgs *fromUArgs,
+ const UChar* codeUnits,
+ int32_t length,
+ UChar32 codePoint,
+ UConverterCallbackReason reason,
+ UErrorCode * err);
+
+
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This To Unicode callback STOPS at the ILLEGAL_SEQUENCE,
+ * returning the error code back to the caller immediately.
+ *
+ * @param context Pointer to the callback's private data
+ * @param toUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param reason Defines the reason the callback was invoked
+ * @param err This should always be set to a failure status prior to calling.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_STOP (
+ const void *context,
+ UConverterToUnicodeArgs *toUArgs,
+ const char* codeUnits,
+ int32_t length,
+ UConverterCallbackReason reason,
+ UErrorCode * err);
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This From Unicode callback skips any ILLEGAL_SEQUENCE, or
+ * skips only UNASSINGED_SEQUENCE depending on the context parameter
+ * simply ignoring those characters.
+ *
+ * @param context The function currently recognizes the callback options:
+ * UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
+ * returning the error code back to the caller immediately.
+ * NULL: Skips any ILLEGAL_SEQUENCE
+ * @param fromUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
+ * @param reason Defines the reason the callback was invoked
+ * @param err Return value will be set to success if the callback was handled,
+ * otherwise this value will be set to a failure status.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_SKIP (
+ const void *context,
+ UConverterFromUnicodeArgs *fromUArgs,
+ const UChar* codeUnits,
+ int32_t length,
+ UChar32 codePoint,
+ UConverterCallbackReason reason,
+ UErrorCode * err);
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This From Unicode callback will Substitute the ILLEGAL SEQUENCE, or
+ * UNASSIGNED_SEQUENCE depending on context parameter, with the
+ * current substitution string for the converter. This is the default
+ * callback.
+ *
+ * @param context The function currently recognizes the callback options:
+ * UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
+ * returning the error code back to the caller immediately.
+ * NULL: Substitutes any ILLEGAL_SEQUENCE
+ * @param fromUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
+ * @param reason Defines the reason the callback was invoked
+ * @param err Return value will be set to success if the callback was handled,
+ * otherwise this value will be set to a failure status.
+ * @see ucnv_setSubstChars
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_SUBSTITUTE (
+ const void *context,
+ UConverterFromUnicodeArgs *fromUArgs,
+ const UChar* codeUnits,
+ int32_t length,
+ UChar32 codePoint,
+ UConverterCallbackReason reason,
+ UErrorCode * err);
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This From Unicode callback will Substitute the ILLEGAL SEQUENCE with the
+ * hexadecimal representation of the illegal codepoints
+ *
+ * @param context The function currently recognizes the callback options:
+ * <ul>
+ * <li>UCNV_ESCAPE_ICU: Substitues the ILLEGAL SEQUENCE with the hexadecimal
+ * representation in the format %UXXXX, e.g. "%uFFFE%u00AC%uC8FE").
+ * In the Event the converter doesn't support the characters {%,U}[A-F][0-9],
+ * it will substitute the illegal sequence with the substitution characters.
+ * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
+ * %UD84D%UDC56</li>
+ * <li>UCNV_ESCAPE_JAVA: Substitues the ILLEGAL SEQUENCE with the hexadecimal
+ * representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE").
+ * In the Event the converter doesn't support the characters {\,u}[A-F][0-9],
+ * it will substitute the illegal sequence with the substitution characters.
+ * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
+ * \\uD84D\\uDC56</li>
+ * <li>UCNV_ESCAPE_C: Substitues the ILLEGAL SEQUENCE with the hexadecimal
+ * representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE").
+ * In the Event the converter doesn't support the characters {\,u,U}[A-F][0-9],
+ * it will substitute the illegal sequence with the substitution characters.
+ * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
+ * \\U00023456</li>
+ * <li>UCNV_ESCAPE_XML_DEC: Substitues the ILLEGAL SEQUENCE with the decimal
+ * representation in the format \htmlonly&amp;#DDDDDDDD;, e.g. "&amp;#65534;&amp;#172;&amp;#51454;")\endhtmlonly.
+ * In the Event the converter doesn't support the characters {&amp;,#}[0-9],
+ * it will substitute the illegal sequence with the substitution characters.
+ * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
+ * &amp;#144470; and Zero padding is ignored.</li>
+ * <li>UCNV_ESCAPE_XML_HEX:Substitues the ILLEGAL SEQUENCE with the decimal
+ * representation in the format \htmlonly&amp;#xXXXX; e.g. "&amp;#xFFFE;&amp;#x00AC;&amp;#xC8FE;")\endhtmlonly.
+ * In the Event the converter doesn't support the characters {&,#,x}[0-9],
+ * it will substitute the illegal sequence with the substitution characters.
+ * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
+ * \htmlonly&amp;#x23456;\endhtmlonly</li>
+ * </ul>
+ * @param fromUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
+ * @param reason Defines the reason the callback was invoked
+ * @param err Return value will be set to success if the callback was handled,
+ * otherwise this value will be set to a failure status.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE (
+ const void *context,
+ UConverterFromUnicodeArgs *fromUArgs,
+ const UChar* codeUnits,
+ int32_t length,
+ UChar32 codePoint,
+ UConverterCallbackReason reason,
+ UErrorCode * err);
+
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This To Unicode callback skips any ILLEGAL_SEQUENCE, or
+ * skips only UNASSINGED_SEQUENCE depending on the context parameter
+ * simply ignoring those characters.
+ *
+ * @param context The function currently recognizes the callback options:
+ * UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
+ * returning the error code back to the caller immediately.
+ * NULL: Skips any ILLEGAL_SEQUENCE
+ * @param toUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param reason Defines the reason the callback was invoked
+ * @param err Return value will be set to success if the callback was handled,
+ * otherwise this value will be set to a failure status.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_SKIP (
+ const void *context,
+ UConverterToUnicodeArgs *toUArgs,
+ const char* codeUnits,
+ int32_t length,
+ UConverterCallbackReason reason,
+ UErrorCode * err);
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This To Unicode callback will Substitute the ILLEGAL SEQUENCE,or
+ * UNASSIGNED_SEQUENCE depending on context parameter, with the
+ * Unicode substitution character, U+FFFD.
+ *
+ * @param context The function currently recognizes the callback options:
+ * UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
+ * returning the error code back to the caller immediately.
+ * NULL: Substitutes any ILLEGAL_SEQUENCE
+ * @param toUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param reason Defines the reason the callback was invoked
+ * @param err Return value will be set to success if the callback was handled,
+ * otherwise this value will be set to a failure status.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_SUBSTITUTE (
+ const void *context,
+ UConverterToUnicodeArgs *toUArgs,
+ const char* codeUnits,
+ int32_t length,
+ UConverterCallbackReason reason,
+ UErrorCode * err);
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This To Unicode callback will Substitute the ILLEGAL SEQUENCE with the
+ * hexadecimal representation of the illegal bytes
+ * (in the format %XNN, e.g. "%XFF%X0A%XC8%X03").
+ *
+ * @param context This function currently recognizes the callback options:
+ * UCNV_ESCAPE_ICU, UCNV_ESCAPE_JAVA, UCNV_ESCAPE_C, UCNV_ESCAPE_XML_DEC,
+ * UCNV_ESCAPE_XML_HEX and UCNV_ESCAPE_UNICODE.
+ * @param toUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param reason Defines the reason the callback was invoked
+ * @param err Return value will be set to success if the callback was handled,
+ * otherwise this value will be set to a failure status.
+ * @stable ICU 2.0
+ */
+
+U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE (
+ const void *context,
+ UConverterToUnicodeArgs *toUArgs,
+ const char* codeUnits,
+ int32_t length,
+ UConverterCallbackReason reason,
+ UErrorCode * err);
+
+#endif
+
+#endif
+
+/*UCNV_ERR_H*/
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/uconfig.h b/src/core/basetypes/icu-ucsdet/include/unicode/uconfig.h
new file mode 100644
index 00000000..ed073b63
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/uconfig.h
@@ -0,0 +1,430 @@
+/*
+**********************************************************************
+* Copyright (C) 2002-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* file name: uconfig.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002sep19
+* created by: Markus W. Scherer
+*/
+
+#ifndef __UCONFIG_H__
+#define __UCONFIG_H__
+
+
+/*!
+ * \file
+ * \brief User-configurable settings
+ *
+ * Miscellaneous switches:
+ *
+ * A number of macros affect a variety of minor aspects of ICU.
+ * Most of them used to be defined elsewhere (e.g., in utypes.h or platform.h)
+ * and moved here to make them easier to find.
+ *
+ * Switches for excluding parts of ICU library code modules:
+ *
+ * Changing these macros allows building partial, smaller libraries for special purposes.
+ * By default, all modules are built.
+ * The switches are fairly coarse, controlling large modules.
+ * Basic services cannot be turned off.
+ *
+ * Building with any of these options does not guarantee that the
+ * ICU build process will completely work. It is recommended that
+ * the ICU libraries and data be built using the normal build.
+ * At that time you should remove the data used by those services.
+ * After building the ICU data library, you should rebuild the ICU
+ * libraries with these switches customized to your needs.
+ *
+ * @stable ICU 2.4
+ */
+
+/**
+ * If this switch is defined, ICU will attempt to load a header file named "uconfig_local.h"
+ * prior to determining default settings for uconfig variables.
+ *
+ * @internal ICU 4.0
+ */
+#if defined(UCONFIG_USE_LOCAL)
+#include "uconfig_local.h"
+#endif
+
+/**
+ * \def U_DEBUG
+ * Determines whether to include debugging code.
+ * Automatically set on Windows, but most compilers do not have
+ * related predefined macros.
+ * @internal
+ */
+#ifdef U_DEBUG
+ /* Use the predefined value. */
+#elif defined(_DEBUG)
+ /*
+ * _DEBUG is defined by Visual Studio debug compilation.
+ * Do *not* test for its NDEBUG macro: It is an orthogonal macro
+ * which disables assert().
+ */
+# define U_DEBUG 1
+# else
+# define U_DEBUG 0
+#endif
+
+/**
+ * Determines wheter to enable auto cleanup of libraries.
+ * @internal
+ */
+#ifndef UCLN_NO_AUTO_CLEANUP
+#define UCLN_NO_AUTO_CLEANUP 1
+#endif
+
+/**
+ * \def U_DISABLE_RENAMING
+ * Determines whether to disable renaming or not.
+ * @internal
+ */
+#ifndef U_DISABLE_RENAMING
+#define U_DISABLE_RENAMING 0
+#endif
+
+/**
+ * \def U_NO_DEFAULT_INCLUDE_UTF_HEADERS
+ * Determines whether utypes.h includes utf.h, utf8.h, utf16.h and utf_old.h.
+ * utypes.h includes those headers if this macro is defined to 0.
+ * Otherwise, each those headers must be included explicitly when using one of their macros.
+ * Defaults to 0 for backward compatibility, except inside ICU.
+ * @stable ICU 49
+ */
+#ifdef U_NO_DEFAULT_INCLUDE_UTF_HEADERS
+ /* Use the predefined value. */
+#elif defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || \
+ defined(U_IO_IMPLEMENTATION) || defined(U_LAYOUT_IMPLEMENTATION) || defined(U_LAYOUTEX_IMPLEMENTATION) || \
+ defined(U_TOOLUTIL_IMPLEMENTATION)
+# define U_NO_DEFAULT_INCLUDE_UTF_HEADERS 1
+#else
+# define U_NO_DEFAULT_INCLUDE_UTF_HEADERS 0
+#endif
+
+/**
+ * \def U_OVERRIDE_CXX_ALLOCATION
+ * Determines whether to override new and delete.
+ * ICU is normally built such that all of its C++ classes, via their UMemory base,
+ * override operators new and delete to use its internal, customizable,
+ * non-exception-throwing memory allocation functions. (Default value 1 for this macro.)
+ *
+ * This is especially important when the application and its libraries use multiple heaps.
+ * For example, on Windows, this allows the ICU DLL to be used by
+ * applications that statically link the C Runtime library.
+ *
+ * @stable ICU 2.2
+ */
+#ifndef U_OVERRIDE_CXX_ALLOCATION
+#define U_OVERRIDE_CXX_ALLOCATION 1
+#endif
+
+/**
+ * \def U_ENABLE_TRACING
+ * Determines whether to enable tracing.
+ * @internal
+ */
+#ifndef U_ENABLE_TRACING
+#define U_ENABLE_TRACING 0
+#endif
+
+/**
+ * \def U_ENABLE_DYLOAD
+ * Whether to enable Dynamic loading in ICU.
+ * @internal
+ */
+#ifndef U_ENABLE_DYLOAD
+#define U_ENABLE_DYLOAD 1
+#endif
+
+/**
+ * \def U_CHECK_DYLOAD
+ * Whether to test Dynamic loading as an OS capability.
+ * @internal
+ */
+#ifndef U_CHECK_DYLOAD
+#define U_CHECK_DYLOAD 1
+#endif
+
+
+/**
+ * \def U_DEFAULT_SHOW_DRAFT
+ * Do we allow ICU users to use the draft APIs by default?
+ * @internal
+ */
+#ifndef U_DEFAULT_SHOW_DRAFT
+#define U_DEFAULT_SHOW_DRAFT 1
+#endif
+
+/*===========================================================================*/
+/* Custom icu entry point renaming */
+/*===========================================================================*/
+
+/**
+ * \def U_HAVE_LIB_SUFFIX
+ * 1 if a custom library suffix is set.
+ * @internal
+ */
+#ifdef U_HAVE_LIB_SUFFIX
+ /* Use the predefined value. */
+#elif defined(U_LIB_SUFFIX_C_NAME)
+# define U_HAVE_LIB_SUFFIX 1
+#endif
+
+/**
+ * \def U_LIB_SUFFIX_C_NAME_STRING
+ * Defines the library suffix as a string with C syntax.
+ * @internal
+ */
+#ifdef U_LIB_SUFFIX_C_NAME_STRING
+ /* Use the predefined value. */
+#elif defined(U_LIB_SUFFIX_C_NAME)
+# define CONVERT_TO_STRING(s) #s
+# define U_LIB_SUFFIX_C_NAME_STRING CONVERT_TO_STRING(U_LIB_SUFFIX_C_NAME)
+#else
+# define U_LIB_SUFFIX_C_NAME_STRING ""
+#endif
+
+/* common/i18n library switches --------------------------------------------- */
+
+/**
+ * \def UCONFIG_ONLY_COLLATION
+ * This switch turns off modules that are not needed for collation.
+ *
+ * It does not turn off legacy conversion because that is necessary
+ * for ICU to work on EBCDIC platforms (for the default converter).
+ * If you want "only collation" and do not build for EBCDIC,
+ * then you can define UCONFIG_NO_LEGACY_CONVERSION 1 as well.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_ONLY_COLLATION
+# define UCONFIG_ONLY_COLLATION 0
+#endif
+
+#if UCONFIG_ONLY_COLLATION
+ /* common library */
+# define UCONFIG_NO_BREAK_ITERATION 1
+# define UCONFIG_NO_IDNA 1
+
+ /* i18n library */
+# if UCONFIG_NO_COLLATION
+# error Contradictory collation switches in uconfig.h.
+# endif
+# define UCONFIG_NO_FORMATTING 1
+# define UCONFIG_NO_TRANSLITERATION 1
+# define UCONFIG_NO_REGULAR_EXPRESSIONS 1
+#endif
+
+/* common library switches -------------------------------------------------- */
+
+/**
+ * \def UCONFIG_NO_FILE_IO
+ * This switch turns off all file access in the common library
+ * where file access is only used for data loading.
+ * ICU data must then be provided in the form of a data DLL (or with an
+ * equivalent way to link to the data residing in an executable,
+ * as in building a combined library with both the common library's code and
+ * the data), or via udata_setCommonData().
+ * Application data must be provided via udata_setAppData() or by using
+ * "open" functions that take pointers to data, for example ucol_openBinary().
+ *
+ * File access is not used at all in the i18n library.
+ *
+ * File access cannot be turned off for the icuio library or for the ICU
+ * test suites and ICU tools.
+ *
+ * @stable ICU 3.6
+ */
+#ifndef UCONFIG_NO_FILE_IO
+# define UCONFIG_NO_FILE_IO 0
+#endif
+
+#if UCONFIG_NO_FILE_IO && defined(U_TIMEZONE_FILES_DIR)
+# error Contradictory file io switches in uconfig.h.
+#endif
+
+/**
+ * \def UCONFIG_NO_CONVERSION
+ * ICU will not completely build with this switch turned on.
+ * This switch turns off all converters.
+ *
+ * You may want to use this together with U_CHARSET_IS_UTF8 defined to 1
+ * in utypes.h if char* strings in your environment are always in UTF-8.
+ *
+ * @stable ICU 3.2
+ * @see U_CHARSET_IS_UTF8
+ */
+#ifndef UCONFIG_NO_CONVERSION
+# define UCONFIG_NO_CONVERSION 0
+#endif
+
+#if UCONFIG_NO_CONVERSION
+# define UCONFIG_NO_LEGACY_CONVERSION 1
+#endif
+
+/**
+ * \def UCONFIG_NO_LEGACY_CONVERSION
+ * This switch turns off all converters except for
+ * - Unicode charsets (UTF-7/8/16/32, CESU-8, SCSU, BOCU-1)
+ * - US-ASCII
+ * - ISO-8859-1
+ *
+ * Turning off legacy conversion is not possible on EBCDIC platforms
+ * because they need ibm-37 or ibm-1047 default converters.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_NO_LEGACY_CONVERSION
+# define UCONFIG_NO_LEGACY_CONVERSION 0
+#endif
+
+/**
+ * \def UCONFIG_NO_NORMALIZATION
+ * This switch turns off normalization.
+ * It implies turning off several other services as well, for example
+ * collation and IDNA.
+ *
+ * @stable ICU 2.6
+ */
+#ifndef UCONFIG_NO_NORMALIZATION
+# define UCONFIG_NO_NORMALIZATION 0
+#elif UCONFIG_NO_NORMALIZATION
+ /* common library */
+ /* ICU 50 CJK dictionary BreakIterator uses normalization */
+# define UCONFIG_NO_BREAK_ITERATION 1
+ /* IDNA (UTS #46) is implemented via normalization */
+# define UCONFIG_NO_IDNA 1
+
+ /* i18n library */
+# if UCONFIG_ONLY_COLLATION
+# error Contradictory collation switches in uconfig.h.
+# endif
+# define UCONFIG_NO_COLLATION 1
+# define UCONFIG_NO_TRANSLITERATION 1
+#endif
+
+/**
+ * \def UCONFIG_NO_BREAK_ITERATION
+ * This switch turns off break iteration.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_NO_BREAK_ITERATION
+# define UCONFIG_NO_BREAK_ITERATION 0
+#endif
+
+/**
+ * \def UCONFIG_NO_IDNA
+ * This switch turns off IDNA.
+ *
+ * @stable ICU 2.6
+ */
+#ifndef UCONFIG_NO_IDNA
+# define UCONFIG_NO_IDNA 0
+#endif
+
+/**
+ * \def UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
+ * Determines the default UMessagePatternApostropheMode.
+ * See the documentation for that enum.
+ *
+ * @stable ICU 4.8
+ */
+#ifndef UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
+# define UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE UMSGPAT_APOS_DOUBLE_OPTIONAL
+#endif
+
+/* i18n library switches ---------------------------------------------------- */
+
+/**
+ * \def UCONFIG_NO_COLLATION
+ * This switch turns off collation and collation-based string search.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_NO_COLLATION
+# define UCONFIG_NO_COLLATION 0
+#endif
+
+/**
+ * \def UCONFIG_NO_FORMATTING
+ * This switch turns off formatting and calendar/timezone services.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_NO_FORMATTING
+# define UCONFIG_NO_FORMATTING 0
+#endif
+
+/**
+ * \def UCONFIG_NO_TRANSLITERATION
+ * This switch turns off transliteration.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_NO_TRANSLITERATION
+# define UCONFIG_NO_TRANSLITERATION 0
+#endif
+
+/**
+ * \def UCONFIG_NO_REGULAR_EXPRESSIONS
+ * This switch turns off regular expressions.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_NO_REGULAR_EXPRESSIONS
+# define UCONFIG_NO_REGULAR_EXPRESSIONS 0
+#endif
+
+/**
+ * \def UCONFIG_NO_SERVICE
+ * This switch turns off service registration.
+ *
+ * @stable ICU 3.2
+ */
+#ifndef UCONFIG_NO_SERVICE
+# define UCONFIG_NO_SERVICE 0
+#endif
+
+/**
+ * \def UCONFIG_HAVE_PARSEALLINPUT
+ * This switch turns on the "parse all input" attribute. Binary incompatible.
+ *
+ * @internal
+ */
+#ifndef UCONFIG_HAVE_PARSEALLINPUT
+# define UCONFIG_HAVE_PARSEALLINPUT 1
+#endif
+
+
+/**
+ * \def UCONFIG_FORMAT_FASTPATHS_49
+ * This switch turns on other formatting fastpaths. Binary incompatible in object DecimalFormat and DecimalFormatSymbols
+ *
+ * @internal
+ */
+#ifndef UCONFIG_FORMAT_FASTPATHS_49
+# define UCONFIG_FORMAT_FASTPATHS_49 1
+#endif
+
+/**
+ * \def UCONFIG_NO_FILTERED_BREAK_ITERATION
+ * This switch turns off filtered break iteration code.
+ *
+ * @internal
+ */
+#ifndef UCONFIG_NO_FILTERED_BREAK_ITERATION
+# define UCONFIG_NO_FILTERED_BREAK_ITERATION 0
+
+
+
+#endif
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/ucsdet.h b/src/core/basetypes/icu-ucsdet/include/unicode/ucsdet.h
new file mode 100644
index 00000000..d3a297be
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/ucsdet.h
@@ -0,0 +1,413 @@
+/*
+ **********************************************************************
+ * Copyright (C) 2005-2013, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ **********************************************************************
+ * file name: ucsdet.h
+ * encoding: US-ASCII
+ * indentation:4
+ *
+ * created on: 2005Aug04
+ * created by: Andy Heninger
+ *
+ * ICU Character Set Detection, API for C
+ *
+ * Draft version 18 Oct 2005
+ *
+ */
+
+#ifndef __UCSDET_H
+#define __UCSDET_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "unicode/localpointer.h"
+#include "unicode/uenum.h"
+
+/**
+ * \file
+ * \brief C API: Charset Detection API
+ *
+ * This API provides a facility for detecting the
+ * charset or encoding of character data in an unknown text format.
+ * The input data can be from an array of bytes.
+ * <p>
+ * Character set detection is at best an imprecise operation. The detection
+ * process will attempt to identify the charset that best matches the characteristics
+ * of the byte data, but the process is partly statistical in nature, and
+ * the results can not be guaranteed to always be correct.
+ * <p>
+ * For best accuracy in charset detection, the input data should be primarily
+ * in a single language, and a minimum of a few hundred bytes worth of plain text
+ * in the language are needed. The detection process will attempt to
+ * ignore html or xml style markup that could otherwise obscure the content.
+ */
+
+
+struct UCharsetDetector;
+/**
+ * Structure representing a charset detector
+ * @stable ICU 3.6
+ */
+typedef struct UCharsetDetector UCharsetDetector;
+
+struct UCharsetMatch;
+/**
+ * Opaque structure representing a match that was identified
+ * from a charset detection operation.
+ * @stable ICU 3.6
+ */
+typedef struct UCharsetMatch UCharsetMatch;
+
+/**
+ * Open a charset detector.
+ *
+ * @param status Any error conditions occurring during the open
+ * operation are reported back in this variable.
+ * @return the newly opened charset detector.
+ * @stable ICU 3.6
+ */
+U_STABLE UCharsetDetector * U_EXPORT2
+ucsdet_open(UErrorCode *status);
+
+/**
+ * Close a charset detector. All storage and any other resources
+ * owned by this charset detector will be released. Failure to
+ * close a charset detector when finished with it can result in
+ * memory leaks in the application.
+ *
+ * @param ucsd The charset detector to be closed.
+ * @stable ICU 3.6
+ */
+U_STABLE void U_EXPORT2
+ucsdet_close(UCharsetDetector *ucsd);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUCharsetDetectorPointer
+ * "Smart pointer" class, closes a UCharsetDetector via ucsdet_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUCharsetDetectorPointer, UCharsetDetector, ucsdet_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Set the input byte data whose charset is to detected.
+ *
+ * Ownership of the input text byte array remains with the caller.
+ * The input string must not be altered or deleted until the charset
+ * detector is either closed or reset to refer to different input text.
+ *
+ * @param ucsd the charset detector to be used.
+ * @param textIn the input text of unknown encoding. .
+ * @param len the length of the input text, or -1 if the text
+ * is NUL terminated.
+ * @param status any error conditions are reported back in this variable.
+ *
+ * @stable ICU 3.6
+ */
+U_STABLE void U_EXPORT2
+ucsdet_setText(UCharsetDetector *ucsd, const char *textIn, int32_t len, UErrorCode *status);
+
+
+/** Set the declared encoding for charset detection.
+ * The declared encoding of an input text is an encoding obtained
+ * by the user from an http header or xml declaration or similar source that
+ * can be provided as an additional hint to the charset detector.
+ *
+ * How and whether the declared encoding will be used during the
+ * detection process is TBD.
+ *
+ * @param ucsd the charset detector to be used.
+ * @param encoding an encoding for the current data obtained from
+ * a header or declaration or other source outside
+ * of the byte data itself.
+ * @param length the length of the encoding name, or -1 if the name string
+ * is NUL terminated.
+ * @param status any error conditions are reported back in this variable.
+ *
+ * @stable ICU 3.6
+ */
+U_STABLE void U_EXPORT2
+ucsdet_setDeclaredEncoding(UCharsetDetector *ucsd, const char *encoding, int32_t length, UErrorCode *status);
+
+
+/**
+ * Return the charset that best matches the supplied input data.
+ *
+ * Note though, that because the detection
+ * only looks at the start of the input data,
+ * there is a possibility that the returned charset will fail to handle
+ * the full set of input data.
+ * <p>
+ * The returned UCharsetMatch object is owned by the UCharsetDetector.
+ * It will remain valid until the detector input is reset, or until
+ * the detector is closed.
+ * <p>
+ * The function will fail if
+ * <ul>
+ * <li>no charset appears to match the data.</li>
+ * <li>no input text has been provided</li>
+ * </ul>
+ *
+ * @param ucsd the charset detector to be used.
+ * @param status any error conditions are reported back in this variable.
+ * @return a UCharsetMatch representing the best matching charset,
+ * or NULL if no charset matches the byte data.
+ *
+ * @stable ICU 3.6
+ */
+U_STABLE const UCharsetMatch * U_EXPORT2
+ucsdet_detect(UCharsetDetector *ucsd, UErrorCode *status);
+
+
+/**
+ * Find all charset matches that appear to be consistent with the input,
+ * returning an array of results. The results are ordered with the
+ * best quality match first.
+ *
+ * Because the detection only looks at a limited amount of the
+ * input byte data, some of the returned charsets may fail to handle
+ * the all of input data.
+ * <p>
+ * The returned UCharsetMatch objects are owned by the UCharsetDetector.
+ * They will remain valid until the detector is closed or modified
+ *
+ * <p>
+ * Return an error if
+ * <ul>
+ * <li>no charsets appear to match the input data.</li>
+ * <li>no input text has been provided</li>
+ * </ul>
+ *
+ * @param ucsd the charset detector to be used.
+ * @param matchesFound pointer to a variable that will be set to the
+ * number of charsets identified that are consistent with
+ * the input data. Output only.
+ * @param status any error conditions are reported back in this variable.
+ * @return A pointer to an array of pointers to UCharSetMatch objects.
+ * This array, and the UCharSetMatch instances to which it refers,
+ * are owned by the UCharsetDetector, and will remain valid until
+ * the detector is closed or modified.
+ * @stable ICU 3.6
+ */
+U_STABLE const UCharsetMatch ** U_EXPORT2
+ucsdet_detectAll(UCharsetDetector *ucsd, int32_t *matchesFound, UErrorCode *status);
+
+
+
+/**
+ * Get the name of the charset represented by a UCharsetMatch.
+ *
+ * The storage for the returned name string is owned by the
+ * UCharsetMatch, and will remain valid while the UCharsetMatch
+ * is valid.
+ *
+ * The name returned is suitable for use with the ICU conversion APIs.
+ *
+ * @param ucsm The charset match object.
+ * @param status Any error conditions are reported back in this variable.
+ * @return The name of the matching charset.
+ *
+ * @stable ICU 3.6
+ */
+U_STABLE const char * U_EXPORT2
+ucsdet_getName(const UCharsetMatch *ucsm, UErrorCode *status);
+
+/**
+ * Get a confidence number for the quality of the match of the byte
+ * data with the charset. Confidence numbers range from zero to 100,
+ * with 100 representing complete confidence and zero representing
+ * no confidence.
+ *
+ * The confidence values are somewhat arbitrary. They define an
+ * an ordering within the results for any single detection operation
+ * but are not generally comparable between the results for different input.
+ *
+ * A confidence value of ten does have a general meaning - it is used
+ * for charsets that can represent the input data, but for which there
+ * is no other indication that suggests that the charset is the correct one.
+ * Pure 7 bit ASCII data, for example, is compatible with a
+ * great many charsets, most of which will appear as possible matches
+ * with a confidence of 10.
+ *
+ * @param ucsm The charset match object.
+ * @param status Any error conditions are reported back in this variable.
+ * @return A confidence number for the charset match.
+ *
+ * @stable ICU 3.6
+ */
+U_STABLE int32_t U_EXPORT2
+ucsdet_getConfidence(const UCharsetMatch *ucsm, UErrorCode *status);
+
+/**
+ * Get the RFC 3066 code for the language of the input data.
+ *
+ * The Charset Detection service is intended primarily for detecting
+ * charsets, not language. For some, but not all, charsets, a language is
+ * identified as a byproduct of the detection process, and that is what
+ * is returned by this function.
+ *
+ * CAUTION:
+ * 1. Language information is not available for input data encoded in
+ * all charsets. In particular, no language is identified
+ * for UTF-8 input data.
+ *
+ * 2. Closely related languages may sometimes be confused.
+ *
+ * If more accurate language detection is required, a linguistic
+ * analysis package should be used.
+ *
+ * The storage for the returned name string is owned by the
+ * UCharsetMatch, and will remain valid while the UCharsetMatch
+ * is valid.
+ *
+ * @param ucsm The charset match object.
+ * @param status Any error conditions are reported back in this variable.
+ * @return The RFC 3066 code for the language of the input data, or
+ * an empty string if the language could not be determined.
+ *
+ * @stable ICU 3.6
+ */
+U_STABLE const char * U_EXPORT2
+ucsdet_getLanguage(const UCharsetMatch *ucsm, UErrorCode *status);
+
+
+/**
+ * Get the entire input text as a UChar string, placing it into
+ * a caller-supplied buffer. A terminating
+ * NUL character will be appended to the buffer if space is available.
+ *
+ * The number of UChars in the output string, not including the terminating
+ * NUL, is returned.
+ *
+ * If the supplied buffer is smaller than required to hold the output,
+ * the contents of the buffer are undefined. The full output string length
+ * (in UChars) is returned as always, and can be used to allocate a buffer
+ * of the correct size.
+ *
+ *
+ * @param ucsm The charset match object.
+ * @param buf A UChar buffer to be filled with the converted text data.
+ * @param cap The capacity of the buffer in UChars.
+ * @param status Any error conditions are reported back in this variable.
+ * @return The number of UChars in the output string.
+ *
+ * @stable ICU 3.6
+ */
+U_STABLE int32_t U_EXPORT2
+ucsdet_getUChars(const UCharsetMatch *ucsm,
+ UChar *buf, int32_t cap, UErrorCode *status);
+
+
+
+/**
+ * Get an iterator over the set of all detectable charsets -
+ * over the charsets that are known to the charset detection
+ * service.
+ *
+ * The returned UEnumeration provides access to the names of
+ * the charsets.
+ *
+ * <p>
+ * The state of the Charset detector that is passed in does not
+ * affect the result of this function, but requiring a valid, open
+ * charset detector as a parameter insures that the charset detection
+ * service has been safely initialized and that the required detection
+ * data is available.
+ *
+ * <p>
+ * <b>Note:</b> Multiple different charset encodings in a same family may use
+ * a single shared name in this implementation. For example, this method returns
+ * an array including "ISO-8859-1" (ISO Latin 1), but not including "windows-1252"
+ * (Windows Latin 1). However, actual detection result could be "windows-1252"
+ * when the input data matches Latin 1 code points with any points only available
+ * in "windows-1252".
+ *
+ * @param ucsd a Charset detector.
+ * @param status Any error conditions are reported back in this variable.
+ * @return an iterator providing access to the detectable charset names.
+ * @stable ICU 3.6
+ */
+U_STABLE UEnumeration * U_EXPORT2
+ucsdet_getAllDetectableCharsets(const UCharsetDetector *ucsd, UErrorCode *status);
+
+/**
+ * Test whether input filtering is enabled for this charset detector.
+ * Input filtering removes text that appears to be HTML or xml
+ * markup from the input before applying the code page detection
+ * heuristics.
+ *
+ * @param ucsd The charset detector to check.
+ * @return TRUE if filtering is enabled.
+ * @stable ICU 3.6
+ */
+
+U_STABLE UBool U_EXPORT2
+ucsdet_isInputFilterEnabled(const UCharsetDetector *ucsd);
+
+
+/**
+ * Enable filtering of input text. If filtering is enabled,
+ * text within angle brackets ("<" and ">") will be removed
+ * before detection, which will remove most HTML or xml markup.
+ *
+ * @param ucsd the charset detector to be modified.
+ * @param filter <code>true</code> to enable input text filtering.
+ * @return The previous setting.
+ *
+ * @stable ICU 3.6
+ */
+U_STABLE UBool U_EXPORT2
+ucsdet_enableInputFilter(UCharsetDetector *ucsd, UBool filter);
+
+#ifndef U_HIDE_INTERNAL_API
+/**
+ * Get an iterator over the set of detectable charsets -
+ * over the charsets that are enabled by the specified charset detector.
+ *
+ * The returned UEnumeration provides access to the names of
+ * the charsets.
+ *
+ * @param ucsd a Charset detector.
+ * @param status Any error conditions are reported back in this variable.
+ * @return an iterator providing access to the detectable charset names by
+ * the specified charset detector.
+ * @internal
+ */
+U_INTERNAL UEnumeration * U_EXPORT2
+ucsdet_getDetectableCharsets(const UCharsetDetector *ucsd, UErrorCode *status);
+
+/**
+ * Enable or disable individual charset encoding.
+ * A name of charset encoding must be included in the names returned by
+ * {@link #getAllDetectableCharsets()}.
+ *
+ * @param ucsd a Charset detector.
+ * @param encoding encoding the name of charset encoding.
+ * @param enabled <code>TRUE</code> to enable, or <code>FALSE</code> to disable the
+ * charset encoding.
+ * @param status receives the return status. When the name of charset encoding
+ * is not supported, U_ILLEGAL_ARGUMENT_ERROR is set.
+ * @internal
+ */
+U_INTERNAL void U_EXPORT2
+ucsdet_setDetectableCharset(UCharsetDetector *ucsd, const char *encoding, UBool enabled, UErrorCode *status);
+#endif /* U_HIDE_INTERNAL_API */
+
+#endif
+#endif /* __UCSDET_H */
+
+
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/udata.h b/src/core/basetypes/icu-ucsdet/include/unicode/udata.h
new file mode 100644
index 00000000..29e46630
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/udata.h
@@ -0,0 +1,430 @@
+/*
+******************************************************************************
+*
+* Copyright (C) 1999-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: udata.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 1999oct25
+* created by: Markus W. Scherer
+*/
+
+#ifndef __UDATA_H__
+#define __UDATA_H__
+
+#include "unicode/utypes.h"
+#include "unicode/localpointer.h"
+
+U_CDECL_BEGIN
+
+/**
+ * \file
+ * \brief C API: Data loading interface
+ *
+ * <h2>Information about data loading interface</h2>
+ *
+ * This API is used to find and efficiently load data for ICU and applications
+ * using ICU. It provides an abstract interface that specifies a data type and
+ * name to find and load the data. Normally this API is used by other ICU APIs
+ * to load required data out of the ICU data library, but it can be used to
+ * load data out of other places.
+ *
+ * See the User Guide Data Management chapter.
+ */
+
+#ifndef U_HIDE_INTERNAL_API
+/**
+ * Character used to separate package names from tree names
+ * @internal ICU 3.0
+ */
+#define U_TREE_SEPARATOR '-'
+
+/**
+ * String used to separate package names from tree names
+ * @internal ICU 3.0
+ */
+#define U_TREE_SEPARATOR_STRING "-"
+
+/**
+ * Character used to separate parts of entry names
+ * @internal ICU 3.0
+ */
+#define U_TREE_ENTRY_SEP_CHAR '/'
+
+/**
+ * String used to separate parts of entry names
+ * @internal ICU 3.0
+ */
+#define U_TREE_ENTRY_SEP_STRING "/"
+
+/**
+ * Alias for standard ICU data
+ * @internal ICU 3.0
+ */
+#define U_ICUDATA_ALIAS "ICUDATA"
+
+#endif /* U_HIDE_INTERNAL_API */
+
+/**
+ * UDataInfo contains the properties about the requested data.
+ * This is meta data.
+ *
+ * <p>This structure may grow in the future, indicated by the
+ * <code>size</code> field.</p>
+ *
+ * <p>ICU data must be at least 8-aligned, and should be 16-aligned.
+ * The UDataInfo struct begins 4 bytes after the start of the data item,
+ * so it is 4-aligned.
+ *
+ * <p>The platform data property fields help determine if a data
+ * file can be efficiently used on a given machine.
+ * The particular fields are of importance only if the data
+ * is affected by the properties - if there is integer data
+ * with word sizes > 1 byte, char* text, or UChar* text.</p>
+ *
+ * <p>The implementation for the <code>udata_open[Choice]()</code>
+ * functions may reject data based on the value in <code>isBigEndian</code>.
+ * No other field is used by the <code>udata</code> API implementation.</p>
+ *
+ * <p>The <code>dataFormat</code> may be used to identify
+ * the kind of data, e.g. a converter table.</p>
+ *
+ * <p>The <code>formatVersion</code> field should be used to
+ * make sure that the format can be interpreted.
+ * It may be a good idea to check only for the one or two highest
+ * of the version elements to allow the data memory to
+ * get more or somewhat rearranged contents, for as long
+ * as the using code can still interpret the older contents.</p>
+ *
+ * <p>The <code>dataVersion</code> field is intended to be a
+ * common place to store the source version of the data;
+ * for data from the Unicode character database, this could
+ * reflect the Unicode version.</p>
+ *
+ * @stable ICU 2.0
+ */
+typedef struct {
+ /** sizeof(UDataInfo)
+ * @stable ICU 2.0 */
+ uint16_t size;
+
+ /** unused, set to 0
+ * @stable ICU 2.0*/
+ uint16_t reservedWord;
+
+ /* platform data properties */
+ /** 0 for little-endian machine, 1 for big-endian
+ * @stable ICU 2.0 */
+ uint8_t isBigEndian;
+
+ /** see U_CHARSET_FAMILY values in utypes.h
+ * @stable ICU 2.0*/
+ uint8_t charsetFamily;
+
+ /** sizeof(UChar), one of { 1, 2, 4 }
+ * @stable ICU 2.0*/
+ uint8_t sizeofUChar;
+
+ /** unused, set to 0
+ * @stable ICU 2.0*/
+ uint8_t reservedByte;
+
+ /** data format identifier
+ * @stable ICU 2.0*/
+ uint8_t dataFormat[4];
+
+ /** versions: [0] major [1] minor [2] milli [3] micro
+ * @stable ICU 2.0*/
+ uint8_t formatVersion[4];
+
+ /** versions: [0] major [1] minor [2] milli [3] micro
+ * @stable ICU 2.0*/
+ uint8_t dataVersion[4];
+} UDataInfo;
+
+/* API for reading data -----------------------------------------------------*/
+
+/**
+ * Forward declaration of the data memory type.
+ * @stable ICU 2.0
+ */
+typedef struct UDataMemory UDataMemory;
+
+/**
+ * Callback function for udata_openChoice().
+ * @param context parameter passed into <code>udata_openChoice()</code>.
+ * @param type The type of the data as passed into <code>udata_openChoice()</code>.
+ * It may be <code>NULL</code>.
+ * @param name The name of the data as passed into <code>udata_openChoice()</code>.
+ * @param pInfo A pointer to the <code>UDataInfo</code> structure
+ * of data that has been loaded and will be returned
+ * by <code>udata_openChoice()</code> if this function
+ * returns <code>TRUE</code>.
+ * @return TRUE if the current data memory is acceptable
+ * @stable ICU 2.0
+ */
+typedef UBool U_CALLCONV
+UDataMemoryIsAcceptable(void *context,
+ const char *type, const char *name,
+ const UDataInfo *pInfo);
+
+
+/**
+ * Convenience function.
+ * This function works the same as <code>udata_openChoice</code>
+ * except that any data that matches the type and name
+ * is assumed to be acceptable.
+ * @param path Specifies an absolute path and/or a basename for the
+ * finding of the data in the file system.
+ * <code>NULL</code> for ICU data.
+ * @param type A string that specifies the type of data to be loaded.
+ * For example, resource bundles are loaded with type "res",
+ * conversion tables with type "cnv".
+ * This may be <code>NULL</code> or empty.
+ * @param name A string that specifies the name of the data.
+ * @param pErrorCode An ICU UErrorCode parameter. It must not be <code>NULL</code>.
+ * @return A pointer (handle) to a data memory object, or <code>NULL</code>
+ * if an error occurs. Call <code>udata_getMemory()</code>
+ * to get a pointer to the actual data.
+ *
+ * @see udata_openChoice
+ * @stable ICU 2.0
+ */
+U_STABLE UDataMemory * U_EXPORT2
+udata_open(const char *path, const char *type, const char *name,
+ UErrorCode *pErrorCode);
+
+/**
+ * Data loading function.
+ * This function is used to find and load efficiently data for
+ * ICU and applications using ICU.
+ * It provides an abstract interface that allows to specify a data
+ * type and name to find and load the data.
+ *
+ * <p>The implementation depends on platform properties and user preferences
+ * and may involve loading shared libraries (DLLs), mapping
+ * files into memory, or fopen()/fread() files.
+ * It may also involve using static memory or database queries etc.
+ * Several or all data items may be combined into one entity
+ * (DLL, memory-mappable file).</p>
+ *
+ * <p>The data is always preceded by a header that includes
+ * a <code>UDataInfo</code> structure.
+ * The caller's <code>isAcceptable()</code> function is called to make
+ * sure that the data is useful. It may be called several times if it
+ * rejects the data and there is more than one location with data
+ * matching the type and name.</p>
+ *
+ * <p>If <code>path==NULL</code>, then ICU data is loaded.
+ * Otherwise, it is separated into a basename and a basename-less directory string.
+ * The basename is used as the data package name, and the directory is
+ * logically prepended to the ICU data directory string.</p>
+ *
+ * <p>For details about ICU data loading see the User Guide
+ * Data Management chapter. (http://icu-project.org/userguide/icudata.html)</p>
+ *
+ * @param path Specifies an absolute path and/or a basename for the
+ * finding of the data in the file system.
+ * <code>NULL</code> for ICU data.
+ * @param type A string that specifies the type of data to be loaded.
+ * For example, resource bundles are loaded with type "res",
+ * conversion tables with type "cnv".
+ * This may be <code>NULL</code> or empty.
+ * @param name A string that specifies the name of the data.
+ * @param isAcceptable This function is called to verify that loaded data
+ * is useful for the client code. If it returns FALSE
+ * for all data items, then <code>udata_openChoice()</code>
+ * will return with an error.
+ * @param context Arbitrary parameter to be passed into isAcceptable.
+ * @param pErrorCode An ICU UErrorCode parameter. It must not be <code>NULL</code>.
+ * @return A pointer (handle) to a data memory object, or <code>NULL</code>
+ * if an error occurs. Call <code>udata_getMemory()</code>
+ * to get a pointer to the actual data.
+ * @stable ICU 2.0
+ */
+U_STABLE UDataMemory * U_EXPORT2
+udata_openChoice(const char *path, const char *type, const char *name,
+ UDataMemoryIsAcceptable *isAcceptable, void *context,
+ UErrorCode *pErrorCode);
+
+/**
+ * Close the data memory.
+ * This function must be called to allow the system to
+ * release resources associated with this data memory.
+ * @param pData The pointer to data memory object
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+udata_close(UDataMemory *pData);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUDataMemoryPointer
+ * "Smart pointer" class, closes a UDataMemory via udata_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUDataMemoryPointer, UDataMemory, udata_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Get the pointer to the actual data inside the data memory.
+ * The data is read-only.
+ *
+ * ICU data must be at least 8-aligned, and should be 16-aligned.
+ *
+ * @param pData The pointer to data memory object
+ * @stable ICU 2.0
+ */
+U_STABLE const void * U_EXPORT2
+udata_getMemory(UDataMemory *pData);
+
+/**
+ * Get the information from the data memory header.
+ * This allows to get access to the header containing
+ * platform data properties etc. which is not part of
+ * the data itself and can therefore not be accessed
+ * via the pointer that <code>udata_getMemory()</code> returns.
+ *
+ * @param pData pointer to the data memory object
+ * @param pInfo pointer to a UDataInfo object;
+ * its <code>size</code> field must be set correctly,
+ * typically to <code>sizeof(UDataInfo)</code>.
+ *
+ * <code>*pInfo</code> will be filled with the UDataInfo structure
+ * in the data memory object. If this structure is smaller than
+ * <code>pInfo->size</code>, then the <code>size</code> will be
+ * adjusted and only part of the structure will be filled.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+udata_getInfo(UDataMemory *pData, UDataInfo *pInfo);
+
+/**
+ * This function bypasses the normal ICU data loading process and
+ * allows you to force ICU's system data to come out of a user-specified
+ * area in memory.
+ *
+ * ICU data must be at least 8-aligned, and should be 16-aligned.
+ * See http://userguide.icu-project.org/icudata
+ *
+ * The format of this data is that of the icu common data file, as is
+ * generated by the pkgdata tool with mode=common or mode=dll.
+ * You can read in a whole common mode file and pass the address to the start of the
+ * data, or (with the appropriate link options) pass in the pointer to
+ * the data that has been loaded from a dll by the operating system,
+ * as shown in this code:
+ *
+ * extern const char U_IMPORT U_ICUDATA_ENTRY_POINT [];
+ * // U_ICUDATA_ENTRY_POINT is same as entry point specified to pkgdata tool
+ * UErrorCode status = U_ZERO_ERROR;
+ *
+ * udata_setCommonData(&U_ICUDATA_ENTRY_POINT, &status);
+ *
+ * It is important that the declaration be as above. The entry point
+ * must not be declared as an extern void*.
+ *
+ * Starting with ICU 4.4, it is possible to set several data packages,
+ * one per call to this function.
+ * udata_open() will look for data in the multiple data packages in the order
+ * in which they were set.
+ * The position of the linked-in or default-name ICU .data package in the
+ * search list depends on when the first data item is loaded that is not contained
+ * in the already explicitly set packages.
+ * If data was loaded implicitly before the first call to this function
+ * (for example, via opening a converter, constructing a UnicodeString
+ * from default-codepage data, using formatting or collation APIs, etc.),
+ * then the default data will be first in the list.
+ *
+ * This function has no effect on application (non ICU) data. See udata_setAppData()
+ * for similar functionality for application data.
+ *
+ * @param data pointer to ICU common data
+ * @param err outgoing error status <code>U_USING_DEFAULT_WARNING, U_UNSUPPORTED_ERROR</code>
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+udata_setCommonData(const void *data, UErrorCode *err);
+
+
+/**
+ * This function bypasses the normal ICU data loading process for application-specific
+ * data and allows you to force the it to come out of a user-specified
+ * pointer.
+ *
+ * ICU data must be at least 8-aligned, and should be 16-aligned.
+ * See http://userguide.icu-project.org/icudata
+ *
+ * The format of this data is that of the icu common data file, like 'icudt26l.dat'
+ * or the corresponding shared library (DLL) file.
+ * The application must read in or otherwise construct an image of the data and then
+ * pass the address of it to this function.
+ *
+ *
+ * Warning: setAppData will set a U_USING_DEFAULT_WARNING code if
+ * data with the specifed path that has already been opened, or
+ * if setAppData with the same path has already been called.
+ * Any such calls to setAppData will have no effect.
+ *
+ *
+ * @param packageName the package name by which the application will refer
+ * to (open) this data
+ * @param data pointer to the data
+ * @param err outgoing error status <code>U_USING_DEFAULT_WARNING, U_UNSUPPORTED_ERROR</code>
+ * @see udata_setCommonData
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+udata_setAppData(const char *packageName, const void *data, UErrorCode *err);
+
+/**
+ * Possible settings for udata_setFileAccess()
+ * @see udata_setFileAccess
+ * @stable ICU 3.4
+ */
+typedef enum UDataFileAccess {
+ /** ICU looks for data in single files first, then in packages. (default) @stable ICU 3.4 */
+ UDATA_FILES_FIRST,
+ /** An alias for the default access mode. @stable ICU 3.4 */
+ UDATA_DEFAULT_ACCESS = UDATA_FILES_FIRST,
+ /** ICU only loads data from packages, not from single files. @stable ICU 3.4 */
+ UDATA_ONLY_PACKAGES,
+ /** ICU loads data from packages first, and only from single files
+ if the data cannot be found in a package. @stable ICU 3.4 */
+ UDATA_PACKAGES_FIRST,
+ /** ICU does not access the file system for data loading. @stable ICU 3.4 */
+ UDATA_NO_FILES,
+ /** Number of real UDataFileAccess values. @stable ICU 3.4 */
+ UDATA_FILE_ACCESS_COUNT
+} UDataFileAccess;
+
+/**
+ * This function may be called to control how ICU loads data. It must be called
+ * before any ICU data is loaded, including application data loaded with
+ * ures/ResourceBundle or udata APIs. This function is not multithread safe.
+ * The results of calling it while other threads are loading data are undefined.
+ * @param access The type of file access to be used
+ * @param status Error code.
+ * @see UDataFileAccess
+ * @stable ICU 3.4
+ */
+U_STABLE void U_EXPORT2
+udata_setFileAccess(UDataFileAccess access, UErrorCode *status);
+
+U_CDECL_END
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/uenum.h b/src/core/basetypes/icu-ucsdet/include/unicode/uenum.h
new file mode 100644
index 00000000..5408ec5a
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/uenum.h
@@ -0,0 +1,206 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 2002-2013, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: uenum.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:2
+*
+* created on: 2002jul08
+* created by: Vladimir Weinstein
+*/
+
+#ifndef __UENUM_H
+#define __UENUM_H
+
+#include "unicode/utypes.h"
+#include "unicode/localpointer.h"
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/strenum.h"
+#endif
+
+/**
+ * \file
+ * \brief C API: String Enumeration
+ */
+
+/**
+ * An enumeration object.
+ * For usage in C programs.
+ * @stable ICU 2.2
+ */
+struct UEnumeration;
+/** structure representing an enumeration object instance @stable ICU 2.2 */
+typedef struct UEnumeration UEnumeration;
+
+/**
+ * Disposes of resources in use by the iterator. If en is NULL,
+ * does nothing. After this call, any char* or UChar* pointer
+ * returned by uenum_unext() or uenum_next() is invalid.
+ * @param en UEnumeration structure pointer
+ * @stable ICU 2.2
+ */
+U_STABLE void U_EXPORT2
+uenum_close(UEnumeration* en);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUEnumerationPointer
+ * "Smart pointer" class, closes a UEnumeration via uenum_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUEnumerationPointer, UEnumeration, uenum_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Returns the number of elements that the iterator traverses. If
+ * the iterator is out-of-sync with its service, status is set to
+ * U_ENUM_OUT_OF_SYNC_ERROR.
+ * This is a convenience function. It can end up being very
+ * expensive as all the items might have to be pre-fetched (depending
+ * on the type of data being traversed). Use with caution and only
+ * when necessary.
+ * @param en UEnumeration structure pointer
+ * @param status error code, can be U_ENUM_OUT_OF_SYNC_ERROR if the
+ * iterator is out of sync.
+ * @return number of elements in the iterator
+ * @stable ICU 2.2
+ */
+U_STABLE int32_t U_EXPORT2
+uenum_count(UEnumeration* en, UErrorCode* status);
+
+/**
+ * Returns the next element in the iterator's list. If there are
+ * no more elements, returns NULL. If the iterator is out-of-sync
+ * with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR and
+ * NULL is returned. If the native service string is a char* string,
+ * it is converted to UChar* with the invariant converter.
+ * The result is terminated by (UChar)0.
+ * @param en the iterator object
+ * @param resultLength pointer to receive the length of the result
+ * (not including the terminating \\0).
+ * If the pointer is NULL it is ignored.
+ * @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
+ * the iterator is out of sync with its service.
+ * @return a pointer to the string. The string will be
+ * zero-terminated. The return pointer is owned by this iterator
+ * and must not be deleted by the caller. The pointer is valid
+ * until the next call to any uenum_... method, including
+ * uenum_next() or uenum_unext(). When all strings have been
+ * traversed, returns NULL.
+ * @stable ICU 2.2
+ */
+U_STABLE const UChar* U_EXPORT2
+uenum_unext(UEnumeration* en,
+ int32_t* resultLength,
+ UErrorCode* status);
+
+/**
+ * Returns the next element in the iterator's list. If there are
+ * no more elements, returns NULL. If the iterator is out-of-sync
+ * with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR and
+ * NULL is returned. If the native service string is a UChar*
+ * string, it is converted to char* with the invariant converter.
+ * The result is terminated by (char)0. If the conversion fails
+ * (because a character cannot be converted) then status is set to
+ * U_INVARIANT_CONVERSION_ERROR and the return value is undefined
+ * (but non-NULL).
+ * @param en the iterator object
+ * @param resultLength pointer to receive the length of the result
+ * (not including the terminating \\0).
+ * If the pointer is NULL it is ignored.
+ * @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
+ * the iterator is out of sync with its service. Set to
+ * U_INVARIANT_CONVERSION_ERROR if the underlying native string is
+ * UChar* and conversion to char* with the invariant converter
+ * fails. This error pertains only to current string, so iteration
+ * might be able to continue successfully.
+ * @return a pointer to the string. The string will be
+ * zero-terminated. The return pointer is owned by this iterator
+ * and must not be deleted by the caller. The pointer is valid
+ * until the next call to any uenum_... method, including
+ * uenum_next() or uenum_unext(). When all strings have been
+ * traversed, returns NULL.
+ * @stable ICU 2.2
+ */
+U_STABLE const char* U_EXPORT2
+uenum_next(UEnumeration* en,
+ int32_t* resultLength,
+ UErrorCode* status);
+
+/**
+ * Resets the iterator to the current list of service IDs. This
+ * re-establishes sync with the service and rewinds the iterator
+ * to start at the first element.
+ * @param en the iterator object
+ * @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
+ * the iterator is out of sync with its service.
+ * @stable ICU 2.2
+ */
+U_STABLE void U_EXPORT2
+uenum_reset(UEnumeration* en, UErrorCode* status);
+
+#if U_SHOW_CPLUSPLUS_API
+
+/**
+ * Given a StringEnumeration, wrap it in a UEnumeration. The
+ * StringEnumeration is adopted; after this call, the caller must not
+ * delete it (regardless of error status).
+ * @param adopted the C++ StringEnumeration to be wrapped in a UEnumeration.
+ * @param ec the error code.
+ * @return a UEnumeration wrapping the adopted StringEnumeration.
+ * @stable ICU 4.2
+ */
+U_STABLE UEnumeration* U_EXPORT2
+uenum_openFromStringEnumeration(icu::StringEnumeration* adopted, UErrorCode* ec);
+
+#endif
+
+/**
+ * Given an array of const UChar* strings, return a UEnumeration. String pointers from 0..count-1 must not be null.
+ * Do not free or modify either the string array or the characters it points to until this object has been destroyed with uenum_close.
+ * \snippet test/cintltst/uenumtst.c uenum_openUCharStringsEnumeration
+ * @param strings array of const UChar* strings (each null terminated). All storage is owned by the caller.
+ * @param count length of the array
+ * @param ec error code
+ * @return the new UEnumeration object. Caller is responsible for calling uenum_close to free memory.
+ * @see uenum_close
+ * @stable ICU 50
+ */
+U_STABLE UEnumeration* U_EXPORT2
+uenum_openUCharStringsEnumeration(const UChar* const strings[], int32_t count,
+ UErrorCode* ec);
+
+/* Note: next function is not hidden as draft, as it is used internally (it was formerly an internal function). */
+
+/**
+ * Given an array of const char* strings (invariant chars only), return a UEnumeration. String pointers from 0..count-1 must not be null.
+ * Do not free or modify either the string array or the characters it points to until this object has been destroyed with uenum_close.
+ * \snippet test/cintltst/uenumtst.c uenum_openCharStringsEnumeration
+ * @param strings array of char* strings (each null terminated). All storage is owned by the caller.
+ * @param count length of the array
+ * @param ec error code
+ * @return the new UEnumeration object. Caller is responsible for calling uenum_close to free memory
+ * @see uenum_close
+ * @stable ICU 50
+ */
+U_STABLE UEnumeration* U_EXPORT2
+uenum_openCharStringsEnumeration(const char* const strings[], int32_t count,
+ UErrorCode* ec);
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/uiter.h b/src/core/basetypes/icu-ucsdet/include/unicode/uiter.h
new file mode 100644
index 00000000..0cdb8ffb
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/uiter.h
@@ -0,0 +1,707 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 2002-2011 International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: uiter.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002jan18
+* created by: Markus W. Scherer
+*/
+
+#ifndef __UITER_H__
+#define __UITER_H__
+
+/**
+ * \file
+ * \brief C API: Unicode Character Iteration
+ *
+ * @see UCharIterator
+ */
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+ U_NAMESPACE_BEGIN
+
+ class CharacterIterator;
+ class Replaceable;
+
+ U_NAMESPACE_END
+#endif
+
+U_CDECL_BEGIN
+
+struct UCharIterator;
+typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */
+
+/**
+ * Origin constants for UCharIterator.getIndex() and UCharIterator.move().
+ * @see UCharIteratorMove
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef enum UCharIteratorOrigin {
+ UITER_START, UITER_CURRENT, UITER_LIMIT, UITER_ZERO, UITER_LENGTH
+} UCharIteratorOrigin;
+
+/** Constants for UCharIterator. @stable ICU 2.6 */
+enum {
+ /**
+ * Constant value that may be returned by UCharIteratorMove
+ * indicating that the final UTF-16 index is not known, but that the move succeeded.
+ * This can occur when moving relative to limit or length, or
+ * when moving relative to the current index after a setState()
+ * when the current UTF-16 index is not known.
+ *
+ * It would be very inefficient to have to count from the beginning of the text
+ * just to get the current/limit/length index after moving relative to it.
+ * The actual index can be determined with getIndex(UITER_CURRENT)
+ * which will count the UChars if necessary.
+ *
+ * @stable ICU 2.6
+ */
+ UITER_UNKNOWN_INDEX=-2
+};
+
+
+/**
+ * Constant for UCharIterator getState() indicating an error or
+ * an unknown state.
+ * Returned by uiter_getState()/UCharIteratorGetState
+ * when an error occurs.
+ * Also, some UCharIterator implementations may not be able to return
+ * a valid state for each position. This will be clearly documented
+ * for each such iterator (none of the public ones here).
+ *
+ * @stable ICU 2.6
+ */
+#define UITER_NO_STATE ((uint32_t)0xffffffff)
+
+/**
+ * Function type declaration for UCharIterator.getIndex().
+ *
+ * Gets the current position, or the start or limit of the
+ * iteration range.
+ *
+ * This function may perform slowly for UITER_CURRENT after setState() was called,
+ * or for UITER_LENGTH, because an iterator implementation may have to count
+ * UChars if the underlying storage is not UTF-16.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @param origin get the 0, start, limit, length, or current index
+ * @return the requested index, or U_SENTINEL in an error condition
+ *
+ * @see UCharIteratorOrigin
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef int32_t U_CALLCONV
+UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin);
+
+/**
+ * Function type declaration for UCharIterator.move().
+ *
+ * Use iter->move(iter, index, UITER_ZERO) like CharacterIterator::setIndex(index).
+ *
+ * Moves the current position relative to the start or limit of the
+ * iteration range, or relative to the current position itself.
+ * The movement is expressed in numbers of code units forward
+ * or backward by specifying a positive or negative delta.
+ * Out of bounds movement will be pinned to the start or limit.
+ *
+ * This function may perform slowly for moving relative to UITER_LENGTH
+ * because an iterator implementation may have to count the rest of the
+ * UChars if the native storage is not UTF-16.
+ *
+ * When moving relative to the limit or length, or
+ * relative to the current position after setState() was called,
+ * move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient
+ * determination of the actual UTF-16 index.
+ * The actual index can be determined with getIndex(UITER_CURRENT)
+ * which will count the UChars if necessary.
+ * See UITER_UNKNOWN_INDEX for details.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @param delta can be positive, zero, or negative
+ * @param origin move relative to the 0, start, limit, length, or current index
+ * @return the new index, or U_SENTINEL on an error condition,
+ * or UITER_UNKNOWN_INDEX when the index is not known.
+ *
+ * @see UCharIteratorOrigin
+ * @see UCharIterator
+ * @see UITER_UNKNOWN_INDEX
+ * @stable ICU 2.1
+ */
+typedef int32_t U_CALLCONV
+UCharIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin);
+
+/**
+ * Function type declaration for UCharIterator.hasNext().
+ *
+ * Check if current() and next() can still
+ * return another code unit.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return boolean value for whether current() and next() can still return another code unit
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef UBool U_CALLCONV
+UCharIteratorHasNext(UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.hasPrevious().
+ *
+ * Check if previous() can still return another code unit.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return boolean value for whether previous() can still return another code unit
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef UBool U_CALLCONV
+UCharIteratorHasPrevious(UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.current().
+ *
+ * Return the code unit at the current position,
+ * or U_SENTINEL if there is none (index is at the limit).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the current code unit
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef UChar32 U_CALLCONV
+UCharIteratorCurrent(UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.next().
+ *
+ * Return the code unit at the current index and increment
+ * the index (post-increment, like s[i++]),
+ * or return U_SENTINEL if there is none (index is at the limit).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the current code unit (and post-increment the current index)
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef UChar32 U_CALLCONV
+UCharIteratorNext(UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.previous().
+ *
+ * Decrement the index and return the code unit from there
+ * (pre-decrement, like s[--i]),
+ * or return U_SENTINEL if there is none (index is at the start).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the previous code unit (after pre-decrementing the current index)
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef UChar32 U_CALLCONV
+UCharIteratorPrevious(UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.reservedFn().
+ * Reserved for future use.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @param something some integer argument
+ * @return some integer
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef int32_t U_CALLCONV
+UCharIteratorReserved(UCharIterator *iter, int32_t something);
+
+/**
+ * Function type declaration for UCharIterator.getState().
+ *
+ * Get the "state" of the iterator in the form of a single 32-bit word.
+ * It is recommended that the state value be calculated to be as small as
+ * is feasible. For strings with limited lengths, fewer than 32 bits may
+ * be sufficient.
+ *
+ * This is used together with setState()/UCharIteratorSetState
+ * to save and restore the iterator position more efficiently than with
+ * getIndex()/move().
+ *
+ * The iterator state is defined as a uint32_t value because it is designed
+ * for use in ucol_nextSortKeyPart() which provides 32 bits to store the state
+ * of the character iterator.
+ *
+ * With some UCharIterator implementations (e.g., UTF-8),
+ * getting and setting the UTF-16 index with existing functions
+ * (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but
+ * relatively slow because the iterator has to "walk" from a known index
+ * to the requested one.
+ * This takes more time the farther it needs to go.
+ *
+ * An opaque state value allows an iterator implementation to provide
+ * an internal index (UTF-8: the source byte array index) for
+ * fast, constant-time restoration.
+ *
+ * After calling setState(), a getIndex(UITER_CURRENT) may be slow because
+ * the UTF-16 index may not be restored as well, but the iterator can deliver
+ * the correct text contents and move relative to the current position
+ * without performance degradation.
+ *
+ * Some UCharIterator implementations may not be able to return
+ * a valid state for each position, in which case they return UITER_NO_STATE instead.
+ * This will be clearly documented for each such iterator (none of the public ones here).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the state word
+ *
+ * @see UCharIterator
+ * @see UCharIteratorSetState
+ * @see UITER_NO_STATE
+ * @stable ICU 2.6
+ */
+typedef uint32_t U_CALLCONV
+UCharIteratorGetState(const UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.setState().
+ *
+ * Restore the "state" of the iterator using a state word from a getState() call.
+ * The iterator object need not be the same one as for which getState() was called,
+ * but it must be of the same type (set up using the same uiter_setXYZ function)
+ * and it must iterate over the same string
+ * (binary identical regardless of memory address).
+ * For more about the state word see UCharIteratorGetState.
+ *
+ * After calling setState(), a getIndex(UITER_CURRENT) may be slow because
+ * the UTF-16 index may not be restored as well, but the iterator can deliver
+ * the correct text contents and move relative to the current position
+ * without performance degradation.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @param state the state word from a getState() call
+ * on a same-type, same-string iterator
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ *
+ * @see UCharIterator
+ * @see UCharIteratorGetState
+ * @stable ICU 2.6
+ */
+typedef void U_CALLCONV
+UCharIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
+
+
+/**
+ * C API for code unit iteration.
+ * This can be used as a C wrapper around
+ * CharacterIterator, Replaceable, or implemented using simple strings, etc.
+ *
+ * There are two roles for using UCharIterator:
+ *
+ * A "provider" sets the necessary function pointers and controls the "protected"
+ * fields of the UCharIterator structure. A "provider" passes a UCharIterator
+ * into C APIs that need a UCharIterator as an abstract, flexible string interface.
+ *
+ * Implementations of such C APIs are "callers" of UCharIterator functions;
+ * they only use the "public" function pointers and never access the "protected"
+ * fields directly.
+ *
+ * The current() and next() functions only check the current index against the
+ * limit, and previous() only checks the current index against the start,
+ * to see if the iterator already reached the end of the iteration range.
+ *
+ * The assumption - in all iterators - is that the index is moved via the API,
+ * which means it won't go out of bounds, or the index is modified by
+ * user code that knows enough about the iterator implementation to set valid
+ * index values.
+ *
+ * UCharIterator functions return code unit values 0..0xffff,
+ * or U_SENTINEL if the iteration bounds are reached.
+ *
+ * @stable ICU 2.1
+ */
+struct UCharIterator {
+ /**
+ * (protected) Pointer to string or wrapped object or similar.
+ * Not used by caller.
+ * @stable ICU 2.1
+ */
+ const void *context;
+
+ /**
+ * (protected) Length of string or similar.
+ * Not used by caller.
+ * @stable ICU 2.1
+ */
+ int32_t length;
+
+ /**
+ * (protected) Start index or similar.
+ * Not used by caller.
+ * @stable ICU 2.1
+ */
+ int32_t start;
+
+ /**
+ * (protected) Current index or similar.
+ * Not used by caller.
+ * @stable ICU 2.1
+ */
+ int32_t index;
+
+ /**
+ * (protected) Limit index or similar.
+ * Not used by caller.
+ * @stable ICU 2.1
+ */
+ int32_t limit;
+
+ /**
+ * (protected) Used by UTF-8 iterators and possibly others.
+ * @stable ICU 2.1
+ */
+ int32_t reservedField;
+
+ /**
+ * (public) Returns the current position or the
+ * start or limit index of the iteration range.
+ *
+ * @see UCharIteratorGetIndex
+ * @stable ICU 2.1
+ */
+ UCharIteratorGetIndex *getIndex;
+
+ /**
+ * (public) Moves the current position relative to the start or limit of the
+ * iteration range, or relative to the current position itself.
+ * The movement is expressed in numbers of code units forward
+ * or backward by specifying a positive or negative delta.
+ *
+ * @see UCharIteratorMove
+ * @stable ICU 2.1
+ */
+ UCharIteratorMove *move;
+
+ /**
+ * (public) Check if current() and next() can still
+ * return another code unit.
+ *
+ * @see UCharIteratorHasNext
+ * @stable ICU 2.1
+ */
+ UCharIteratorHasNext *hasNext;
+
+ /**
+ * (public) Check if previous() can still return another code unit.
+ *
+ * @see UCharIteratorHasPrevious
+ * @stable ICU 2.1
+ */
+ UCharIteratorHasPrevious *hasPrevious;
+
+ /**
+ * (public) Return the code unit at the current position,
+ * or U_SENTINEL if there is none (index is at the limit).
+ *
+ * @see UCharIteratorCurrent
+ * @stable ICU 2.1
+ */
+ UCharIteratorCurrent *current;
+
+ /**
+ * (public) Return the code unit at the current index and increment
+ * the index (post-increment, like s[i++]),
+ * or return U_SENTINEL if there is none (index is at the limit).
+ *
+ * @see UCharIteratorNext
+ * @stable ICU 2.1
+ */
+ UCharIteratorNext *next;
+
+ /**
+ * (public) Decrement the index and return the code unit from there
+ * (pre-decrement, like s[--i]),
+ * or return U_SENTINEL if there is none (index is at the start).
+ *
+ * @see UCharIteratorPrevious
+ * @stable ICU 2.1
+ */
+ UCharIteratorPrevious *previous;
+
+ /**
+ * (public) Reserved for future use. Currently NULL.
+ *
+ * @see UCharIteratorReserved
+ * @stable ICU 2.1
+ */
+ UCharIteratorReserved *reservedFn;
+
+ /**
+ * (public) Return the state of the iterator, to be restored later with setState().
+ * This function pointer is NULL if the iterator does not implement it.
+ *
+ * @see UCharIteratorGet
+ * @stable ICU 2.6
+ */
+ UCharIteratorGetState *getState;
+
+ /**
+ * (public) Restore the iterator state from the state word from a call
+ * to getState().
+ * This function pointer is NULL if the iterator does not implement it.
+ *
+ * @see UCharIteratorSet
+ * @stable ICU 2.6
+ */
+ UCharIteratorSetState *setState;
+};
+
+/**
+ * Helper function for UCharIterator to get the code point
+ * at the current index.
+ *
+ * Return the code point that includes the code unit at the current position,
+ * or U_SENTINEL if there is none (index is at the limit).
+ * If the current code unit is a lead or trail surrogate,
+ * then the following or preceding surrogate is used to form
+ * the code point value.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the current code point
+ *
+ * @see UCharIterator
+ * @see U16_GET
+ * @see UnicodeString::char32At()
+ * @stable ICU 2.1
+ */
+U_STABLE UChar32 U_EXPORT2
+uiter_current32(UCharIterator *iter);
+
+/**
+ * Helper function for UCharIterator to get the next code point.
+ *
+ * Return the code point at the current index and increment
+ * the index (post-increment, like s[i++]),
+ * or return U_SENTINEL if there is none (index is at the limit).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the current code point (and post-increment the current index)
+ *
+ * @see UCharIterator
+ * @see U16_NEXT
+ * @stable ICU 2.1
+ */
+U_STABLE UChar32 U_EXPORT2
+uiter_next32(UCharIterator *iter);
+
+/**
+ * Helper function for UCharIterator to get the previous code point.
+ *
+ * Decrement the index and return the code point from there
+ * (pre-decrement, like s[--i]),
+ * or return U_SENTINEL if there is none (index is at the start).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the previous code point (after pre-decrementing the current index)
+ *
+ * @see UCharIterator
+ * @see U16_PREV
+ * @stable ICU 2.1
+ */
+U_STABLE UChar32 U_EXPORT2
+uiter_previous32(UCharIterator *iter);
+
+/**
+ * Get the "state" of the iterator in the form of a single 32-bit word.
+ * This is a convenience function that calls iter->getState(iter)
+ * if iter->getState is not NULL;
+ * if it is NULL or any other error occurs, then UITER_NO_STATE is returned.
+ *
+ * Some UCharIterator implementations may not be able to return
+ * a valid state for each position, in which case they return UITER_NO_STATE instead.
+ * This will be clearly documented for each such iterator (none of the public ones here).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the state word
+ *
+ * @see UCharIterator
+ * @see UCharIteratorGetState
+ * @see UITER_NO_STATE
+ * @stable ICU 2.6
+ */
+U_STABLE uint32_t U_EXPORT2
+uiter_getState(const UCharIterator *iter);
+
+/**
+ * Restore the "state" of the iterator using a state word from a getState() call.
+ * This is a convenience function that calls iter->setState(iter, state, pErrorCode)
+ * if iter->setState is not NULL; if it is NULL, then U_UNSUPPORTED_ERROR is set.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @param state the state word from a getState() call
+ * on a same-type, same-string iterator
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ *
+ * @see UCharIterator
+ * @see UCharIteratorSetState
+ * @stable ICU 2.6
+ */
+U_STABLE void U_EXPORT2
+uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
+
+/**
+ * Set up a UCharIterator to iterate over a string.
+ *
+ * Sets the UCharIterator function pointers for iteration over the string s
+ * with iteration boundaries start=index=0 and length=limit=string length.
+ * The "provider" may set the start, index, and limit values at any time
+ * within the range 0..length.
+ * The length field will be ignored.
+ *
+ * The string pointer s is set into UCharIterator.context without copying
+ * or reallocating the string contents.
+ *
+ * getState() simply returns the current index.
+ * move() will always return the final index.
+ *
+ * @param iter UCharIterator structure to be set for iteration
+ * @param s String to iterate over
+ * @param length Length of s, or -1 if NUL-terminated
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+U_STABLE void U_EXPORT2
+uiter_setString(UCharIterator *iter, const UChar *s, int32_t length);
+
+/**
+ * Set up a UCharIterator to iterate over a UTF-16BE string
+ * (byte vector with a big-endian pair of bytes per UChar).
+ *
+ * Everything works just like with a normal UChar iterator (uiter_setString),
+ * except that UChars are assembled from byte pairs,
+ * and that the length argument here indicates an even number of bytes.
+ *
+ * getState() simply returns the current index.
+ * move() will always return the final index.
+ *
+ * @param iter UCharIterator structure to be set for iteration
+ * @param s UTF-16BE string to iterate over
+ * @param length Length of s as an even number of bytes, or -1 if NUL-terminated
+ * (NUL means pair of 0 bytes at even index from s)
+ *
+ * @see UCharIterator
+ * @see uiter_setString
+ * @stable ICU 2.6
+ */
+U_STABLE void U_EXPORT2
+uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length);
+
+/**
+ * Set up a UCharIterator to iterate over a UTF-8 string.
+ *
+ * Sets the UCharIterator function pointers for iteration over the UTF-8 string s
+ * with UTF-8 iteration boundaries 0 and length.
+ * The implementation counts the UTF-16 index on the fly and
+ * lazily evaluates the UTF-16 length of the text.
+ *
+ * The start field is used as the UTF-8 offset, the limit field as the UTF-8 length.
+ * When the reservedField is not 0, then it contains a supplementary code point
+ * and the UTF-16 index is between the two corresponding surrogates.
+ * At that point, the UTF-8 index is behind that code point.
+ *
+ * The UTF-8 string pointer s is set into UCharIterator.context without copying
+ * or reallocating the string contents.
+ *
+ * getState() returns a state value consisting of
+ * - the current UTF-8 source byte index (bits 31..1)
+ * - a flag (bit 0) that indicates whether the UChar position is in the middle
+ * of a surrogate pair
+ * (from a 4-byte UTF-8 sequence for the corresponding supplementary code point)
+ *
+ * getState() cannot also encode the UTF-16 index in the state value.
+ * move(relative to limit or length), or
+ * move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX.
+ *
+ * @param iter UCharIterator structure to be set for iteration
+ * @param s UTF-8 string to iterate over
+ * @param length Length of s in bytes, or -1 if NUL-terminated
+ *
+ * @see UCharIterator
+ * @stable ICU 2.6
+ */
+U_STABLE void U_EXPORT2
+uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length);
+
+#if U_SHOW_CPLUSPLUS_API
+
+/**
+ * Set up a UCharIterator to wrap around a C++ CharacterIterator.
+ *
+ * Sets the UCharIterator function pointers for iteration using the
+ * CharacterIterator charIter.
+ *
+ * The CharacterIterator pointer charIter is set into UCharIterator.context
+ * without copying or cloning the CharacterIterator object.
+ * The other "protected" UCharIterator fields are set to 0 and will be ignored.
+ * The iteration index and boundaries are controlled by the CharacterIterator.
+ *
+ * getState() simply returns the current index.
+ * move() will always return the final index.
+ *
+ * @param iter UCharIterator structure to be set for iteration
+ * @param charIter CharacterIterator to wrap
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+U_STABLE void U_EXPORT2
+uiter_setCharacterIterator(UCharIterator *iter, icu::CharacterIterator *charIter);
+
+/**
+ * Set up a UCharIterator to iterate over a C++ Replaceable.
+ *
+ * Sets the UCharIterator function pointers for iteration over the
+ * Replaceable rep with iteration boundaries start=index=0 and
+ * length=limit=rep->length().
+ * The "provider" may set the start, index, and limit values at any time
+ * within the range 0..length=rep->length().
+ * The length field will be ignored.
+ *
+ * The Replaceable pointer rep is set into UCharIterator.context without copying
+ * or cloning/reallocating the Replaceable object.
+ *
+ * getState() simply returns the current index.
+ * move() will always return the final index.
+ *
+ * @param iter UCharIterator structure to be set for iteration
+ * @param rep Replaceable to iterate over
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+U_STABLE void U_EXPORT2
+uiter_setReplaceable(UCharIterator *iter, const icu::Replaceable *rep);
+
+#endif
+
+U_CDECL_END
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/umachine.h b/src/core/basetypes/icu-ucsdet/include/unicode/umachine.h
new file mode 100644
index 00000000..53215921
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/umachine.h
@@ -0,0 +1,356 @@
+/*
+******************************************************************************
+*
+* Copyright (C) 1999-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: umachine.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 1999sep13
+* created by: Markus W. Scherer
+*
+* This file defines basic types and constants for ICU to be
+* platform-independent. umachine.h and utf.h are included into
+* utypes.h to provide all the general definitions for ICU.
+* All of these definitions used to be in utypes.h before
+* the UTF-handling macros made this unmaintainable.
+*/
+
+#ifndef __UMACHINE_H__
+#define __UMACHINE_H__
+
+
+/**
+ * \file
+ * \brief Basic types and constants for UTF
+ *
+ * <h2> Basic types and constants for UTF </h2>
+ * This file defines basic types and constants for utf.h to be
+ * platform-independent. umachine.h and utf.h are included into
+ * utypes.h to provide all the general definitions for ICU.
+ * All of these definitions used to be in utypes.h before
+ * the UTF-handling macros made this unmaintainable.
+ *
+ */
+/*==========================================================================*/
+/* Include platform-dependent definitions */
+/* which are contained in the platform-specific file platform.h */
+/*==========================================================================*/
+
+#include "unicode/ptypes.h" /* platform.h is included in ptypes.h */
+
+/*
+ * ANSI C headers:
+ * stddef.h defines wchar_t
+ */
+#include <stddef.h>
+
+/*==========================================================================*/
+/* For C wrappers, we use the symbol U_STABLE. */
+/* This works properly if the includer is C or C++. */
+/* Functions are declared U_STABLE return-type U_EXPORT2 function-name()... */
+/*==========================================================================*/
+
+/**
+ * \def U_CFUNC
+ * This is used in a declaration of a library private ICU C function.
+ * @stable ICU 2.4
+ */
+
+/**
+ * \def U_CDECL_BEGIN
+ * This is used to begin a declaration of a library private ICU C API.
+ * @stable ICU 2.4
+ */
+
+/**
+ * \def U_CDECL_END
+ * This is used to end a declaration of a library private ICU C API
+ * @stable ICU 2.4
+ */
+
+#ifdef __cplusplus
+# define U_CFUNC extern "C"
+# define U_CDECL_BEGIN extern "C" {
+# define U_CDECL_END }
+#else
+# define U_CFUNC extern
+# define U_CDECL_BEGIN
+# define U_CDECL_END
+#endif
+
+#ifndef U_ATTRIBUTE_DEPRECATED
+/**
+ * \def U_ATTRIBUTE_DEPRECATED
+ * This is used for GCC specific attributes
+ * @internal
+ */
+#if U_GCC_MAJOR_MINOR >= 302
+# define U_ATTRIBUTE_DEPRECATED __attribute__ ((deprecated))
+/**
+ * \def U_ATTRIBUTE_DEPRECATED
+ * This is used for Visual C++ specific attributes
+ * @internal
+ */
+#elif defined(_MSC_VER) && (_MSC_VER >= 1400)
+# define U_ATTRIBUTE_DEPRECATED __declspec(deprecated)
+#else
+# define U_ATTRIBUTE_DEPRECATED
+#endif
+#endif
+
+/** This is used to declare a function as a public ICU C API @stable ICU 2.0*/
+#define U_CAPI U_CFUNC U_EXPORT
+/** This is used to declare a function as a stable public ICU C API*/
+#define U_STABLE U_CAPI
+/** This is used to declare a function as a draft public ICU C API */
+#define U_DRAFT U_CAPI
+/** This is used to declare a function as a deprecated public ICU C API */
+#define U_DEPRECATED U_CAPI U_ATTRIBUTE_DEPRECATED
+/** This is used to declare a function as an obsolete public ICU C API */
+#define U_OBSOLETE U_CAPI
+/** This is used to declare a function as an internal ICU C API */
+#define U_INTERNAL U_CAPI
+
+/**
+ * \def U_OVERRIDE
+ * Defined to the C++11 "override" keyword if available.
+ * Denotes a class or member which is an override of the base class.
+ * May result in an error if it applied to something not an override.
+ * @internal
+ */
+
+/**
+ * \def U_FINAL
+ * Defined to the C++11 "final" keyword if available.
+ * Denotes a class or member which may not be overridden in subclasses.
+ * May result in an error if subclasses attempt to override.
+ * @internal
+ */
+
+#if defined(__cplusplus) && __cplusplus>=201103L
+/* C++11 */
+#ifndef U_OVERRIDE
+#define U_OVERRIDE override
+#endif
+#ifndef U_FINAL
+#define U_FINAL final
+#endif
+#else
+/* not C++11 - define to nothing */
+#ifndef U_OVERRIDE
+#define U_OVERRIDE
+#endif
+#ifndef U_FINAL
+#define U_FINAL
+#endif
+#endif
+
+/*==========================================================================*/
+/* limits for int32_t etc., like in POSIX inttypes.h */
+/*==========================================================================*/
+
+#ifndef INT8_MIN
+/** The smallest value an 8 bit signed integer can hold @stable ICU 2.0 */
+# define INT8_MIN ((int8_t)(-128))
+#endif
+#ifndef INT16_MIN
+/** The smallest value a 16 bit signed integer can hold @stable ICU 2.0 */
+# define INT16_MIN ((int16_t)(-32767-1))
+#endif
+#ifndef INT32_MIN
+/** The smallest value a 32 bit signed integer can hold @stable ICU 2.0 */
+# define INT32_MIN ((int32_t)(-2147483647-1))
+#endif
+
+#ifndef INT8_MAX
+/** The largest value an 8 bit signed integer can hold @stable ICU 2.0 */
+# define INT8_MAX ((int8_t)(127))
+#endif
+#ifndef INT16_MAX
+/** The largest value a 16 bit signed integer can hold @stable ICU 2.0 */
+# define INT16_MAX ((int16_t)(32767))
+#endif
+#ifndef INT32_MAX
+/** The largest value a 32 bit signed integer can hold @stable ICU 2.0 */
+# define INT32_MAX ((int32_t)(2147483647))
+#endif
+
+#ifndef UINT8_MAX
+/** The largest value an 8 bit unsigned integer can hold @stable ICU 2.0 */
+# define UINT8_MAX ((uint8_t)(255U))
+#endif
+#ifndef UINT16_MAX
+/** The largest value a 16 bit unsigned integer can hold @stable ICU 2.0 */
+# define UINT16_MAX ((uint16_t)(65535U))
+#endif
+#ifndef UINT32_MAX
+/** The largest value a 32 bit unsigned integer can hold @stable ICU 2.0 */
+# define UINT32_MAX ((uint32_t)(4294967295U))
+#endif
+
+#if defined(U_INT64_T_UNAVAILABLE)
+# error int64_t is required for decimal format and rule-based number format.
+#else
+# ifndef INT64_C
+/**
+ * Provides a platform independent way to specify a signed 64-bit integer constant.
+ * note: may be wrong for some 64 bit platforms - ensure your compiler provides INT64_C
+ * @stable ICU 2.8
+ */
+# define INT64_C(c) c ## LL
+# endif
+# ifndef UINT64_C
+/**
+ * Provides a platform independent way to specify an unsigned 64-bit integer constant.
+ * note: may be wrong for some 64 bit platforms - ensure your compiler provides UINT64_C
+ * @stable ICU 2.8
+ */
+# define UINT64_C(c) c ## ULL
+# endif
+# ifndef U_INT64_MIN
+/** The smallest value a 64 bit signed integer can hold @stable ICU 2.8 */
+# define U_INT64_MIN ((int64_t)(INT64_C(-9223372036854775807)-1))
+# endif
+# ifndef U_INT64_MAX
+/** The largest value a 64 bit signed integer can hold @stable ICU 2.8 */
+# define U_INT64_MAX ((int64_t)(INT64_C(9223372036854775807)))
+# endif
+# ifndef U_UINT64_MAX
+/** The largest value a 64 bit unsigned integer can hold @stable ICU 2.8 */
+# define U_UINT64_MAX ((uint64_t)(UINT64_C(18446744073709551615)))
+# endif
+#endif
+
+/*==========================================================================*/
+/* Boolean data type */
+/*==========================================================================*/
+
+/** The ICU boolean type @stable ICU 2.0 */
+typedef int8_t UBool;
+
+#ifndef TRUE
+/** The TRUE value of a UBool @stable ICU 2.0 */
+# define TRUE 1
+#endif
+#ifndef FALSE
+/** The FALSE value of a UBool @stable ICU 2.0 */
+# define FALSE 0
+#endif
+
+
+/*==========================================================================*/
+/* Unicode data types */
+/*==========================================================================*/
+
+/* wchar_t-related definitions -------------------------------------------- */
+
+/*
+ * \def U_WCHAR_IS_UTF16
+ * Defined if wchar_t uses UTF-16.
+ *
+ * @stable ICU 2.0
+ */
+/*
+ * \def U_WCHAR_IS_UTF32
+ * Defined if wchar_t uses UTF-32.
+ *
+ * @stable ICU 2.0
+ */
+#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
+# ifdef __STDC_ISO_10646__
+# if (U_SIZEOF_WCHAR_T==2)
+# define U_WCHAR_IS_UTF16
+# elif (U_SIZEOF_WCHAR_T==4)
+# define U_WCHAR_IS_UTF32
+# endif
+# elif defined __UCS2__
+# if (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400) && (U_SIZEOF_WCHAR_T==2)
+# define U_WCHAR_IS_UTF16
+# endif
+# elif defined(__UCS4__) || (U_PLATFORM == U_PF_OS400 && defined(__UTF32__))
+# if (U_SIZEOF_WCHAR_T==4)
+# define U_WCHAR_IS_UTF32
+# endif
+# elif U_PLATFORM_IS_DARWIN_BASED || (U_SIZEOF_WCHAR_T==4 && U_PLATFORM_IS_LINUX_BASED)
+# define U_WCHAR_IS_UTF32
+# elif U_PLATFORM_HAS_WIN32_API
+# define U_WCHAR_IS_UTF16
+# endif
+#endif
+
+/* UChar and UChar32 definitions -------------------------------------------- */
+
+/** Number of bytes in a UChar. @stable ICU 2.0 */
+#define U_SIZEOF_UCHAR 2
+
+/**
+ * \var UChar
+ * Define UChar to be UCHAR_TYPE, if that is #defined (for example, to char16_t),
+ * or wchar_t if that is 16 bits wide; always assumed to be unsigned.
+ * If neither is available, then define UChar to be uint16_t.
+ *
+ * This makes the definition of UChar platform-dependent
+ * but allows direct string type compatibility with platforms with
+ * 16-bit wchar_t types.
+ *
+ * @stable ICU 4.4
+ */
+#if defined(UCHAR_TYPE)
+ typedef UCHAR_TYPE UChar;
+/* Not #elif U_HAVE_CHAR16_T -- because that is type-incompatible with pre-C++11 callers
+ typedef char16_t UChar; */
+#elif U_SIZEOF_WCHAR_T==2
+ typedef wchar_t UChar;
+#elif defined(__CHAR16_TYPE__)
+ typedef __CHAR16_TYPE__ UChar;
+#else
+ typedef uint16_t UChar;
+#endif
+
+/**
+ * Define UChar32 as a type for single Unicode code points.
+ * UChar32 is a signed 32-bit integer (same as int32_t).
+ *
+ * The Unicode code point range is 0..0x10ffff.
+ * All other values (negative or >=0x110000) are illegal as Unicode code points.
+ * They may be used as sentinel values to indicate "done", "error"
+ * or similar non-code point conditions.
+ *
+ * Before ICU 2.4 (Jitterbug 2146), UChar32 was defined
+ * to be wchar_t if that is 32 bits wide (wchar_t may be signed or unsigned)
+ * or else to be uint32_t.
+ * That is, the definition of UChar32 was platform-dependent.
+ *
+ * @see U_SENTINEL
+ * @stable ICU 2.4
+ */
+typedef int32_t UChar32;
+
+/**
+ * This value is intended for sentinel values for APIs that
+ * (take or) return single code points (UChar32).
+ * It is outside of the Unicode code point range 0..0x10ffff.
+ *
+ * For example, a "done" or "error" value in a new API
+ * could be indicated with U_SENTINEL.
+ *
+ * ICU APIs designed before ICU 2.4 usually define service-specific "done"
+ * values, mostly 0xffff.
+ * Those may need to be distinguished from
+ * actual U+ffff text contents by calling functions like
+ * CharacterIterator::hasNext() or UnicodeString::length().
+ *
+ * @return -1
+ * @see UChar32
+ * @stable ICU 2.4
+ */
+#define U_SENTINEL (-1)
+
+#include "unicode/urename.h"
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/unistr.h b/src/core/basetypes/icu-ucsdet/include/unicode/unistr.h
new file mode 100644
index 00000000..c6e8b446
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/unistr.h
@@ -0,0 +1,4470 @@
+/*
+**********************************************************************
+* Copyright (C) 1998-2013, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*
+* File unistr.h
+*
+* Modification History:
+*
+* Date Name Description
+* 09/25/98 stephen Creation.
+* 11/11/98 stephen Changed per 11/9 code review.
+* 04/20/99 stephen Overhauled per 4/16 code review.
+* 11/18/99 aliu Made to inherit from Replaceable. Added method
+* handleReplaceBetween(); other methods unchanged.
+* 06/25/01 grhoten Remove dependency on iostream.
+******************************************************************************
+*/
+
+#ifndef UNISTR_H
+#define UNISTR_H
+
+/**
+ * \file
+ * \brief C++ API: Unicode String
+ */
+
+#include "unicode/utypes.h"
+#include "unicode/rep.h"
+#include "unicode/std_string.h"
+#include "unicode/stringpiece.h"
+#include "unicode/bytestream.h"
+#include "unicode/ucasemap.h"
+
+struct UConverter; // unicode/ucnv.h
+class StringThreadTest;
+
+#ifndef U_COMPARE_CODE_POINT_ORDER
+/* see also ustring.h and unorm.h */
+/**
+ * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
+ * Compare strings in code point order instead of code unit order.
+ * @stable ICU 2.2
+ */
+#define U_COMPARE_CODE_POINT_ORDER 0x8000
+#endif
+
+#ifndef USTRING_H
+/**
+ * \ingroup ustring_ustrlen
+ */
+U_STABLE int32_t U_EXPORT2
+u_strlen(const UChar *s);
+#endif
+
+/**
+ * \def U_STRING_CASE_MAPPER_DEFINED
+ * @internal
+ */
+#ifndef U_STRING_CASE_MAPPER_DEFINED
+#define U_STRING_CASE_MAPPER_DEFINED
+
+/**
+ * Internal string case mapping function type.
+ * @internal
+ */
+typedef int32_t U_CALLCONV
+UStringCaseMapper(const UCaseMap *csm,
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+#endif
+
+U_NAMESPACE_BEGIN
+
+class BreakIterator; // unicode/brkiter.h
+class Locale; // unicode/locid.h
+class StringCharacterIterator;
+class UnicodeStringAppendable; // unicode/appendable.h
+
+/* The <iostream> include has been moved to unicode/ustream.h */
+
+/**
+ * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
+ * which constructs a Unicode string from an invariant-character char * string.
+ * About invariant characters see utypes.h.
+ * This constructor has no runtime dependency on conversion code and is
+ * therefore recommended over ones taking a charset name string
+ * (where the empty string "" indicates invariant-character conversion).
+ *
+ * @stable ICU 3.2
+ */
+#define US_INV icu::UnicodeString::kInvariant
+
+/**
+ * Unicode String literals in C++.
+ * Dependent on the platform properties, different UnicodeString
+ * constructors should be used to create a UnicodeString object from
+ * a string literal.
+ * The macros are defined for maximum performance.
+ * They work only for strings that contain "invariant characters", i.e.,
+ * only latin letters, digits, and some punctuation.
+ * See utypes.h for details.
+ *
+ * The string parameter must be a C string literal.
+ * The length of the string, not including the terminating
+ * <code>NUL</code>, must be specified as a constant.
+ * The U_STRING_DECL macro should be invoked exactly once for one
+ * such string variable before it is used.
+ * @stable ICU 2.0
+ */
+#if defined(U_DECLARE_UTF16)
+# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)
+#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
+# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)L ## cs, _length)
+#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
+# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)cs, _length)
+#else
+# define UNICODE_STRING(cs, _length) icu::UnicodeString(cs, _length, US_INV)
+#endif
+
+/**
+ * Unicode String literals in C++.
+ * Dependent on the platform properties, different UnicodeString
+ * constructors should be used to create a UnicodeString object from
+ * a string literal.
+ * The macros are defined for improved performance.
+ * They work only for strings that contain "invariant characters", i.e.,
+ * only latin letters, digits, and some punctuation.
+ * See utypes.h for details.
+ *
+ * The string parameter must be a C string literal.
+ * @stable ICU 2.0
+ */
+#define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
+
+/**
+ * \def UNISTR_FROM_CHAR_EXPLICIT
+ * This can be defined to be empty or "explicit".
+ * If explicit, then the UnicodeString(UChar) and UnicodeString(UChar32)
+ * constructors are marked as explicit, preventing their inadvertent use.
+ * @stable ICU 49
+ */
+#ifndef UNISTR_FROM_CHAR_EXPLICIT
+# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
+ // Auto-"explicit" in ICU library code.
+# define UNISTR_FROM_CHAR_EXPLICIT explicit
+# else
+ // Empty by default for source code compatibility.
+# define UNISTR_FROM_CHAR_EXPLICIT
+# endif
+#endif
+
+/**
+ * \def UNISTR_FROM_STRING_EXPLICIT
+ * This can be defined to be empty or "explicit".
+ * If explicit, then the UnicodeString(const char *) and UnicodeString(const UChar *)
+ * constructors are marked as explicit, preventing their inadvertent use.
+ *
+ * In particular, this helps prevent accidentally depending on ICU conversion code
+ * by passing a string literal into an API with a const UnicodeString & parameter.
+ * @stable ICU 49
+ */
+#ifndef UNISTR_FROM_STRING_EXPLICIT
+# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
+ // Auto-"explicit" in ICU library code.
+# define UNISTR_FROM_STRING_EXPLICIT explicit
+# else
+ // Empty by default for source code compatibility.
+# define UNISTR_FROM_STRING_EXPLICIT
+# endif
+#endif
+
+/**
+ * UnicodeString is a string class that stores Unicode characters directly and provides
+ * similar functionality as the Java String and StringBuffer classes.
+ * It is a concrete implementation of the abstract class Replaceable (for transliteration).
+ *
+ * The UnicodeString class is not suitable for subclassing.
+ *
+ * <p>For an overview of Unicode strings in C and C++ see the
+ * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
+ *
+ * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>.
+ * A Unicode character may be stored with either one code unit
+ * (the most common case) or with a matched pair of special code units
+ * ("surrogates"). The data type for code units is UChar.
+ * For single-character handling, a Unicode character code <em>point</em> is a value
+ * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p>
+ *
+ * <p>Indexes and offsets into and lengths of strings always count code units, not code points.
+ * This is the same as with multi-byte char* strings in traditional string handling.
+ * Operations on partial strings typically do not test for code point boundaries.
+ * If necessary, the user needs to take care of such boundaries by testing for the code unit
+ * values or by using functions like
+ * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
+ * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p>
+ *
+ * UnicodeString methods are more lenient with regard to input parameter values
+ * than other ICU APIs. In particular:
+ * - If indexes are out of bounds for a UnicodeString object
+ * (<0 or >length()) then they are "pinned" to the nearest boundary.
+ * - If primitive string pointer values (e.g., const UChar * or char *)
+ * for input strings are NULL, then those input string parameters are treated
+ * as if they pointed to an empty string.
+ * However, this is <em>not</em> the case for char * parameters for charset names
+ * or other IDs.
+ * - Most UnicodeString methods do not take a UErrorCode parameter because
+ * there are usually very few opportunities for failure other than a shortage
+ * of memory, error codes in low-level C++ string methods would be inconvenient,
+ * and the error code as the last parameter (ICU convention) would prevent
+ * the use of default parameter values.
+ * Instead, such methods set the UnicodeString into a "bogus" state
+ * (see isBogus()) if an error occurs.
+ *
+ * In string comparisons, two UnicodeString objects that are both "bogus"
+ * compare equal (to be transitive and prevent endless loops in sorting),
+ * and a "bogus" string compares less than any non-"bogus" one.
+ *
+ * Const UnicodeString methods are thread-safe. Multiple threads can use
+ * const methods on the same UnicodeString object simultaneously,
+ * but non-const methods must not be called concurrently (in multiple threads)
+ * with any other (const or non-const) methods.
+ *
+ * Similarly, const UnicodeString & parameters are thread-safe.
+ * One object may be passed in as such a parameter concurrently in multiple threads.
+ * This includes the const UnicodeString & parameters for
+ * copy construction, assignment, and cloning.
+ *
+ * <p>UnicodeString uses several storage methods.
+ * String contents can be stored inside the UnicodeString object itself,
+ * in an allocated and shared buffer, or in an outside buffer that is "aliased".
+ * Most of this is done transparently, but careful aliasing in particular provides
+ * significant performance improvements.
+ * Also, the internal buffer is accessible via special functions.
+ * For details see the
+ * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
+ *
+ * @see utf.h
+ * @see CharacterIterator
+ * @stable ICU 2.0
+ */
+class U_COMMON_API UnicodeString : public Replaceable
+{
+public:
+
+ /**
+ * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
+ * which constructs a Unicode string from an invariant-character char * string.
+ * Use the macro US_INV instead of the full qualification for this value.
+ *
+ * @see US_INV
+ * @stable ICU 3.2
+ */
+ enum EInvariant {
+ /**
+ * @see EInvariant
+ * @stable ICU 3.2
+ */
+ kInvariant
+ };
+
+ //========================================
+ // Read-only operations
+ //========================================
+
+ /* Comparison - bitwise only - for international comparison use collation */
+
+ /**
+ * Equality operator. Performs only bitwise comparison.
+ * @param text The UnicodeString to compare to this one.
+ * @return TRUE if <TT>text</TT> contains the same characters as this one,
+ * FALSE otherwise.
+ * @stable ICU 2.0
+ */
+ inline UBool operator== (const UnicodeString& text) const;
+
+ /**
+ * Inequality operator. Performs only bitwise comparison.
+ * @param text The UnicodeString to compare to this one.
+ * @return FALSE if <TT>text</TT> contains the same characters as this one,
+ * TRUE otherwise.
+ * @stable ICU 2.0
+ */
+ inline UBool operator!= (const UnicodeString& text) const;
+
+ /**
+ * Greater than operator. Performs only bitwise comparison.
+ * @param text The UnicodeString to compare to this one.
+ * @return TRUE if the characters in this are bitwise
+ * greater than the characters in <code>text</code>, FALSE otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool operator> (const UnicodeString& text) const;
+
+ /**
+ * Less than operator. Performs only bitwise comparison.
+ * @param text The UnicodeString to compare to this one.
+ * @return TRUE if the characters in this are bitwise
+ * less than the characters in <code>text</code>, FALSE otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool operator< (const UnicodeString& text) const;
+
+ /**
+ * Greater than or equal operator. Performs only bitwise comparison.
+ * @param text The UnicodeString to compare to this one.
+ * @return TRUE if the characters in this are bitwise
+ * greater than or equal to the characters in <code>text</code>, FALSE otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool operator>= (const UnicodeString& text) const;
+
+ /**
+ * Less than or equal operator. Performs only bitwise comparison.
+ * @param text The UnicodeString to compare to this one.
+ * @return TRUE if the characters in this are bitwise
+ * less than or equal to the characters in <code>text</code>, FALSE otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool operator<= (const UnicodeString& text) const;
+
+ /**
+ * Compare the characters bitwise in this UnicodeString to
+ * the characters in <code>text</code>.
+ * @param text The UnicodeString to compare to this one.
+ * @return The result of bitwise character comparison: 0 if this
+ * contains the same characters as <code>text</code>, -1 if the characters in
+ * this are bitwise less than the characters in <code>text</code>, +1 if the
+ * characters in this are bitwise greater than the characters
+ * in <code>text</code>.
+ * @stable ICU 2.0
+ */
+ inline int8_t compare(const UnicodeString& text) const;
+
+ /**
+ * Compare the characters bitwise in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) with the characters
+ * in the <b>entire string</b> <TT>text</TT>.
+ * (The parameters "start" and "length" are not applied to the other text "text".)
+ * @param start the offset at which the compare operation begins
+ * @param length the number of characters of text to compare.
+ * @param text the other text to be compared against this string.
+ * @return The result of bitwise character comparison: 0 if this
+ * contains the same characters as <code>text</code>, -1 if the characters in
+ * this are bitwise less than the characters in <code>text</code>, +1 if the
+ * characters in this are bitwise greater than the characters
+ * in <code>text</code>.
+ * @stable ICU 2.0
+ */
+ inline int8_t compare(int32_t start,
+ int32_t length,
+ const UnicodeString& text) const;
+
+ /**
+ * Compare the characters bitwise in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) with the characters
+ * in <TT>srcText</TT> in the range
+ * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
+ * @param start the offset at which the compare operation begins
+ * @param length the number of characters in this to compare.
+ * @param srcText the text to be compared
+ * @param srcStart the offset into <TT>srcText</TT> to start comparison
+ * @param srcLength the number of characters in <TT>src</TT> to compare
+ * @return The result of bitwise character comparison: 0 if this
+ * contains the same characters as <code>srcText</code>, -1 if the characters in
+ * this are bitwise less than the characters in <code>srcText</code>, +1 if the
+ * characters in this are bitwise greater than the characters
+ * in <code>srcText</code>.
+ * @stable ICU 2.0
+ */
+ inline int8_t compare(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ /**
+ * Compare the characters bitwise in this UnicodeString with the first
+ * <TT>srcLength</TT> characters in <TT>srcChars</TT>.
+ * @param srcChars The characters to compare to this UnicodeString.
+ * @param srcLength the number of characters in <TT>srcChars</TT> to compare
+ * @return The result of bitwise character comparison: 0 if this
+ * contains the same characters as <code>srcChars</code>, -1 if the characters in
+ * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
+ * characters in this are bitwise greater than the characters
+ * in <code>srcChars</code>.
+ * @stable ICU 2.0
+ */
+ inline int8_t compare(const UChar *srcChars,
+ int32_t srcLength) const;
+
+ /**
+ * Compare the characters bitwise in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) with the first
+ * <TT>length</TT> characters in <TT>srcChars</TT>
+ * @param start the offset at which the compare operation begins
+ * @param length the number of characters to compare.
+ * @param srcChars the characters to be compared
+ * @return The result of bitwise character comparison: 0 if this
+ * contains the same characters as <code>srcChars</code>, -1 if the characters in
+ * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
+ * characters in this are bitwise greater than the characters
+ * in <code>srcChars</code>.
+ * @stable ICU 2.0
+ */
+ inline int8_t compare(int32_t start,
+ int32_t length,
+ const UChar *srcChars) const;
+
+ /**
+ * Compare the characters bitwise in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) with the characters
+ * in <TT>srcChars</TT> in the range
+ * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
+ * @param start the offset at which the compare operation begins
+ * @param length the number of characters in this to compare
+ * @param srcChars the characters to be compared
+ * @param srcStart the offset into <TT>srcChars</TT> to start comparison
+ * @param srcLength the number of characters in <TT>srcChars</TT> to compare
+ * @return The result of bitwise character comparison: 0 if this
+ * contains the same characters as <code>srcChars</code>, -1 if the characters in
+ * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
+ * characters in this are bitwise greater than the characters
+ * in <code>srcChars</code>.
+ * @stable ICU 2.0
+ */
+ inline int8_t compare(int32_t start,
+ int32_t length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ /**
+ * Compare the characters bitwise in the range
+ * [<TT>start</TT>, <TT>limit</TT>) with the characters
+ * in <TT>srcText</TT> in the range
+ * [<TT>srcStart</TT>, <TT>srcLimit</TT>).
+ * @param start the offset at which the compare operation begins
+ * @param limit the offset immediately following the compare operation
+ * @param srcText the text to be compared
+ * @param srcStart the offset into <TT>srcText</TT> to start comparison
+ * @param srcLimit the offset into <TT>srcText</TT> to limit comparison
+ * @return The result of bitwise character comparison: 0 if this
+ * contains the same characters as <code>srcText</code>, -1 if the characters in
+ * this are bitwise less than the characters in <code>srcText</code>, +1 if the
+ * characters in this are bitwise greater than the characters
+ * in <code>srcText</code>.
+ * @stable ICU 2.0
+ */
+ inline int8_t compareBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLimit) const;
+
+ /**
+ * Compare two Unicode strings in code point order.
+ * The result may be different from the results of compare(), operator<, etc.
+ * if supplementary characters are present:
+ *
+ * In UTF-16, supplementary characters (with code points U+10000 and above) are
+ * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+ * which means that they compare as less than some other BMP characters like U+feff.
+ * This function compares Unicode strings in code point order.
+ * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+ *
+ * @param text Another string to compare this one to.
+ * @return a negative/zero/positive integer corresponding to whether
+ * this string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+ inline int8_t compareCodePointOrder(const UnicodeString& text) const;
+
+ /**
+ * Compare two Unicode strings in code point order.
+ * The result may be different from the results of compare(), operator<, etc.
+ * if supplementary characters are present:
+ *
+ * In UTF-16, supplementary characters (with code points U+10000 and above) are
+ * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+ * which means that they compare as less than some other BMP characters like U+feff.
+ * This function compares Unicode strings in code point order.
+ * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcText Another string to compare this one to.
+ * @return a negative/zero/positive integer corresponding to whether
+ * this string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+ inline int8_t compareCodePointOrder(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText) const;
+
+ /**
+ * Compare two Unicode strings in code point order.
+ * The result may be different from the results of compare(), operator<, etc.
+ * if supplementary characters are present:
+ *
+ * In UTF-16, supplementary characters (with code points U+10000 and above) are
+ * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+ * which means that they compare as less than some other BMP characters like U+feff.
+ * This function compares Unicode strings in code point order.
+ * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcText Another string to compare this one to.
+ * @param srcStart The start offset in that string at which the compare operation begins.
+ * @param srcLength The number of code units from that string to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * this string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+ inline int8_t compareCodePointOrder(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ /**
+ * Compare two Unicode strings in code point order.
+ * The result may be different from the results of compare(), operator<, etc.
+ * if supplementary characters are present:
+ *
+ * In UTF-16, supplementary characters (with code points U+10000 and above) are
+ * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+ * which means that they compare as less than some other BMP characters like U+feff.
+ * This function compares Unicode strings in code point order.
+ * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+ *
+ * @param srcChars A pointer to another string to compare this one to.
+ * @param srcLength The number of code units from that string to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * this string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+ inline int8_t compareCodePointOrder(const UChar *srcChars,
+ int32_t srcLength) const;
+
+ /**
+ * Compare two Unicode strings in code point order.
+ * The result may be different from the results of compare(), operator<, etc.
+ * if supplementary characters are present:
+ *
+ * In UTF-16, supplementary characters (with code points U+10000 and above) are
+ * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+ * which means that they compare as less than some other BMP characters like U+feff.
+ * This function compares Unicode strings in code point order.
+ * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcChars A pointer to another string to compare this one to.
+ * @return a negative/zero/positive integer corresponding to whether
+ * this string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+ inline int8_t compareCodePointOrder(int32_t start,
+ int32_t length,
+ const UChar *srcChars) const;
+
+ /**
+ * Compare two Unicode strings in code point order.
+ * The result may be different from the results of compare(), operator<, etc.
+ * if supplementary characters are present:
+ *
+ * In UTF-16, supplementary characters (with code points U+10000 and above) are
+ * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+ * which means that they compare as less than some other BMP characters like U+feff.
+ * This function compares Unicode strings in code point order.
+ * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcChars A pointer to another string to compare this one to.
+ * @param srcStart The start offset in that string at which the compare operation begins.
+ * @param srcLength The number of code units from that string to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * this string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+ inline int8_t compareCodePointOrder(int32_t start,
+ int32_t length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ /**
+ * Compare two Unicode strings in code point order.
+ * The result may be different from the results of compare(), operator<, etc.
+ * if supplementary characters are present:
+ *
+ * In UTF-16, supplementary characters (with code points U+10000 and above) are
+ * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+ * which means that they compare as less than some other BMP characters like U+feff.
+ * This function compares Unicode strings in code point order.
+ * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param limit The offset after the last code unit from this string to compare.
+ * @param srcText Another string to compare this one to.
+ * @param srcStart The start offset in that string at which the compare operation begins.
+ * @param srcLimit The offset after the last code unit from that string to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * this string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+ inline int8_t compareCodePointOrderBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLimit) const;
+
+ /**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
+ *
+ * @param text Another string to compare this one to.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+ inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
+
+ /**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcText Another string to compare this one to.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+ inline int8_t caseCompare(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText,
+ uint32_t options) const;
+
+ /**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcText Another string to compare this one to.
+ * @param srcStart The start offset in that string at which the compare operation begins.
+ * @param srcLength The number of code units from that string to compare.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+ inline int8_t caseCompare(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ uint32_t options) const;
+
+ /**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
+ *
+ * @param srcChars A pointer to another string to compare this one to.
+ * @param srcLength The number of code units from that string to compare.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+ inline int8_t caseCompare(const UChar *srcChars,
+ int32_t srcLength,
+ uint32_t options) const;
+
+ /**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcChars A pointer to another string to compare this one to.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+ inline int8_t caseCompare(int32_t start,
+ int32_t length,
+ const UChar *srcChars,
+ uint32_t options) const;
+
+ /**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcChars A pointer to another string to compare this one to.
+ * @param srcStart The start offset in that string at which the compare operation begins.
+ * @param srcLength The number of code units from that string to compare.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+ inline int8_t caseCompare(int32_t start,
+ int32_t length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength,
+ uint32_t options) const;
+
+ /**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param limit The offset after the last code unit from this string to compare.
+ * @param srcText Another string to compare this one to.
+ * @param srcStart The start offset in that string at which the compare operation begins.
+ * @param srcLimit The offset after the last code unit from that string to compare.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+ inline int8_t caseCompareBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLimit,
+ uint32_t options) const;
+
+ /**
+ * Determine if this starts with the characters in <TT>text</TT>
+ * @param text The text to match.
+ * @return TRUE if this starts with the characters in <TT>text</TT>,
+ * FALSE otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool startsWith(const UnicodeString& text) const;
+
+ /**
+ * Determine if this starts with the characters in <TT>srcText</TT>
+ * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
+ * @param srcText The text to match.
+ * @param srcStart the offset into <TT>srcText</TT> to start matching
+ * @param srcLength the number of characters in <TT>srcText</TT> to match
+ * @return TRUE if this starts with the characters in <TT>text</TT>,
+ * FALSE otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool startsWith(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ /**
+ * Determine if this starts with the characters in <TT>srcChars</TT>
+ * @param srcChars The characters to match.
+ * @param srcLength the number of characters in <TT>srcChars</TT>
+ * @return TRUE if this starts with the characters in <TT>srcChars</TT>,
+ * FALSE otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool startsWith(const UChar *srcChars,
+ int32_t srcLength) const;
+
+ /**
+ * Determine if this ends with the characters in <TT>srcChars</TT>
+ * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
+ * @param srcChars The characters to match.
+ * @param srcStart the offset into <TT>srcText</TT> to start matching
+ * @param srcLength the number of characters in <TT>srcChars</TT> to match
+ * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool startsWith(const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ /**
+ * Determine if this ends with the characters in <TT>text</TT>
+ * @param text The text to match.
+ * @return TRUE if this ends with the characters in <TT>text</TT>,
+ * FALSE otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool endsWith(const UnicodeString& text) const;
+
+ /**
+ * Determine if this ends with the characters in <TT>srcText</TT>
+ * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
+ * @param srcText The text to match.
+ * @param srcStart the offset into <TT>srcText</TT> to start matching
+ * @param srcLength the number of characters in <TT>srcText</TT> to match
+ * @return TRUE if this ends with the characters in <TT>text</TT>,
+ * FALSE otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool endsWith(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ /**
+ * Determine if this ends with the characters in <TT>srcChars</TT>
+ * @param srcChars The characters to match.
+ * @param srcLength the number of characters in <TT>srcChars</TT>
+ * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
+ * FALSE otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool endsWith(const UChar *srcChars,
+ int32_t srcLength) const;
+
+ /**
+ * Determine if this ends with the characters in <TT>srcChars</TT>
+ * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
+ * @param srcChars The characters to match.
+ * @param srcStart the offset into <TT>srcText</TT> to start matching
+ * @param srcLength the number of characters in <TT>srcChars</TT> to match
+ * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
+ * FALSE otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool endsWith(const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+
+ /* Searching - bitwise only */
+
+ /**
+ * Locate in this the first occurrence of the characters in <TT>text</TT>,
+ * using bitwise comparison.
+ * @param text The text to search for.
+ * @return The offset into this of the start of <TT>text</TT>,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(const UnicodeString& text) const;
+
+ /**
+ * Locate in this the first occurrence of the characters in <TT>text</TT>
+ * starting at offset <TT>start</TT>, using bitwise comparison.
+ * @param text The text to search for.
+ * @param start The offset at which searching will start.
+ * @return The offset into this of the start of <TT>text</TT>,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(const UnicodeString& text,
+ int32_t start) const;
+
+ /**
+ * Locate in this the first occurrence in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) of the characters
+ * in <TT>text</TT>, using bitwise comparison.
+ * @param text The text to search for.
+ * @param start The offset at which searching will start.
+ * @param length The number of characters to search
+ * @return The offset into this of the start of <TT>text</TT>,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(const UnicodeString& text,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the first occurrence in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) of the characters
+ * in <TT>srcText</TT> in the range
+ * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
+ * using bitwise comparison.
+ * @param srcText The text to search for.
+ * @param srcStart the offset into <TT>srcText</TT> at which
+ * to start matching
+ * @param srcLength the number of characters in <TT>srcText</TT> to match
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of the start of <TT>text</TT>,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the first occurrence of the characters in
+ * <TT>srcChars</TT>
+ * starting at offset <TT>start</TT>, using bitwise comparison.
+ * @param srcChars The text to search for.
+ * @param srcLength the number of characters in <TT>srcChars</TT> to match
+ * @param start the offset into this at which to start matching
+ * @return The offset into this of the start of <TT>text</TT>,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(const UChar *srcChars,
+ int32_t srcLength,
+ int32_t start) const;
+
+ /**
+ * Locate in this the first occurrence in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) of the characters
+ * in <TT>srcChars</TT>, using bitwise comparison.
+ * @param srcChars The text to search for.
+ * @param srcLength the number of characters in <TT>srcChars</TT>
+ * @param start The offset at which searching will start.
+ * @param length The number of characters to search
+ * @return The offset into this of the start of <TT>srcChars</TT>,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(const UChar *srcChars,
+ int32_t srcLength,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the first occurrence in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) of the characters
+ * in <TT>srcChars</TT> in the range
+ * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
+ * using bitwise comparison.
+ * @param srcChars The text to search for.
+ * @param srcStart the offset into <TT>srcChars</TT> at which
+ * to start matching
+ * @param srcLength the number of characters in <TT>srcChars</TT> to match
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of the start of <TT>text</TT>,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ int32_t indexOf(const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the first occurrence of the BMP code point <code>c</code>,
+ * using bitwise comparison.
+ * @param c The code unit to search for.
+ * @return The offset into this of <TT>c</TT>, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(UChar c) const;
+
+ /**
+ * Locate in this the first occurrence of the code point <TT>c</TT>,
+ * using bitwise comparison.
+ *
+ * @param c The code point to search for.
+ * @return The offset into this of <TT>c</TT>, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(UChar32 c) const;
+
+ /**
+ * Locate in this the first occurrence of the BMP code point <code>c</code>,
+ * starting at offset <TT>start</TT>, using bitwise comparison.
+ * @param c The code unit to search for.
+ * @param start The offset at which searching will start.
+ * @return The offset into this of <TT>c</TT>, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(UChar c,
+ int32_t start) const;
+
+ /**
+ * Locate in this the first occurrence of the code point <TT>c</TT>
+ * starting at offset <TT>start</TT>, using bitwise comparison.
+ *
+ * @param c The code point to search for.
+ * @param start The offset at which searching will start.
+ * @return The offset into this of <TT>c</TT>, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(UChar32 c,
+ int32_t start) const;
+
+ /**
+ * Locate in this the first occurrence of the BMP code point <code>c</code>
+ * in the range [<TT>start</TT>, <TT>start + length</TT>),
+ * using bitwise comparison.
+ * @param c The code unit to search for.
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of <TT>c</TT>, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(UChar c,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the first occurrence of the code point <TT>c</TT>
+ * in the range [<TT>start</TT>, <TT>start + length</TT>),
+ * using bitwise comparison.
+ *
+ * @param c The code point to search for.
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of <TT>c</TT>, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(UChar32 c,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the last occurrence of the characters in <TT>text</TT>,
+ * using bitwise comparison.
+ * @param text The text to search for.
+ * @return The offset into this of the start of <TT>text</TT>,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(const UnicodeString& text) const;
+
+ /**
+ * Locate in this the last occurrence of the characters in <TT>text</TT>
+ * starting at offset <TT>start</TT>, using bitwise comparison.
+ * @param text The text to search for.
+ * @param start The offset at which searching will start.
+ * @return The offset into this of the start of <TT>text</TT>,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(const UnicodeString& text,
+ int32_t start) const;
+
+ /**
+ * Locate in this the last occurrence in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) of the characters
+ * in <TT>text</TT>, using bitwise comparison.
+ * @param text The text to search for.
+ * @param start The offset at which searching will start.
+ * @param length The number of characters to search
+ * @return The offset into this of the start of <TT>text</TT>,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(const UnicodeString& text,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the last occurrence in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) of the characters
+ * in <TT>srcText</TT> in the range
+ * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
+ * using bitwise comparison.
+ * @param srcText The text to search for.
+ * @param srcStart the offset into <TT>srcText</TT> at which
+ * to start matching
+ * @param srcLength the number of characters in <TT>srcText</TT> to match
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of the start of <TT>text</TT>,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the last occurrence of the characters in <TT>srcChars</TT>
+ * starting at offset <TT>start</TT>, using bitwise comparison.
+ * @param srcChars The text to search for.
+ * @param srcLength the number of characters in <TT>srcChars</TT> to match
+ * @param start the offset into this at which to start matching
+ * @return The offset into this of the start of <TT>text</TT>,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(const UChar *srcChars,
+ int32_t srcLength,
+ int32_t start) const;
+
+ /**
+ * Locate in this the last occurrence in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) of the characters
+ * in <TT>srcChars</TT>, using bitwise comparison.
+ * @param srcChars The text to search for.
+ * @param srcLength the number of characters in <TT>srcChars</TT>
+ * @param start The offset at which searching will start.
+ * @param length The number of characters to search
+ * @return The offset into this of the start of <TT>srcChars</TT>,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(const UChar *srcChars,
+ int32_t srcLength,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the last occurrence in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) of the characters
+ * in <TT>srcChars</TT> in the range
+ * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
+ * using bitwise comparison.
+ * @param srcChars The text to search for.
+ * @param srcStart the offset into <TT>srcChars</TT> at which
+ * to start matching
+ * @param srcLength the number of characters in <TT>srcChars</TT> to match
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of the start of <TT>text</TT>,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ int32_t lastIndexOf(const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the last occurrence of the BMP code point <code>c</code>,
+ * using bitwise comparison.
+ * @param c The code unit to search for.
+ * @return The offset into this of <TT>c</TT>, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(UChar c) const;
+
+ /**
+ * Locate in this the last occurrence of the code point <TT>c</TT>,
+ * using bitwise comparison.
+ *
+ * @param c The code point to search for.
+ * @return The offset into this of <TT>c</TT>, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(UChar32 c) const;
+
+ /**
+ * Locate in this the last occurrence of the BMP code point <code>c</code>
+ * starting at offset <TT>start</TT>, using bitwise comparison.
+ * @param c The code unit to search for.
+ * @param start The offset at which searching will start.
+ * @return The offset into this of <TT>c</TT>, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(UChar c,
+ int32_t start) const;
+
+ /**
+ * Locate in this the last occurrence of the code point <TT>c</TT>
+ * starting at offset <TT>start</TT>, using bitwise comparison.
+ *
+ * @param c The code point to search for.
+ * @param start The offset at which searching will start.
+ * @return The offset into this of <TT>c</TT>, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(UChar32 c,
+ int32_t start) const;
+
+ /**
+ * Locate in this the last occurrence of the BMP code point <code>c</code>
+ * in the range [<TT>start</TT>, <TT>start + length</TT>),
+ * using bitwise comparison.
+ * @param c The code unit to search for.
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of <TT>c</TT>, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(UChar c,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the last occurrence of the code point <TT>c</TT>
+ * in the range [<TT>start</TT>, <TT>start + length</TT>),
+ * using bitwise comparison.
+ *
+ * @param c The code point to search for.
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of <TT>c</TT>, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(UChar32 c,
+ int32_t start,
+ int32_t length) const;
+
+
+ /* Character access */
+
+ /**
+ * Return the code unit at offset <tt>offset</tt>.
+ * If the offset is not valid (0..length()-1) then U+ffff is returned.
+ * @param offset a valid offset into the text
+ * @return the code unit at offset <tt>offset</tt>
+ * or 0xffff if the offset is not valid for this string
+ * @stable ICU 2.0
+ */
+ inline UChar charAt(int32_t offset) const;
+
+ /**
+ * Return the code unit at offset <tt>offset</tt>.
+ * If the offset is not valid (0..length()-1) then U+ffff is returned.
+ * @param offset a valid offset into the text
+ * @return the code unit at offset <tt>offset</tt>
+ * @stable ICU 2.0
+ */
+ inline UChar operator[] (int32_t offset) const;
+
+ /**
+ * Return the code point that contains the code unit
+ * at offset <tt>offset</tt>.
+ * If the offset is not valid (0..length()-1) then U+ffff is returned.
+ * @param offset a valid offset into the text
+ * that indicates the text offset of any of the code units
+ * that will be assembled into a code point (21-bit value) and returned
+ * @return the code point of text at <tt>offset</tt>
+ * or 0xffff if the offset is not valid for this string
+ * @stable ICU 2.0
+ */
+ UChar32 char32At(int32_t offset) const;
+
+ /**
+ * Adjust a random-access offset so that
+ * it points to the beginning of a Unicode character.
+ * The offset that is passed in points to
+ * any code unit of a code point,
+ * while the returned offset will point to the first code unit
+ * of the same code point.
+ * In UTF-16, if the input offset points to a second surrogate
+ * of a surrogate pair, then the returned offset will point
+ * to the first surrogate.
+ * @param offset a valid offset into one code point of the text
+ * @return offset of the first code unit of the same code point
+ * @see U16_SET_CP_START
+ * @stable ICU 2.0
+ */
+ int32_t getChar32Start(int32_t offset) const;
+
+ /**
+ * Adjust a random-access offset so that
+ * it points behind a Unicode character.
+ * The offset that is passed in points behind
+ * any code unit of a code point,
+ * while the returned offset will point behind the last code unit
+ * of the same code point.
+ * In UTF-16, if the input offset points behind the first surrogate
+ * (i.e., to the second surrogate)
+ * of a surrogate pair, then the returned offset will point
+ * behind the second surrogate (i.e., to the first surrogate).
+ * @param offset a valid offset after any code unit of a code point of the text
+ * @return offset of the first code unit after the same code point
+ * @see U16_SET_CP_LIMIT
+ * @stable ICU 2.0
+ */
+ int32_t getChar32Limit(int32_t offset) const;
+
+ /**
+ * Move the code unit index along the string by delta code points.
+ * Interpret the input index as a code unit-based offset into the string,
+ * move the index forward or backward by delta code points, and
+ * return the resulting index.
+ * The input index should point to the first code unit of a code point,
+ * if there is more than one.
+ *
+ * Both input and output indexes are code unit-based as for all
+ * string indexes/offsets in ICU (and other libraries, like MBCS char*).
+ * If delta<0 then the index is moved backward (toward the start of the string).
+ * If delta>0 then the index is moved forward (toward the end of the string).
+ *
+ * This behaves like CharacterIterator::move32(delta, kCurrent).
+ *
+ * Behavior for out-of-bounds indexes:
+ * <code>moveIndex32</code> pins the input index to 0..length(), i.e.,
+ * if the input index<0 then it is pinned to 0;
+ * if it is index>length() then it is pinned to length().
+ * Afterwards, the index is moved by <code>delta</code> code points
+ * forward or backward,
+ * but no further backward than to 0 and no further forward than to length().
+ * The resulting index return value will be in between 0 and length(), inclusively.
+ *
+ * Examples:
+ * <pre>
+ * // s has code points 'a' U+10000 'b' U+10ffff U+2029
+ * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape();
+ *
+ * // initial index: position of U+10000
+ * int32_t index=1;
+ *
+ * // the following examples will all result in index==4, position of U+10ffff
+ *
+ * // skip 2 code points from some position in the string
+ * index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
+ *
+ * // go to the 3rd code point from the start of s (0-based)
+ * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
+ *
+ * // go to the next-to-last code point of s
+ * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
+ * </pre>
+ *
+ * @param index input code unit index
+ * @param delta (signed) code point count to move the index forward or backward
+ * in the string
+ * @return the resulting code unit index
+ * @stable ICU 2.0
+ */
+ int32_t moveIndex32(int32_t index, int32_t delta) const;
+
+ /* Substring extraction */
+
+ /**
+ * Copy the characters in the range
+ * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>,
+ * beginning at <tt>dstStart</tt>.
+ * If the string aliases to <code>dst</code> itself as an external buffer,
+ * then extract() will not copy the contents.
+ *
+ * @param start offset of first character which will be copied into the array
+ * @param length the number of characters to extract
+ * @param dst array in which to copy characters. The length of <tt>dst</tt>
+ * must be at least (<tt>dstStart + length</tt>).
+ * @param dstStart the offset in <TT>dst</TT> where the first character
+ * will be extracted
+ * @stable ICU 2.0
+ */
+ inline void extract(int32_t start,
+ int32_t length,
+ UChar *dst,
+ int32_t dstStart = 0) const;
+
+ /**
+ * Copy the contents of the string into dest.
+ * This is a convenience function that
+ * checks if there is enough space in dest,
+ * extracts the entire string if possible,
+ * and NUL-terminates dest if possible.
+ *
+ * If the string fits into dest but cannot be NUL-terminated
+ * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
+ * If the string itself does not fit into dest
+ * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
+ *
+ * If the string aliases to <code>dest</code> itself as an external buffer,
+ * then extract() will not copy the contents.
+ *
+ * @param dest Destination string buffer.
+ * @param destCapacity Number of UChars available at dest.
+ * @param errorCode ICU error code.
+ * @return length()
+ * @stable ICU 2.0
+ */
+ int32_t
+ extract(UChar *dest, int32_t destCapacity,
+ UErrorCode &errorCode) const;
+
+ /**
+ * Copy the characters in the range
+ * [<tt>start</tt>, <tt>start + length</tt>) into the UnicodeString
+ * <tt>target</tt>.
+ * @param start offset of first character which will be copied
+ * @param length the number of characters to extract
+ * @param target UnicodeString into which to copy characters.
+ * @return A reference to <TT>target</TT>
+ * @stable ICU 2.0
+ */
+ inline void extract(int32_t start,
+ int32_t length,
+ UnicodeString& target) const;
+
+ /**
+ * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
+ * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>.
+ * @param start offset of first character which will be copied into the array
+ * @param limit offset immediately following the last character to be copied
+ * @param dst array in which to copy characters. The length of <tt>dst</tt>
+ * must be at least (<tt>dstStart + (limit - start)</tt>).
+ * @param dstStart the offset in <TT>dst</TT> where the first character
+ * will be extracted
+ * @stable ICU 2.0
+ */
+ inline void extractBetween(int32_t start,
+ int32_t limit,
+ UChar *dst,
+ int32_t dstStart = 0) const;
+
+ /**
+ * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
+ * into the UnicodeString <tt>target</tt>. Replaceable API.
+ * @param start offset of first character which will be copied
+ * @param limit offset immediately following the last character to be copied
+ * @param target UnicodeString into which to copy characters.
+ * @return A reference to <TT>target</TT>
+ * @stable ICU 2.0
+ */
+ virtual void extractBetween(int32_t start,
+ int32_t limit,
+ UnicodeString& target) const;
+
+ /**
+ * Copy the characters in the range
+ * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters.
+ * All characters must be invariant (see utypes.h).
+ * Use US_INV as the last, signature-distinguishing parameter.
+ *
+ * This function does not write any more than <code>targetLength</code>
+ * characters but returns the length of the entire output string
+ * so that one can allocate a larger buffer and call the function again
+ * if necessary.
+ * The output string is NUL-terminated if possible.
+ *
+ * @param start offset of first character which will be copied
+ * @param startLength the number of characters to extract
+ * @param target the target buffer for extraction, can be NULL
+ * if targetLength is 0
+ * @param targetCapacity the length of the target buffer
+ * @param inv Signature-distinguishing paramater, use US_INV.
+ * @return the output string length, not including the terminating NUL
+ * @stable ICU 3.2
+ */
+ int32_t extract(int32_t start,
+ int32_t startLength,
+ char *target,
+ int32_t targetCapacity,
+ enum EInvariant inv) const;
+
+#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
+
+ /**
+ * Copy the characters in the range
+ * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
+ * in the platform's default codepage.
+ * This function does not write any more than <code>targetLength</code>
+ * characters but returns the length of the entire output string
+ * so that one can allocate a larger buffer and call the function again
+ * if necessary.
+ * The output string is NUL-terminated if possible.
+ *
+ * @param start offset of first character which will be copied
+ * @param startLength the number of characters to extract
+ * @param target the target buffer for extraction
+ * @param targetLength the length of the target buffer
+ * If <TT>target</TT> is NULL, then the number of bytes required for
+ * <TT>target</TT> is returned.
+ * @return the output string length, not including the terminating NUL
+ * @stable ICU 2.0
+ */
+ int32_t extract(int32_t start,
+ int32_t startLength,
+ char *target,
+ uint32_t targetLength) const;
+
+#endif
+
+#if !UCONFIG_NO_CONVERSION
+
+ /**
+ * Copy the characters in the range
+ * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
+ * in a specified codepage.
+ * The output string is NUL-terminated.
+ *
+ * Recommendation: For invariant-character strings use
+ * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
+ * because it avoids object code dependencies of UnicodeString on
+ * the conversion code.
+ *
+ * @param start offset of first character which will be copied
+ * @param startLength the number of characters to extract
+ * @param target the target buffer for extraction
+ * @param codepage the desired codepage for the characters. 0 has
+ * the special meaning of the default codepage
+ * If <code>codepage</code> is an empty string (<code>""</code>),
+ * then a simple conversion is performed on the codepage-invariant
+ * subset ("invariant characters") of the platform encoding. See utypes.h.
+ * If <TT>target</TT> is NULL, then the number of bytes required for
+ * <TT>target</TT> is returned. It is assumed that the target is big enough
+ * to fit all of the characters.
+ * @return the output string length, not including the terminating NUL
+ * @stable ICU 2.0
+ */
+ inline int32_t extract(int32_t start,
+ int32_t startLength,
+ char *target,
+ const char *codepage = 0) const;
+
+ /**
+ * Copy the characters in the range
+ * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
+ * in a specified codepage.
+ * This function does not write any more than <code>targetLength</code>
+ * characters but returns the length of the entire output string
+ * so that one can allocate a larger buffer and call the function again
+ * if necessary.
+ * The output string is NUL-terminated if possible.
+ *
+ * Recommendation: For invariant-character strings use
+ * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
+ * because it avoids object code dependencies of UnicodeString on
+ * the conversion code.
+ *
+ * @param start offset of first character which will be copied
+ * @param startLength the number of characters to extract
+ * @param target the target buffer for extraction
+ * @param targetLength the length of the target buffer
+ * @param codepage the desired codepage for the characters. 0 has
+ * the special meaning of the default codepage
+ * If <code>codepage</code> is an empty string (<code>""</code>),
+ * then a simple conversion is performed on the codepage-invariant
+ * subset ("invariant characters") of the platform encoding. See utypes.h.
+ * If <TT>target</TT> is NULL, then the number of bytes required for
+ * <TT>target</TT> is returned.
+ * @return the output string length, not including the terminating NUL
+ * @stable ICU 2.0
+ */
+ int32_t extract(int32_t start,
+ int32_t startLength,
+ char *target,
+ uint32_t targetLength,
+ const char *codepage) const;
+
+ /**
+ * Convert the UnicodeString into a codepage string using an existing UConverter.
+ * The output string is NUL-terminated if possible.
+ *
+ * This function avoids the overhead of opening and closing a converter if
+ * multiple strings are extracted.
+ *
+ * @param dest destination string buffer, can be NULL if destCapacity==0
+ * @param destCapacity the number of chars available at dest
+ * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
+ * or NULL for the default converter
+ * @param errorCode normal ICU error code
+ * @return the length of the output string, not counting the terminating NUL;
+ * if the length is greater than destCapacity, then the string will not fit
+ * and a buffer of the indicated length would need to be passed in
+ * @stable ICU 2.0
+ */
+ int32_t extract(char *dest, int32_t destCapacity,
+ UConverter *cnv,
+ UErrorCode &errorCode) const;
+
+#endif
+
+ /**
+ * Create a temporary substring for the specified range.
+ * Unlike the substring constructor and setTo() functions,
+ * the object returned here will be a read-only alias (using getBuffer())
+ * rather than copying the text.
+ * As a result, this substring operation is much faster but requires
+ * that the original string not be modified or deleted during the lifetime
+ * of the returned substring object.
+ * @param start offset of the first character visible in the substring
+ * @param length length of the substring
+ * @return a read-only alias UnicodeString object for the substring
+ * @stable ICU 4.4
+ */
+ UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
+
+ /**
+ * Create a temporary substring for the specified range.
+ * Same as tempSubString(start, length) except that the substring range
+ * is specified as a (start, limit) pair (with an exclusive limit index)
+ * rather than a (start, length) pair.
+ * @param start offset of the first character visible in the substring
+ * @param limit offset immediately following the last character visible in the substring
+ * @return a read-only alias UnicodeString object for the substring
+ * @stable ICU 4.4
+ */
+ inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
+
+ /**
+ * Convert the UnicodeString to UTF-8 and write the result
+ * to a ByteSink. This is called by toUTF8String().
+ * Unpaired surrogates are replaced with U+FFFD.
+ * Calls u_strToUTF8WithSub().
+ *
+ * @param sink A ByteSink to which the UTF-8 version of the string is written.
+ * sink.Flush() is called at the end.
+ * @stable ICU 4.2
+ * @see toUTF8String
+ */
+ void toUTF8(ByteSink &sink) const;
+
+#if U_HAVE_STD_STRING
+
+ /**
+ * Convert the UnicodeString to UTF-8 and append the result
+ * to a standard string.
+ * Unpaired surrogates are replaced with U+FFFD.
+ * Calls toUTF8().
+ *
+ * @param result A standard string (or a compatible object)
+ * to which the UTF-8 version of the string is appended.
+ * @return The string object.
+ * @stable ICU 4.2
+ * @see toUTF8
+ */
+ template<typename StringClass>
+ StringClass &toUTF8String(StringClass &result) const {
+ StringByteSink<StringClass> sbs(&result);
+ toUTF8(sbs);
+ return result;
+ }
+
+#endif
+
+ /**
+ * Convert the UnicodeString to UTF-32.
+ * Unpaired surrogates are replaced with U+FFFD.
+ * Calls u_strToUTF32WithSub().
+ *
+ * @param utf32 destination string buffer, can be NULL if capacity==0
+ * @param capacity the number of UChar32s available at utf32
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return The length of the UTF-32 string.
+ * @see fromUTF32
+ * @stable ICU 4.2
+ */
+ int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
+
+ /* Length operations */
+
+ /**
+ * Return the length of the UnicodeString object.
+ * The length is the number of UChar code units are in the UnicodeString.
+ * If you want the number of code points, please use countChar32().
+ * @return the length of the UnicodeString object
+ * @see countChar32
+ * @stable ICU 2.0
+ */
+ inline int32_t length(void) const;
+
+ /**
+ * Count Unicode code points in the length UChar code units of the string.
+ * A code point may occupy either one or two UChar code units.
+ * Counting code points involves reading all code units.
+ *
+ * This functions is basically the inverse of moveIndex32().
+ *
+ * @param start the index of the first code unit to check
+ * @param length the number of UChar code units to check
+ * @return the number of code points in the specified code units
+ * @see length
+ * @stable ICU 2.0
+ */
+ int32_t
+ countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
+
+ /**
+ * Check if the length UChar code units of the string
+ * contain more Unicode code points than a certain number.
+ * This is more efficient than counting all code points in this part of the string
+ * and comparing that number with a threshold.
+ * This function may not need to scan the string at all if the length
+ * falls within a certain range, and
+ * never needs to count more than 'number+1' code points.
+ * Logically equivalent to (countChar32(start, length)>number).
+ * A Unicode code point may occupy either one or two UChar code units.
+ *
+ * @param start the index of the first code unit to check (0 for the entire string)
+ * @param length the number of UChar code units to check
+ * (use INT32_MAX for the entire string; remember that start/length
+ * values are pinned)
+ * @param number The number of code points in the (sub)string is compared against
+ * the 'number' parameter.
+ * @return Boolean value for whether the string contains more Unicode code points
+ * than 'number'. Same as (u_countChar32(s, length)>number).
+ * @see countChar32
+ * @see u_strHasMoreChar32Than
+ * @stable ICU 2.4
+ */
+ UBool
+ hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
+
+ /**
+ * Determine if this string is empty.
+ * @return TRUE if this string contains 0 characters, FALSE otherwise.
+ * @stable ICU 2.0
+ */
+ inline UBool isEmpty(void) const;
+
+ /**
+ * Return the capacity of the internal buffer of the UnicodeString object.
+ * This is useful together with the getBuffer functions.
+ * See there for details.
+ *
+ * @return the number of UChars available in the internal buffer
+ * @see getBuffer
+ * @stable ICU 2.0
+ */
+ inline int32_t getCapacity(void) const;
+
+ /* Other operations */
+
+ /**
+ * Generate a hash code for this object.
+ * @return The hash code of this UnicodeString.
+ * @stable ICU 2.0
+ */
+ inline int32_t hashCode(void) const;
+
+ /**
+ * Determine if this object contains a valid string.
+ * A bogus string has no value. It is different from an empty string,
+ * although in both cases isEmpty() returns TRUE and length() returns 0.
+ * setToBogus() and isBogus() can be used to indicate that no string value is available.
+ * For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and
+ * length() returns 0.
+ *
+ * @return TRUE if the string is bogus/invalid, FALSE otherwise
+ * @see setToBogus()
+ * @stable ICU 2.0
+ */
+ inline UBool isBogus(void) const;
+
+
+ //========================================
+ // Write operations
+ //========================================
+
+ /* Assignment operations */
+
+ /**
+ * Assignment operator. Replace the characters in this UnicodeString
+ * with the characters from <TT>srcText</TT>.
+ * @param srcText The text containing the characters to replace
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString &operator=(const UnicodeString &srcText);
+
+ /**
+ * Almost the same as the assignment operator.
+ * Replace the characters in this UnicodeString
+ * with the characters from <code>srcText</code>.
+ *
+ * This function works the same as the assignment operator
+ * for all strings except for ones that are readonly aliases.
+ *
+ * Starting with ICU 2.4, the assignment operator and the copy constructor
+ * allocate a new buffer and copy the buffer contents even for readonly aliases.
+ * This function implements the old, more efficient but less safe behavior
+ * of making this string also a readonly alias to the same buffer.
+ *
+ * The fastCopyFrom function must be used only if it is known that the lifetime of
+ * this UnicodeString does not exceed the lifetime of the aliased buffer
+ * including its contents, for example for strings from resource bundles
+ * or aliases to string constants.
+ *
+ * @param src The text containing the characters to replace.
+ * @return a reference to this
+ * @stable ICU 2.4
+ */
+ UnicodeString &fastCopyFrom(const UnicodeString &src);
+
+ /**
+ * Assignment operator. Replace the characters in this UnicodeString
+ * with the code unit <TT>ch</TT>.
+ * @param ch the code unit to replace
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& operator= (UChar ch);
+
+ /**
+ * Assignment operator. Replace the characters in this UnicodeString
+ * with the code point <TT>ch</TT>.
+ * @param ch the code point to replace
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& operator= (UChar32 ch);
+
+ /**
+ * Set the text in the UnicodeString object to the characters
+ * in <TT>srcText</TT> in the range
+ * [<TT>srcStart</TT>, <TT>srcText.length()</TT>).
+ * <TT>srcText</TT> is not modified.
+ * @param srcText the source for the new characters
+ * @param srcStart the offset into <TT>srcText</TT> where new characters
+ * will be obtained
+ * @return a reference to this
+ * @stable ICU 2.2
+ */
+ inline UnicodeString& setTo(const UnicodeString& srcText,
+ int32_t srcStart);
+
+ /**
+ * Set the text in the UnicodeString object to the characters
+ * in <TT>srcText</TT> in the range
+ * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
+ * <TT>srcText</TT> is not modified.
+ * @param srcText the source for the new characters
+ * @param srcStart the offset into <TT>srcText</TT> where new characters
+ * will be obtained
+ * @param srcLength the number of characters in <TT>srcText</TT> in the
+ * replace string.
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& setTo(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ /**
+ * Set the text in the UnicodeString object to the characters in
+ * <TT>srcText</TT>.
+ * <TT>srcText</TT> is not modified.
+ * @param srcText the source for the new characters
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& setTo(const UnicodeString& srcText);
+
+ /**
+ * Set the characters in the UnicodeString object to the characters
+ * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
+ * @param srcChars the source for the new characters
+ * @param srcLength the number of Unicode characters in srcChars.
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& setTo(const UChar *srcChars,
+ int32_t srcLength);
+
+ /**
+ * Set the characters in the UnicodeString object to the code unit
+ * <TT>srcChar</TT>.
+ * @param srcChar the code unit which becomes the UnicodeString's character
+ * content
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString& setTo(UChar srcChar);
+
+ /**
+ * Set the characters in the UnicodeString object to the code point
+ * <TT>srcChar</TT>.
+ * @param srcChar the code point which becomes the UnicodeString's character
+ * content
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString& setTo(UChar32 srcChar);
+
+ /**
+ * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor.
+ * The text will be used for the UnicodeString object, but
+ * it will not be released when the UnicodeString is destroyed.
+ * This has copy-on-write semantics:
+ * When the string is modified, then the buffer is first copied into
+ * newly allocated memory.
+ * The aliased buffer is never modified.
+ *
+ * In an assignment to another UnicodeString, when using the copy constructor
+ * or the assignment operator, the text will be copied.
+ * When using fastCopyFrom(), the text will be aliased again,
+ * so that both strings then alias the same readonly-text.
+ *
+ * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
+ * This must be true if <code>textLength==-1</code>.
+ * @param text The characters to alias for the UnicodeString.
+ * @param textLength The number of Unicode characters in <code>text</code> to alias.
+ * If -1, then this constructor will determine the length
+ * by calling <code>u_strlen()</code>.
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString &setTo(UBool isTerminated,
+ const UChar *text,
+ int32_t textLength);
+
+ /**
+ * Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor.
+ * The text will be used for the UnicodeString object, but
+ * it will not be released when the UnicodeString is destroyed.
+ * This has write-through semantics:
+ * For as long as the capacity of the buffer is sufficient, write operations
+ * will directly affect the buffer. When more capacity is necessary, then
+ * a new buffer will be allocated and the contents copied as with regularly
+ * constructed strings.
+ * In an assignment to another UnicodeString, the buffer will be copied.
+ * The extract(UChar *dst) function detects whether the dst pointer is the same
+ * as the string buffer itself and will in this case not copy the contents.
+ *
+ * @param buffer The characters to alias for the UnicodeString.
+ * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
+ * @param buffCapacity The size of <code>buffer</code> in UChars.
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString &setTo(UChar *buffer,
+ int32_t buffLength,
+ int32_t buffCapacity);
+
+ /**
+ * Make this UnicodeString object invalid.
+ * The string will test TRUE with isBogus().
+ *
+ * A bogus string has no value. It is different from an empty string.
+ * It can be used to indicate that no string value is available.
+ * getBuffer() and getTerminatedBuffer() return NULL, and
+ * length() returns 0.
+ *
+ * This utility function is used throughout the UnicodeString
+ * implementation to indicate that a UnicodeString operation failed,
+ * and may be used in other functions,
+ * especially but not exclusively when such functions do not
+ * take a UErrorCode for simplicity.
+ *
+ * The following methods, and no others, will clear a string object's bogus flag:
+ * - remove()
+ * - remove(0, INT32_MAX)
+ * - truncate(0)
+ * - operator=() (assignment operator)
+ * - setTo(...)
+ *
+ * The simplest ways to turn a bogus string into an empty one
+ * is to use the remove() function.
+ * Examples for other functions that are equivalent to "set to empty string":
+ * \code
+ * if(s.isBogus()) {
+ * s.remove(); // set to an empty string (remove all), or
+ * s.remove(0, INT32_MAX); // set to an empty string (remove all), or
+ * s.truncate(0); // set to an empty string (complete truncation), or
+ * s=UnicodeString(); // assign an empty string, or
+ * s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
+ * static const UChar nul=0;
+ * s.setTo(&nul, 0); // set to an empty C Unicode string
+ * }
+ * \endcode
+ *
+ * @see isBogus()
+ * @stable ICU 2.0
+ */
+ void setToBogus();
+
+ /**
+ * Set the character at the specified offset to the specified character.
+ * @param offset A valid offset into the text of the character to set
+ * @param ch The new character
+ * @return A reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString& setCharAt(int32_t offset,
+ UChar ch);
+
+
+ /* Append operations */
+
+ /**
+ * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString
+ * object.
+ * @param ch the code unit to be appended
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& operator+= (UChar ch);
+
+ /**
+ * Append operator. Append the code point <TT>ch</TT> to the UnicodeString
+ * object.
+ * @param ch the code point to be appended
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& operator+= (UChar32 ch);
+
+ /**
+ * Append operator. Append the characters in <TT>srcText</TT> to the
+ * UnicodeString object. <TT>srcText</TT> is not modified.
+ * @param srcText the source for the new characters
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& operator+= (const UnicodeString& srcText);
+
+ /**
+ * Append the characters
+ * in <TT>srcText</TT> in the range
+ * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the
+ * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT>
+ * is not modified.
+ * @param srcText the source for the new characters
+ * @param srcStart the offset into <TT>srcText</TT> where new characters
+ * will be obtained
+ * @param srcLength the number of characters in <TT>srcText</TT> in
+ * the append string
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& append(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ /**
+ * Append the characters in <TT>srcText</TT> to the UnicodeString object.
+ * <TT>srcText</TT> is not modified.
+ * @param srcText the source for the new characters
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& append(const UnicodeString& srcText);
+
+ /**
+ * Append the characters in <TT>srcChars</TT> in the range
+ * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString
+ * object at offset
+ * <TT>start</TT>. <TT>srcChars</TT> is not modified.
+ * @param srcChars the source for the new characters
+ * @param srcStart the offset into <TT>srcChars</TT> where new characters
+ * will be obtained
+ * @param srcLength the number of characters in <TT>srcChars</TT> in
+ * the append string; can be -1 if <TT>srcChars</TT> is NUL-terminated
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& append(const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ /**
+ * Append the characters in <TT>srcChars</TT> to the UnicodeString object
+ * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
+ * @param srcChars the source for the new characters
+ * @param srcLength the number of Unicode characters in <TT>srcChars</TT>;
+ * can be -1 if <TT>srcChars</TT> is NUL-terminated
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& append(const UChar *srcChars,
+ int32_t srcLength);
+
+ /**
+ * Append the code unit <TT>srcChar</TT> to the UnicodeString object.
+ * @param srcChar the code unit to append
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& append(UChar srcChar);
+
+ /**
+ * Append the code point <TT>srcChar</TT> to the UnicodeString object.
+ * @param srcChar the code point to append
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString& append(UChar32 srcChar);
+
+
+ /* Insert operations */
+
+ /**
+ * Insert the characters in <TT>srcText</TT> in the range
+ * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
+ * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
+ * @param start the offset where the insertion begins
+ * @param srcText the source for the new characters
+ * @param srcStart the offset into <TT>srcText</TT> where new characters
+ * will be obtained
+ * @param srcLength the number of characters in <TT>srcText</TT> in
+ * the insert string
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& insert(int32_t start,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ /**
+ * Insert the characters in <TT>srcText</TT> into the UnicodeString object
+ * at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
+ * @param start the offset where the insertion begins
+ * @param srcText the source for the new characters
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& insert(int32_t start,
+ const UnicodeString& srcText);
+
+ /**
+ * Insert the characters in <TT>srcChars</TT> in the range
+ * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
+ * object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
+ * @param start the offset at which the insertion begins
+ * @param srcChars the source for the new characters
+ * @param srcStart the offset into <TT>srcChars</TT> where new characters
+ * will be obtained
+ * @param srcLength the number of characters in <TT>srcChars</TT>
+ * in the insert string
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& insert(int32_t start,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ /**
+ * Insert the characters in <TT>srcChars</TT> into the UnicodeString object
+ * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
+ * @param start the offset where the insertion begins
+ * @param srcChars the source for the new characters
+ * @param srcLength the number of Unicode characters in srcChars.
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& insert(int32_t start,
+ const UChar *srcChars,
+ int32_t srcLength);
+
+ /**
+ * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at
+ * offset <TT>start</TT>.
+ * @param start the offset at which the insertion occurs
+ * @param srcChar the code unit to insert
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& insert(int32_t start,
+ UChar srcChar);
+
+ /**
+ * Insert the code point <TT>srcChar</TT> into the UnicodeString object at
+ * offset <TT>start</TT>.
+ * @param start the offset at which the insertion occurs
+ * @param srcChar the code point to insert
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& insert(int32_t start,
+ UChar32 srcChar);
+
+
+ /* Replace operations */
+
+ /**
+ * Replace the characters in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
+ * <TT>srcText</TT> in the range
+ * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
+ * <TT>srcText</TT> is not modified.
+ * @param start the offset at which the replace operation begins
+ * @param length the number of characters to replace. The character at
+ * <TT>start + length</TT> is not modified.
+ * @param srcText the source for the new characters
+ * @param srcStart the offset into <TT>srcText</TT> where new characters
+ * will be obtained
+ * @param srcLength the number of characters in <TT>srcText</TT> in
+ * the replace string
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString& replace(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ /**
+ * Replace the characters in the range
+ * [<TT>start</TT>, <TT>start + length</TT>)
+ * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is
+ * not modified.
+ * @param start the offset at which the replace operation begins
+ * @param length the number of characters to replace. The character at
+ * <TT>start + length</TT> is not modified.
+ * @param srcText the source for the new characters
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString& replace(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText);
+
+ /**
+ * Replace the characters in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
+ * <TT>srcChars</TT> in the range
+ * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT>
+ * is not modified.
+ * @param start the offset at which the replace operation begins
+ * @param length the number of characters to replace. The character at
+ * <TT>start + length</TT> is not modified.
+ * @param srcChars the source for the new characters
+ * @param srcStart the offset into <TT>srcChars</TT> where new characters
+ * will be obtained
+ * @param srcLength the number of characters in <TT>srcChars</TT>
+ * in the replace string
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString& replace(int32_t start,
+ int32_t length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ /**
+ * Replace the characters in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
+ * <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
+ * @param start the offset at which the replace operation begins
+ * @param length number of characters to replace. The character at
+ * <TT>start + length</TT> is not modified.
+ * @param srcChars the source for the new characters
+ * @param srcLength the number of Unicode characters in srcChars
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& replace(int32_t start,
+ int32_t length,
+ const UChar *srcChars,
+ int32_t srcLength);
+
+ /**
+ * Replace the characters in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) with the code unit
+ * <TT>srcChar</TT>.
+ * @param start the offset at which the replace operation begins
+ * @param length the number of characters to replace. The character at
+ * <TT>start + length</TT> is not modified.
+ * @param srcChar the new code unit
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& replace(int32_t start,
+ int32_t length,
+ UChar srcChar);
+
+ /**
+ * Replace the characters in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) with the code point
+ * <TT>srcChar</TT>.
+ * @param start the offset at which the replace operation begins
+ * @param length the number of characters to replace. The character at
+ * <TT>start + length</TT> is not modified.
+ * @param srcChar the new code point
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
+
+ /**
+ * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
+ * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified.
+ * @param start the offset at which the replace operation begins
+ * @param limit the offset immediately following the replace range
+ * @param srcText the source for the new characters
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& replaceBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText);
+
+ /**
+ * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
+ * with the characters in <TT>srcText</TT> in the range
+ * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified.
+ * @param start the offset at which the replace operation begins
+ * @param limit the offset immediately following the replace range
+ * @param srcText the source for the new characters
+ * @param srcStart the offset into <TT>srcChars</TT> where new characters
+ * will be obtained
+ * @param srcLimit the offset immediately following the range to copy
+ * in <TT>srcText</TT>
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& replaceBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLimit);
+
+ /**
+ * Replace a substring of this object with the given text.
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= length()</code>.
+ * @param text the text to replace characters <code>start</code>
+ * to <code>limit - 1</code>
+ * @stable ICU 2.0
+ */
+ virtual void handleReplaceBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& text);
+
+ /**
+ * Replaceable API
+ * @return TRUE if it has MetaData
+ * @stable ICU 2.4
+ */
+ virtual UBool hasMetaData() const;
+
+ /**
+ * Copy a substring of this object, retaining attribute (out-of-band)
+ * information. This method is used to duplicate or reorder substrings.
+ * The destination index must not overlap the source range.
+ *
+ * @param start the beginning index, inclusive; <code>0 <= start <=
+ * limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit <=
+ * length()</code>.
+ * @param dest the destination index. The characters from
+ * <code>start..limit-1</code> will be copied to <code>dest</code>.
+ * Implementations of this method may assume that <code>dest <= start ||
+ * dest >= limit</code>.
+ * @stable ICU 2.0
+ */
+ virtual void copy(int32_t start, int32_t limit, int32_t dest);
+
+ /* Search and replace operations */
+
+ /**
+ * Replace all occurrences of characters in oldText with the characters
+ * in newText
+ * @param oldText the text containing the search text
+ * @param newText the text containing the replacement text
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& findAndReplace(const UnicodeString& oldText,
+ const UnicodeString& newText);
+
+ /**
+ * Replace all occurrences of characters in oldText with characters
+ * in newText
+ * in the range [<TT>start</TT>, <TT>start + length</TT>).
+ * @param start the start of the range in which replace will performed
+ * @param length the length of the range in which replace will be performed
+ * @param oldText the text containing the search text
+ * @param newText the text containing the replacement text
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& findAndReplace(int32_t start,
+ int32_t length,
+ const UnicodeString& oldText,
+ const UnicodeString& newText);
+
+ /**
+ * Replace all occurrences of characters in oldText in the range
+ * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters
+ * in newText in the range
+ * [<TT>newStart</TT>, <TT>newStart + newLength</TT>)
+ * in the range [<TT>start</TT>, <TT>start + length</TT>).
+ * @param start the start of the range in which replace will performed
+ * @param length the length of the range in which replace will be performed
+ * @param oldText the text containing the search text
+ * @param oldStart the start of the search range in <TT>oldText</TT>
+ * @param oldLength the length of the search range in <TT>oldText</TT>
+ * @param newText the text containing the replacement text
+ * @param newStart the start of the replacement range in <TT>newText</TT>
+ * @param newLength the length of the replacement range in <TT>newText</TT>
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString& findAndReplace(int32_t start,
+ int32_t length,
+ const UnicodeString& oldText,
+ int32_t oldStart,
+ int32_t oldLength,
+ const UnicodeString& newText,
+ int32_t newStart,
+ int32_t newLength);
+
+
+ /* Remove operations */
+
+ /**
+ * Remove all characters from the UnicodeString object.
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& remove(void);
+
+ /**
+ * Remove the characters in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object.
+ * @param start the offset of the first character to remove
+ * @param length the number of characters to remove
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& remove(int32_t start,
+ int32_t length = (int32_t)INT32_MAX);
+
+ /**
+ * Remove the characters in the range
+ * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object.
+ * @param start the offset of the first character to remove
+ * @param limit the offset immediately following the range to remove
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& removeBetween(int32_t start,
+ int32_t limit = (int32_t)INT32_MAX);
+
+ /**
+ * Retain only the characters in the range
+ * [<code>start</code>, <code>limit</code>) from the UnicodeString object.
+ * Removes characters before <code>start</code> and at and after <code>limit</code>.
+ * @param start the offset of the first character to retain
+ * @param limit the offset immediately following the range to retain
+ * @return a reference to this
+ * @stable ICU 4.4
+ */
+ inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
+
+ /* Length operations */
+
+ /**
+ * Pad the start of this UnicodeString with the character <TT>padChar</TT>.
+ * If the length of this UnicodeString is less than targetLength,
+ * length() - targetLength copies of padChar will be added to the
+ * beginning of this UnicodeString.
+ * @param targetLength the desired length of the string
+ * @param padChar the character to use for padding. Defaults to
+ * space (U+0020)
+ * @return TRUE if the text was padded, FALSE otherwise.
+ * @stable ICU 2.0
+ */
+ UBool padLeading(int32_t targetLength,
+ UChar padChar = 0x0020);
+
+ /**
+ * Pad the end of this UnicodeString with the character <TT>padChar</TT>.
+ * If the length of this UnicodeString is less than targetLength,
+ * length() - targetLength copies of padChar will be added to the
+ * end of this UnicodeString.
+ * @param targetLength the desired length of the string
+ * @param padChar the character to use for padding. Defaults to
+ * space (U+0020)
+ * @return TRUE if the text was padded, FALSE otherwise.
+ * @stable ICU 2.0
+ */
+ UBool padTrailing(int32_t targetLength,
+ UChar padChar = 0x0020);
+
+ /**
+ * Truncate this UnicodeString to the <TT>targetLength</TT>.
+ * @param targetLength the desired length of this UnicodeString.
+ * @return TRUE if the text was truncated, FALSE otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool truncate(int32_t targetLength);
+
+ /**
+ * Trims leading and trailing whitespace from this UnicodeString.
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString& trim(void);
+
+
+ /* Miscellaneous operations */
+
+ /**
+ * Reverse this UnicodeString in place.
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& reverse(void);
+
+ /**
+ * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in
+ * this UnicodeString.
+ * @param start the start of the range to reverse
+ * @param length the number of characters to to reverse
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& reverse(int32_t start,
+ int32_t length);
+
+ /**
+ * Convert the characters in this to UPPER CASE following the conventions of
+ * the default locale.
+ * @return A reference to this.
+ * @stable ICU 2.0
+ */
+ UnicodeString& toUpper(void);
+
+ /**
+ * Convert the characters in this to UPPER CASE following the conventions of
+ * a specific locale.
+ * @param locale The locale containing the conventions to use.
+ * @return A reference to this.
+ * @stable ICU 2.0
+ */
+ UnicodeString& toUpper(const Locale& locale);
+
+ /**
+ * Convert the characters in this to lower case following the conventions of
+ * the default locale.
+ * @return A reference to this.
+ * @stable ICU 2.0
+ */
+ UnicodeString& toLower(void);
+
+ /**
+ * Convert the characters in this to lower case following the conventions of
+ * a specific locale.
+ * @param locale The locale containing the conventions to use.
+ * @return A reference to this.
+ * @stable ICU 2.0
+ */
+ UnicodeString& toLower(const Locale& locale);
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+ /**
+ * Titlecase this string, convenience function using the default locale.
+ *
+ * Casing is locale-dependent and context-sensitive.
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others.
+ *
+ * The titlecase break iterator can be provided to customize for arbitrary
+ * styles, using rules and dictionaries beyond the standard iterators.
+ * It may be more efficient to always provide an iterator to avoid
+ * opening and closing one for each string.
+ * The standard titlecase iterator for the root locale implements the
+ * algorithm of Unicode TR 21.
+ *
+ * This function uses only the setText(), first() and next() methods of the
+ * provided break iterator.
+ *
+ * @param titleIter A break iterator to find the first characters of words
+ * that are to be titlecased.
+ * If none is provided (0), then a standard titlecase
+ * break iterator is opened.
+ * Otherwise the provided iterator is set to the string's text.
+ * @return A reference to this.
+ * @stable ICU 2.1
+ */
+ UnicodeString &toTitle(BreakIterator *titleIter);
+
+ /**
+ * Titlecase this string.
+ *
+ * Casing is locale-dependent and context-sensitive.
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others.
+ *
+ * The titlecase break iterator can be provided to customize for arbitrary
+ * styles, using rules and dictionaries beyond the standard iterators.
+ * It may be more efficient to always provide an iterator to avoid
+ * opening and closing one for each string.
+ * The standard titlecase iterator for the root locale implements the
+ * algorithm of Unicode TR 21.
+ *
+ * This function uses only the setText(), first() and next() methods of the
+ * provided break iterator.
+ *
+ * @param titleIter A break iterator to find the first characters of words
+ * that are to be titlecased.
+ * If none is provided (0), then a standard titlecase
+ * break iterator is opened.
+ * Otherwise the provided iterator is set to the string's text.
+ * @param locale The locale to consider.
+ * @return A reference to this.
+ * @stable ICU 2.1
+ */
+ UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
+
+ /**
+ * Titlecase this string, with options.
+ *
+ * Casing is locale-dependent and context-sensitive.
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others. (This can be modified with options.)
+ *
+ * The titlecase break iterator can be provided to customize for arbitrary
+ * styles, using rules and dictionaries beyond the standard iterators.
+ * It may be more efficient to always provide an iterator to avoid
+ * opening and closing one for each string.
+ * The standard titlecase iterator for the root locale implements the
+ * algorithm of Unicode TR 21.
+ *
+ * This function uses only the setText(), first() and next() methods of the
+ * provided break iterator.
+ *
+ * @param titleIter A break iterator to find the first characters of words
+ * that are to be titlecased.
+ * If none is provided (0), then a standard titlecase
+ * break iterator is opened.
+ * Otherwise the provided iterator is set to the string's text.
+ * @param locale The locale to consider.
+ * @param options Options bit set, see ucasemap_open().
+ * @return A reference to this.
+ * @see U_TITLECASE_NO_LOWERCASE
+ * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
+ * @see ucasemap_open
+ * @stable ICU 3.8
+ */
+ UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
+
+#endif
+
+ /**
+ * Case-folds the characters in this string.
+ *
+ * Case-folding is locale-independent and not context-sensitive,
+ * but there is an option for whether to include or exclude mappings for dotted I
+ * and dotless i that are marked with 'T' in CaseFolding.txt.
+ *
+ * The result may be longer or shorter than the original.
+ *
+ * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * @return A reference to this.
+ * @stable ICU 2.0
+ */
+ UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
+
+ //========================================
+ // Access to the internal buffer
+ //========================================
+
+ /**
+ * Get a read/write pointer to the internal buffer.
+ * The buffer is guaranteed to be large enough for at least minCapacity UChars,
+ * writable, and is still owned by the UnicodeString object.
+ * Calls to getBuffer(minCapacity) must not be nested, and
+ * must be matched with calls to releaseBuffer(newLength).
+ * If the string buffer was read-only or shared,
+ * then it will be reallocated and copied.
+ *
+ * An attempted nested call will return 0, and will not further modify the
+ * state of the UnicodeString object.
+ * It also returns 0 if the string is bogus.
+ *
+ * The actual capacity of the string buffer may be larger than minCapacity.
+ * getCapacity() returns the actual capacity.
+ * For many operations, the full capacity should be used to avoid reallocations.
+ *
+ * While the buffer is "open" between getBuffer(minCapacity)
+ * and releaseBuffer(newLength), the following applies:
+ * - The string length is set to 0.
+ * - Any read API call on the UnicodeString object will behave like on a 0-length string.
+ * - Any write API call on the UnicodeString object is disallowed and will have no effect.
+ * - You can read from and write to the returned buffer.
+ * - The previous string contents will still be in the buffer;
+ * if you want to use it, then you need to call length() before getBuffer(minCapacity).
+ * If the length() was greater than minCapacity, then any contents after minCapacity
+ * may be lost.
+ * The buffer contents is not NUL-terminated by getBuffer().
+ * If length()<getCapacity() then you can terminate it by writing a NUL
+ * at index length().
+ * - You must call releaseBuffer(newLength) before and in order to
+ * return to normal UnicodeString operation.
+ *
+ * @param minCapacity the minimum number of UChars that are to be available
+ * in the buffer, starting at the returned pointer;
+ * default to the current string capacity if minCapacity==-1
+ * @return a writable pointer to the internal string buffer,
+ * or 0 if an error occurs (nested calls, out of memory)
+ *
+ * @see releaseBuffer
+ * @see getTerminatedBuffer()
+ * @stable ICU 2.0
+ */
+ UChar *getBuffer(int32_t minCapacity);
+
+ /**
+ * Release a read/write buffer on a UnicodeString object with an
+ * "open" getBuffer(minCapacity).
+ * This function must be called in a matched pair with getBuffer(minCapacity).
+ * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
+ *
+ * It will set the string length to newLength, at most to the current capacity.
+ * If newLength==-1 then it will set the length according to the
+ * first NUL in the buffer, or to the capacity if there is no NUL.
+ *
+ * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
+ *
+ * @param newLength the new length of the UnicodeString object;
+ * defaults to the current capacity if newLength is greater than that;
+ * if newLength==-1, it defaults to u_strlen(buffer) but not more than
+ * the current capacity of the string
+ *
+ * @see getBuffer(int32_t minCapacity)
+ * @stable ICU 2.0
+ */
+ void releaseBuffer(int32_t newLength=-1);
+
+ /**
+ * Get a read-only pointer to the internal buffer.
+ * This can be called at any time on a valid UnicodeString.
+ *
+ * It returns 0 if the string is bogus, or
+ * during an "open" getBuffer(minCapacity).
+ *
+ * It can be called as many times as desired.
+ * The pointer that it returns will remain valid until the UnicodeString object is modified,
+ * at which time the pointer is semantically invalidated and must not be used any more.
+ *
+ * The capacity of the buffer can be determined with getCapacity().
+ * The part after length() may or may not be initialized and valid,
+ * depending on the history of the UnicodeString object.
+ *
+ * The buffer contents is (probably) not NUL-terminated.
+ * You can check if it is with
+ * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>.
+ * (See getTerminatedBuffer().)
+ *
+ * The buffer may reside in read-only memory. Its contents must not
+ * be modified.
+ *
+ * @return a read-only pointer to the internal string buffer,
+ * or 0 if the string is empty or bogus
+ *
+ * @see getBuffer(int32_t minCapacity)
+ * @see getTerminatedBuffer()
+ * @stable ICU 2.0
+ */
+ inline const UChar *getBuffer() const;
+
+ /**
+ * Get a read-only pointer to the internal buffer,
+ * making sure that it is NUL-terminated.
+ * This can be called at any time on a valid UnicodeString.
+ *
+ * It returns 0 if the string is bogus, or
+ * during an "open" getBuffer(minCapacity), or if the buffer cannot
+ * be NUL-terminated (because memory allocation failed).
+ *
+ * It can be called as many times as desired.
+ * The pointer that it returns will remain valid until the UnicodeString object is modified,
+ * at which time the pointer is semantically invalidated and must not be used any more.
+ *
+ * The capacity of the buffer can be determined with getCapacity().
+ * The part after length()+1 may or may not be initialized and valid,
+ * depending on the history of the UnicodeString object.
+ *
+ * The buffer contents is guaranteed to be NUL-terminated.
+ * getTerminatedBuffer() may reallocate the buffer if a terminating NUL
+ * is written.
+ * For this reason, this function is not const, unlike getBuffer().
+ * Note that a UnicodeString may also contain NUL characters as part of its contents.
+ *
+ * The buffer may reside in read-only memory. Its contents must not
+ * be modified.
+ *
+ * @return a read-only pointer to the internal string buffer,
+ * or 0 if the string is empty or bogus
+ *
+ * @see getBuffer(int32_t minCapacity)
+ * @see getBuffer()
+ * @stable ICU 2.2
+ */
+ const UChar *getTerminatedBuffer();
+
+ //========================================
+ // Constructors
+ //========================================
+
+ /** Construct an empty UnicodeString.
+ * @stable ICU 2.0
+ */
+ inline UnicodeString();
+
+ /**
+ * Construct a UnicodeString with capacity to hold <TT>capacity</TT> UChars
+ * @param capacity the number of UChars this UnicodeString should hold
+ * before a resize is necessary; if count is greater than 0 and count
+ * code points c take up more space than capacity, then capacity is adjusted
+ * accordingly.
+ * @param c is used to initially fill the string
+ * @param count specifies how many code points c are to be written in the
+ * string
+ * @stable ICU 2.0
+ */
+ UnicodeString(int32_t capacity, UChar32 c, int32_t count);
+
+ /**
+ * Single UChar (code unit) constructor.
+ *
+ * It is recommended to mark this constructor "explicit" by
+ * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>
+ * on the compiler command line or similar.
+ * @param ch the character to place in the UnicodeString
+ * @stable ICU 2.0
+ */
+ UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar ch);
+
+ /**
+ * Single UChar32 (code point) constructor.
+ *
+ * It is recommended to mark this constructor "explicit" by
+ * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>
+ * on the compiler command line or similar.
+ * @param ch the character to place in the UnicodeString
+ * @stable ICU 2.0
+ */
+ UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch);
+
+ /**
+ * UChar* constructor.
+ *
+ * It is recommended to mark this constructor "explicit" by
+ * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
+ * on the compiler command line or similar.
+ * @param text The characters to place in the UnicodeString. <TT>text</TT>
+ * must be NULL (U+0000) terminated.
+ * @stable ICU 2.0
+ */
+ UNISTR_FROM_STRING_EXPLICIT UnicodeString(const UChar *text);
+
+ /**
+ * UChar* constructor.
+ * @param text The characters to place in the UnicodeString.
+ * @param textLength The number of Unicode characters in <TT>text</TT>
+ * to copy.
+ * @stable ICU 2.0
+ */
+ UnicodeString(const UChar *text,
+ int32_t textLength);
+
+ /**
+ * Readonly-aliasing UChar* constructor.
+ * The text will be used for the UnicodeString object, but
+ * it will not be released when the UnicodeString is destroyed.
+ * This has copy-on-write semantics:
+ * When the string is modified, then the buffer is first copied into
+ * newly allocated memory.
+ * The aliased buffer is never modified.
+ *
+ * In an assignment to another UnicodeString, when using the copy constructor
+ * or the assignment operator, the text will be copied.
+ * When using fastCopyFrom(), the text will be aliased again,
+ * so that both strings then alias the same readonly-text.
+ *
+ * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
+ * This must be true if <code>textLength==-1</code>.
+ * @param text The characters to alias for the UnicodeString.
+ * @param textLength The number of Unicode characters in <code>text</code> to alias.
+ * If -1, then this constructor will determine the length
+ * by calling <code>u_strlen()</code>.
+ * @stable ICU 2.0
+ */
+ UnicodeString(UBool isTerminated,
+ const UChar *text,
+ int32_t textLength);
+
+ /**
+ * Writable-aliasing UChar* constructor.
+ * The text will be used for the UnicodeString object, but
+ * it will not be released when the UnicodeString is destroyed.
+ * This has write-through semantics:
+ * For as long as the capacity of the buffer is sufficient, write operations
+ * will directly affect the buffer. When more capacity is necessary, then
+ * a new buffer will be allocated and the contents copied as with regularly
+ * constructed strings.
+ * In an assignment to another UnicodeString, the buffer will be copied.
+ * The extract(UChar *dst) function detects whether the dst pointer is the same
+ * as the string buffer itself and will in this case not copy the contents.
+ *
+ * @param buffer The characters to alias for the UnicodeString.
+ * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
+ * @param buffCapacity The size of <code>buffer</code> in UChars.
+ * @stable ICU 2.0
+ */
+ UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
+
+#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
+
+ /**
+ * char* constructor.
+ * Uses the default converter (and thus depends on the ICU conversion code)
+ * unless U_CHARSET_IS_UTF8 is set to 1.
+ *
+ * For ASCII (really "invariant character") strings it is more efficient to use
+ * the constructor that takes a US_INV (for its enum EInvariant).
+ * For ASCII (invariant-character) string literals, see UNICODE_STRING and
+ * UNICODE_STRING_SIMPLE.
+ *
+ * It is recommended to mark this constructor "explicit" by
+ * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
+ * on the compiler command line or similar.
+ * @param codepageData an array of bytes, null-terminated,
+ * in the platform's default codepage.
+ * @stable ICU 2.0
+ * @see UNICODE_STRING
+ * @see UNICODE_STRING_SIMPLE
+ */
+ UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);
+
+ /**
+ * char* constructor.
+ * Uses the default converter (and thus depends on the ICU conversion code)
+ * unless U_CHARSET_IS_UTF8 is set to 1.
+ * @param codepageData an array of bytes in the platform's default codepage.
+ * @param dataLength The number of bytes in <TT>codepageData</TT>.
+ * @stable ICU 2.0
+ */
+ UnicodeString(const char *codepageData, int32_t dataLength);
+
+#endif
+
+#if !UCONFIG_NO_CONVERSION
+
+ /**
+ * char* constructor.
+ * @param codepageData an array of bytes, null-terminated
+ * @param codepage the encoding of <TT>codepageData</TT>. The special
+ * value 0 for <TT>codepage</TT> indicates that the text is in the
+ * platform's default codepage.
+ *
+ * If <code>codepage</code> is an empty string (<code>""</code>),
+ * then a simple conversion is performed on the codepage-invariant
+ * subset ("invariant characters") of the platform encoding. See utypes.h.
+ * Recommendation: For invariant-character strings use the constructor
+ * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
+ * because it avoids object code dependencies of UnicodeString on
+ * the conversion code.
+ *
+ * @stable ICU 2.0
+ */
+ UnicodeString(const char *codepageData, const char *codepage);
+
+ /**
+ * char* constructor.
+ * @param codepageData an array of bytes.
+ * @param dataLength The number of bytes in <TT>codepageData</TT>.
+ * @param codepage the encoding of <TT>codepageData</TT>. The special
+ * value 0 for <TT>codepage</TT> indicates that the text is in the
+ * platform's default codepage.
+ * If <code>codepage</code> is an empty string (<code>""</code>),
+ * then a simple conversion is performed on the codepage-invariant
+ * subset ("invariant characters") of the platform encoding. See utypes.h.
+ * Recommendation: For invariant-character strings use the constructor
+ * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
+ * because it avoids object code dependencies of UnicodeString on
+ * the conversion code.
+ *
+ * @stable ICU 2.0
+ */
+ UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
+
+ /**
+ * char * / UConverter constructor.
+ * This constructor uses an existing UConverter object to
+ * convert the codepage string to Unicode and construct a UnicodeString
+ * from that.
+ *
+ * The converter is reset at first.
+ * If the error code indicates a failure before this constructor is called,
+ * or if an error occurs during conversion or construction,
+ * then the string will be bogus.
+ *
+ * This function avoids the overhead of opening and closing a converter if
+ * multiple strings are constructed.
+ *
+ * @param src input codepage string
+ * @param srcLength length of the input string, can be -1 for NUL-terminated strings
+ * @param cnv converter object (ucnv_resetToUnicode() will be called),
+ * can be NULL for the default converter
+ * @param errorCode normal ICU error code
+ * @stable ICU 2.0
+ */
+ UnicodeString(
+ const char *src, int32_t srcLength,
+ UConverter *cnv,
+ UErrorCode &errorCode);
+
+#endif
+
+ /**
+ * Constructs a Unicode string from an invariant-character char * string.
+ * About invariant characters see utypes.h.
+ * This constructor has no runtime dependency on conversion code and is
+ * therefore recommended over ones taking a charset name string
+ * (where the empty string "" indicates invariant-character conversion).
+ *
+ * Use the macro US_INV as the third, signature-distinguishing parameter.
+ *
+ * For example:
+ * \code
+ * void fn(const char *s) {
+ * UnicodeString ustr(s, -1, US_INV);
+ * // use ustr ...
+ * }
+ * \endcode
+ *
+ * @param src String using only invariant characters.
+ * @param length Length of src, or -1 if NUL-terminated.
+ * @param inv Signature-distinguishing paramater, use US_INV.
+ *
+ * @see US_INV
+ * @stable ICU 3.2
+ */
+ UnicodeString(const char *src, int32_t length, enum EInvariant inv);
+
+
+ /**
+ * Copy constructor.
+ * @param that The UnicodeString object to copy.
+ * @stable ICU 2.0
+ */
+ UnicodeString(const UnicodeString& that);
+
+ /**
+ * 'Substring' constructor from tail of source string.
+ * @param src The UnicodeString object to copy.
+ * @param srcStart The offset into <tt>src</tt> at which to start copying.
+ * @stable ICU 2.2
+ */
+ UnicodeString(const UnicodeString& src, int32_t srcStart);
+
+ /**
+ * 'Substring' constructor from subrange of source string.
+ * @param src The UnicodeString object to copy.
+ * @param srcStart The offset into <tt>src</tt> at which to start copying.
+ * @param srcLength The number of characters from <tt>src</tt> to copy.
+ * @stable ICU 2.2
+ */
+ UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
+
+ /**
+ * Clone this object, an instance of a subclass of Replaceable.
+ * Clones can be used concurrently in multiple threads.
+ * If a subclass does not implement clone(), or if an error occurs,
+ * then NULL is returned.
+ * The clone functions in all subclasses return a pointer to a Replaceable
+ * because some compilers do not support covariant (same-as-this)
+ * return types; cast to the appropriate subclass if necessary.
+ * The caller must delete the clone.
+ *
+ * @return a clone of this object
+ *
+ * @see Replaceable::clone
+ * @see getDynamicClassID
+ * @stable ICU 2.6
+ */
+ virtual Replaceable *clone() const;
+
+ /** Destructor.
+ * @stable ICU 2.0
+ */
+ virtual ~UnicodeString();
+
+ /**
+ * Create a UnicodeString from a UTF-8 string.
+ * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
+ * Calls u_strFromUTF8WithSub().
+ *
+ * @param utf8 UTF-8 input string.
+ * Note that a StringPiece can be implicitly constructed
+ * from a std::string or a NUL-terminated const char * string.
+ * @return A UnicodeString with equivalent UTF-16 contents.
+ * @see toUTF8
+ * @see toUTF8String
+ * @stable ICU 4.2
+ */
+ static UnicodeString fromUTF8(const StringPiece &utf8);
+
+ /**
+ * Create a UnicodeString from a UTF-32 string.
+ * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
+ * Calls u_strFromUTF32WithSub().
+ *
+ * @param utf32 UTF-32 input string. Must not be NULL.
+ * @param length Length of the input string, or -1 if NUL-terminated.
+ * @return A UnicodeString with equivalent UTF-16 contents.
+ * @see toUTF32
+ * @stable ICU 4.2
+ */
+ static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
+
+ /* Miscellaneous operations */
+
+ /**
+ * Unescape a string of characters and return a string containing
+ * the result. The following escape sequences are recognized:
+ *
+ * \\uhhhh 4 hex digits; h in [0-9A-Fa-f]
+ * \\Uhhhhhhhh 8 hex digits
+ * \\xhh 1-2 hex digits
+ * \\ooo 1-3 octal digits; o in [0-7]
+ * \\cX control-X; X is masked with 0x1F
+ *
+ * as well as the standard ANSI C escapes:
+ *
+ * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
+ * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
+ * \\&quot; => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
+ *
+ * Anything else following a backslash is generically escaped. For
+ * example, "[a\\-z]" returns "[a-z]".
+ *
+ * If an escape sequence is ill-formed, this method returns an empty
+ * string. An example of an ill-formed sequence is "\\u" followed by
+ * fewer than 4 hex digits.
+ *
+ * This function is similar to u_unescape() but not identical to it.
+ * The latter takes a source char*, so it does escape recognition
+ * and also invariant conversion.
+ *
+ * @return a string with backslash escapes interpreted, or an
+ * empty string on error.
+ * @see UnicodeString#unescapeAt()
+ * @see u_unescape()
+ * @see u_unescapeAt()
+ * @stable ICU 2.0
+ */
+ UnicodeString unescape() const;
+
+ /**
+ * Unescape a single escape sequence and return the represented
+ * character. See unescape() for a listing of the recognized escape
+ * sequences. The character at offset-1 is assumed (without
+ * checking) to be a backslash. If the escape sequence is
+ * ill-formed, or the offset is out of range, U_SENTINEL=-1 is
+ * returned.
+ *
+ * @param offset an input output parameter. On input, it is the
+ * offset into this string where the escape sequence is located,
+ * after the initial backslash. On output, it is advanced after the
+ * last character parsed. On error, it is not advanced at all.
+ * @return the character represented by the escape sequence at
+ * offset, or U_SENTINEL=-1 on error.
+ * @see UnicodeString#unescape()
+ * @see u_unescape()
+ * @see u_unescapeAt()
+ * @stable ICU 2.0
+ */
+ UChar32 unescapeAt(int32_t &offset) const;
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for this class.
+ *
+ * @stable ICU 2.2
+ */
+ static UClassID U_EXPORT2 getStaticClassID();
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for the actual class.
+ *
+ * @stable ICU 2.2
+ */
+ virtual UClassID getDynamicClassID() const;
+
+ //========================================
+ // Implementation methods
+ //========================================
+
+protected:
+ /**
+ * Implement Replaceable::getLength() (see jitterbug 1027).
+ * @stable ICU 2.4
+ */
+ virtual int32_t getLength() const;
+
+ /**
+ * The change in Replaceable to use virtual getCharAt() allows
+ * UnicodeString::charAt() to be inline again (see jitterbug 709).
+ * @stable ICU 2.4
+ */
+ virtual UChar getCharAt(int32_t offset) const;
+
+ /**
+ * The change in Replaceable to use virtual getChar32At() allows
+ * UnicodeString::char32At() to be inline again (see jitterbug 709).
+ * @stable ICU 2.4
+ */
+ virtual UChar32 getChar32At(int32_t offset) const;
+
+private:
+ // For char* constructors. Could be made public.
+ UnicodeString &setToUTF8(const StringPiece &utf8);
+ // For extract(char*).
+ // We could make a toUTF8(target, capacity, errorCode) public but not
+ // this version: New API will be cleaner if we make callers create substrings
+ // rather than having start+length on every method,
+ // and it should take a UErrorCode&.
+ int32_t
+ toUTF8(int32_t start, int32_t len,
+ char *target, int32_t capacity) const;
+
+ /**
+ * Internal string contents comparison, called by operator==.
+ * Requires: this & text not bogus and have same lengths.
+ */
+ UBool doEquals(const UnicodeString &text, int32_t len) const;
+
+ inline int8_t
+ doCompare(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ int8_t doCompare(int32_t start,
+ int32_t length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ inline int8_t
+ doCompareCodePointOrder(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ int8_t doCompareCodePointOrder(int32_t start,
+ int32_t length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ inline int8_t
+ doCaseCompare(int32_t start,
+ int32_t length,
+ const UnicodeString &srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ uint32_t options) const;
+
+ int8_t
+ doCaseCompare(int32_t start,
+ int32_t length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength,
+ uint32_t options) const;
+
+ int32_t doIndexOf(UChar c,
+ int32_t start,
+ int32_t length) const;
+
+ int32_t doIndexOf(UChar32 c,
+ int32_t start,
+ int32_t length) const;
+
+ int32_t doLastIndexOf(UChar c,
+ int32_t start,
+ int32_t length) const;
+
+ int32_t doLastIndexOf(UChar32 c,
+ int32_t start,
+ int32_t length) const;
+
+ void doExtract(int32_t start,
+ int32_t length,
+ UChar *dst,
+ int32_t dstStart) const;
+
+ inline void doExtract(int32_t start,
+ int32_t length,
+ UnicodeString& target) const;
+
+ inline UChar doCharAt(int32_t offset) const;
+
+ UnicodeString& doReplace(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ UnicodeString& doReplace(int32_t start,
+ int32_t length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ UnicodeString& doReverse(int32_t start,
+ int32_t length);
+
+ // calculate hash code
+ int32_t doHashCode(void) const;
+
+ // get pointer to start of array
+ // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
+ inline UChar* getArrayStart(void);
+ inline const UChar* getArrayStart(void) const;
+
+ // A UnicodeString object (not necessarily its current buffer)
+ // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
+ inline UBool isWritable() const;
+
+ // Is the current buffer writable?
+ inline UBool isBufferWritable() const;
+
+ // None of the following does releaseArray().
+ inline void setLength(int32_t len); // sets only fShortLength and fLength
+ inline void setToEmpty(); // sets fFlags=kShortString
+ inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags
+
+ // allocate the array; result may be fStackBuffer
+ // sets refCount to 1 if appropriate
+ // sets fArray, fCapacity, and fFlags
+ // returns boolean for success or failure
+ UBool allocate(int32_t capacity);
+
+ // release the array if owned
+ void releaseArray(void);
+
+ // turn a bogus string into an empty one
+ void unBogus();
+
+ // implements assigment operator, copy constructor, and fastCopyFrom()
+ UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
+
+ // Pin start and limit to acceptable values.
+ inline void pinIndex(int32_t& start) const;
+ inline void pinIndices(int32_t& start,
+ int32_t& length) const;
+
+#if !UCONFIG_NO_CONVERSION
+
+ /* Internal extract() using UConverter. */
+ int32_t doExtract(int32_t start, int32_t length,
+ char *dest, int32_t destCapacity,
+ UConverter *cnv,
+ UErrorCode &errorCode) const;
+
+ /*
+ * Real constructor for converting from codepage data.
+ * It assumes that it is called with !fRefCounted.
+ *
+ * If <code>codepage==0</code>, then the default converter
+ * is used for the platform encoding.
+ * If <code>codepage</code> is an empty string (<code>""</code>),
+ * then a simple conversion is performed on the codepage-invariant
+ * subset ("invariant characters") of the platform encoding. See utypes.h.
+ */
+ void doCodepageCreate(const char *codepageData,
+ int32_t dataLength,
+ const char *codepage);
+
+ /*
+ * Worker function for creating a UnicodeString from
+ * a codepage string using a UConverter.
+ */
+ void
+ doCodepageCreate(const char *codepageData,
+ int32_t dataLength,
+ UConverter *converter,
+ UErrorCode &status);
+
+#endif
+
+ /*
+ * This function is called when write access to the array
+ * is necessary.
+ *
+ * We need to make a copy of the array if
+ * the buffer is read-only, or
+ * the buffer is refCounted (shared), and refCount>1, or
+ * the buffer is too small.
+ *
+ * Return FALSE if memory could not be allocated.
+ */
+ UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
+ int32_t growCapacity = -1,
+ UBool doCopyArray = TRUE,
+ int32_t **pBufferToDelete = 0,
+ UBool forceClone = FALSE);
+
+ /**
+ * Common function for UnicodeString case mappings.
+ * The stringCaseMapper has the same type UStringCaseMapper
+ * as in ustr_imp.h for ustrcase_map().
+ */
+ UnicodeString &
+ caseMap(const UCaseMap *csm, UStringCaseMapper *stringCaseMapper);
+
+ // ref counting
+ void addRef(void);
+ int32_t removeRef(void);
+ int32_t refCount(void) const;
+
+ // constants
+ enum {
+ // Set the stack buffer size so that sizeof(UnicodeString) is,
+ // naturally (without padding), a multiple of sizeof(pointer).
+ US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for short strings
+ kInvalidUChar=0xffff, // invalid UChar index
+ kGrowSize=128, // grow size for this buffer
+ kInvalidHashCode=0, // invalid hash code
+ kEmptyHashCode=1, // hash code for empty string
+
+ // bit flag values for fFlags
+ kIsBogus=1, // this string is bogus, i.e., not valid or NULL
+ kUsingStackBuffer=2,// using fUnion.fStackBuffer instead of fUnion.fFields
+ kRefCounted=4, // there is a refCount field before the characters in fArray
+ kBufferIsReadonly=8,// do not write to this buffer
+ kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"),
+ // and releaseBuffer(newLength) must be called
+
+ // combined values for convenience
+ kShortString=kUsingStackBuffer,
+ kLongString=kRefCounted,
+ kReadonlyAlias=kBufferIsReadonly,
+ kWritableAlias=0
+ };
+
+ friend class StringThreadTest;
+ friend class UnicodeStringAppendable;
+
+ union StackBufferOrFields; // forward declaration necessary before friend declaration
+ friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
+
+ /*
+ * The following are all the class fields that are stored
+ * in each UnicodeString object.
+ * Note that UnicodeString has virtual functions,
+ * therefore there is an implicit vtable pointer
+ * as the first real field.
+ * The fields should be aligned such that no padding is necessary.
+ * On 32-bit machines, the size should be 32 bytes,
+ * on 64-bit machines (8-byte pointers), it should be 40 bytes.
+ *
+ * We use a hack to achieve this.
+ *
+ * With at least some compilers, each of the following is forced to
+ * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
+ * rounded up with additional padding if the fields do not already fit that requirement:
+ * - sizeof(class UnicodeString)
+ * - offsetof(UnicodeString, fUnion)
+ * - sizeof(fUnion)
+ * - sizeof(fFields)
+ *
+ * In order to avoid padding, we make sizeof(fStackBuffer)=16 (=8 UChars)
+ * which is at least as large as sizeof(fFields) on 32-bit and 64-bit machines.
+ * (Padding at the end of fFields is ok:
+ * As long as there is no padding after fStackBuffer, it is not wasted space.)
+ *
+ * We further assume that the compiler does not reorder the fields,
+ * so that fRestOfStackBuffer (which holds a few more UChars) immediately follows after fUnion,
+ * with at most some padding (but no other field) in between.
+ * (Padding there would be wasted space, but functionally harmless.)
+ *
+ * We use a few more sizeof(pointer)'s chunks of space with
+ * fRestOfStackBuffer, fShortLength and fFlags,
+ * to get up exactly to the intended sizeof(UnicodeString).
+ */
+ // (implicit) *vtable;
+ union StackBufferOrFields {
+ // fStackBuffer is used iff (fFlags&kUsingStackBuffer)
+ // else fFields is used
+ UChar fStackBuffer[8]; // buffer for short strings, together with fRestOfStackBuffer
+ struct {
+ UChar *fArray; // the Unicode data
+ int32_t fCapacity; // capacity of fArray (in UChars)
+ int32_t fLength; // number of characters in fArray if >127; else undefined
+ } fFields;
+ } fUnion;
+ UChar fRestOfStackBuffer[US_STACKBUF_SIZE-8];
+ int8_t fShortLength; // 0..127: length <0: real length is in fUnion.fFields.fLength
+ uint8_t fFlags; // bit flags: see constants above
+};
+
+/**
+ * Create a new UnicodeString with the concatenation of two others.
+ *
+ * @param s1 The first string to be copied to the new one.
+ * @param s2 The second string to be copied to the new one, after s1.
+ * @return UnicodeString(s1).append(s2)
+ * @stable ICU 2.8
+ */
+U_COMMON_API UnicodeString U_EXPORT2
+operator+ (const UnicodeString &s1, const UnicodeString &s2);
+
+//========================================
+// Inline members
+//========================================
+
+//========================================
+// Privates
+//========================================
+
+inline void
+UnicodeString::pinIndex(int32_t& start) const
+{
+ // pin index
+ if(start < 0) {
+ start = 0;
+ } else if(start > length()) {
+ start = length();
+ }
+}
+
+inline void
+UnicodeString::pinIndices(int32_t& start,
+ int32_t& _length) const
+{
+ // pin indices
+ int32_t len = length();
+ if(start < 0) {
+ start = 0;
+ } else if(start > len) {
+ start = len;
+ }
+ if(_length < 0) {
+ _length = 0;
+ } else if(_length > (len - start)) {
+ _length = (len - start);
+ }
+}
+
+inline UChar*
+UnicodeString::getArrayStart()
+{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
+
+inline const UChar*
+UnicodeString::getArrayStart() const
+{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
+
+//========================================
+// Default constructor
+//========================================
+
+inline
+UnicodeString::UnicodeString()
+ : fShortLength(0),
+ fFlags(kShortString)
+{}
+
+//========================================
+// Read-only implementation methods
+//========================================
+inline int32_t
+UnicodeString::length() const
+{ return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; }
+
+inline int32_t
+UnicodeString::getCapacity() const
+{ return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; }
+
+inline int32_t
+UnicodeString::hashCode() const
+{ return doHashCode(); }
+
+inline UBool
+UnicodeString::isBogus() const
+{ return (UBool)(fFlags & kIsBogus); }
+
+inline UBool
+UnicodeString::isWritable() const
+{ return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); }
+
+inline UBool
+UnicodeString::isBufferWritable() const
+{
+ return (UBool)(
+ !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
+ (!(fFlags&kRefCounted) || refCount()==1));
+}
+
+inline const UChar *
+UnicodeString::getBuffer() const {
+ if(fFlags&(kIsBogus|kOpenGetBuffer)) {
+ return 0;
+ } else if(fFlags&kUsingStackBuffer) {
+ return fUnion.fStackBuffer;
+ } else {
+ return fUnion.fFields.fArray;
+ }
+}
+
+//========================================
+// Read-only alias methods
+//========================================
+inline int8_t
+UnicodeString::doCompare(int32_t start,
+ int32_t thisLength,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const
+{
+ if(srcText.isBogus()) {
+ return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
+ } else {
+ srcText.pinIndices(srcStart, srcLength);
+ return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
+ }
+}
+
+inline UBool
+UnicodeString::operator== (const UnicodeString& text) const
+{
+ if(isBogus()) {
+ return text.isBogus();
+ } else {
+ int32_t len = length(), textLength = text.length();
+ return !text.isBogus() && len == textLength && doEquals(text, len);
+ }
+}
+
+inline UBool
+UnicodeString::operator!= (const UnicodeString& text) const
+{ return (! operator==(text)); }
+
+inline UBool
+UnicodeString::operator> (const UnicodeString& text) const
+{ return doCompare(0, length(), text, 0, text.length()) == 1; }
+
+inline UBool
+UnicodeString::operator< (const UnicodeString& text) const
+{ return doCompare(0, length(), text, 0, text.length()) == -1; }
+
+inline UBool
+UnicodeString::operator>= (const UnicodeString& text) const
+{ return doCompare(0, length(), text, 0, text.length()) != -1; }
+
+inline UBool
+UnicodeString::operator<= (const UnicodeString& text) const
+{ return doCompare(0, length(), text, 0, text.length()) != 1; }
+
+inline int8_t
+UnicodeString::compare(const UnicodeString& text) const
+{ return doCompare(0, length(), text, 0, text.length()); }
+
+inline int8_t
+UnicodeString::compare(int32_t start,
+ int32_t _length,
+ const UnicodeString& srcText) const
+{ return doCompare(start, _length, srcText, 0, srcText.length()); }
+
+inline int8_t
+UnicodeString::compare(const UChar *srcChars,
+ int32_t srcLength) const
+{ return doCompare(0, length(), srcChars, 0, srcLength); }
+
+inline int8_t
+UnicodeString::compare(int32_t start,
+ int32_t _length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const
+{ return doCompare(start, _length, srcText, srcStart, srcLength); }
+
+inline int8_t
+UnicodeString::compare(int32_t start,
+ int32_t _length,
+ const UChar *srcChars) const
+{ return doCompare(start, _length, srcChars, 0, _length); }
+
+inline int8_t
+UnicodeString::compare(int32_t start,
+ int32_t _length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const
+{ return doCompare(start, _length, srcChars, srcStart, srcLength); }
+
+inline int8_t
+UnicodeString::compareBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLimit) const
+{ return doCompare(start, limit - start,
+ srcText, srcStart, srcLimit - srcStart); }
+
+inline int8_t
+UnicodeString::doCompareCodePointOrder(int32_t start,
+ int32_t thisLength,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const
+{
+ if(srcText.isBogus()) {
+ return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
+ } else {
+ srcText.pinIndices(srcStart, srcLength);
+ return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
+ }
+}
+
+inline int8_t
+UnicodeString::compareCodePointOrder(const UnicodeString& text) const
+{ return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
+
+inline int8_t
+UnicodeString::compareCodePointOrder(int32_t start,
+ int32_t _length,
+ const UnicodeString& srcText) const
+{ return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
+
+inline int8_t
+UnicodeString::compareCodePointOrder(const UChar *srcChars,
+ int32_t srcLength) const
+{ return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
+
+inline int8_t
+UnicodeString::compareCodePointOrder(int32_t start,
+ int32_t _length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const
+{ return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
+
+inline int8_t
+UnicodeString::compareCodePointOrder(int32_t start,
+ int32_t _length,
+ const UChar *srcChars) const
+{ return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
+
+inline int8_t
+UnicodeString::compareCodePointOrder(int32_t start,
+ int32_t _length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const
+{ return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
+
+inline int8_t
+UnicodeString::compareCodePointOrderBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLimit) const
+{ return doCompareCodePointOrder(start, limit - start,
+ srcText, srcStart, srcLimit - srcStart); }
+
+inline int8_t
+UnicodeString::doCaseCompare(int32_t start,
+ int32_t thisLength,
+ const UnicodeString &srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ uint32_t options) const
+{
+ if(srcText.isBogus()) {
+ return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
+ } else {
+ srcText.pinIndices(srcStart, srcLength);
+ return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
+ }
+}
+
+inline int8_t
+UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
+ return doCaseCompare(0, length(), text, 0, text.length(), options);
+}
+
+inline int8_t
+UnicodeString::caseCompare(int32_t start,
+ int32_t _length,
+ const UnicodeString &srcText,
+ uint32_t options) const {
+ return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
+}
+
+inline int8_t
+UnicodeString::caseCompare(const UChar *srcChars,
+ int32_t srcLength,
+ uint32_t options) const {
+ return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
+}
+
+inline int8_t
+UnicodeString::caseCompare(int32_t start,
+ int32_t _length,
+ const UnicodeString &srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ uint32_t options) const {
+ return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
+}
+
+inline int8_t
+UnicodeString::caseCompare(int32_t start,
+ int32_t _length,
+ const UChar *srcChars,
+ uint32_t options) const {
+ return doCaseCompare(start, _length, srcChars, 0, _length, options);
+}
+
+inline int8_t
+UnicodeString::caseCompare(int32_t start,
+ int32_t _length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength,
+ uint32_t options) const {
+ return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
+}
+
+inline int8_t
+UnicodeString::caseCompareBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString &srcText,
+ int32_t srcStart,
+ int32_t srcLimit,
+ uint32_t options) const {
+ return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
+}
+
+inline int32_t
+UnicodeString::indexOf(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ int32_t start,
+ int32_t _length) const
+{
+ if(!srcText.isBogus()) {
+ srcText.pinIndices(srcStart, srcLength);
+ if(srcLength > 0) {
+ return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
+ }
+ }
+ return -1;
+}
+
+inline int32_t
+UnicodeString::indexOf(const UnicodeString& text) const
+{ return indexOf(text, 0, text.length(), 0, length()); }
+
+inline int32_t
+UnicodeString::indexOf(const UnicodeString& text,
+ int32_t start) const {
+ pinIndex(start);
+ return indexOf(text, 0, text.length(), start, length() - start);
+}
+
+inline int32_t
+UnicodeString::indexOf(const UnicodeString& text,
+ int32_t start,
+ int32_t _length) const
+{ return indexOf(text, 0, text.length(), start, _length); }
+
+inline int32_t
+UnicodeString::indexOf(const UChar *srcChars,
+ int32_t srcLength,
+ int32_t start) const {
+ pinIndex(start);
+ return indexOf(srcChars, 0, srcLength, start, length() - start);
+}
+
+inline int32_t
+UnicodeString::indexOf(const UChar *srcChars,
+ int32_t srcLength,
+ int32_t start,
+ int32_t _length) const
+{ return indexOf(srcChars, 0, srcLength, start, _length); }
+
+inline int32_t
+UnicodeString::indexOf(UChar c,
+ int32_t start,
+ int32_t _length) const
+{ return doIndexOf(c, start, _length); }
+
+inline int32_t
+UnicodeString::indexOf(UChar32 c,
+ int32_t start,
+ int32_t _length) const
+{ return doIndexOf(c, start, _length); }
+
+inline int32_t
+UnicodeString::indexOf(UChar c) const
+{ return doIndexOf(c, 0, length()); }
+
+inline int32_t
+UnicodeString::indexOf(UChar32 c) const
+{ return indexOf(c, 0, length()); }
+
+inline int32_t
+UnicodeString::indexOf(UChar c,
+ int32_t start) const {
+ pinIndex(start);
+ return doIndexOf(c, start, length() - start);
+}
+
+inline int32_t
+UnicodeString::indexOf(UChar32 c,
+ int32_t start) const {
+ pinIndex(start);
+ return indexOf(c, start, length() - start);
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(const UChar *srcChars,
+ int32_t srcLength,
+ int32_t start,
+ int32_t _length) const
+{ return lastIndexOf(srcChars, 0, srcLength, start, _length); }
+
+inline int32_t
+UnicodeString::lastIndexOf(const UChar *srcChars,
+ int32_t srcLength,
+ int32_t start) const {
+ pinIndex(start);
+ return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ int32_t start,
+ int32_t _length) const
+{
+ if(!srcText.isBogus()) {
+ srcText.pinIndices(srcStart, srcLength);
+ if(srcLength > 0) {
+ return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
+ }
+ }
+ return -1;
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(const UnicodeString& text,
+ int32_t start,
+ int32_t _length) const
+{ return lastIndexOf(text, 0, text.length(), start, _length); }
+
+inline int32_t
+UnicodeString::lastIndexOf(const UnicodeString& text,
+ int32_t start) const {
+ pinIndex(start);
+ return lastIndexOf(text, 0, text.length(), start, length() - start);
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(const UnicodeString& text) const
+{ return lastIndexOf(text, 0, text.length(), 0, length()); }
+
+inline int32_t
+UnicodeString::lastIndexOf(UChar c,
+ int32_t start,
+ int32_t _length) const
+{ return doLastIndexOf(c, start, _length); }
+
+inline int32_t
+UnicodeString::lastIndexOf(UChar32 c,
+ int32_t start,
+ int32_t _length) const {
+ return doLastIndexOf(c, start, _length);
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(UChar c) const
+{ return doLastIndexOf(c, 0, length()); }
+
+inline int32_t
+UnicodeString::lastIndexOf(UChar32 c) const {
+ return lastIndexOf(c, 0, length());
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(UChar c,
+ int32_t start) const {
+ pinIndex(start);
+ return doLastIndexOf(c, start, length() - start);
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(UChar32 c,
+ int32_t start) const {
+ pinIndex(start);
+ return lastIndexOf(c, start, length() - start);
+}
+
+inline UBool
+UnicodeString::startsWith(const UnicodeString& text) const
+{ return compare(0, text.length(), text, 0, text.length()) == 0; }
+
+inline UBool
+UnicodeString::startsWith(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const
+{ return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
+
+inline UBool
+UnicodeString::startsWith(const UChar *srcChars, int32_t srcLength) const {
+ if(srcLength < 0) {
+ srcLength = u_strlen(srcChars);
+ }
+ return doCompare(0, srcLength, srcChars, 0, srcLength) == 0;
+}
+
+inline UBool
+UnicodeString::startsWith(const UChar *srcChars, int32_t srcStart, int32_t srcLength) const {
+ if(srcLength < 0) {
+ srcLength = u_strlen(srcChars);
+ }
+ return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;
+}
+
+inline UBool
+UnicodeString::endsWith(const UnicodeString& text) const
+{ return doCompare(length() - text.length(), text.length(),
+ text, 0, text.length()) == 0; }
+
+inline UBool
+UnicodeString::endsWith(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const {
+ srcText.pinIndices(srcStart, srcLength);
+ return doCompare(length() - srcLength, srcLength,
+ srcText, srcStart, srcLength) == 0;
+}
+
+inline UBool
+UnicodeString::endsWith(const UChar *srcChars,
+ int32_t srcLength) const {
+ if(srcLength < 0) {
+ srcLength = u_strlen(srcChars);
+ }
+ return doCompare(length() - srcLength, srcLength,
+ srcChars, 0, srcLength) == 0;
+}
+
+inline UBool
+UnicodeString::endsWith(const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const {
+ if(srcLength < 0) {
+ srcLength = u_strlen(srcChars + srcStart);
+ }
+ return doCompare(length() - srcLength, srcLength,
+ srcChars, srcStart, srcLength) == 0;
+}
+
+//========================================
+// replace
+//========================================
+inline UnicodeString&
+UnicodeString::replace(int32_t start,
+ int32_t _length,
+ const UnicodeString& srcText)
+{ return doReplace(start, _length, srcText, 0, srcText.length()); }
+
+inline UnicodeString&
+UnicodeString::replace(int32_t start,
+ int32_t _length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength)
+{ return doReplace(start, _length, srcText, srcStart, srcLength); }
+
+inline UnicodeString&
+UnicodeString::replace(int32_t start,
+ int32_t _length,
+ const UChar *srcChars,
+ int32_t srcLength)
+{ return doReplace(start, _length, srcChars, 0, srcLength); }
+
+inline UnicodeString&
+UnicodeString::replace(int32_t start,
+ int32_t _length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength)
+{ return doReplace(start, _length, srcChars, srcStart, srcLength); }
+
+inline UnicodeString&
+UnicodeString::replace(int32_t start,
+ int32_t _length,
+ UChar srcChar)
+{ return doReplace(start, _length, &srcChar, 0, 1); }
+
+inline UnicodeString&
+UnicodeString::replaceBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText)
+{ return doReplace(start, limit - start, srcText, 0, srcText.length()); }
+
+inline UnicodeString&
+UnicodeString::replaceBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLimit)
+{ return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
+
+inline UnicodeString&
+UnicodeString::findAndReplace(const UnicodeString& oldText,
+ const UnicodeString& newText)
+{ return findAndReplace(0, length(), oldText, 0, oldText.length(),
+ newText, 0, newText.length()); }
+
+inline UnicodeString&
+UnicodeString::findAndReplace(int32_t start,
+ int32_t _length,
+ const UnicodeString& oldText,
+ const UnicodeString& newText)
+{ return findAndReplace(start, _length, oldText, 0, oldText.length(),
+ newText, 0, newText.length()); }
+
+// ============================
+// extract
+// ============================
+inline void
+UnicodeString::doExtract(int32_t start,
+ int32_t _length,
+ UnicodeString& target) const
+{ target.replace(0, target.length(), *this, start, _length); }
+
+inline void
+UnicodeString::extract(int32_t start,
+ int32_t _length,
+ UChar *target,
+ int32_t targetStart) const
+{ doExtract(start, _length, target, targetStart); }
+
+inline void
+UnicodeString::extract(int32_t start,
+ int32_t _length,
+ UnicodeString& target) const
+{ doExtract(start, _length, target); }
+
+#if !UCONFIG_NO_CONVERSION
+
+inline int32_t
+UnicodeString::extract(int32_t start,
+ int32_t _length,
+ char *dst,
+ const char *codepage) const
+
+{
+ // This dstSize value will be checked explicitly
+ return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
+}
+
+#endif
+
+inline void
+UnicodeString::extractBetween(int32_t start,
+ int32_t limit,
+ UChar *dst,
+ int32_t dstStart) const {
+ pinIndex(start);
+ pinIndex(limit);
+ doExtract(start, limit - start, dst, dstStart);
+}
+
+inline UnicodeString
+UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
+ return tempSubString(start, limit - start);
+}
+
+inline UChar
+UnicodeString::doCharAt(int32_t offset) const
+{
+ if((uint32_t)offset < (uint32_t)length()) {
+ return getArrayStart()[offset];
+ } else {
+ return kInvalidUChar;
+ }
+}
+
+inline UChar
+UnicodeString::charAt(int32_t offset) const
+{ return doCharAt(offset); }
+
+inline UChar
+UnicodeString::operator[] (int32_t offset) const
+{ return doCharAt(offset); }
+
+inline UBool
+UnicodeString::isEmpty() const {
+ return fShortLength == 0;
+}
+
+//========================================
+// Write implementation methods
+//========================================
+inline void
+UnicodeString::setLength(int32_t len) {
+ if(len <= 127) {
+ fShortLength = (int8_t)len;
+ } else {
+ fShortLength = (int8_t)-1;
+ fUnion.fFields.fLength = len;
+ }
+}
+
+inline void
+UnicodeString::setToEmpty() {
+ fShortLength = 0;
+ fFlags = kShortString;
+}
+
+inline void
+UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) {
+ setLength(len);
+ fUnion.fFields.fArray = array;
+ fUnion.fFields.fCapacity = capacity;
+}
+
+inline UnicodeString&
+UnicodeString::operator= (UChar ch)
+{ return doReplace(0, length(), &ch, 0, 1); }
+
+inline UnicodeString&
+UnicodeString::operator= (UChar32 ch)
+{ return replace(0, length(), ch); }
+
+inline UnicodeString&
+UnicodeString::setTo(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength)
+{
+ unBogus();
+ return doReplace(0, length(), srcText, srcStart, srcLength);
+}
+
+inline UnicodeString&
+UnicodeString::setTo(const UnicodeString& srcText,
+ int32_t srcStart)
+{
+ unBogus();
+ srcText.pinIndex(srcStart);
+ return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
+}
+
+inline UnicodeString&
+UnicodeString::setTo(const UnicodeString& srcText)
+{
+ return copyFrom(srcText);
+}
+
+inline UnicodeString&
+UnicodeString::setTo(const UChar *srcChars,
+ int32_t srcLength)
+{
+ unBogus();
+ return doReplace(0, length(), srcChars, 0, srcLength);
+}
+
+inline UnicodeString&
+UnicodeString::setTo(UChar srcChar)
+{
+ unBogus();
+ return doReplace(0, length(), &srcChar, 0, 1);
+}
+
+inline UnicodeString&
+UnicodeString::setTo(UChar32 srcChar)
+{
+ unBogus();
+ return replace(0, length(), srcChar);
+}
+
+inline UnicodeString&
+UnicodeString::append(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength)
+{ return doReplace(length(), 0, srcText, srcStart, srcLength); }
+
+inline UnicodeString&
+UnicodeString::append(const UnicodeString& srcText)
+{ return doReplace(length(), 0, srcText, 0, srcText.length()); }
+
+inline UnicodeString&
+UnicodeString::append(const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength)
+{ return doReplace(length(), 0, srcChars, srcStart, srcLength); }
+
+inline UnicodeString&
+UnicodeString::append(const UChar *srcChars,
+ int32_t srcLength)
+{ return doReplace(length(), 0, srcChars, 0, srcLength); }
+
+inline UnicodeString&
+UnicodeString::append(UChar srcChar)
+{ return doReplace(length(), 0, &srcChar, 0, 1); }
+
+inline UnicodeString&
+UnicodeString::operator+= (UChar ch)
+{ return doReplace(length(), 0, &ch, 0, 1); }
+
+inline UnicodeString&
+UnicodeString::operator+= (UChar32 ch) {
+ return append(ch);
+}
+
+inline UnicodeString&
+UnicodeString::operator+= (const UnicodeString& srcText)
+{ return doReplace(length(), 0, srcText, 0, srcText.length()); }
+
+inline UnicodeString&
+UnicodeString::insert(int32_t start,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength)
+{ return doReplace(start, 0, srcText, srcStart, srcLength); }
+
+inline UnicodeString&
+UnicodeString::insert(int32_t start,
+ const UnicodeString& srcText)
+{ return doReplace(start, 0, srcText, 0, srcText.length()); }
+
+inline UnicodeString&
+UnicodeString::insert(int32_t start,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength)
+{ return doReplace(start, 0, srcChars, srcStart, srcLength); }
+
+inline UnicodeString&
+UnicodeString::insert(int32_t start,
+ const UChar *srcChars,
+ int32_t srcLength)
+{ return doReplace(start, 0, srcChars, 0, srcLength); }
+
+inline UnicodeString&
+UnicodeString::insert(int32_t start,
+ UChar srcChar)
+{ return doReplace(start, 0, &srcChar, 0, 1); }
+
+inline UnicodeString&
+UnicodeString::insert(int32_t start,
+ UChar32 srcChar)
+{ return replace(start, 0, srcChar); }
+
+
+inline UnicodeString&
+UnicodeString::remove()
+{
+ // remove() of a bogus string makes the string empty and non-bogus
+ if(isBogus()) {
+ setToEmpty();
+ } else {
+ fShortLength = 0;
+ }
+ return *this;
+}
+
+inline UnicodeString&
+UnicodeString::remove(int32_t start,
+ int32_t _length)
+{
+ if(start <= 0 && _length == INT32_MAX) {
+ // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
+ return remove();
+ }
+ return doReplace(start, _length, NULL, 0, 0);
+}
+
+inline UnicodeString&
+UnicodeString::removeBetween(int32_t start,
+ int32_t limit)
+{ return doReplace(start, limit - start, NULL, 0, 0); }
+
+inline UnicodeString &
+UnicodeString::retainBetween(int32_t start, int32_t limit) {
+ truncate(limit);
+ return doReplace(0, start, NULL, 0, 0);
+}
+
+inline UBool
+UnicodeString::truncate(int32_t targetLength)
+{
+ if(isBogus() && targetLength == 0) {
+ // truncate(0) of a bogus string makes the string empty and non-bogus
+ unBogus();
+ return FALSE;
+ } else if((uint32_t)targetLength < (uint32_t)length()) {
+ setLength(targetLength);
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+inline UnicodeString&
+UnicodeString::reverse()
+{ return doReverse(0, length()); }
+
+inline UnicodeString&
+UnicodeString::reverse(int32_t start,
+ int32_t _length)
+{ return doReverse(start, _length); }
+
+U_NAMESPACE_END
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/uobject.h b/src/core/basetypes/icu-ucsdet/include/unicode/uobject.h
new file mode 100644
index 00000000..54ceace6
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/uobject.h
@@ -0,0 +1,320 @@
+/*
+******************************************************************************
+*
+* Copyright (C) 2002-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: uobject.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002jun26
+* created by: Markus W. Scherer
+*/
+
+#ifndef __UOBJECT_H__
+#define __UOBJECT_H__
+
+#include "unicode/utypes.h"
+
+/**
+ * \file
+ * \brief C++ API: Common ICU base class UObject.
+ */
+
+/**
+ * @{
+ * \def U_NO_THROW
+ * Define this to define the throw() specification so
+ * certain functions do not throw any exceptions
+ *
+ * UMemory operator new methods should have the throw() specification
+ * appended to them, so that the compiler adds the additional NULL check
+ * before calling constructors. Without, if <code>operator new</code> returns NULL the
+ * constructor is still called, and if the constructor references member
+ * data, (which it typically does), the result is a segmentation violation.
+ *
+ * @stable ICU 4.2
+ */
+#ifndef U_NO_THROW
+#define U_NO_THROW throw()
+#endif
+
+/** @} */
+
+/*===========================================================================*/
+/* UClassID-based RTTI */
+/*===========================================================================*/
+
+/**
+ * UClassID is used to identify classes without using the compiler's RTTI.
+ * This was used before C++ compilers consistently supported RTTI.
+ * ICU 4.6 requires compiler RTTI to be turned on.
+ *
+ * Each class hierarchy which needs
+ * to implement polymorphic clone() or operator==() defines two methods,
+ * described in detail below. UClassID values can be compared using
+ * operator==(). Nothing else should be done with them.
+ *
+ * \par
+ * In class hierarchies that implement "poor man's RTTI",
+ * each concrete subclass implements getDynamicClassID() in the same way:
+ *
+ * \code
+ * class Derived {
+ * public:
+ * virtual UClassID getDynamicClassID() const
+ * { return Derived::getStaticClassID(); }
+ * }
+ * \endcode
+ *
+ * Each concrete class implements getStaticClassID() as well, which allows
+ * clients to test for a specific type.
+ *
+ * \code
+ * class Derived {
+ * public:
+ * static UClassID U_EXPORT2 getStaticClassID();
+ * private:
+ * static char fgClassID;
+ * }
+ *
+ * // In Derived.cpp:
+ * UClassID Derived::getStaticClassID()
+ * { return (UClassID)&Derived::fgClassID; }
+ * char Derived::fgClassID = 0; // Value is irrelevant
+ * \endcode
+ * @stable ICU 2.0
+ */
+typedef void* UClassID;
+
+U_NAMESPACE_BEGIN
+
+/**
+ * UMemory is the common ICU base class.
+ * All other ICU C++ classes are derived from UMemory (starting with ICU 2.4).
+ *
+ * This is primarily to make it possible and simple to override the
+ * C++ memory management by adding new/delete operators to this base class.
+ *
+ * To override ALL ICU memory management, including that from plain C code,
+ * replace the allocation functions declared in cmemory.h
+ *
+ * UMemory does not contain any virtual functions.
+ * Common "boilerplate" functions are defined in UObject.
+ *
+ * @stable ICU 2.4
+ */
+class U_COMMON_API UMemory {
+public:
+
+/* test versions for debugging shaper heap memory problems */
+#ifdef SHAPER_MEMORY_DEBUG
+ static void * NewArray(int size, int count);
+ static void * GrowArray(void * array, int newSize );
+ static void FreeArray(void * array );
+#endif
+
+#if U_OVERRIDE_CXX_ALLOCATION
+ /**
+ * Override for ICU4C C++ memory management.
+ * simple, non-class types are allocated using the macros in common/cmemory.h
+ * (uprv_malloc(), uprv_free(), uprv_realloc());
+ * they or something else could be used here to implement C++ new/delete
+ * for ICU4C C++ classes
+ * @stable ICU 2.4
+ */
+ static void * U_EXPORT2 operator new(size_t size) U_NO_THROW;
+
+ /**
+ * Override for ICU4C C++ memory management.
+ * See new().
+ * @stable ICU 2.4
+ */
+ static void * U_EXPORT2 operator new[](size_t size) U_NO_THROW;
+
+ /**
+ * Override for ICU4C C++ memory management.
+ * simple, non-class types are allocated using the macros in common/cmemory.h
+ * (uprv_malloc(), uprv_free(), uprv_realloc());
+ * they or something else could be used here to implement C++ new/delete
+ * for ICU4C C++ classes
+ * @stable ICU 2.4
+ */
+ static void U_EXPORT2 operator delete(void *p) U_NO_THROW;
+
+ /**
+ * Override for ICU4C C++ memory management.
+ * See delete().
+ * @stable ICU 2.4
+ */
+ static void U_EXPORT2 operator delete[](void *p) U_NO_THROW;
+
+#if U_HAVE_PLACEMENT_NEW
+ /**
+ * Override for ICU4C C++ memory management for STL.
+ * See new().
+ * @stable ICU 2.6
+ */
+ static inline void * U_EXPORT2 operator new(size_t, void *ptr) U_NO_THROW { return ptr; }
+
+ /**
+ * Override for ICU4C C++ memory management for STL.
+ * See delete().
+ * @stable ICU 2.6
+ */
+ static inline void U_EXPORT2 operator delete(void *, void *) U_NO_THROW {}
+#endif /* U_HAVE_PLACEMENT_NEW */
+#if U_HAVE_DEBUG_LOCATION_NEW
+ /**
+ * This method overrides the MFC debug version of the operator new
+ *
+ * @param size The requested memory size
+ * @param file The file where the allocation was requested
+ * @param line The line where the allocation was requested
+ */
+ static void * U_EXPORT2 operator new(size_t size, const char* file, int line) U_NO_THROW;
+ /**
+ * This method provides a matching delete for the MFC debug new
+ *
+ * @param p The pointer to the allocated memory
+ * @param file The file where the allocation was requested
+ * @param line The line where the allocation was requested
+ */
+ static void U_EXPORT2 operator delete(void* p, const char* file, int line) U_NO_THROW;
+#endif /* U_HAVE_DEBUG_LOCATION_NEW */
+#endif /* U_OVERRIDE_CXX_ALLOCATION */
+
+ /*
+ * Assignment operator not declared. The compiler will provide one
+ * which does nothing since this class does not contain any data members.
+ * API/code coverage may show the assignment operator as present and
+ * untested - ignore.
+ * Subclasses need this assignment operator if they use compiler-provided
+ * assignment operators of their own. An alternative to not declaring one
+ * here would be to declare and empty-implement a protected or public one.
+ UMemory &UMemory::operator=(const UMemory &);
+ */
+};
+
+/**
+ * UObject is the common ICU "boilerplate" class.
+ * UObject inherits UMemory (starting with ICU 2.4),
+ * and all other public ICU C++ classes
+ * are derived from UObject (starting with ICU 2.2).
+ *
+ * UObject contains common virtual functions, in particular a virtual destructor.
+ *
+ * The clone() function is not available in UObject because it is not
+ * implemented by all ICU classes.
+ * Many ICU services provide a clone() function for their class trees,
+ * defined on the service's C++ base class, and all subclasses within that
+ * service class tree return a pointer to the service base class
+ * (which itself is a subclass of UObject).
+ * This is because some compilers do not support covariant (same-as-this)
+ * return types; cast to the appropriate subclass if necessary.
+ *
+ * @stable ICU 2.2
+ */
+class U_COMMON_API UObject : public UMemory {
+public:
+ /**
+ * Destructor.
+ *
+ * @stable ICU 2.2
+ */
+ virtual ~UObject();
+
+ /**
+ * ICU4C "poor man's RTTI", returns a UClassID for the actual ICU class.
+ * The base class implementation returns a dummy value.
+ *
+ * Use compiler RTTI rather than ICU's "poor man's RTTI".
+ * Since ICU 4.6, new ICU C++ class hierarchies do not implement "poor man's RTTI".
+ *
+ * @stable ICU 2.2
+ */
+ virtual UClassID getDynamicClassID() const;
+
+protected:
+ // the following functions are protected to prevent instantiation and
+ // direct use of UObject itself
+
+ // default constructor
+ // inline UObject() {}
+
+ // copy constructor
+ // inline UObject(const UObject &other) {}
+
+#if 0
+ // TODO Sometime in the future. Implement operator==().
+ // (This comment inserted in 2.2)
+ // some or all of the following "boilerplate" functions may be made public
+ // in a future ICU4C release when all subclasses implement them
+
+ // assignment operator
+ // (not virtual, see "Taligent's Guide to Designing Programs" pp.73..74)
+ // commented out because the implementation is the same as a compiler's default
+ // UObject &operator=(const UObject &other) { return *this; }
+
+ // comparison operators
+ virtual inline UBool operator==(const UObject &other) const { return this==&other; }
+ inline UBool operator!=(const UObject &other) const { return !operator==(other); }
+
+ // clone() commented out from the base class:
+ // some compilers do not support co-variant return types
+ // (i.e., subclasses would have to return UObject * as well, instead of SubClass *)
+ // see also UObject class documentation.
+ // virtual UObject *clone() const;
+#endif
+
+ /*
+ * Assignment operator not declared. The compiler will provide one
+ * which does nothing since this class does not contain any data members.
+ * API/code coverage may show the assignment operator as present and
+ * untested - ignore.
+ * Subclasses need this assignment operator if they use compiler-provided
+ * assignment operators of their own. An alternative to not declaring one
+ * here would be to declare and empty-implement a protected or public one.
+ UObject &UObject::operator=(const UObject &);
+ */
+};
+
+#ifndef U_HIDE_INTERNAL_API
+/**
+ * This is a simple macro to add ICU RTTI to an ICU object implementation.
+ * This does not go into the header. This should only be used in *.cpp files.
+ *
+ * @param myClass The name of the class that needs RTTI defined.
+ * @internal
+ */
+#define UOBJECT_DEFINE_RTTI_IMPLEMENTATION(myClass) \
+ UClassID U_EXPORT2 myClass::getStaticClassID() { \
+ static char classID = 0; \
+ return (UClassID)&classID; \
+ } \
+ UClassID myClass::getDynamicClassID() const \
+ { return myClass::getStaticClassID(); }
+
+
+/**
+ * This macro adds ICU RTTI to an ICU abstract class implementation.
+ * This macro should be invoked in *.cpp files. The corresponding
+ * header should declare getStaticClassID.
+ *
+ * @param myClass The name of the class that needs RTTI defined.
+ * @internal
+ */
+#define UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(myClass) \
+ UClassID U_EXPORT2 myClass::getStaticClassID() { \
+ static char classID = 0; \
+ return (UClassID)&classID; \
+ }
+
+#endif /* U_HIDE_INTERNAL_API */
+
+U_NAMESPACE_END
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/urename.h b/src/core/basetypes/icu-ucsdet/include/unicode/urename.h
new file mode 100644
index 00000000..a8172626
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/urename.h
@@ -0,0 +1,1784 @@
+/*
+*******************************************************************************
+* Copyright (C) 2002-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+*
+* file name: urename.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* Created by: Perl script tools/genren.pl written by Vladimir Weinstein
+*
+* Contains data for renaming ICU exports.
+* Gets included by umachine.h
+*
+* THIS FILE IS MACHINE-GENERATED, DON'T PLAY WITH IT IF YOU DON'T KNOW WHAT
+* YOU ARE DOING, OTHERWISE VERY BAD THINGS WILL HAPPEN!
+*/
+
+#ifndef URENAME_H
+#define URENAME_H
+
+/* U_DISABLE_RENAMING can be defined in the following ways:
+ * - when running configure, e.g.
+ * runConfigureICU Linux --disable-renaming
+ * - by changing the default setting of U_DISABLE_RENAMING in uconfig.h
+ */
+
+#include "unicode/uconfig.h"
+
+#if !U_DISABLE_RENAMING
+
+/* We need the U_ICU_ENTRY_POINT_RENAME definition. There's a default one in unicode/uvernum.h we can use, but we will give
+ the platform a chance to define it first.
+ Normally (if utypes.h or umachine.h was included first) this will not be necessary as it will already be defined.
+ */
+
+#ifndef U_ICU_ENTRY_POINT_RENAME
+#include "unicode/umachine.h"
+#endif
+
+/* If we still don't have U_ICU_ENTRY_POINT_RENAME use the default. */
+#ifndef U_ICU_ENTRY_POINT_RENAME
+#include "unicode/uvernum.h"
+#endif
+
+/* Error out before the following defines cause very strange and unexpected code breakage */
+#ifndef U_ICU_ENTRY_POINT_RENAME
+#error U_ICU_ENTRY_POINT_RENAME is not defined - cannot continue. Consider defining U_DISABLE_RENAMING if renaming should not be used.
+#endif
+
+
+/* C exports renaming data */
+
+#define T_CString_int64ToString U_ICU_ENTRY_POINT_RENAME(T_CString_int64ToString)
+#define T_CString_integerToString U_ICU_ENTRY_POINT_RENAME(T_CString_integerToString)
+#define T_CString_stringToInteger U_ICU_ENTRY_POINT_RENAME(T_CString_stringToInteger)
+#define T_CString_toLowerCase U_ICU_ENTRY_POINT_RENAME(T_CString_toLowerCase)
+#define T_CString_toUpperCase U_ICU_ENTRY_POINT_RENAME(T_CString_toUpperCase)
+#define UCNV_FROM_U_CALLBACK_ESCAPE U_ICU_ENTRY_POINT_RENAME(UCNV_FROM_U_CALLBACK_ESCAPE)
+#define UCNV_FROM_U_CALLBACK_SKIP U_ICU_ENTRY_POINT_RENAME(UCNV_FROM_U_CALLBACK_SKIP)
+#define UCNV_FROM_U_CALLBACK_STOP U_ICU_ENTRY_POINT_RENAME(UCNV_FROM_U_CALLBACK_STOP)
+#define UCNV_FROM_U_CALLBACK_SUBSTITUTE U_ICU_ENTRY_POINT_RENAME(UCNV_FROM_U_CALLBACK_SUBSTITUTE)
+#define UCNV_TO_U_CALLBACK_ESCAPE U_ICU_ENTRY_POINT_RENAME(UCNV_TO_U_CALLBACK_ESCAPE)
+#define UCNV_TO_U_CALLBACK_SKIP U_ICU_ENTRY_POINT_RENAME(UCNV_TO_U_CALLBACK_SKIP)
+#define UCNV_TO_U_CALLBACK_STOP U_ICU_ENTRY_POINT_RENAME(UCNV_TO_U_CALLBACK_STOP)
+#define UCNV_TO_U_CALLBACK_SUBSTITUTE U_ICU_ENTRY_POINT_RENAME(UCNV_TO_U_CALLBACK_SUBSTITUTE)
+#define UDataMemory_createNewInstance U_ICU_ENTRY_POINT_RENAME(UDataMemory_createNewInstance)
+#define UDataMemory_init U_ICU_ENTRY_POINT_RENAME(UDataMemory_init)
+#define UDataMemory_isLoaded U_ICU_ENTRY_POINT_RENAME(UDataMemory_isLoaded)
+#define UDataMemory_normalizeDataPointer U_ICU_ENTRY_POINT_RENAME(UDataMemory_normalizeDataPointer)
+#define UDataMemory_setData U_ICU_ENTRY_POINT_RENAME(UDataMemory_setData)
+#define UDatamemory_assign U_ICU_ENTRY_POINT_RENAME(UDatamemory_assign)
+#define _ASCIIData U_ICU_ENTRY_POINT_RENAME(_ASCIIData)
+#define _Bocu1Data U_ICU_ENTRY_POINT_RENAME(_Bocu1Data)
+#define _CESU8Data U_ICU_ENTRY_POINT_RENAME(_CESU8Data)
+#define _CompoundTextData U_ICU_ENTRY_POINT_RENAME(_CompoundTextData)
+#define _HZData U_ICU_ENTRY_POINT_RENAME(_HZData)
+#define _IMAPData U_ICU_ENTRY_POINT_RENAME(_IMAPData)
+#define _ISCIIData U_ICU_ENTRY_POINT_RENAME(_ISCIIData)
+#define _ISO2022Data U_ICU_ENTRY_POINT_RENAME(_ISO2022Data)
+#define _LMBCSData1 U_ICU_ENTRY_POINT_RENAME(_LMBCSData1)
+#define _LMBCSData11 U_ICU_ENTRY_POINT_RENAME(_LMBCSData11)
+#define _LMBCSData16 U_ICU_ENTRY_POINT_RENAME(_LMBCSData16)
+#define _LMBCSData17 U_ICU_ENTRY_POINT_RENAME(_LMBCSData17)
+#define _LMBCSData18 U_ICU_ENTRY_POINT_RENAME(_LMBCSData18)
+#define _LMBCSData19 U_ICU_ENTRY_POINT_RENAME(_LMBCSData19)
+#define _LMBCSData2 U_ICU_ENTRY_POINT_RENAME(_LMBCSData2)
+#define _LMBCSData3 U_ICU_ENTRY_POINT_RENAME(_LMBCSData3)
+#define _LMBCSData4 U_ICU_ENTRY_POINT_RENAME(_LMBCSData4)
+#define _LMBCSData5 U_ICU_ENTRY_POINT_RENAME(_LMBCSData5)
+#define _LMBCSData6 U_ICU_ENTRY_POINT_RENAME(_LMBCSData6)
+#define _LMBCSData8 U_ICU_ENTRY_POINT_RENAME(_LMBCSData8)
+#define _Latin1Data U_ICU_ENTRY_POINT_RENAME(_Latin1Data)
+#define _MBCSData U_ICU_ENTRY_POINT_RENAME(_MBCSData)
+#define _SCSUData U_ICU_ENTRY_POINT_RENAME(_SCSUData)
+#define _UTF16BEData U_ICU_ENTRY_POINT_RENAME(_UTF16BEData)
+#define _UTF16Data U_ICU_ENTRY_POINT_RENAME(_UTF16Data)
+#define _UTF16LEData U_ICU_ENTRY_POINT_RENAME(_UTF16LEData)
+#define _UTF32BEData U_ICU_ENTRY_POINT_RENAME(_UTF32BEData)
+#define _UTF32Data U_ICU_ENTRY_POINT_RENAME(_UTF32Data)
+#define _UTF32LEData U_ICU_ENTRY_POINT_RENAME(_UTF32LEData)
+#define _UTF7Data U_ICU_ENTRY_POINT_RENAME(_UTF7Data)
+#define _UTF8Data U_ICU_ENTRY_POINT_RENAME(_UTF8Data)
+#define cmemory_cleanup U_ICU_ENTRY_POINT_RENAME(cmemory_cleanup)
+#define cmemory_inUse U_ICU_ENTRY_POINT_RENAME(cmemory_inUse)
+#define gTimeZoneFilesInitOnce U_ICU_ENTRY_POINT_RENAME(gTimeZoneFilesInitOnce)
+#define izrule_clone U_ICU_ENTRY_POINT_RENAME(izrule_clone)
+#define izrule_close U_ICU_ENTRY_POINT_RENAME(izrule_close)
+#define izrule_equals U_ICU_ENTRY_POINT_RENAME(izrule_equals)
+#define izrule_getDSTSavings U_ICU_ENTRY_POINT_RENAME(izrule_getDSTSavings)
+#define izrule_getDynamicClassID U_ICU_ENTRY_POINT_RENAME(izrule_getDynamicClassID)
+#define izrule_getFinalStart U_ICU_ENTRY_POINT_RENAME(izrule_getFinalStart)
+#define izrule_getFirstStart U_ICU_ENTRY_POINT_RENAME(izrule_getFirstStart)
+#define izrule_getName U_ICU_ENTRY_POINT_RENAME(izrule_getName)
+#define izrule_getNextStart U_ICU_ENTRY_POINT_RENAME(izrule_getNextStart)
+#define izrule_getPreviousStart U_ICU_ENTRY_POINT_RENAME(izrule_getPreviousStart)
+#define izrule_getRawOffset U_ICU_ENTRY_POINT_RENAME(izrule_getRawOffset)
+#define izrule_getStaticClassID U_ICU_ENTRY_POINT_RENAME(izrule_getStaticClassID)
+#define izrule_isEquivalentTo U_ICU_ENTRY_POINT_RENAME(izrule_isEquivalentTo)
+#define izrule_open U_ICU_ENTRY_POINT_RENAME(izrule_open)
+#define le_close U_ICU_ENTRY_POINT_RENAME(le_close)
+#define le_create U_ICU_ENTRY_POINT_RENAME(le_create)
+#define le_getCharIndices U_ICU_ENTRY_POINT_RENAME(le_getCharIndices)
+#define le_getCharIndicesWithBase U_ICU_ENTRY_POINT_RENAME(le_getCharIndicesWithBase)
+#define le_getGlyphCount U_ICU_ENTRY_POINT_RENAME(le_getGlyphCount)
+#define le_getGlyphPosition U_ICU_ENTRY_POINT_RENAME(le_getGlyphPosition)
+#define le_getGlyphPositions U_ICU_ENTRY_POINT_RENAME(le_getGlyphPositions)
+#define le_getGlyphs U_ICU_ENTRY_POINT_RENAME(le_getGlyphs)
+#define le_layoutChars U_ICU_ENTRY_POINT_RENAME(le_layoutChars)
+#define le_reset U_ICU_ENTRY_POINT_RENAME(le_reset)
+#define locale_getKeywords U_ICU_ENTRY_POINT_RENAME(locale_getKeywords)
+#define locale_getKeywordsStart U_ICU_ENTRY_POINT_RENAME(locale_getKeywordsStart)
+#define locale_get_default U_ICU_ENTRY_POINT_RENAME(locale_get_default)
+#define locale_set_default U_ICU_ENTRY_POINT_RENAME(locale_set_default)
+#define pl_addFontRun U_ICU_ENTRY_POINT_RENAME(pl_addFontRun)
+#define pl_addLocaleRun U_ICU_ENTRY_POINT_RENAME(pl_addLocaleRun)
+#define pl_addValueRun U_ICU_ENTRY_POINT_RENAME(pl_addValueRun)
+#define pl_close U_ICU_ENTRY_POINT_RENAME(pl_close)
+#define pl_closeFontRuns U_ICU_ENTRY_POINT_RENAME(pl_closeFontRuns)
+#define pl_closeLine U_ICU_ENTRY_POINT_RENAME(pl_closeLine)
+#define pl_closeLocaleRuns U_ICU_ENTRY_POINT_RENAME(pl_closeLocaleRuns)
+#define pl_closeValueRuns U_ICU_ENTRY_POINT_RENAME(pl_closeValueRuns)
+#define pl_countLineRuns U_ICU_ENTRY_POINT_RENAME(pl_countLineRuns)
+#define pl_create U_ICU_ENTRY_POINT_RENAME(pl_create)
+#define pl_getAscent U_ICU_ENTRY_POINT_RENAME(pl_getAscent)
+#define pl_getDescent U_ICU_ENTRY_POINT_RENAME(pl_getDescent)
+#define pl_getFontRunCount U_ICU_ENTRY_POINT_RENAME(pl_getFontRunCount)
+#define pl_getFontRunFont U_ICU_ENTRY_POINT_RENAME(pl_getFontRunFont)
+#define pl_getFontRunLastLimit U_ICU_ENTRY_POINT_RENAME(pl_getFontRunLastLimit)
+#define pl_getFontRunLimit U_ICU_ENTRY_POINT_RENAME(pl_getFontRunLimit)
+#define pl_getLeading U_ICU_ENTRY_POINT_RENAME(pl_getLeading)
+#define pl_getLineAscent U_ICU_ENTRY_POINT_RENAME(pl_getLineAscent)
+#define pl_getLineDescent U_ICU_ENTRY_POINT_RENAME(pl_getLineDescent)
+#define pl_getLineLeading U_ICU_ENTRY_POINT_RENAME(pl_getLineLeading)
+#define pl_getLineVisualRun U_ICU_ENTRY_POINT_RENAME(pl_getLineVisualRun)
+#define pl_getLineWidth U_ICU_ENTRY_POINT_RENAME(pl_getLineWidth)
+#define pl_getLocaleRunCount U_ICU_ENTRY_POINT_RENAME(pl_getLocaleRunCount)
+#define pl_getLocaleRunLastLimit U_ICU_ENTRY_POINT_RENAME(pl_getLocaleRunLastLimit)
+#define pl_getLocaleRunLimit U_ICU_ENTRY_POINT_RENAME(pl_getLocaleRunLimit)
+#define pl_getLocaleRunLocale U_ICU_ENTRY_POINT_RENAME(pl_getLocaleRunLocale)
+#define pl_getParagraphLevel U_ICU_ENTRY_POINT_RENAME(pl_getParagraphLevel)
+#define pl_getTextDirection U_ICU_ENTRY_POINT_RENAME(pl_getTextDirection)
+#define pl_getValueRunCount U_ICU_ENTRY_POINT_RENAME(pl_getValueRunCount)
+#define pl_getValueRunLastLimit U_ICU_ENTRY_POINT_RENAME(pl_getValueRunLastLimit)
+#define pl_getValueRunLimit U_ICU_ENTRY_POINT_RENAME(pl_getValueRunLimit)
+#define pl_getValueRunValue U_ICU_ENTRY_POINT_RENAME(pl_getValueRunValue)
+#define pl_getVisualRunAscent U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunAscent)
+#define pl_getVisualRunDescent U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunDescent)
+#define pl_getVisualRunDirection U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunDirection)
+#define pl_getVisualRunFont U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunFont)
+#define pl_getVisualRunGlyphCount U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunGlyphCount)
+#define pl_getVisualRunGlyphToCharMap U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunGlyphToCharMap)
+#define pl_getVisualRunGlyphs U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunGlyphs)
+#define pl_getVisualRunLeading U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunLeading)
+#define pl_getVisualRunPositions U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunPositions)
+#define pl_isComplex U_ICU_ENTRY_POINT_RENAME(pl_isComplex)
+#define pl_nextLine U_ICU_ENTRY_POINT_RENAME(pl_nextLine)
+#define pl_openEmptyFontRuns U_ICU_ENTRY_POINT_RENAME(pl_openEmptyFontRuns)
+#define pl_openEmptyLocaleRuns U_ICU_ENTRY_POINT_RENAME(pl_openEmptyLocaleRuns)
+#define pl_openEmptyValueRuns U_ICU_ENTRY_POINT_RENAME(pl_openEmptyValueRuns)
+#define pl_openFontRuns U_ICU_ENTRY_POINT_RENAME(pl_openFontRuns)
+#define pl_openLocaleRuns U_ICU_ENTRY_POINT_RENAME(pl_openLocaleRuns)
+#define pl_openValueRuns U_ICU_ENTRY_POINT_RENAME(pl_openValueRuns)
+#define pl_reflow U_ICU_ENTRY_POINT_RENAME(pl_reflow)
+#define pl_resetFontRuns U_ICU_ENTRY_POINT_RENAME(pl_resetFontRuns)
+#define pl_resetLocaleRuns U_ICU_ENTRY_POINT_RENAME(pl_resetLocaleRuns)
+#define pl_resetValueRuns U_ICU_ENTRY_POINT_RENAME(pl_resetValueRuns)
+#define res_countArrayItems U_ICU_ENTRY_POINT_RENAME(res_countArrayItems)
+#define res_findResource U_ICU_ENTRY_POINT_RENAME(res_findResource)
+#define res_getAlias U_ICU_ENTRY_POINT_RENAME(res_getAlias)
+#define res_getArrayItem U_ICU_ENTRY_POINT_RENAME(res_getArrayItem)
+#define res_getBinary U_ICU_ENTRY_POINT_RENAME(res_getBinary)
+#define res_getIntVector U_ICU_ENTRY_POINT_RENAME(res_getIntVector)
+#define res_getPublicType U_ICU_ENTRY_POINT_RENAME(res_getPublicType)
+#define res_getResource U_ICU_ENTRY_POINT_RENAME(res_getResource)
+#define res_getString U_ICU_ENTRY_POINT_RENAME(res_getString)
+#define res_getTableItemByIndex U_ICU_ENTRY_POINT_RENAME(res_getTableItemByIndex)
+#define res_getTableItemByKey U_ICU_ENTRY_POINT_RENAME(res_getTableItemByKey)
+#define res_load U_ICU_ENTRY_POINT_RENAME(res_load)
+#define res_read U_ICU_ENTRY_POINT_RENAME(res_read)
+#define res_unload U_ICU_ENTRY_POINT_RENAME(res_unload)
+#define u_UCharsToChars U_ICU_ENTRY_POINT_RENAME(u_UCharsToChars)
+#define u_austrcpy U_ICU_ENTRY_POINT_RENAME(u_austrcpy)
+#define u_austrncpy U_ICU_ENTRY_POINT_RENAME(u_austrncpy)
+#define u_catclose U_ICU_ENTRY_POINT_RENAME(u_catclose)
+#define u_catgets U_ICU_ENTRY_POINT_RENAME(u_catgets)
+#define u_catopen U_ICU_ENTRY_POINT_RENAME(u_catopen)
+#define u_charAge U_ICU_ENTRY_POINT_RENAME(u_charAge)
+#define u_charDigitValue U_ICU_ENTRY_POINT_RENAME(u_charDigitValue)
+#define u_charDirection U_ICU_ENTRY_POINT_RENAME(u_charDirection)
+#define u_charFromName U_ICU_ENTRY_POINT_RENAME(u_charFromName)
+#define u_charMirror U_ICU_ENTRY_POINT_RENAME(u_charMirror)
+#define u_charName U_ICU_ENTRY_POINT_RENAME(u_charName)
+#define u_charType U_ICU_ENTRY_POINT_RENAME(u_charType)
+#define u_charsToUChars U_ICU_ENTRY_POINT_RENAME(u_charsToUChars)
+#define u_cleanup U_ICU_ENTRY_POINT_RENAME(u_cleanup)
+#define u_countChar32 U_ICU_ENTRY_POINT_RENAME(u_countChar32)
+#define u_digit U_ICU_ENTRY_POINT_RENAME(u_digit)
+#define u_enumCharNames U_ICU_ENTRY_POINT_RENAME(u_enumCharNames)
+#define u_enumCharTypes U_ICU_ENTRY_POINT_RENAME(u_enumCharTypes)
+#define u_errorName U_ICU_ENTRY_POINT_RENAME(u_errorName)
+#define u_fadopt U_ICU_ENTRY_POINT_RENAME(u_fadopt)
+#define u_fclose U_ICU_ENTRY_POINT_RENAME(u_fclose)
+#define u_feof U_ICU_ENTRY_POINT_RENAME(u_feof)
+#define u_fflush U_ICU_ENTRY_POINT_RENAME(u_fflush)
+#define u_fgetConverter U_ICU_ENTRY_POINT_RENAME(u_fgetConverter)
+#define u_fgetNumberFormat U_ICU_ENTRY_POINT_RENAME(u_fgetNumberFormat)
+#define u_fgetc U_ICU_ENTRY_POINT_RENAME(u_fgetc)
+#define u_fgetcodepage U_ICU_ENTRY_POINT_RENAME(u_fgetcodepage)
+#define u_fgetcx U_ICU_ENTRY_POINT_RENAME(u_fgetcx)
+#define u_fgetfile U_ICU_ENTRY_POINT_RENAME(u_fgetfile)
+#define u_fgetlocale U_ICU_ENTRY_POINT_RENAME(u_fgetlocale)
+#define u_fgets U_ICU_ENTRY_POINT_RENAME(u_fgets)
+#define u_file_read U_ICU_ENTRY_POINT_RENAME(u_file_read)
+#define u_file_write U_ICU_ENTRY_POINT_RENAME(u_file_write)
+#define u_file_write_flush U_ICU_ENTRY_POINT_RENAME(u_file_write_flush)
+#define u_finit U_ICU_ENTRY_POINT_RENAME(u_finit)
+#define u_flushDefaultConverter U_ICU_ENTRY_POINT_RENAME(u_flushDefaultConverter)
+#define u_foldCase U_ICU_ENTRY_POINT_RENAME(u_foldCase)
+#define u_fopen U_ICU_ENTRY_POINT_RENAME(u_fopen)
+#define u_fopen_u U_ICU_ENTRY_POINT_RENAME(u_fopen_u)
+#define u_forDigit U_ICU_ENTRY_POINT_RENAME(u_forDigit)
+#define u_formatMessage U_ICU_ENTRY_POINT_RENAME(u_formatMessage)
+#define u_formatMessageWithError U_ICU_ENTRY_POINT_RENAME(u_formatMessageWithError)
+#define u_fprintf U_ICU_ENTRY_POINT_RENAME(u_fprintf)
+#define u_fprintf_u U_ICU_ENTRY_POINT_RENAME(u_fprintf_u)
+#define u_fputc U_ICU_ENTRY_POINT_RENAME(u_fputc)
+#define u_fputs U_ICU_ENTRY_POINT_RENAME(u_fputs)
+#define u_frewind U_ICU_ENTRY_POINT_RENAME(u_frewind)
+#define u_fscanf U_ICU_ENTRY_POINT_RENAME(u_fscanf)
+#define u_fscanf_u U_ICU_ENTRY_POINT_RENAME(u_fscanf_u)
+#define u_fsetcodepage U_ICU_ENTRY_POINT_RENAME(u_fsetcodepage)
+#define u_fsetlocale U_ICU_ENTRY_POINT_RENAME(u_fsetlocale)
+#define u_fsettransliterator U_ICU_ENTRY_POINT_RENAME(u_fsettransliterator)
+#define u_fstropen U_ICU_ENTRY_POINT_RENAME(u_fstropen)
+#define u_fungetc U_ICU_ENTRY_POINT_RENAME(u_fungetc)
+#define u_getBidiPairedBracket U_ICU_ENTRY_POINT_RENAME(u_getBidiPairedBracket)
+#define u_getCombiningClass U_ICU_ENTRY_POINT_RENAME(u_getCombiningClass)
+#define u_getDataDirectory U_ICU_ENTRY_POINT_RENAME(u_getDataDirectory)
+#define u_getDataVersion U_ICU_ENTRY_POINT_RENAME(u_getDataVersion)
+#define u_getDefaultConverter U_ICU_ENTRY_POINT_RENAME(u_getDefaultConverter)
+#define u_getFC_NFKC_Closure U_ICU_ENTRY_POINT_RENAME(u_getFC_NFKC_Closure)
+#define u_getISOComment U_ICU_ENTRY_POINT_RENAME(u_getISOComment)
+#define u_getIntPropertyMaxValue U_ICU_ENTRY_POINT_RENAME(u_getIntPropertyMaxValue)
+#define u_getIntPropertyMinValue U_ICU_ENTRY_POINT_RENAME(u_getIntPropertyMinValue)
+#define u_getIntPropertyValue U_ICU_ENTRY_POINT_RENAME(u_getIntPropertyValue)
+#define u_getMainProperties U_ICU_ENTRY_POINT_RENAME(u_getMainProperties)
+#define u_getNumericValue U_ICU_ENTRY_POINT_RENAME(u_getNumericValue)
+#define u_getPropertyEnum U_ICU_ENTRY_POINT_RENAME(u_getPropertyEnum)
+#define u_getPropertyName U_ICU_ENTRY_POINT_RENAME(u_getPropertyName)
+#define u_getPropertyValueEnum U_ICU_ENTRY_POINT_RENAME(u_getPropertyValueEnum)
+#define u_getPropertyValueName U_ICU_ENTRY_POINT_RENAME(u_getPropertyValueName)
+#define u_getTimeZoneFilesDirectory U_ICU_ENTRY_POINT_RENAME(u_getTimeZoneFilesDirectory)
+#define u_getUnicodeProperties U_ICU_ENTRY_POINT_RENAME(u_getUnicodeProperties)
+#define u_getUnicodeVersion U_ICU_ENTRY_POINT_RENAME(u_getUnicodeVersion)
+#define u_getVersion U_ICU_ENTRY_POINT_RENAME(u_getVersion)
+#define u_get_stdout U_ICU_ENTRY_POINT_RENAME(u_get_stdout)
+#define u_hasBinaryProperty U_ICU_ENTRY_POINT_RENAME(u_hasBinaryProperty)
+#define u_init U_ICU_ENTRY_POINT_RENAME(u_init)
+#define u_isIDIgnorable U_ICU_ENTRY_POINT_RENAME(u_isIDIgnorable)
+#define u_isIDPart U_ICU_ENTRY_POINT_RENAME(u_isIDPart)
+#define u_isIDStart U_ICU_ENTRY_POINT_RENAME(u_isIDStart)
+#define u_isISOControl U_ICU_ENTRY_POINT_RENAME(u_isISOControl)
+#define u_isJavaIDPart U_ICU_ENTRY_POINT_RENAME(u_isJavaIDPart)
+#define u_isJavaIDStart U_ICU_ENTRY_POINT_RENAME(u_isJavaIDStart)
+#define u_isJavaSpaceChar U_ICU_ENTRY_POINT_RENAME(u_isJavaSpaceChar)
+#define u_isMirrored U_ICU_ENTRY_POINT_RENAME(u_isMirrored)
+#define u_isUAlphabetic U_ICU_ENTRY_POINT_RENAME(u_isUAlphabetic)
+#define u_isULowercase U_ICU_ENTRY_POINT_RENAME(u_isULowercase)
+#define u_isUUppercase U_ICU_ENTRY_POINT_RENAME(u_isUUppercase)
+#define u_isUWhiteSpace U_ICU_ENTRY_POINT_RENAME(u_isUWhiteSpace)
+#define u_isWhitespace U_ICU_ENTRY_POINT_RENAME(u_isWhitespace)
+#define u_isalnum U_ICU_ENTRY_POINT_RENAME(u_isalnum)
+#define u_isalnumPOSIX U_ICU_ENTRY_POINT_RENAME(u_isalnumPOSIX)
+#define u_isalpha U_ICU_ENTRY_POINT_RENAME(u_isalpha)
+#define u_isbase U_ICU_ENTRY_POINT_RENAME(u_isbase)
+#define u_isblank U_ICU_ENTRY_POINT_RENAME(u_isblank)
+#define u_iscntrl U_ICU_ENTRY_POINT_RENAME(u_iscntrl)
+#define u_isdefined U_ICU_ENTRY_POINT_RENAME(u_isdefined)
+#define u_isdigit U_ICU_ENTRY_POINT_RENAME(u_isdigit)
+#define u_isgraph U_ICU_ENTRY_POINT_RENAME(u_isgraph)
+#define u_isgraphPOSIX U_ICU_ENTRY_POINT_RENAME(u_isgraphPOSIX)
+#define u_islower U_ICU_ENTRY_POINT_RENAME(u_islower)
+#define u_isprint U_ICU_ENTRY_POINT_RENAME(u_isprint)
+#define u_isprintPOSIX U_ICU_ENTRY_POINT_RENAME(u_isprintPOSIX)
+#define u_ispunct U_ICU_ENTRY_POINT_RENAME(u_ispunct)
+#define u_isspace U_ICU_ENTRY_POINT_RENAME(u_isspace)
+#define u_istitle U_ICU_ENTRY_POINT_RENAME(u_istitle)
+#define u_isupper U_ICU_ENTRY_POINT_RENAME(u_isupper)
+#define u_isxdigit U_ICU_ENTRY_POINT_RENAME(u_isxdigit)
+#define u_locbund_close U_ICU_ENTRY_POINT_RENAME(u_locbund_close)
+#define u_locbund_getNumberFormat U_ICU_ENTRY_POINT_RENAME(u_locbund_getNumberFormat)
+#define u_locbund_init U_ICU_ENTRY_POINT_RENAME(u_locbund_init)
+#define u_memcasecmp U_ICU_ENTRY_POINT_RENAME(u_memcasecmp)
+#define u_memchr U_ICU_ENTRY_POINT_RENAME(u_memchr)
+#define u_memchr32 U_ICU_ENTRY_POINT_RENAME(u_memchr32)
+#define u_memcmp U_ICU_ENTRY_POINT_RENAME(u_memcmp)
+#define u_memcmpCodePointOrder U_ICU_ENTRY_POINT_RENAME(u_memcmpCodePointOrder)
+#define u_memcpy U_ICU_ENTRY_POINT_RENAME(u_memcpy)
+#define u_memmove U_ICU_ENTRY_POINT_RENAME(u_memmove)
+#define u_memrchr U_ICU_ENTRY_POINT_RENAME(u_memrchr)
+#define u_memrchr32 U_ICU_ENTRY_POINT_RENAME(u_memrchr32)
+#define u_memset U_ICU_ENTRY_POINT_RENAME(u_memset)
+#define u_parseMessage U_ICU_ENTRY_POINT_RENAME(u_parseMessage)
+#define u_parseMessageWithError U_ICU_ENTRY_POINT_RENAME(u_parseMessageWithError)
+#define u_printf U_ICU_ENTRY_POINT_RENAME(u_printf)
+#define u_printf_parse U_ICU_ENTRY_POINT_RENAME(u_printf_parse)
+#define u_printf_u U_ICU_ENTRY_POINT_RENAME(u_printf_u)
+#define u_releaseDefaultConverter U_ICU_ENTRY_POINT_RENAME(u_releaseDefaultConverter)
+#define u_scanf_parse U_ICU_ENTRY_POINT_RENAME(u_scanf_parse)
+#define u_setAtomicIncDecFunctions U_ICU_ENTRY_POINT_RENAME(u_setAtomicIncDecFunctions)
+#define u_setDataDirectory U_ICU_ENTRY_POINT_RENAME(u_setDataDirectory)
+#define u_setMemoryFunctions U_ICU_ENTRY_POINT_RENAME(u_setMemoryFunctions)
+#define u_setMutexFunctions U_ICU_ENTRY_POINT_RENAME(u_setMutexFunctions)
+#define u_setTimeZoneFilesDirectory U_ICU_ENTRY_POINT_RENAME(u_setTimeZoneFilesDirectory)
+#define u_shapeArabic U_ICU_ENTRY_POINT_RENAME(u_shapeArabic)
+#define u_snprintf U_ICU_ENTRY_POINT_RENAME(u_snprintf)
+#define u_snprintf_u U_ICU_ENTRY_POINT_RENAME(u_snprintf_u)
+#define u_sprintf U_ICU_ENTRY_POINT_RENAME(u_sprintf)
+#define u_sprintf_u U_ICU_ENTRY_POINT_RENAME(u_sprintf_u)
+#define u_sscanf U_ICU_ENTRY_POINT_RENAME(u_sscanf)
+#define u_sscanf_u U_ICU_ENTRY_POINT_RENAME(u_sscanf_u)
+#define u_strCaseCompare U_ICU_ENTRY_POINT_RENAME(u_strCaseCompare)
+#define u_strCompare U_ICU_ENTRY_POINT_RENAME(u_strCompare)
+#define u_strCompareIter U_ICU_ENTRY_POINT_RENAME(u_strCompareIter)
+#define u_strFindFirst U_ICU_ENTRY_POINT_RENAME(u_strFindFirst)
+#define u_strFindLast U_ICU_ENTRY_POINT_RENAME(u_strFindLast)
+#define u_strFoldCase U_ICU_ENTRY_POINT_RENAME(u_strFoldCase)
+#define u_strFromJavaModifiedUTF8WithSub U_ICU_ENTRY_POINT_RENAME(u_strFromJavaModifiedUTF8WithSub)
+#define u_strFromPunycode U_ICU_ENTRY_POINT_RENAME(u_strFromPunycode)
+#define u_strFromUTF32 U_ICU_ENTRY_POINT_RENAME(u_strFromUTF32)
+#define u_strFromUTF32WithSub U_ICU_ENTRY_POINT_RENAME(u_strFromUTF32WithSub)
+#define u_strFromUTF8 U_ICU_ENTRY_POINT_RENAME(u_strFromUTF8)
+#define u_strFromUTF8Lenient U_ICU_ENTRY_POINT_RENAME(u_strFromUTF8Lenient)
+#define u_strFromUTF8WithSub U_ICU_ENTRY_POINT_RENAME(u_strFromUTF8WithSub)
+#define u_strFromWCS U_ICU_ENTRY_POINT_RENAME(u_strFromWCS)
+#define u_strHasMoreChar32Than U_ICU_ENTRY_POINT_RENAME(u_strHasMoreChar32Than)
+#define u_strToJavaModifiedUTF8 U_ICU_ENTRY_POINT_RENAME(u_strToJavaModifiedUTF8)
+#define u_strToLower U_ICU_ENTRY_POINT_RENAME(u_strToLower)
+#define u_strToPunycode U_ICU_ENTRY_POINT_RENAME(u_strToPunycode)
+#define u_strToTitle U_ICU_ENTRY_POINT_RENAME(u_strToTitle)
+#define u_strToUTF32 U_ICU_ENTRY_POINT_RENAME(u_strToUTF32)
+#define u_strToUTF32WithSub U_ICU_ENTRY_POINT_RENAME(u_strToUTF32WithSub)
+#define u_strToUTF8 U_ICU_ENTRY_POINT_RENAME(u_strToUTF8)
+#define u_strToUTF8WithSub U_ICU_ENTRY_POINT_RENAME(u_strToUTF8WithSub)
+#define u_strToUpper U_ICU_ENTRY_POINT_RENAME(u_strToUpper)
+#define u_strToWCS U_ICU_ENTRY_POINT_RENAME(u_strToWCS)
+#define u_strcasecmp U_ICU_ENTRY_POINT_RENAME(u_strcasecmp)
+#define u_strcat U_ICU_ENTRY_POINT_RENAME(u_strcat)
+#define u_strchr U_ICU_ENTRY_POINT_RENAME(u_strchr)
+#define u_strchr32 U_ICU_ENTRY_POINT_RENAME(u_strchr32)
+#define u_strcmp U_ICU_ENTRY_POINT_RENAME(u_strcmp)
+#define u_strcmpCodePointOrder U_ICU_ENTRY_POINT_RENAME(u_strcmpCodePointOrder)
+#define u_strcmpFold U_ICU_ENTRY_POINT_RENAME(u_strcmpFold)
+#define u_strcpy U_ICU_ENTRY_POINT_RENAME(u_strcpy)
+#define u_strcspn U_ICU_ENTRY_POINT_RENAME(u_strcspn)
+#define u_strlen U_ICU_ENTRY_POINT_RENAME(u_strlen)
+#define u_strncasecmp U_ICU_ENTRY_POINT_RENAME(u_strncasecmp)
+#define u_strncat U_ICU_ENTRY_POINT_RENAME(u_strncat)
+#define u_strncmp U_ICU_ENTRY_POINT_RENAME(u_strncmp)
+#define u_strncmpCodePointOrder U_ICU_ENTRY_POINT_RENAME(u_strncmpCodePointOrder)
+#define u_strncpy U_ICU_ENTRY_POINT_RENAME(u_strncpy)
+#define u_strpbrk U_ICU_ENTRY_POINT_RENAME(u_strpbrk)
+#define u_strrchr U_ICU_ENTRY_POINT_RENAME(u_strrchr)
+#define u_strrchr32 U_ICU_ENTRY_POINT_RENAME(u_strrchr32)
+#define u_strrstr U_ICU_ENTRY_POINT_RENAME(u_strrstr)
+#define u_strspn U_ICU_ENTRY_POINT_RENAME(u_strspn)
+#define u_strstr U_ICU_ENTRY_POINT_RENAME(u_strstr)
+#define u_strtok_r U_ICU_ENTRY_POINT_RENAME(u_strtok_r)
+#define u_terminateChars U_ICU_ENTRY_POINT_RENAME(u_terminateChars)
+#define u_terminateUChar32s U_ICU_ENTRY_POINT_RENAME(u_terminateUChar32s)
+#define u_terminateUChars U_ICU_ENTRY_POINT_RENAME(u_terminateUChars)
+#define u_terminateWChars U_ICU_ENTRY_POINT_RENAME(u_terminateWChars)
+#define u_tolower U_ICU_ENTRY_POINT_RENAME(u_tolower)
+#define u_totitle U_ICU_ENTRY_POINT_RENAME(u_totitle)
+#define u_toupper U_ICU_ENTRY_POINT_RENAME(u_toupper)
+#define u_uastrcpy U_ICU_ENTRY_POINT_RENAME(u_uastrcpy)
+#define u_uastrncpy U_ICU_ENTRY_POINT_RENAME(u_uastrncpy)
+#define u_unescape U_ICU_ENTRY_POINT_RENAME(u_unescape)
+#define u_unescapeAt U_ICU_ENTRY_POINT_RENAME(u_unescapeAt)
+#define u_versionFromString U_ICU_ENTRY_POINT_RENAME(u_versionFromString)
+#define u_versionFromUString U_ICU_ENTRY_POINT_RENAME(u_versionFromUString)
+#define u_versionToString U_ICU_ENTRY_POINT_RENAME(u_versionToString)
+#define u_vformatMessage U_ICU_ENTRY_POINT_RENAME(u_vformatMessage)
+#define u_vformatMessageWithError U_ICU_ENTRY_POINT_RENAME(u_vformatMessageWithError)
+#define u_vfprintf U_ICU_ENTRY_POINT_RENAME(u_vfprintf)
+#define u_vfprintf_u U_ICU_ENTRY_POINT_RENAME(u_vfprintf_u)
+#define u_vfscanf U_ICU_ENTRY_POINT_RENAME(u_vfscanf)
+#define u_vfscanf_u U_ICU_ENTRY_POINT_RENAME(u_vfscanf_u)
+#define u_vparseMessage U_ICU_ENTRY_POINT_RENAME(u_vparseMessage)
+#define u_vparseMessageWithError U_ICU_ENTRY_POINT_RENAME(u_vparseMessageWithError)
+#define u_vsnprintf U_ICU_ENTRY_POINT_RENAME(u_vsnprintf)
+#define u_vsnprintf_u U_ICU_ENTRY_POINT_RENAME(u_vsnprintf_u)
+#define u_vsprintf U_ICU_ENTRY_POINT_RENAME(u_vsprintf)
+#define u_vsprintf_u U_ICU_ENTRY_POINT_RENAME(u_vsprintf_u)
+#define u_vsscanf U_ICU_ENTRY_POINT_RENAME(u_vsscanf)
+#define u_vsscanf_u U_ICU_ENTRY_POINT_RENAME(u_vsscanf_u)
+#define u_writeIdenticalLevelRun U_ICU_ENTRY_POINT_RENAME(u_writeIdenticalLevelRun)
+#define ubidi_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(ubidi_addPropertyStarts)
+#define ubidi_close U_ICU_ENTRY_POINT_RENAME(ubidi_close)
+#define ubidi_countParagraphs U_ICU_ENTRY_POINT_RENAME(ubidi_countParagraphs)
+#define ubidi_countRuns U_ICU_ENTRY_POINT_RENAME(ubidi_countRuns)
+#define ubidi_getBaseDirection U_ICU_ENTRY_POINT_RENAME(ubidi_getBaseDirection)
+#define ubidi_getClass U_ICU_ENTRY_POINT_RENAME(ubidi_getClass)
+#define ubidi_getClassCallback U_ICU_ENTRY_POINT_RENAME(ubidi_getClassCallback)
+#define ubidi_getCustomizedClass U_ICU_ENTRY_POINT_RENAME(ubidi_getCustomizedClass)
+#define ubidi_getDirection U_ICU_ENTRY_POINT_RENAME(ubidi_getDirection)
+#define ubidi_getJoiningGroup U_ICU_ENTRY_POINT_RENAME(ubidi_getJoiningGroup)
+#define ubidi_getJoiningType U_ICU_ENTRY_POINT_RENAME(ubidi_getJoiningType)
+#define ubidi_getLength U_ICU_ENTRY_POINT_RENAME(ubidi_getLength)
+#define ubidi_getLevelAt U_ICU_ENTRY_POINT_RENAME(ubidi_getLevelAt)
+#define ubidi_getLevels U_ICU_ENTRY_POINT_RENAME(ubidi_getLevels)
+#define ubidi_getLogicalIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getLogicalIndex)
+#define ubidi_getLogicalMap U_ICU_ENTRY_POINT_RENAME(ubidi_getLogicalMap)
+#define ubidi_getLogicalRun U_ICU_ENTRY_POINT_RENAME(ubidi_getLogicalRun)
+#define ubidi_getMaxValue U_ICU_ENTRY_POINT_RENAME(ubidi_getMaxValue)
+#define ubidi_getMemory U_ICU_ENTRY_POINT_RENAME(ubidi_getMemory)
+#define ubidi_getMirror U_ICU_ENTRY_POINT_RENAME(ubidi_getMirror)
+#define ubidi_getPairedBracket U_ICU_ENTRY_POINT_RENAME(ubidi_getPairedBracket)
+#define ubidi_getPairedBracketType U_ICU_ENTRY_POINT_RENAME(ubidi_getPairedBracketType)
+#define ubidi_getParaLevel U_ICU_ENTRY_POINT_RENAME(ubidi_getParaLevel)
+#define ubidi_getParaLevelAtIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getParaLevelAtIndex)
+#define ubidi_getParagraph U_ICU_ENTRY_POINT_RENAME(ubidi_getParagraph)
+#define ubidi_getParagraphByIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getParagraphByIndex)
+#define ubidi_getProcessedLength U_ICU_ENTRY_POINT_RENAME(ubidi_getProcessedLength)
+#define ubidi_getReorderingMode U_ICU_ENTRY_POINT_RENAME(ubidi_getReorderingMode)
+#define ubidi_getReorderingOptions U_ICU_ENTRY_POINT_RENAME(ubidi_getReorderingOptions)
+#define ubidi_getResultLength U_ICU_ENTRY_POINT_RENAME(ubidi_getResultLength)
+#define ubidi_getRuns U_ICU_ENTRY_POINT_RENAME(ubidi_getRuns)
+#define ubidi_getSingleton U_ICU_ENTRY_POINT_RENAME(ubidi_getSingleton)
+#define ubidi_getText U_ICU_ENTRY_POINT_RENAME(ubidi_getText)
+#define ubidi_getVisualIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getVisualIndex)
+#define ubidi_getVisualMap U_ICU_ENTRY_POINT_RENAME(ubidi_getVisualMap)
+#define ubidi_getVisualRun U_ICU_ENTRY_POINT_RENAME(ubidi_getVisualRun)
+#define ubidi_invertMap U_ICU_ENTRY_POINT_RENAME(ubidi_invertMap)
+#define ubidi_isBidiControl U_ICU_ENTRY_POINT_RENAME(ubidi_isBidiControl)
+#define ubidi_isInverse U_ICU_ENTRY_POINT_RENAME(ubidi_isInverse)
+#define ubidi_isJoinControl U_ICU_ENTRY_POINT_RENAME(ubidi_isJoinControl)
+#define ubidi_isMirrored U_ICU_ENTRY_POINT_RENAME(ubidi_isMirrored)
+#define ubidi_isOrderParagraphsLTR U_ICU_ENTRY_POINT_RENAME(ubidi_isOrderParagraphsLTR)
+#define ubidi_open U_ICU_ENTRY_POINT_RENAME(ubidi_open)
+#define ubidi_openSized U_ICU_ENTRY_POINT_RENAME(ubidi_openSized)
+#define ubidi_orderParagraphsLTR U_ICU_ENTRY_POINT_RENAME(ubidi_orderParagraphsLTR)
+#define ubidi_reorderLogical U_ICU_ENTRY_POINT_RENAME(ubidi_reorderLogical)
+#define ubidi_reorderVisual U_ICU_ENTRY_POINT_RENAME(ubidi_reorderVisual)
+#define ubidi_setClassCallback U_ICU_ENTRY_POINT_RENAME(ubidi_setClassCallback)
+#define ubidi_setContext U_ICU_ENTRY_POINT_RENAME(ubidi_setContext)
+#define ubidi_setInverse U_ICU_ENTRY_POINT_RENAME(ubidi_setInverse)
+#define ubidi_setLine U_ICU_ENTRY_POINT_RENAME(ubidi_setLine)
+#define ubidi_setPara U_ICU_ENTRY_POINT_RENAME(ubidi_setPara)
+#define ubidi_setReorderingMode U_ICU_ENTRY_POINT_RENAME(ubidi_setReorderingMode)
+#define ubidi_setReorderingOptions U_ICU_ENTRY_POINT_RENAME(ubidi_setReorderingOptions)
+#define ubidi_writeReordered U_ICU_ENTRY_POINT_RENAME(ubidi_writeReordered)
+#define ubidi_writeReverse U_ICU_ENTRY_POINT_RENAME(ubidi_writeReverse)
+#define ublock_getCode U_ICU_ENTRY_POINT_RENAME(ublock_getCode)
+#define ubrk_close U_ICU_ENTRY_POINT_RENAME(ubrk_close)
+#define ubrk_countAvailable U_ICU_ENTRY_POINT_RENAME(ubrk_countAvailable)
+#define ubrk_current U_ICU_ENTRY_POINT_RENAME(ubrk_current)
+#define ubrk_first U_ICU_ENTRY_POINT_RENAME(ubrk_first)
+#define ubrk_following U_ICU_ENTRY_POINT_RENAME(ubrk_following)
+#define ubrk_getAvailable U_ICU_ENTRY_POINT_RENAME(ubrk_getAvailable)
+#define ubrk_getLocaleByType U_ICU_ENTRY_POINT_RENAME(ubrk_getLocaleByType)
+#define ubrk_getRuleStatus U_ICU_ENTRY_POINT_RENAME(ubrk_getRuleStatus)
+#define ubrk_getRuleStatusVec U_ICU_ENTRY_POINT_RENAME(ubrk_getRuleStatusVec)
+#define ubrk_isBoundary U_ICU_ENTRY_POINT_RENAME(ubrk_isBoundary)
+#define ubrk_last U_ICU_ENTRY_POINT_RENAME(ubrk_last)
+#define ubrk_next U_ICU_ENTRY_POINT_RENAME(ubrk_next)
+#define ubrk_open U_ICU_ENTRY_POINT_RENAME(ubrk_open)
+#define ubrk_openRules U_ICU_ENTRY_POINT_RENAME(ubrk_openRules)
+#define ubrk_preceding U_ICU_ENTRY_POINT_RENAME(ubrk_preceding)
+#define ubrk_previous U_ICU_ENTRY_POINT_RENAME(ubrk_previous)
+#define ubrk_refreshUText U_ICU_ENTRY_POINT_RENAME(ubrk_refreshUText)
+#define ubrk_safeClone U_ICU_ENTRY_POINT_RENAME(ubrk_safeClone)
+#define ubrk_setText U_ICU_ENTRY_POINT_RENAME(ubrk_setText)
+#define ubrk_setUText U_ICU_ENTRY_POINT_RENAME(ubrk_setUText)
+#define ubrk_swap U_ICU_ENTRY_POINT_RENAME(ubrk_swap)
+#define ucache_compareKeys U_ICU_ENTRY_POINT_RENAME(ucache_compareKeys)
+#define ucache_deleteKey U_ICU_ENTRY_POINT_RENAME(ucache_deleteKey)
+#define ucache_hashKeys U_ICU_ENTRY_POINT_RENAME(ucache_hashKeys)
+#define ucal_add U_ICU_ENTRY_POINT_RENAME(ucal_add)
+#define ucal_clear U_ICU_ENTRY_POINT_RENAME(ucal_clear)
+#define ucal_clearField U_ICU_ENTRY_POINT_RENAME(ucal_clearField)
+#define ucal_clone U_ICU_ENTRY_POINT_RENAME(ucal_clone)
+#define ucal_close U_ICU_ENTRY_POINT_RENAME(ucal_close)
+#define ucal_countAvailable U_ICU_ENTRY_POINT_RENAME(ucal_countAvailable)
+#define ucal_equivalentTo U_ICU_ENTRY_POINT_RENAME(ucal_equivalentTo)
+#define ucal_get U_ICU_ENTRY_POINT_RENAME(ucal_get)
+#define ucal_getAttribute U_ICU_ENTRY_POINT_RENAME(ucal_getAttribute)
+#define ucal_getAvailable U_ICU_ENTRY_POINT_RENAME(ucal_getAvailable)
+#define ucal_getCanonicalTimeZoneID U_ICU_ENTRY_POINT_RENAME(ucal_getCanonicalTimeZoneID)
+#define ucal_getDSTSavings U_ICU_ENTRY_POINT_RENAME(ucal_getDSTSavings)
+#define ucal_getDayOfWeekType U_ICU_ENTRY_POINT_RENAME(ucal_getDayOfWeekType)
+#define ucal_getDefaultTimeZone U_ICU_ENTRY_POINT_RENAME(ucal_getDefaultTimeZone)
+#define ucal_getFieldDifference U_ICU_ENTRY_POINT_RENAME(ucal_getFieldDifference)
+#define ucal_getGregorianChange U_ICU_ENTRY_POINT_RENAME(ucal_getGregorianChange)
+#define ucal_getKeywordValuesForLocale U_ICU_ENTRY_POINT_RENAME(ucal_getKeywordValuesForLocale)
+#define ucal_getLimit U_ICU_ENTRY_POINT_RENAME(ucal_getLimit)
+#define ucal_getLocaleByType U_ICU_ENTRY_POINT_RENAME(ucal_getLocaleByType)
+#define ucal_getMillis U_ICU_ENTRY_POINT_RENAME(ucal_getMillis)
+#define ucal_getNow U_ICU_ENTRY_POINT_RENAME(ucal_getNow)
+#define ucal_getTZDataVersion U_ICU_ENTRY_POINT_RENAME(ucal_getTZDataVersion)
+#define ucal_getTimeZoneDisplayName U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneDisplayName)
+#define ucal_getTimeZoneID U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneID)
+#define ucal_getTimeZoneIDForWindowsID U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneIDForWindowsID)
+#define ucal_getTimeZoneTransitionDate U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneTransitionDate)
+#define ucal_getType U_ICU_ENTRY_POINT_RENAME(ucal_getType)
+#define ucal_getWeekendTransition U_ICU_ENTRY_POINT_RENAME(ucal_getWeekendTransition)
+#define ucal_getWindowsTimeZoneID U_ICU_ENTRY_POINT_RENAME(ucal_getWindowsTimeZoneID)
+#define ucal_inDaylightTime U_ICU_ENTRY_POINT_RENAME(ucal_inDaylightTime)
+#define ucal_isSet U_ICU_ENTRY_POINT_RENAME(ucal_isSet)
+#define ucal_isWeekend U_ICU_ENTRY_POINT_RENAME(ucal_isWeekend)
+#define ucal_open U_ICU_ENTRY_POINT_RENAME(ucal_open)
+#define ucal_openCountryTimeZones U_ICU_ENTRY_POINT_RENAME(ucal_openCountryTimeZones)
+#define ucal_openTimeZoneIDEnumeration U_ICU_ENTRY_POINT_RENAME(ucal_openTimeZoneIDEnumeration)
+#define ucal_openTimeZones U_ICU_ENTRY_POINT_RENAME(ucal_openTimeZones)
+#define ucal_roll U_ICU_ENTRY_POINT_RENAME(ucal_roll)
+#define ucal_set U_ICU_ENTRY_POINT_RENAME(ucal_set)
+#define ucal_setAttribute U_ICU_ENTRY_POINT_RENAME(ucal_setAttribute)
+#define ucal_setDate U_ICU_ENTRY_POINT_RENAME(ucal_setDate)
+#define ucal_setDateTime U_ICU_ENTRY_POINT_RENAME(ucal_setDateTime)
+#define ucal_setDefaultTimeZone U_ICU_ENTRY_POINT_RENAME(ucal_setDefaultTimeZone)
+#define ucal_setGregorianChange U_ICU_ENTRY_POINT_RENAME(ucal_setGregorianChange)
+#define ucal_setMillis U_ICU_ENTRY_POINT_RENAME(ucal_setMillis)
+#define ucal_setTimeZone U_ICU_ENTRY_POINT_RENAME(ucal_setTimeZone)
+#define ucase_addCaseClosure U_ICU_ENTRY_POINT_RENAME(ucase_addCaseClosure)
+#define ucase_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(ucase_addPropertyStarts)
+#define ucase_addStringCaseClosure U_ICU_ENTRY_POINT_RENAME(ucase_addStringCaseClosure)
+#define ucase_fold U_ICU_ENTRY_POINT_RENAME(ucase_fold)
+#define ucase_getCaseLocale U_ICU_ENTRY_POINT_RENAME(ucase_getCaseLocale)
+#define ucase_getSingleton U_ICU_ENTRY_POINT_RENAME(ucase_getSingleton)
+#define ucase_getType U_ICU_ENTRY_POINT_RENAME(ucase_getType)
+#define ucase_getTypeOrIgnorable U_ICU_ENTRY_POINT_RENAME(ucase_getTypeOrIgnorable)
+#define ucase_hasBinaryProperty U_ICU_ENTRY_POINT_RENAME(ucase_hasBinaryProperty)
+#define ucase_isCaseSensitive U_ICU_ENTRY_POINT_RENAME(ucase_isCaseSensitive)
+#define ucase_isSoftDotted U_ICU_ENTRY_POINT_RENAME(ucase_isSoftDotted)
+#define ucase_toFullFolding U_ICU_ENTRY_POINT_RENAME(ucase_toFullFolding)
+#define ucase_toFullLower U_ICU_ENTRY_POINT_RENAME(ucase_toFullLower)
+#define ucase_toFullTitle U_ICU_ENTRY_POINT_RENAME(ucase_toFullTitle)
+#define ucase_toFullUpper U_ICU_ENTRY_POINT_RENAME(ucase_toFullUpper)
+#define ucase_tolower U_ICU_ENTRY_POINT_RENAME(ucase_tolower)
+#define ucase_totitle U_ICU_ENTRY_POINT_RENAME(ucase_totitle)
+#define ucase_toupper U_ICU_ENTRY_POINT_RENAME(ucase_toupper)
+#define ucasemap_close U_ICU_ENTRY_POINT_RENAME(ucasemap_close)
+#define ucasemap_getBreakIterator U_ICU_ENTRY_POINT_RENAME(ucasemap_getBreakIterator)
+#define ucasemap_getLocale U_ICU_ENTRY_POINT_RENAME(ucasemap_getLocale)
+#define ucasemap_getOptions U_ICU_ENTRY_POINT_RENAME(ucasemap_getOptions)
+#define ucasemap_internalUTF8ToTitle U_ICU_ENTRY_POINT_RENAME(ucasemap_internalUTF8ToTitle)
+#define ucasemap_mapUTF8 U_ICU_ENTRY_POINT_RENAME(ucasemap_mapUTF8)
+#define ucasemap_open U_ICU_ENTRY_POINT_RENAME(ucasemap_open)
+#define ucasemap_setBreakIterator U_ICU_ENTRY_POINT_RENAME(ucasemap_setBreakIterator)
+#define ucasemap_setLocale U_ICU_ENTRY_POINT_RENAME(ucasemap_setLocale)
+#define ucasemap_setOptions U_ICU_ENTRY_POINT_RENAME(ucasemap_setOptions)
+#define ucasemap_toTitle U_ICU_ENTRY_POINT_RENAME(ucasemap_toTitle)
+#define ucasemap_utf8FoldCase U_ICU_ENTRY_POINT_RENAME(ucasemap_utf8FoldCase)
+#define ucasemap_utf8ToLower U_ICU_ENTRY_POINT_RENAME(ucasemap_utf8ToLower)
+#define ucasemap_utf8ToTitle U_ICU_ENTRY_POINT_RENAME(ucasemap_utf8ToTitle)
+#define ucasemap_utf8ToUpper U_ICU_ENTRY_POINT_RENAME(ucasemap_utf8ToUpper)
+#define uchar_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(uchar_addPropertyStarts)
+#define uchar_swapNames U_ICU_ENTRY_POINT_RENAME(uchar_swapNames)
+#define ucln_cleanupOne U_ICU_ENTRY_POINT_RENAME(ucln_cleanupOne)
+#define ucln_common_registerCleanup U_ICU_ENTRY_POINT_RENAME(ucln_common_registerCleanup)
+#define ucln_i18n_registerCleanup U_ICU_ENTRY_POINT_RENAME(ucln_i18n_registerCleanup)
+#define ucln_io_registerCleanup U_ICU_ENTRY_POINT_RENAME(ucln_io_registerCleanup)
+#define ucln_lib_cleanup U_ICU_ENTRY_POINT_RENAME(ucln_lib_cleanup)
+#define ucln_registerCleanup U_ICU_ENTRY_POINT_RENAME(ucln_registerCleanup)
+#define ucnv_MBCSFromUChar32 U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSFromUChar32)
+#define ucnv_MBCSFromUnicodeWithOffsets U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSFromUnicodeWithOffsets)
+#define ucnv_MBCSGetFilteredUnicodeSetForUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSGetFilteredUnicodeSetForUnicode)
+#define ucnv_MBCSGetType U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSGetType)
+#define ucnv_MBCSGetUnicodeSetForUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSGetUnicodeSetForUnicode)
+#define ucnv_MBCSIsLeadByte U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSIsLeadByte)
+#define ucnv_MBCSSimpleGetNextUChar U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSSimpleGetNextUChar)
+#define ucnv_MBCSToUnicodeWithOffsets U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSToUnicodeWithOffsets)
+#define ucnv_bld_countAvailableConverters U_ICU_ENTRY_POINT_RENAME(ucnv_bld_countAvailableConverters)
+#define ucnv_bld_getAvailableConverter U_ICU_ENTRY_POINT_RENAME(ucnv_bld_getAvailableConverter)
+#define ucnv_canCreateConverter U_ICU_ENTRY_POINT_RENAME(ucnv_canCreateConverter)
+#define ucnv_cbFromUWriteBytes U_ICU_ENTRY_POINT_RENAME(ucnv_cbFromUWriteBytes)
+#define ucnv_cbFromUWriteSub U_ICU_ENTRY_POINT_RENAME(ucnv_cbFromUWriteSub)
+#define ucnv_cbFromUWriteUChars U_ICU_ENTRY_POINT_RENAME(ucnv_cbFromUWriteUChars)
+#define ucnv_cbToUWriteSub U_ICU_ENTRY_POINT_RENAME(ucnv_cbToUWriteSub)
+#define ucnv_cbToUWriteUChars U_ICU_ENTRY_POINT_RENAME(ucnv_cbToUWriteUChars)
+#define ucnv_close U_ICU_ENTRY_POINT_RENAME(ucnv_close)
+#define ucnv_compareNames U_ICU_ENTRY_POINT_RENAME(ucnv_compareNames)
+#define ucnv_convert U_ICU_ENTRY_POINT_RENAME(ucnv_convert)
+#define ucnv_convertEx U_ICU_ENTRY_POINT_RENAME(ucnv_convertEx)
+#define ucnv_countAliases U_ICU_ENTRY_POINT_RENAME(ucnv_countAliases)
+#define ucnv_countAvailable U_ICU_ENTRY_POINT_RENAME(ucnv_countAvailable)
+#define ucnv_countStandards U_ICU_ENTRY_POINT_RENAME(ucnv_countStandards)
+#define ucnv_createAlgorithmicConverter U_ICU_ENTRY_POINT_RENAME(ucnv_createAlgorithmicConverter)
+#define ucnv_createConverter U_ICU_ENTRY_POINT_RENAME(ucnv_createConverter)
+#define ucnv_createConverterFromPackage U_ICU_ENTRY_POINT_RENAME(ucnv_createConverterFromPackage)
+#define ucnv_createConverterFromSharedData U_ICU_ENTRY_POINT_RENAME(ucnv_createConverterFromSharedData)
+#define ucnv_detectUnicodeSignature U_ICU_ENTRY_POINT_RENAME(ucnv_detectUnicodeSignature)
+#define ucnv_extContinueMatchFromU U_ICU_ENTRY_POINT_RENAME(ucnv_extContinueMatchFromU)
+#define ucnv_extContinueMatchToU U_ICU_ENTRY_POINT_RENAME(ucnv_extContinueMatchToU)
+#define ucnv_extGetUnicodeSet U_ICU_ENTRY_POINT_RENAME(ucnv_extGetUnicodeSet)
+#define ucnv_extInitialMatchFromU U_ICU_ENTRY_POINT_RENAME(ucnv_extInitialMatchFromU)
+#define ucnv_extInitialMatchToU U_ICU_ENTRY_POINT_RENAME(ucnv_extInitialMatchToU)
+#define ucnv_extSimpleMatchFromU U_ICU_ENTRY_POINT_RENAME(ucnv_extSimpleMatchFromU)
+#define ucnv_extSimpleMatchToU U_ICU_ENTRY_POINT_RENAME(ucnv_extSimpleMatchToU)
+#define ucnv_fixFileSeparator U_ICU_ENTRY_POINT_RENAME(ucnv_fixFileSeparator)
+#define ucnv_flushCache U_ICU_ENTRY_POINT_RENAME(ucnv_flushCache)
+#define ucnv_fromAlgorithmic U_ICU_ENTRY_POINT_RENAME(ucnv_fromAlgorithmic)
+#define ucnv_fromUChars U_ICU_ENTRY_POINT_RENAME(ucnv_fromUChars)
+#define ucnv_fromUCountPending U_ICU_ENTRY_POINT_RENAME(ucnv_fromUCountPending)
+#define ucnv_fromUWriteBytes U_ICU_ENTRY_POINT_RENAME(ucnv_fromUWriteBytes)
+#define ucnv_fromUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_fromUnicode)
+#define ucnv_fromUnicode_UTF8 U_ICU_ENTRY_POINT_RENAME(ucnv_fromUnicode_UTF8)
+#define ucnv_fromUnicode_UTF8_OFFSETS_LOGIC U_ICU_ENTRY_POINT_RENAME(ucnv_fromUnicode_UTF8_OFFSETS_LOGIC)
+#define ucnv_getAlias U_ICU_ENTRY_POINT_RENAME(ucnv_getAlias)
+#define ucnv_getAliases U_ICU_ENTRY_POINT_RENAME(ucnv_getAliases)
+#define ucnv_getAvailableName U_ICU_ENTRY_POINT_RENAME(ucnv_getAvailableName)
+#define ucnv_getCCSID U_ICU_ENTRY_POINT_RENAME(ucnv_getCCSID)
+#define ucnv_getCanonicalName U_ICU_ENTRY_POINT_RENAME(ucnv_getCanonicalName)
+#define ucnv_getCompleteUnicodeSet U_ICU_ENTRY_POINT_RENAME(ucnv_getCompleteUnicodeSet)
+#define ucnv_getDefaultName U_ICU_ENTRY_POINT_RENAME(ucnv_getDefaultName)
+#define ucnv_getDisplayName U_ICU_ENTRY_POINT_RENAME(ucnv_getDisplayName)
+#define ucnv_getFromUCallBack U_ICU_ENTRY_POINT_RENAME(ucnv_getFromUCallBack)
+#define ucnv_getInvalidChars U_ICU_ENTRY_POINT_RENAME(ucnv_getInvalidChars)
+#define ucnv_getInvalidUChars U_ICU_ENTRY_POINT_RENAME(ucnv_getInvalidUChars)
+#define ucnv_getMaxCharSize U_ICU_ENTRY_POINT_RENAME(ucnv_getMaxCharSize)
+#define ucnv_getMinCharSize U_ICU_ENTRY_POINT_RENAME(ucnv_getMinCharSize)
+#define ucnv_getName U_ICU_ENTRY_POINT_RENAME(ucnv_getName)
+#define ucnv_getNextUChar U_ICU_ENTRY_POINT_RENAME(ucnv_getNextUChar)
+#define ucnv_getNonSurrogateUnicodeSet U_ICU_ENTRY_POINT_RENAME(ucnv_getNonSurrogateUnicodeSet)
+#define ucnv_getPlatform U_ICU_ENTRY_POINT_RENAME(ucnv_getPlatform)
+#define ucnv_getStandard U_ICU_ENTRY_POINT_RENAME(ucnv_getStandard)
+#define ucnv_getStandardName U_ICU_ENTRY_POINT_RENAME(ucnv_getStandardName)
+#define ucnv_getStarters U_ICU_ENTRY_POINT_RENAME(ucnv_getStarters)
+#define ucnv_getSubstChars U_ICU_ENTRY_POINT_RENAME(ucnv_getSubstChars)
+#define ucnv_getToUCallBack U_ICU_ENTRY_POINT_RENAME(ucnv_getToUCallBack)
+#define ucnv_getType U_ICU_ENTRY_POINT_RENAME(ucnv_getType)
+#define ucnv_getUnicodeSet U_ICU_ENTRY_POINT_RENAME(ucnv_getUnicodeSet)
+#define ucnv_incrementRefCount U_ICU_ENTRY_POINT_RENAME(ucnv_incrementRefCount)
+#define ucnv_io_countKnownConverters U_ICU_ENTRY_POINT_RENAME(ucnv_io_countKnownConverters)
+#define ucnv_io_getConverterName U_ICU_ENTRY_POINT_RENAME(ucnv_io_getConverterName)
+#define ucnv_io_stripASCIIForCompare U_ICU_ENTRY_POINT_RENAME(ucnv_io_stripASCIIForCompare)
+#define ucnv_io_stripEBCDICForCompare U_ICU_ENTRY_POINT_RENAME(ucnv_io_stripEBCDICForCompare)
+#define ucnv_isAmbiguous U_ICU_ENTRY_POINT_RENAME(ucnv_isAmbiguous)
+#define ucnv_isFixedWidth U_ICU_ENTRY_POINT_RENAME(ucnv_isFixedWidth)
+#define ucnv_load U_ICU_ENTRY_POINT_RENAME(ucnv_load)
+#define ucnv_loadSharedData U_ICU_ENTRY_POINT_RENAME(ucnv_loadSharedData)
+#define ucnv_open U_ICU_ENTRY_POINT_RENAME(ucnv_open)
+#define ucnv_openAllNames U_ICU_ENTRY_POINT_RENAME(ucnv_openAllNames)
+#define ucnv_openCCSID U_ICU_ENTRY_POINT_RENAME(ucnv_openCCSID)
+#define ucnv_openPackage U_ICU_ENTRY_POINT_RENAME(ucnv_openPackage)
+#define ucnv_openStandardNames U_ICU_ENTRY_POINT_RENAME(ucnv_openStandardNames)
+#define ucnv_openU U_ICU_ENTRY_POINT_RENAME(ucnv_openU)
+#define ucnv_reset U_ICU_ENTRY_POINT_RENAME(ucnv_reset)
+#define ucnv_resetFromUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_resetFromUnicode)
+#define ucnv_resetToUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_resetToUnicode)
+#define ucnv_safeClone U_ICU_ENTRY_POINT_RENAME(ucnv_safeClone)
+#define ucnv_setDefaultName U_ICU_ENTRY_POINT_RENAME(ucnv_setDefaultName)
+#define ucnv_setFallback U_ICU_ENTRY_POINT_RENAME(ucnv_setFallback)
+#define ucnv_setFromUCallBack U_ICU_ENTRY_POINT_RENAME(ucnv_setFromUCallBack)
+#define ucnv_setSubstChars U_ICU_ENTRY_POINT_RENAME(ucnv_setSubstChars)
+#define ucnv_setSubstString U_ICU_ENTRY_POINT_RENAME(ucnv_setSubstString)
+#define ucnv_setToUCallBack U_ICU_ENTRY_POINT_RENAME(ucnv_setToUCallBack)
+#define ucnv_swap U_ICU_ENTRY_POINT_RENAME(ucnv_swap)
+#define ucnv_swapAliases U_ICU_ENTRY_POINT_RENAME(ucnv_swapAliases)
+#define ucnv_toAlgorithmic U_ICU_ENTRY_POINT_RENAME(ucnv_toAlgorithmic)
+#define ucnv_toUChars U_ICU_ENTRY_POINT_RENAME(ucnv_toUChars)
+#define ucnv_toUCountPending U_ICU_ENTRY_POINT_RENAME(ucnv_toUCountPending)
+#define ucnv_toUWriteCodePoint U_ICU_ENTRY_POINT_RENAME(ucnv_toUWriteCodePoint)
+#define ucnv_toUWriteUChars U_ICU_ENTRY_POINT_RENAME(ucnv_toUWriteUChars)
+#define ucnv_toUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_toUnicode)
+#define ucnv_unload U_ICU_ENTRY_POINT_RENAME(ucnv_unload)
+#define ucnv_unloadSharedDataIfReady U_ICU_ENTRY_POINT_RENAME(ucnv_unloadSharedDataIfReady)
+#define ucnv_usesFallback U_ICU_ENTRY_POINT_RENAME(ucnv_usesFallback)
+#define ucnvsel_close U_ICU_ENTRY_POINT_RENAME(ucnvsel_close)
+#define ucnvsel_open U_ICU_ENTRY_POINT_RENAME(ucnvsel_open)
+#define ucnvsel_openFromSerialized U_ICU_ENTRY_POINT_RENAME(ucnvsel_openFromSerialized)
+#define ucnvsel_selectForString U_ICU_ENTRY_POINT_RENAME(ucnvsel_selectForString)
+#define ucnvsel_selectForUTF8 U_ICU_ENTRY_POINT_RENAME(ucnvsel_selectForUTF8)
+#define ucnvsel_serialize U_ICU_ENTRY_POINT_RENAME(ucnvsel_serialize)
+#define ucol_cloneBinary U_ICU_ENTRY_POINT_RENAME(ucol_cloneBinary)
+#define ucol_close U_ICU_ENTRY_POINT_RENAME(ucol_close)
+#define ucol_closeElements U_ICU_ENTRY_POINT_RENAME(ucol_closeElements)
+#define ucol_countAvailable U_ICU_ENTRY_POINT_RENAME(ucol_countAvailable)
+#define ucol_equal U_ICU_ENTRY_POINT_RENAME(ucol_equal)
+#define ucol_equals U_ICU_ENTRY_POINT_RENAME(ucol_equals)
+#define ucol_getAttribute U_ICU_ENTRY_POINT_RENAME(ucol_getAttribute)
+#define ucol_getAvailable U_ICU_ENTRY_POINT_RENAME(ucol_getAvailable)
+#define ucol_getBound U_ICU_ENTRY_POINT_RENAME(ucol_getBound)
+#define ucol_getContractions U_ICU_ENTRY_POINT_RENAME(ucol_getContractions)
+#define ucol_getContractionsAndExpansions U_ICU_ENTRY_POINT_RENAME(ucol_getContractionsAndExpansions)
+#define ucol_getDisplayName U_ICU_ENTRY_POINT_RENAME(ucol_getDisplayName)
+#define ucol_getEquivalentReorderCodes U_ICU_ENTRY_POINT_RENAME(ucol_getEquivalentReorderCodes)
+#define ucol_getFunctionalEquivalent U_ICU_ENTRY_POINT_RENAME(ucol_getFunctionalEquivalent)
+#define ucol_getKeywordValues U_ICU_ENTRY_POINT_RENAME(ucol_getKeywordValues)
+#define ucol_getKeywordValuesForLocale U_ICU_ENTRY_POINT_RENAME(ucol_getKeywordValuesForLocale)
+#define ucol_getKeywords U_ICU_ENTRY_POINT_RENAME(ucol_getKeywords)
+#define ucol_getLocale U_ICU_ENTRY_POINT_RENAME(ucol_getLocale)
+#define ucol_getLocaleByType U_ICU_ENTRY_POINT_RENAME(ucol_getLocaleByType)
+#define ucol_getMaxExpansion U_ICU_ENTRY_POINT_RENAME(ucol_getMaxExpansion)
+#define ucol_getMaxVariable U_ICU_ENTRY_POINT_RENAME(ucol_getMaxVariable)
+#define ucol_getOffset U_ICU_ENTRY_POINT_RENAME(ucol_getOffset)
+#define ucol_getReorderCodes U_ICU_ENTRY_POINT_RENAME(ucol_getReorderCodes)
+#define ucol_getRules U_ICU_ENTRY_POINT_RENAME(ucol_getRules)
+#define ucol_getRulesEx U_ICU_ENTRY_POINT_RENAME(ucol_getRulesEx)
+#define ucol_getShortDefinitionString U_ICU_ENTRY_POINT_RENAME(ucol_getShortDefinitionString)
+#define ucol_getSortKey U_ICU_ENTRY_POINT_RENAME(ucol_getSortKey)
+#define ucol_getStrength U_ICU_ENTRY_POINT_RENAME(ucol_getStrength)
+#define ucol_getTailoredSet U_ICU_ENTRY_POINT_RENAME(ucol_getTailoredSet)
+#define ucol_getUCAVersion U_ICU_ENTRY_POINT_RENAME(ucol_getUCAVersion)
+#define ucol_getUnsafeSet U_ICU_ENTRY_POINT_RENAME(ucol_getUnsafeSet)
+#define ucol_getVariableTop U_ICU_ENTRY_POINT_RENAME(ucol_getVariableTop)
+#define ucol_getVersion U_ICU_ENTRY_POINT_RENAME(ucol_getVersion)
+#define ucol_greater U_ICU_ENTRY_POINT_RENAME(ucol_greater)
+#define ucol_greaterOrEqual U_ICU_ENTRY_POINT_RENAME(ucol_greaterOrEqual)
+#define ucol_keyHashCode U_ICU_ENTRY_POINT_RENAME(ucol_keyHashCode)
+#define ucol_looksLikeCollationBinary U_ICU_ENTRY_POINT_RENAME(ucol_looksLikeCollationBinary)
+#define ucol_mergeSortkeys U_ICU_ENTRY_POINT_RENAME(ucol_mergeSortkeys)
+#define ucol_next U_ICU_ENTRY_POINT_RENAME(ucol_next)
+#define ucol_nextSortKeyPart U_ICU_ENTRY_POINT_RENAME(ucol_nextSortKeyPart)
+#define ucol_normalizeShortDefinitionString U_ICU_ENTRY_POINT_RENAME(ucol_normalizeShortDefinitionString)
+#define ucol_open U_ICU_ENTRY_POINT_RENAME(ucol_open)
+#define ucol_openAvailableLocales U_ICU_ENTRY_POINT_RENAME(ucol_openAvailableLocales)
+#define ucol_openBinary U_ICU_ENTRY_POINT_RENAME(ucol_openBinary)
+#define ucol_openElements U_ICU_ENTRY_POINT_RENAME(ucol_openElements)
+#define ucol_openFromShortString U_ICU_ENTRY_POINT_RENAME(ucol_openFromShortString)
+#define ucol_openRules U_ICU_ENTRY_POINT_RENAME(ucol_openRules)
+#define ucol_prepareShortStringOpen U_ICU_ENTRY_POINT_RENAME(ucol_prepareShortStringOpen)
+#define ucol_previous U_ICU_ENTRY_POINT_RENAME(ucol_previous)
+#define ucol_primaryOrder U_ICU_ENTRY_POINT_RENAME(ucol_primaryOrder)
+#define ucol_reset U_ICU_ENTRY_POINT_RENAME(ucol_reset)
+#define ucol_restoreVariableTop U_ICU_ENTRY_POINT_RENAME(ucol_restoreVariableTop)
+#define ucol_safeClone U_ICU_ENTRY_POINT_RENAME(ucol_safeClone)
+#define ucol_secondaryOrder U_ICU_ENTRY_POINT_RENAME(ucol_secondaryOrder)
+#define ucol_setAttribute U_ICU_ENTRY_POINT_RENAME(ucol_setAttribute)
+#define ucol_setMaxVariable U_ICU_ENTRY_POINT_RENAME(ucol_setMaxVariable)
+#define ucol_setOffset U_ICU_ENTRY_POINT_RENAME(ucol_setOffset)
+#define ucol_setReorderCodes U_ICU_ENTRY_POINT_RENAME(ucol_setReorderCodes)
+#define ucol_setStrength U_ICU_ENTRY_POINT_RENAME(ucol_setStrength)
+#define ucol_setText U_ICU_ENTRY_POINT_RENAME(ucol_setText)
+#define ucol_setVariableTop U_ICU_ENTRY_POINT_RENAME(ucol_setVariableTop)
+#define ucol_strcoll U_ICU_ENTRY_POINT_RENAME(ucol_strcoll)
+#define ucol_strcollIter U_ICU_ENTRY_POINT_RENAME(ucol_strcollIter)
+#define ucol_strcollUTF8 U_ICU_ENTRY_POINT_RENAME(ucol_strcollUTF8)
+#define ucol_swap U_ICU_ENTRY_POINT_RENAME(ucol_swap)
+#define ucol_swapInverseUCA U_ICU_ENTRY_POINT_RENAME(ucol_swapInverseUCA)
+#define ucol_tertiaryOrder U_ICU_ENTRY_POINT_RENAME(ucol_tertiaryOrder)
+#define ucsdet_close U_ICU_ENTRY_POINT_RENAME(ucsdet_close)
+#define ucsdet_detect U_ICU_ENTRY_POINT_RENAME(ucsdet_detect)
+#define ucsdet_detectAll U_ICU_ENTRY_POINT_RENAME(ucsdet_detectAll)
+#define ucsdet_enableInputFilter U_ICU_ENTRY_POINT_RENAME(ucsdet_enableInputFilter)
+#define ucsdet_getAllDetectableCharsets U_ICU_ENTRY_POINT_RENAME(ucsdet_getAllDetectableCharsets)
+#define ucsdet_getConfidence U_ICU_ENTRY_POINT_RENAME(ucsdet_getConfidence)
+#define ucsdet_getDetectableCharsets U_ICU_ENTRY_POINT_RENAME(ucsdet_getDetectableCharsets)
+#define ucsdet_getLanguage U_ICU_ENTRY_POINT_RENAME(ucsdet_getLanguage)
+#define ucsdet_getName U_ICU_ENTRY_POINT_RENAME(ucsdet_getName)
+#define ucsdet_getUChars U_ICU_ENTRY_POINT_RENAME(ucsdet_getUChars)
+#define ucsdet_isInputFilterEnabled U_ICU_ENTRY_POINT_RENAME(ucsdet_isInputFilterEnabled)
+#define ucsdet_open U_ICU_ENTRY_POINT_RENAME(ucsdet_open)
+#define ucsdet_setDeclaredEncoding U_ICU_ENTRY_POINT_RENAME(ucsdet_setDeclaredEncoding)
+#define ucsdet_setDetectableCharset U_ICU_ENTRY_POINT_RENAME(ucsdet_setDetectableCharset)
+#define ucsdet_setText U_ICU_ENTRY_POINT_RENAME(ucsdet_setText)
+#define ucurr_countCurrencies U_ICU_ENTRY_POINT_RENAME(ucurr_countCurrencies)
+#define ucurr_forLocale U_ICU_ENTRY_POINT_RENAME(ucurr_forLocale)
+#define ucurr_forLocaleAndDate U_ICU_ENTRY_POINT_RENAME(ucurr_forLocaleAndDate)
+#define ucurr_getDefaultFractionDigits U_ICU_ENTRY_POINT_RENAME(ucurr_getDefaultFractionDigits)
+#define ucurr_getDefaultFractionDigitsForUsage U_ICU_ENTRY_POINT_RENAME(ucurr_getDefaultFractionDigitsForUsage)
+#define ucurr_getKeywordValuesForLocale U_ICU_ENTRY_POINT_RENAME(ucurr_getKeywordValuesForLocale)
+#define ucurr_getName U_ICU_ENTRY_POINT_RENAME(ucurr_getName)
+#define ucurr_getNumericCode U_ICU_ENTRY_POINT_RENAME(ucurr_getNumericCode)
+#define ucurr_getPluralName U_ICU_ENTRY_POINT_RENAME(ucurr_getPluralName)
+#define ucurr_getRoundingIncrement U_ICU_ENTRY_POINT_RENAME(ucurr_getRoundingIncrement)
+#define ucurr_getRoundingIncrementForUsage U_ICU_ENTRY_POINT_RENAME(ucurr_getRoundingIncrementForUsage)
+#define ucurr_isAvailable U_ICU_ENTRY_POINT_RENAME(ucurr_isAvailable)
+#define ucurr_openISOCurrencies U_ICU_ENTRY_POINT_RENAME(ucurr_openISOCurrencies)
+#define ucurr_register U_ICU_ENTRY_POINT_RENAME(ucurr_register)
+#define ucurr_unregister U_ICU_ENTRY_POINT_RENAME(ucurr_unregister)
+#define udat_adoptNumberFormat U_ICU_ENTRY_POINT_RENAME(udat_adoptNumberFormat)
+#define udat_adoptNumberFormatForFields U_ICU_ENTRY_POINT_RENAME(udat_adoptNumberFormatForFields)
+#define udat_applyPattern U_ICU_ENTRY_POINT_RENAME(udat_applyPattern)
+#define udat_applyPatternRelative U_ICU_ENTRY_POINT_RENAME(udat_applyPatternRelative)
+#define udat_clone U_ICU_ENTRY_POINT_RENAME(udat_clone)
+#define udat_close U_ICU_ENTRY_POINT_RENAME(udat_close)
+#define udat_countAvailable U_ICU_ENTRY_POINT_RENAME(udat_countAvailable)
+#define udat_countSymbols U_ICU_ENTRY_POINT_RENAME(udat_countSymbols)
+#define udat_format U_ICU_ENTRY_POINT_RENAME(udat_format)
+#define udat_get2DigitYearStart U_ICU_ENTRY_POINT_RENAME(udat_get2DigitYearStart)
+#define udat_getAvailable U_ICU_ENTRY_POINT_RENAME(udat_getAvailable)
+#define udat_getBooleanAttribute U_ICU_ENTRY_POINT_RENAME(udat_getBooleanAttribute)
+#define udat_getCalendar U_ICU_ENTRY_POINT_RENAME(udat_getCalendar)
+#define udat_getContext U_ICU_ENTRY_POINT_RENAME(udat_getContext)
+#define udat_getLocaleByType U_ICU_ENTRY_POINT_RENAME(udat_getLocaleByType)
+#define udat_getNumberFormat U_ICU_ENTRY_POINT_RENAME(udat_getNumberFormat)
+#define udat_getNumberFormatForField U_ICU_ENTRY_POINT_RENAME(udat_getNumberFormatForField)
+#define udat_getSymbols U_ICU_ENTRY_POINT_RENAME(udat_getSymbols)
+#define udat_isLenient U_ICU_ENTRY_POINT_RENAME(udat_isLenient)
+#define udat_open U_ICU_ENTRY_POINT_RENAME(udat_open)
+#define udat_parse U_ICU_ENTRY_POINT_RENAME(udat_parse)
+#define udat_parseCalendar U_ICU_ENTRY_POINT_RENAME(udat_parseCalendar)
+#define udat_registerOpener U_ICU_ENTRY_POINT_RENAME(udat_registerOpener)
+#define udat_set2DigitYearStart U_ICU_ENTRY_POINT_RENAME(udat_set2DigitYearStart)
+#define udat_setBooleanAttribute U_ICU_ENTRY_POINT_RENAME(udat_setBooleanAttribute)
+#define udat_setCalendar U_ICU_ENTRY_POINT_RENAME(udat_setCalendar)
+#define udat_setContext U_ICU_ENTRY_POINT_RENAME(udat_setContext)
+#define udat_setLenient U_ICU_ENTRY_POINT_RENAME(udat_setLenient)
+#define udat_setNumberFormat U_ICU_ENTRY_POINT_RENAME(udat_setNumberFormat)
+#define udat_setSymbols U_ICU_ENTRY_POINT_RENAME(udat_setSymbols)
+#define udat_toCalendarDateField U_ICU_ENTRY_POINT_RENAME(udat_toCalendarDateField)
+#define udat_toPattern U_ICU_ENTRY_POINT_RENAME(udat_toPattern)
+#define udat_toPatternRelativeDate U_ICU_ENTRY_POINT_RENAME(udat_toPatternRelativeDate)
+#define udat_toPatternRelativeTime U_ICU_ENTRY_POINT_RENAME(udat_toPatternRelativeTime)
+#define udat_unregisterOpener U_ICU_ENTRY_POINT_RENAME(udat_unregisterOpener)
+#define udata_checkCommonData U_ICU_ENTRY_POINT_RENAME(udata_checkCommonData)
+#define udata_close U_ICU_ENTRY_POINT_RENAME(udata_close)
+#define udata_closeSwapper U_ICU_ENTRY_POINT_RENAME(udata_closeSwapper)
+#define udata_getHeaderSize U_ICU_ENTRY_POINT_RENAME(udata_getHeaderSize)
+#define udata_getInfo U_ICU_ENTRY_POINT_RENAME(udata_getInfo)
+#define udata_getInfoSize U_ICU_ENTRY_POINT_RENAME(udata_getInfoSize)
+#define udata_getLength U_ICU_ENTRY_POINT_RENAME(udata_getLength)
+#define udata_getMemory U_ICU_ENTRY_POINT_RENAME(udata_getMemory)
+#define udata_getRawMemory U_ICU_ENTRY_POINT_RENAME(udata_getRawMemory)
+#define udata_open U_ICU_ENTRY_POINT_RENAME(udata_open)
+#define udata_openChoice U_ICU_ENTRY_POINT_RENAME(udata_openChoice)
+#define udata_openSwapper U_ICU_ENTRY_POINT_RENAME(udata_openSwapper)
+#define udata_openSwapperForInputData U_ICU_ENTRY_POINT_RENAME(udata_openSwapperForInputData)
+#define udata_printError U_ICU_ENTRY_POINT_RENAME(udata_printError)
+#define udata_readInt16 U_ICU_ENTRY_POINT_RENAME(udata_readInt16)
+#define udata_readInt32 U_ICU_ENTRY_POINT_RENAME(udata_readInt32)
+#define udata_setAppData U_ICU_ENTRY_POINT_RENAME(udata_setAppData)
+#define udata_setCommonData U_ICU_ENTRY_POINT_RENAME(udata_setCommonData)
+#define udata_setFileAccess U_ICU_ENTRY_POINT_RENAME(udata_setFileAccess)
+#define udata_swapDataHeader U_ICU_ENTRY_POINT_RENAME(udata_swapDataHeader)
+#define udata_swapInvStringBlock U_ICU_ENTRY_POINT_RENAME(udata_swapInvStringBlock)
+#define udatpg_addPattern U_ICU_ENTRY_POINT_RENAME(udatpg_addPattern)
+#define udatpg_clone U_ICU_ENTRY_POINT_RENAME(udatpg_clone)
+#define udatpg_close U_ICU_ENTRY_POINT_RENAME(udatpg_close)
+#define udatpg_getAppendItemFormat U_ICU_ENTRY_POINT_RENAME(udatpg_getAppendItemFormat)
+#define udatpg_getAppendItemName U_ICU_ENTRY_POINT_RENAME(udatpg_getAppendItemName)
+#define udatpg_getBaseSkeleton U_ICU_ENTRY_POINT_RENAME(udatpg_getBaseSkeleton)
+#define udatpg_getBestPattern U_ICU_ENTRY_POINT_RENAME(udatpg_getBestPattern)
+#define udatpg_getBestPatternWithOptions U_ICU_ENTRY_POINT_RENAME(udatpg_getBestPatternWithOptions)
+#define udatpg_getDateTimeFormat U_ICU_ENTRY_POINT_RENAME(udatpg_getDateTimeFormat)
+#define udatpg_getDecimal U_ICU_ENTRY_POINT_RENAME(udatpg_getDecimal)
+#define udatpg_getPatternForSkeleton U_ICU_ENTRY_POINT_RENAME(udatpg_getPatternForSkeleton)
+#define udatpg_getSkeleton U_ICU_ENTRY_POINT_RENAME(udatpg_getSkeleton)
+#define udatpg_open U_ICU_ENTRY_POINT_RENAME(udatpg_open)
+#define udatpg_openBaseSkeletons U_ICU_ENTRY_POINT_RENAME(udatpg_openBaseSkeletons)
+#define udatpg_openEmpty U_ICU_ENTRY_POINT_RENAME(udatpg_openEmpty)
+#define udatpg_openSkeletons U_ICU_ENTRY_POINT_RENAME(udatpg_openSkeletons)
+#define udatpg_replaceFieldTypes U_ICU_ENTRY_POINT_RENAME(udatpg_replaceFieldTypes)
+#define udatpg_replaceFieldTypesWithOptions U_ICU_ENTRY_POINT_RENAME(udatpg_replaceFieldTypesWithOptions)
+#define udatpg_setAppendItemFormat U_ICU_ENTRY_POINT_RENAME(udatpg_setAppendItemFormat)
+#define udatpg_setAppendItemName U_ICU_ENTRY_POINT_RENAME(udatpg_setAppendItemName)
+#define udatpg_setDateTimeFormat U_ICU_ENTRY_POINT_RENAME(udatpg_setDateTimeFormat)
+#define udatpg_setDecimal U_ICU_ENTRY_POINT_RENAME(udatpg_setDecimal)
+#define udict_swap U_ICU_ENTRY_POINT_RENAME(udict_swap)
+#define udtitvfmt_close U_ICU_ENTRY_POINT_RENAME(udtitvfmt_close)
+#define udtitvfmt_format U_ICU_ENTRY_POINT_RENAME(udtitvfmt_format)
+#define udtitvfmt_open U_ICU_ENTRY_POINT_RENAME(udtitvfmt_open)
+#define uenum_close U_ICU_ENTRY_POINT_RENAME(uenum_close)
+#define uenum_count U_ICU_ENTRY_POINT_RENAME(uenum_count)
+#define uenum_next U_ICU_ENTRY_POINT_RENAME(uenum_next)
+#define uenum_nextDefault U_ICU_ENTRY_POINT_RENAME(uenum_nextDefault)
+#define uenum_openCharStringsEnumeration U_ICU_ENTRY_POINT_RENAME(uenum_openCharStringsEnumeration)
+#define uenum_openFromStringEnumeration U_ICU_ENTRY_POINT_RENAME(uenum_openFromStringEnumeration)
+#define uenum_openUCharStringsEnumeration U_ICU_ENTRY_POINT_RENAME(uenum_openUCharStringsEnumeration)
+#define uenum_reset U_ICU_ENTRY_POINT_RENAME(uenum_reset)
+#define uenum_unext U_ICU_ENTRY_POINT_RENAME(uenum_unext)
+#define uenum_unextDefault U_ICU_ENTRY_POINT_RENAME(uenum_unextDefault)
+#define ufile_close_translit U_ICU_ENTRY_POINT_RENAME(ufile_close_translit)
+#define ufile_fill_uchar_buffer U_ICU_ENTRY_POINT_RENAME(ufile_fill_uchar_buffer)
+#define ufile_flush_io U_ICU_ENTRY_POINT_RENAME(ufile_flush_io)
+#define ufile_flush_translit U_ICU_ENTRY_POINT_RENAME(ufile_flush_translit)
+#define ufile_getch U_ICU_ENTRY_POINT_RENAME(ufile_getch)
+#define ufile_getch32 U_ICU_ENTRY_POINT_RENAME(ufile_getch32)
+#define ufmt_64tou U_ICU_ENTRY_POINT_RENAME(ufmt_64tou)
+#define ufmt_close U_ICU_ENTRY_POINT_RENAME(ufmt_close)
+#define ufmt_defaultCPToUnicode U_ICU_ENTRY_POINT_RENAME(ufmt_defaultCPToUnicode)
+#define ufmt_digitvalue U_ICU_ENTRY_POINT_RENAME(ufmt_digitvalue)
+#define ufmt_getArrayItemByIndex U_ICU_ENTRY_POINT_RENAME(ufmt_getArrayItemByIndex)
+#define ufmt_getArrayLength U_ICU_ENTRY_POINT_RENAME(ufmt_getArrayLength)
+#define ufmt_getDate U_ICU_ENTRY_POINT_RENAME(ufmt_getDate)
+#define ufmt_getDecNumChars U_ICU_ENTRY_POINT_RENAME(ufmt_getDecNumChars)
+#define ufmt_getDouble U_ICU_ENTRY_POINT_RENAME(ufmt_getDouble)
+#define ufmt_getInt64 U_ICU_ENTRY_POINT_RENAME(ufmt_getInt64)
+#define ufmt_getLong U_ICU_ENTRY_POINT_RENAME(ufmt_getLong)
+#define ufmt_getObject U_ICU_ENTRY_POINT_RENAME(ufmt_getObject)
+#define ufmt_getType U_ICU_ENTRY_POINT_RENAME(ufmt_getType)
+#define ufmt_getUChars U_ICU_ENTRY_POINT_RENAME(ufmt_getUChars)
+#define ufmt_isNumeric U_ICU_ENTRY_POINT_RENAME(ufmt_isNumeric)
+#define ufmt_isdigit U_ICU_ENTRY_POINT_RENAME(ufmt_isdigit)
+#define ufmt_open U_ICU_ENTRY_POINT_RENAME(ufmt_open)
+#define ufmt_ptou U_ICU_ENTRY_POINT_RENAME(ufmt_ptou)
+#define ufmt_uto64 U_ICU_ENTRY_POINT_RENAME(ufmt_uto64)
+#define ufmt_utop U_ICU_ENTRY_POINT_RENAME(ufmt_utop)
+#define ugender_getInstance U_ICU_ENTRY_POINT_RENAME(ugender_getInstance)
+#define ugender_getListGender U_ICU_ENTRY_POINT_RENAME(ugender_getListGender)
+#define uhash_close U_ICU_ENTRY_POINT_RENAME(uhash_close)
+#define uhash_compareCaselessUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_compareCaselessUnicodeString)
+#define uhash_compareChars U_ICU_ENTRY_POINT_RENAME(uhash_compareChars)
+#define uhash_compareIChars U_ICU_ENTRY_POINT_RENAME(uhash_compareIChars)
+#define uhash_compareLong U_ICU_ENTRY_POINT_RENAME(uhash_compareLong)
+#define uhash_compareScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_compareScriptSet)
+#define uhash_compareUChars U_ICU_ENTRY_POINT_RENAME(uhash_compareUChars)
+#define uhash_compareUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_compareUnicodeString)
+#define uhash_count U_ICU_ENTRY_POINT_RENAME(uhash_count)
+#define uhash_deleteHashtable U_ICU_ENTRY_POINT_RENAME(uhash_deleteHashtable)
+#define uhash_deleteScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_deleteScriptSet)
+#define uhash_equals U_ICU_ENTRY_POINT_RENAME(uhash_equals)
+#define uhash_equalsScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_equalsScriptSet)
+#define uhash_find U_ICU_ENTRY_POINT_RENAME(uhash_find)
+#define uhash_get U_ICU_ENTRY_POINT_RENAME(uhash_get)
+#define uhash_geti U_ICU_ENTRY_POINT_RENAME(uhash_geti)
+#define uhash_hashCaselessUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_hashCaselessUnicodeString)
+#define uhash_hashChars U_ICU_ENTRY_POINT_RENAME(uhash_hashChars)
+#define uhash_hashIChars U_ICU_ENTRY_POINT_RENAME(uhash_hashIChars)
+#define uhash_hashLong U_ICU_ENTRY_POINT_RENAME(uhash_hashLong)
+#define uhash_hashScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_hashScriptSet)
+#define uhash_hashUChars U_ICU_ENTRY_POINT_RENAME(uhash_hashUChars)
+#define uhash_hashUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_hashUnicodeString)
+#define uhash_iget U_ICU_ENTRY_POINT_RENAME(uhash_iget)
+#define uhash_igeti U_ICU_ENTRY_POINT_RENAME(uhash_igeti)
+#define uhash_init U_ICU_ENTRY_POINT_RENAME(uhash_init)
+#define uhash_iput U_ICU_ENTRY_POINT_RENAME(uhash_iput)
+#define uhash_iputi U_ICU_ENTRY_POINT_RENAME(uhash_iputi)
+#define uhash_iremove U_ICU_ENTRY_POINT_RENAME(uhash_iremove)
+#define uhash_iremovei U_ICU_ENTRY_POINT_RENAME(uhash_iremovei)
+#define uhash_nextElement U_ICU_ENTRY_POINT_RENAME(uhash_nextElement)
+#define uhash_open U_ICU_ENTRY_POINT_RENAME(uhash_open)
+#define uhash_openSize U_ICU_ENTRY_POINT_RENAME(uhash_openSize)
+#define uhash_put U_ICU_ENTRY_POINT_RENAME(uhash_put)
+#define uhash_puti U_ICU_ENTRY_POINT_RENAME(uhash_puti)
+#define uhash_remove U_ICU_ENTRY_POINT_RENAME(uhash_remove)
+#define uhash_removeAll U_ICU_ENTRY_POINT_RENAME(uhash_removeAll)
+#define uhash_removeElement U_ICU_ENTRY_POINT_RENAME(uhash_removeElement)
+#define uhash_removei U_ICU_ENTRY_POINT_RENAME(uhash_removei)
+#define uhash_setKeyComparator U_ICU_ENTRY_POINT_RENAME(uhash_setKeyComparator)
+#define uhash_setKeyDeleter U_ICU_ENTRY_POINT_RENAME(uhash_setKeyDeleter)
+#define uhash_setKeyHasher U_ICU_ENTRY_POINT_RENAME(uhash_setKeyHasher)
+#define uhash_setResizePolicy U_ICU_ENTRY_POINT_RENAME(uhash_setResizePolicy)
+#define uhash_setValueComparator U_ICU_ENTRY_POINT_RENAME(uhash_setValueComparator)
+#define uhash_setValueDeleter U_ICU_ENTRY_POINT_RENAME(uhash_setValueDeleter)
+#define uidna_IDNToASCII U_ICU_ENTRY_POINT_RENAME(uidna_IDNToASCII)
+#define uidna_IDNToUnicode U_ICU_ENTRY_POINT_RENAME(uidna_IDNToUnicode)
+#define uidna_close U_ICU_ENTRY_POINT_RENAME(uidna_close)
+#define uidna_compare U_ICU_ENTRY_POINT_RENAME(uidna_compare)
+#define uidna_labelToASCII U_ICU_ENTRY_POINT_RENAME(uidna_labelToASCII)
+#define uidna_labelToASCII_UTF8 U_ICU_ENTRY_POINT_RENAME(uidna_labelToASCII_UTF8)
+#define uidna_labelToUnicode U_ICU_ENTRY_POINT_RENAME(uidna_labelToUnicode)
+#define uidna_labelToUnicodeUTF8 U_ICU_ENTRY_POINT_RENAME(uidna_labelToUnicodeUTF8)
+#define uidna_nameToASCII U_ICU_ENTRY_POINT_RENAME(uidna_nameToASCII)
+#define uidna_nameToASCII_UTF8 U_ICU_ENTRY_POINT_RENAME(uidna_nameToASCII_UTF8)
+#define uidna_nameToUnicode U_ICU_ENTRY_POINT_RENAME(uidna_nameToUnicode)
+#define uidna_nameToUnicodeUTF8 U_ICU_ENTRY_POINT_RENAME(uidna_nameToUnicodeUTF8)
+#define uidna_openUTS46 U_ICU_ENTRY_POINT_RENAME(uidna_openUTS46)
+#define uidna_toASCII U_ICU_ENTRY_POINT_RENAME(uidna_toASCII)
+#define uidna_toUnicode U_ICU_ENTRY_POINT_RENAME(uidna_toUnicode)
+#define uiter_current32 U_ICU_ENTRY_POINT_RENAME(uiter_current32)
+#define uiter_getState U_ICU_ENTRY_POINT_RENAME(uiter_getState)
+#define uiter_next32 U_ICU_ENTRY_POINT_RENAME(uiter_next32)
+#define uiter_previous32 U_ICU_ENTRY_POINT_RENAME(uiter_previous32)
+#define uiter_setCharacterIterator U_ICU_ENTRY_POINT_RENAME(uiter_setCharacterIterator)
+#define uiter_setReplaceable U_ICU_ENTRY_POINT_RENAME(uiter_setReplaceable)
+#define uiter_setState U_ICU_ENTRY_POINT_RENAME(uiter_setState)
+#define uiter_setString U_ICU_ENTRY_POINT_RENAME(uiter_setString)
+#define uiter_setUTF16BE U_ICU_ENTRY_POINT_RENAME(uiter_setUTF16BE)
+#define uiter_setUTF8 U_ICU_ENTRY_POINT_RENAME(uiter_setUTF8)
+#define uldn_close U_ICU_ENTRY_POINT_RENAME(uldn_close)
+#define uldn_getContext U_ICU_ENTRY_POINT_RENAME(uldn_getContext)
+#define uldn_getDialectHandling U_ICU_ENTRY_POINT_RENAME(uldn_getDialectHandling)
+#define uldn_getLocale U_ICU_ENTRY_POINT_RENAME(uldn_getLocale)
+#define uldn_keyDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_keyDisplayName)
+#define uldn_keyValueDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_keyValueDisplayName)
+#define uldn_languageDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_languageDisplayName)
+#define uldn_localeDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_localeDisplayName)
+#define uldn_open U_ICU_ENTRY_POINT_RENAME(uldn_open)
+#define uldn_openForContext U_ICU_ENTRY_POINT_RENAME(uldn_openForContext)
+#define uldn_regionDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_regionDisplayName)
+#define uldn_scriptCodeDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_scriptCodeDisplayName)
+#define uldn_scriptDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_scriptDisplayName)
+#define uldn_variantDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_variantDisplayName)
+#define ulist_addItemBeginList U_ICU_ENTRY_POINT_RENAME(ulist_addItemBeginList)
+#define ulist_addItemEndList U_ICU_ENTRY_POINT_RENAME(ulist_addItemEndList)
+#define ulist_close_keyword_values_iterator U_ICU_ENTRY_POINT_RENAME(ulist_close_keyword_values_iterator)
+#define ulist_containsString U_ICU_ENTRY_POINT_RENAME(ulist_containsString)
+#define ulist_count_keyword_values U_ICU_ENTRY_POINT_RENAME(ulist_count_keyword_values)
+#define ulist_createEmptyList U_ICU_ENTRY_POINT_RENAME(ulist_createEmptyList)
+#define ulist_deleteList U_ICU_ENTRY_POINT_RENAME(ulist_deleteList)
+#define ulist_getListFromEnum U_ICU_ENTRY_POINT_RENAME(ulist_getListFromEnum)
+#define ulist_getListSize U_ICU_ENTRY_POINT_RENAME(ulist_getListSize)
+#define ulist_getNext U_ICU_ENTRY_POINT_RENAME(ulist_getNext)
+#define ulist_next_keyword_value U_ICU_ENTRY_POINT_RENAME(ulist_next_keyword_value)
+#define ulist_resetList U_ICU_ENTRY_POINT_RENAME(ulist_resetList)
+#define ulist_reset_keyword_values_iterator U_ICU_ENTRY_POINT_RENAME(ulist_reset_keyword_values_iterator)
+#define uloc_acceptLanguage U_ICU_ENTRY_POINT_RENAME(uloc_acceptLanguage)
+#define uloc_acceptLanguageFromHTTP U_ICU_ENTRY_POINT_RENAME(uloc_acceptLanguageFromHTTP)
+#define uloc_addLikelySubtags U_ICU_ENTRY_POINT_RENAME(uloc_addLikelySubtags)
+#define uloc_canonicalize U_ICU_ENTRY_POINT_RENAME(uloc_canonicalize)
+#define uloc_countAvailable U_ICU_ENTRY_POINT_RENAME(uloc_countAvailable)
+#define uloc_forLanguageTag U_ICU_ENTRY_POINT_RENAME(uloc_forLanguageTag)
+#define uloc_getAvailable U_ICU_ENTRY_POINT_RENAME(uloc_getAvailable)
+#define uloc_getBaseName U_ICU_ENTRY_POINT_RENAME(uloc_getBaseName)
+#define uloc_getCharacterOrientation U_ICU_ENTRY_POINT_RENAME(uloc_getCharacterOrientation)
+#define uloc_getCountry U_ICU_ENTRY_POINT_RENAME(uloc_getCountry)
+#define uloc_getCurrentCountryID U_ICU_ENTRY_POINT_RENAME(uloc_getCurrentCountryID)
+#define uloc_getCurrentLanguageID U_ICU_ENTRY_POINT_RENAME(uloc_getCurrentLanguageID)
+#define uloc_getDefault U_ICU_ENTRY_POINT_RENAME(uloc_getDefault)
+#define uloc_getDisplayCountry U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayCountry)
+#define uloc_getDisplayKeyword U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayKeyword)
+#define uloc_getDisplayKeywordValue U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayKeywordValue)
+#define uloc_getDisplayLanguage U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayLanguage)
+#define uloc_getDisplayName U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayName)
+#define uloc_getDisplayScript U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayScript)
+#define uloc_getDisplayScriptInContext U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayScriptInContext)
+#define uloc_getDisplayVariant U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayVariant)
+#define uloc_getISO3Country U_ICU_ENTRY_POINT_RENAME(uloc_getISO3Country)
+#define uloc_getISO3Language U_ICU_ENTRY_POINT_RENAME(uloc_getISO3Language)
+#define uloc_getISOCountries U_ICU_ENTRY_POINT_RENAME(uloc_getISOCountries)
+#define uloc_getISOLanguages U_ICU_ENTRY_POINT_RENAME(uloc_getISOLanguages)
+#define uloc_getKeywordValue U_ICU_ENTRY_POINT_RENAME(uloc_getKeywordValue)
+#define uloc_getLCID U_ICU_ENTRY_POINT_RENAME(uloc_getLCID)
+#define uloc_getLanguage U_ICU_ENTRY_POINT_RENAME(uloc_getLanguage)
+#define uloc_getLineOrientation U_ICU_ENTRY_POINT_RENAME(uloc_getLineOrientation)
+#define uloc_getLocaleForLCID U_ICU_ENTRY_POINT_RENAME(uloc_getLocaleForLCID)
+#define uloc_getName U_ICU_ENTRY_POINT_RENAME(uloc_getName)
+#define uloc_getParent U_ICU_ENTRY_POINT_RENAME(uloc_getParent)
+#define uloc_getScript U_ICU_ENTRY_POINT_RENAME(uloc_getScript)
+#define uloc_getTableStringWithFallback U_ICU_ENTRY_POINT_RENAME(uloc_getTableStringWithFallback)
+#define uloc_getVariant U_ICU_ENTRY_POINT_RENAME(uloc_getVariant)
+#define uloc_isRightToLeft U_ICU_ENTRY_POINT_RENAME(uloc_isRightToLeft)
+#define uloc_minimizeSubtags U_ICU_ENTRY_POINT_RENAME(uloc_minimizeSubtags)
+#define uloc_openKeywordList U_ICU_ENTRY_POINT_RENAME(uloc_openKeywordList)
+#define uloc_openKeywords U_ICU_ENTRY_POINT_RENAME(uloc_openKeywords)
+#define uloc_setDefault U_ICU_ENTRY_POINT_RENAME(uloc_setDefault)
+#define uloc_setKeywordValue U_ICU_ENTRY_POINT_RENAME(uloc_setKeywordValue)
+#define uloc_toLanguageTag U_ICU_ENTRY_POINT_RENAME(uloc_toLanguageTag)
+#define uloc_toLegacyKey U_ICU_ENTRY_POINT_RENAME(uloc_toLegacyKey)
+#define uloc_toLegacyType U_ICU_ENTRY_POINT_RENAME(uloc_toLegacyType)
+#define uloc_toUnicodeLocaleKey U_ICU_ENTRY_POINT_RENAME(uloc_toUnicodeLocaleKey)
+#define uloc_toUnicodeLocaleType U_ICU_ENTRY_POINT_RENAME(uloc_toUnicodeLocaleType)
+#define ulocdata_close U_ICU_ENTRY_POINT_RENAME(ulocdata_close)
+#define ulocdata_getCLDRVersion U_ICU_ENTRY_POINT_RENAME(ulocdata_getCLDRVersion)
+#define ulocdata_getDelimiter U_ICU_ENTRY_POINT_RENAME(ulocdata_getDelimiter)
+#define ulocdata_getExemplarSet U_ICU_ENTRY_POINT_RENAME(ulocdata_getExemplarSet)
+#define ulocdata_getLocaleDisplayPattern U_ICU_ENTRY_POINT_RENAME(ulocdata_getLocaleDisplayPattern)
+#define ulocdata_getLocaleSeparator U_ICU_ENTRY_POINT_RENAME(ulocdata_getLocaleSeparator)
+#define ulocdata_getMeasurementSystem U_ICU_ENTRY_POINT_RENAME(ulocdata_getMeasurementSystem)
+#define ulocdata_getNoSubstitute U_ICU_ENTRY_POINT_RENAME(ulocdata_getNoSubstitute)
+#define ulocdata_getPaperSize U_ICU_ENTRY_POINT_RENAME(ulocdata_getPaperSize)
+#define ulocdata_open U_ICU_ENTRY_POINT_RENAME(ulocdata_open)
+#define ulocdata_setNoSubstitute U_ICU_ENTRY_POINT_RENAME(ulocdata_setNoSubstitute)
+#define ulocimp_getCountry U_ICU_ENTRY_POINT_RENAME(ulocimp_getCountry)
+#define ulocimp_getLanguage U_ICU_ENTRY_POINT_RENAME(ulocimp_getLanguage)
+#define ulocimp_getScript U_ICU_ENTRY_POINT_RENAME(ulocimp_getScript)
+#define ulocimp_toBcpKey U_ICU_ENTRY_POINT_RENAME(ulocimp_toBcpKey)
+#define ulocimp_toBcpType U_ICU_ENTRY_POINT_RENAME(ulocimp_toBcpType)
+#define ulocimp_toLegacyKey U_ICU_ENTRY_POINT_RENAME(ulocimp_toLegacyKey)
+#define ulocimp_toLegacyType U_ICU_ENTRY_POINT_RENAME(ulocimp_toLegacyType)
+#define ultag_isUnicodeLocaleKey U_ICU_ENTRY_POINT_RENAME(ultag_isUnicodeLocaleKey)
+#define ultag_isUnicodeLocaleType U_ICU_ENTRY_POINT_RENAME(ultag_isUnicodeLocaleType)
+#define umsg_applyPattern U_ICU_ENTRY_POINT_RENAME(umsg_applyPattern)
+#define umsg_autoQuoteApostrophe U_ICU_ENTRY_POINT_RENAME(umsg_autoQuoteApostrophe)
+#define umsg_clone U_ICU_ENTRY_POINT_RENAME(umsg_clone)
+#define umsg_close U_ICU_ENTRY_POINT_RENAME(umsg_close)
+#define umsg_format U_ICU_ENTRY_POINT_RENAME(umsg_format)
+#define umsg_getLocale U_ICU_ENTRY_POINT_RENAME(umsg_getLocale)
+#define umsg_open U_ICU_ENTRY_POINT_RENAME(umsg_open)
+#define umsg_parse U_ICU_ENTRY_POINT_RENAME(umsg_parse)
+#define umsg_setLocale U_ICU_ENTRY_POINT_RENAME(umsg_setLocale)
+#define umsg_toPattern U_ICU_ENTRY_POINT_RENAME(umsg_toPattern)
+#define umsg_vformat U_ICU_ENTRY_POINT_RENAME(umsg_vformat)
+#define umsg_vparse U_ICU_ENTRY_POINT_RENAME(umsg_vparse)
+#define umtx_condBroadcast U_ICU_ENTRY_POINT_RENAME(umtx_condBroadcast)
+#define umtx_condSignal U_ICU_ENTRY_POINT_RENAME(umtx_condSignal)
+#define umtx_condWait U_ICU_ENTRY_POINT_RENAME(umtx_condWait)
+#define umtx_lock U_ICU_ENTRY_POINT_RENAME(umtx_lock)
+#define umtx_unlock U_ICU_ENTRY_POINT_RENAME(umtx_unlock)
+#define uniset_getUnicode32Instance U_ICU_ENTRY_POINT_RENAME(uniset_getUnicode32Instance)
+#define unorm2_append U_ICU_ENTRY_POINT_RENAME(unorm2_append)
+#define unorm2_close U_ICU_ENTRY_POINT_RENAME(unorm2_close)
+#define unorm2_composePair U_ICU_ENTRY_POINT_RENAME(unorm2_composePair)
+#define unorm2_getCombiningClass U_ICU_ENTRY_POINT_RENAME(unorm2_getCombiningClass)
+#define unorm2_getDecomposition U_ICU_ENTRY_POINT_RENAME(unorm2_getDecomposition)
+#define unorm2_getInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getInstance)
+#define unorm2_getNFCInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFCInstance)
+#define unorm2_getNFDInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFDInstance)
+#define unorm2_getNFKCCasefoldInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFKCCasefoldInstance)
+#define unorm2_getNFKCInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFKCInstance)
+#define unorm2_getNFKDInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFKDInstance)
+#define unorm2_getRawDecomposition U_ICU_ENTRY_POINT_RENAME(unorm2_getRawDecomposition)
+#define unorm2_hasBoundaryAfter U_ICU_ENTRY_POINT_RENAME(unorm2_hasBoundaryAfter)
+#define unorm2_hasBoundaryBefore U_ICU_ENTRY_POINT_RENAME(unorm2_hasBoundaryBefore)
+#define unorm2_isInert U_ICU_ENTRY_POINT_RENAME(unorm2_isInert)
+#define unorm2_isNormalized U_ICU_ENTRY_POINT_RENAME(unorm2_isNormalized)
+#define unorm2_normalize U_ICU_ENTRY_POINT_RENAME(unorm2_normalize)
+#define unorm2_normalizeSecondAndAppend U_ICU_ENTRY_POINT_RENAME(unorm2_normalizeSecondAndAppend)
+#define unorm2_openFiltered U_ICU_ENTRY_POINT_RENAME(unorm2_openFiltered)
+#define unorm2_quickCheck U_ICU_ENTRY_POINT_RENAME(unorm2_quickCheck)
+#define unorm2_spanQuickCheckYes U_ICU_ENTRY_POINT_RENAME(unorm2_spanQuickCheckYes)
+#define unorm2_swap U_ICU_ENTRY_POINT_RENAME(unorm2_swap)
+#define unorm_compare U_ICU_ENTRY_POINT_RENAME(unorm_compare)
+#define unorm_concatenate U_ICU_ENTRY_POINT_RENAME(unorm_concatenate)
+#define unorm_getFCD16 U_ICU_ENTRY_POINT_RENAME(unorm_getFCD16)
+#define unorm_getQuickCheck U_ICU_ENTRY_POINT_RENAME(unorm_getQuickCheck)
+#define unorm_isNormalized U_ICU_ENTRY_POINT_RENAME(unorm_isNormalized)
+#define unorm_isNormalizedWithOptions U_ICU_ENTRY_POINT_RENAME(unorm_isNormalizedWithOptions)
+#define unorm_next U_ICU_ENTRY_POINT_RENAME(unorm_next)
+#define unorm_normalize U_ICU_ENTRY_POINT_RENAME(unorm_normalize)
+#define unorm_previous U_ICU_ENTRY_POINT_RENAME(unorm_previous)
+#define unorm_quickCheck U_ICU_ENTRY_POINT_RENAME(unorm_quickCheck)
+#define unorm_quickCheckWithOptions U_ICU_ENTRY_POINT_RENAME(unorm_quickCheckWithOptions)
+#define unum_applyPattern U_ICU_ENTRY_POINT_RENAME(unum_applyPattern)
+#define unum_clone U_ICU_ENTRY_POINT_RENAME(unum_clone)
+#define unum_close U_ICU_ENTRY_POINT_RENAME(unum_close)
+#define unum_countAvailable U_ICU_ENTRY_POINT_RENAME(unum_countAvailable)
+#define unum_format U_ICU_ENTRY_POINT_RENAME(unum_format)
+#define unum_formatDecimal U_ICU_ENTRY_POINT_RENAME(unum_formatDecimal)
+#define unum_formatDouble U_ICU_ENTRY_POINT_RENAME(unum_formatDouble)
+#define unum_formatDoubleCurrency U_ICU_ENTRY_POINT_RENAME(unum_formatDoubleCurrency)
+#define unum_formatInt64 U_ICU_ENTRY_POINT_RENAME(unum_formatInt64)
+#define unum_formatUFormattable U_ICU_ENTRY_POINT_RENAME(unum_formatUFormattable)
+#define unum_getAttribute U_ICU_ENTRY_POINT_RENAME(unum_getAttribute)
+#define unum_getAvailable U_ICU_ENTRY_POINT_RENAME(unum_getAvailable)
+#define unum_getContext U_ICU_ENTRY_POINT_RENAME(unum_getContext)
+#define unum_getDoubleAttribute U_ICU_ENTRY_POINT_RENAME(unum_getDoubleAttribute)
+#define unum_getLocaleByType U_ICU_ENTRY_POINT_RENAME(unum_getLocaleByType)
+#define unum_getSymbol U_ICU_ENTRY_POINT_RENAME(unum_getSymbol)
+#define unum_getTextAttribute U_ICU_ENTRY_POINT_RENAME(unum_getTextAttribute)
+#define unum_open U_ICU_ENTRY_POINT_RENAME(unum_open)
+#define unum_parse U_ICU_ENTRY_POINT_RENAME(unum_parse)
+#define unum_parseDecimal U_ICU_ENTRY_POINT_RENAME(unum_parseDecimal)
+#define unum_parseDouble U_ICU_ENTRY_POINT_RENAME(unum_parseDouble)
+#define unum_parseDoubleCurrency U_ICU_ENTRY_POINT_RENAME(unum_parseDoubleCurrency)
+#define unum_parseInt64 U_ICU_ENTRY_POINT_RENAME(unum_parseInt64)
+#define unum_parseToUFormattable U_ICU_ENTRY_POINT_RENAME(unum_parseToUFormattable)
+#define unum_setAttribute U_ICU_ENTRY_POINT_RENAME(unum_setAttribute)
+#define unum_setContext U_ICU_ENTRY_POINT_RENAME(unum_setContext)
+#define unum_setDoubleAttribute U_ICU_ENTRY_POINT_RENAME(unum_setDoubleAttribute)
+#define unum_setSymbol U_ICU_ENTRY_POINT_RENAME(unum_setSymbol)
+#define unum_setTextAttribute U_ICU_ENTRY_POINT_RENAME(unum_setTextAttribute)
+#define unum_toPattern U_ICU_ENTRY_POINT_RENAME(unum_toPattern)
+#define unumsys_close U_ICU_ENTRY_POINT_RENAME(unumsys_close)
+#define unumsys_getDescription U_ICU_ENTRY_POINT_RENAME(unumsys_getDescription)
+#define unumsys_getName U_ICU_ENTRY_POINT_RENAME(unumsys_getName)
+#define unumsys_getRadix U_ICU_ENTRY_POINT_RENAME(unumsys_getRadix)
+#define unumsys_isAlgorithmic U_ICU_ENTRY_POINT_RENAME(unumsys_isAlgorithmic)
+#define unumsys_open U_ICU_ENTRY_POINT_RENAME(unumsys_open)
+#define unumsys_openAvailableNames U_ICU_ENTRY_POINT_RENAME(unumsys_openAvailableNames)
+#define unumsys_openByName U_ICU_ENTRY_POINT_RENAME(unumsys_openByName)
+#define uplrules_close U_ICU_ENTRY_POINT_RENAME(uplrules_close)
+#define uplrules_open U_ICU_ENTRY_POINT_RENAME(uplrules_open)
+#define uplrules_openForType U_ICU_ENTRY_POINT_RENAME(uplrules_openForType)
+#define uplrules_select U_ICU_ENTRY_POINT_RENAME(uplrules_select)
+#define uplug_closeLibrary U_ICU_ENTRY_POINT_RENAME(uplug_closeLibrary)
+#define uplug_findLibrary U_ICU_ENTRY_POINT_RENAME(uplug_findLibrary)
+#define uplug_getConfiguration U_ICU_ENTRY_POINT_RENAME(uplug_getConfiguration)
+#define uplug_getContext U_ICU_ENTRY_POINT_RENAME(uplug_getContext)
+#define uplug_getCurrentLevel U_ICU_ENTRY_POINT_RENAME(uplug_getCurrentLevel)
+#define uplug_getLibrary U_ICU_ENTRY_POINT_RENAME(uplug_getLibrary)
+#define uplug_getLibraryName U_ICU_ENTRY_POINT_RENAME(uplug_getLibraryName)
+#define uplug_getPlugInternal U_ICU_ENTRY_POINT_RENAME(uplug_getPlugInternal)
+#define uplug_getPlugLevel U_ICU_ENTRY_POINT_RENAME(uplug_getPlugLevel)
+#define uplug_getPlugLoadStatus U_ICU_ENTRY_POINT_RENAME(uplug_getPlugLoadStatus)
+#define uplug_getPlugName U_ICU_ENTRY_POINT_RENAME(uplug_getPlugName)
+#define uplug_getPluginFile U_ICU_ENTRY_POINT_RENAME(uplug_getPluginFile)
+#define uplug_getSymbolName U_ICU_ENTRY_POINT_RENAME(uplug_getSymbolName)
+#define uplug_init U_ICU_ENTRY_POINT_RENAME(uplug_init)
+#define uplug_loadPlugFromEntrypoint U_ICU_ENTRY_POINT_RENAME(uplug_loadPlugFromEntrypoint)
+#define uplug_loadPlugFromLibrary U_ICU_ENTRY_POINT_RENAME(uplug_loadPlugFromLibrary)
+#define uplug_nextPlug U_ICU_ENTRY_POINT_RENAME(uplug_nextPlug)
+#define uplug_openLibrary U_ICU_ENTRY_POINT_RENAME(uplug_openLibrary)
+#define uplug_removePlug U_ICU_ENTRY_POINT_RENAME(uplug_removePlug)
+#define uplug_setContext U_ICU_ENTRY_POINT_RENAME(uplug_setContext)
+#define uplug_setPlugLevel U_ICU_ENTRY_POINT_RENAME(uplug_setPlugLevel)
+#define uplug_setPlugName U_ICU_ENTRY_POINT_RENAME(uplug_setPlugName)
+#define uplug_setPlugNoUnload U_ICU_ENTRY_POINT_RENAME(uplug_setPlugNoUnload)
+#define uprops_getSource U_ICU_ENTRY_POINT_RENAME(uprops_getSource)
+#define upropsvec_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(upropsvec_addPropertyStarts)
+#define uprv_aestrncpy U_ICU_ENTRY_POINT_RENAME(uprv_aestrncpy)
+#define uprv_asciiFromEbcdic U_ICU_ENTRY_POINT_RENAME(uprv_asciiFromEbcdic)
+#define uprv_asciitolower U_ICU_ENTRY_POINT_RENAME(uprv_asciitolower)
+#define uprv_calloc U_ICU_ENTRY_POINT_RENAME(uprv_calloc)
+#define uprv_ceil U_ICU_ENTRY_POINT_RENAME(uprv_ceil)
+#define uprv_compareASCIIPropertyNames U_ICU_ENTRY_POINT_RENAME(uprv_compareASCIIPropertyNames)
+#define uprv_compareEBCDICPropertyNames U_ICU_ENTRY_POINT_RENAME(uprv_compareEBCDICPropertyNames)
+#define uprv_compareInvAscii U_ICU_ENTRY_POINT_RENAME(uprv_compareInvAscii)
+#define uprv_compareInvEbcdic U_ICU_ENTRY_POINT_RENAME(uprv_compareInvEbcdic)
+#define uprv_compareInvEbcdicAsAscii U_ICU_ENTRY_POINT_RENAME(uprv_compareInvEbcdicAsAscii)
+#define uprv_convertToLCID U_ICU_ENTRY_POINT_RENAME(uprv_convertToLCID)
+#define uprv_convertToPosix U_ICU_ENTRY_POINT_RENAME(uprv_convertToPosix)
+#define uprv_copyAscii U_ICU_ENTRY_POINT_RENAME(uprv_copyAscii)
+#define uprv_copyEbcdic U_ICU_ENTRY_POINT_RENAME(uprv_copyEbcdic)
+#define uprv_decContextClearStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextClearStatus)
+#define uprv_decContextDefault U_ICU_ENTRY_POINT_RENAME(uprv_decContextDefault)
+#define uprv_decContextGetRounding U_ICU_ENTRY_POINT_RENAME(uprv_decContextGetRounding)
+#define uprv_decContextGetStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextGetStatus)
+#define uprv_decContextRestoreStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextRestoreStatus)
+#define uprv_decContextSaveStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextSaveStatus)
+#define uprv_decContextSetRounding U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetRounding)
+#define uprv_decContextSetStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetStatus)
+#define uprv_decContextSetStatusFromString U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetStatusFromString)
+#define uprv_decContextSetStatusFromStringQuiet U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetStatusFromStringQuiet)
+#define uprv_decContextSetStatusQuiet U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetStatusQuiet)
+#define uprv_decContextStatusToString U_ICU_ENTRY_POINT_RENAME(uprv_decContextStatusToString)
+#define uprv_decContextTestSavedStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextTestSavedStatus)
+#define uprv_decContextTestStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextTestStatus)
+#define uprv_decContextZeroStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextZeroStatus)
+#define uprv_decNumberAbs U_ICU_ENTRY_POINT_RENAME(uprv_decNumberAbs)
+#define uprv_decNumberAdd U_ICU_ENTRY_POINT_RENAME(uprv_decNumberAdd)
+#define uprv_decNumberAnd U_ICU_ENTRY_POINT_RENAME(uprv_decNumberAnd)
+#define uprv_decNumberClass U_ICU_ENTRY_POINT_RENAME(uprv_decNumberClass)
+#define uprv_decNumberClassToString U_ICU_ENTRY_POINT_RENAME(uprv_decNumberClassToString)
+#define uprv_decNumberCompare U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompare)
+#define uprv_decNumberCompareSignal U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompareSignal)
+#define uprv_decNumberCompareTotal U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompareTotal)
+#define uprv_decNumberCompareTotalMag U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompareTotalMag)
+#define uprv_decNumberCopy U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCopy)
+#define uprv_decNumberCopyAbs U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCopyAbs)
+#define uprv_decNumberCopyNegate U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCopyNegate)
+#define uprv_decNumberCopySign U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCopySign)
+#define uprv_decNumberDivide U_ICU_ENTRY_POINT_RENAME(uprv_decNumberDivide)
+#define uprv_decNumberDivideInteger U_ICU_ENTRY_POINT_RENAME(uprv_decNumberDivideInteger)
+#define uprv_decNumberExp U_ICU_ENTRY_POINT_RENAME(uprv_decNumberExp)
+#define uprv_decNumberFMA U_ICU_ENTRY_POINT_RENAME(uprv_decNumberFMA)
+#define uprv_decNumberFromInt32 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberFromInt32)
+#define uprv_decNumberFromString U_ICU_ENTRY_POINT_RENAME(uprv_decNumberFromString)
+#define uprv_decNumberFromUInt32 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberFromUInt32)
+#define uprv_decNumberGetBCD U_ICU_ENTRY_POINT_RENAME(uprv_decNumberGetBCD)
+#define uprv_decNumberInvert U_ICU_ENTRY_POINT_RENAME(uprv_decNumberInvert)
+#define uprv_decNumberIsNormal U_ICU_ENTRY_POINT_RENAME(uprv_decNumberIsNormal)
+#define uprv_decNumberIsSubnormal U_ICU_ENTRY_POINT_RENAME(uprv_decNumberIsSubnormal)
+#define uprv_decNumberLn U_ICU_ENTRY_POINT_RENAME(uprv_decNumberLn)
+#define uprv_decNumberLog10 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberLog10)
+#define uprv_decNumberLogB U_ICU_ENTRY_POINT_RENAME(uprv_decNumberLogB)
+#define uprv_decNumberMax U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMax)
+#define uprv_decNumberMaxMag U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMaxMag)
+#define uprv_decNumberMin U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMin)
+#define uprv_decNumberMinMag U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMinMag)
+#define uprv_decNumberMinus U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMinus)
+#define uprv_decNumberMultiply U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMultiply)
+#define uprv_decNumberNextMinus U_ICU_ENTRY_POINT_RENAME(uprv_decNumberNextMinus)
+#define uprv_decNumberNextPlus U_ICU_ENTRY_POINT_RENAME(uprv_decNumberNextPlus)
+#define uprv_decNumberNextToward U_ICU_ENTRY_POINT_RENAME(uprv_decNumberNextToward)
+#define uprv_decNumberNormalize U_ICU_ENTRY_POINT_RENAME(uprv_decNumberNormalize)
+#define uprv_decNumberOr U_ICU_ENTRY_POINT_RENAME(uprv_decNumberOr)
+#define uprv_decNumberPlus U_ICU_ENTRY_POINT_RENAME(uprv_decNumberPlus)
+#define uprv_decNumberPower U_ICU_ENTRY_POINT_RENAME(uprv_decNumberPower)
+#define uprv_decNumberQuantize U_ICU_ENTRY_POINT_RENAME(uprv_decNumberQuantize)
+#define uprv_decNumberReduce U_ICU_ENTRY_POINT_RENAME(uprv_decNumberReduce)
+#define uprv_decNumberRemainder U_ICU_ENTRY_POINT_RENAME(uprv_decNumberRemainder)
+#define uprv_decNumberRemainderNear U_ICU_ENTRY_POINT_RENAME(uprv_decNumberRemainderNear)
+#define uprv_decNumberRescale U_ICU_ENTRY_POINT_RENAME(uprv_decNumberRescale)
+#define uprv_decNumberRotate U_ICU_ENTRY_POINT_RENAME(uprv_decNumberRotate)
+#define uprv_decNumberSameQuantum U_ICU_ENTRY_POINT_RENAME(uprv_decNumberSameQuantum)
+#define uprv_decNumberScaleB U_ICU_ENTRY_POINT_RENAME(uprv_decNumberScaleB)
+#define uprv_decNumberSetBCD U_ICU_ENTRY_POINT_RENAME(uprv_decNumberSetBCD)
+#define uprv_decNumberShift U_ICU_ENTRY_POINT_RENAME(uprv_decNumberShift)
+#define uprv_decNumberSquareRoot U_ICU_ENTRY_POINT_RENAME(uprv_decNumberSquareRoot)
+#define uprv_decNumberSubtract U_ICU_ENTRY_POINT_RENAME(uprv_decNumberSubtract)
+#define uprv_decNumberToEngString U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToEngString)
+#define uprv_decNumberToInt32 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToInt32)
+#define uprv_decNumberToIntegralExact U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToIntegralExact)
+#define uprv_decNumberToIntegralValue U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToIntegralValue)
+#define uprv_decNumberToString U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToString)
+#define uprv_decNumberToUInt32 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToUInt32)
+#define uprv_decNumberTrim U_ICU_ENTRY_POINT_RENAME(uprv_decNumberTrim)
+#define uprv_decNumberVersion U_ICU_ENTRY_POINT_RENAME(uprv_decNumberVersion)
+#define uprv_decNumberXor U_ICU_ENTRY_POINT_RENAME(uprv_decNumberXor)
+#define uprv_decNumberZero U_ICU_ENTRY_POINT_RENAME(uprv_decNumberZero)
+#define uprv_deleteConditionalCE32 U_ICU_ENTRY_POINT_RENAME(uprv_deleteConditionalCE32)
+#define uprv_deleteUObject U_ICU_ENTRY_POINT_RENAME(uprv_deleteUObject)
+#define uprv_dl_close U_ICU_ENTRY_POINT_RENAME(uprv_dl_close)
+#define uprv_dl_open U_ICU_ENTRY_POINT_RENAME(uprv_dl_open)
+#define uprv_dlsym_func U_ICU_ENTRY_POINT_RENAME(uprv_dlsym_func)
+#define uprv_eastrncpy U_ICU_ENTRY_POINT_RENAME(uprv_eastrncpy)
+#define uprv_ebcdicFromAscii U_ICU_ENTRY_POINT_RENAME(uprv_ebcdicFromAscii)
+#define uprv_ebcdicToLowercaseAscii U_ICU_ENTRY_POINT_RENAME(uprv_ebcdicToLowercaseAscii)
+#define uprv_ebcdictolower U_ICU_ENTRY_POINT_RENAME(uprv_ebcdictolower)
+#define uprv_fabs U_ICU_ENTRY_POINT_RENAME(uprv_fabs)
+#define uprv_floor U_ICU_ENTRY_POINT_RENAME(uprv_floor)
+#define uprv_fmax U_ICU_ENTRY_POINT_RENAME(uprv_fmax)
+#define uprv_fmin U_ICU_ENTRY_POINT_RENAME(uprv_fmin)
+#define uprv_fmod U_ICU_ENTRY_POINT_RENAME(uprv_fmod)
+#define uprv_free U_ICU_ENTRY_POINT_RENAME(uprv_free)
+#define uprv_getCharNameCharacters U_ICU_ENTRY_POINT_RENAME(uprv_getCharNameCharacters)
+#define uprv_getDefaultCodepage U_ICU_ENTRY_POINT_RENAME(uprv_getDefaultCodepage)
+#define uprv_getDefaultLocaleID U_ICU_ENTRY_POINT_RENAME(uprv_getDefaultLocaleID)
+#define uprv_getInfinity U_ICU_ENTRY_POINT_RENAME(uprv_getInfinity)
+#define uprv_getMaxCharNameLength U_ICU_ENTRY_POINT_RENAME(uprv_getMaxCharNameLength)
+#define uprv_getMaxValues U_ICU_ENTRY_POINT_RENAME(uprv_getMaxValues)
+#define uprv_getNaN U_ICU_ENTRY_POINT_RENAME(uprv_getNaN)
+#define uprv_getRawUTCtime U_ICU_ENTRY_POINT_RENAME(uprv_getRawUTCtime)
+#define uprv_getStaticCurrencyName U_ICU_ENTRY_POINT_RENAME(uprv_getStaticCurrencyName)
+#define uprv_getUTCtime U_ICU_ENTRY_POINT_RENAME(uprv_getUTCtime)
+#define uprv_haveProperties U_ICU_ENTRY_POINT_RENAME(uprv_haveProperties)
+#define uprv_int32Comparator U_ICU_ENTRY_POINT_RENAME(uprv_int32Comparator)
+#define uprv_isASCIILetter U_ICU_ENTRY_POINT_RENAME(uprv_isASCIILetter)
+#define uprv_isInfinite U_ICU_ENTRY_POINT_RENAME(uprv_isInfinite)
+#define uprv_isInvariantString U_ICU_ENTRY_POINT_RENAME(uprv_isInvariantString)
+#define uprv_isInvariantUString U_ICU_ENTRY_POINT_RENAME(uprv_isInvariantUString)
+#define uprv_isNaN U_ICU_ENTRY_POINT_RENAME(uprv_isNaN)
+#define uprv_isNegativeInfinity U_ICU_ENTRY_POINT_RENAME(uprv_isNegativeInfinity)
+#define uprv_isPositiveInfinity U_ICU_ENTRY_POINT_RENAME(uprv_isPositiveInfinity)
+#define uprv_itou U_ICU_ENTRY_POINT_RENAME(uprv_itou)
+#define uprv_log U_ICU_ENTRY_POINT_RENAME(uprv_log)
+#define uprv_malloc U_ICU_ENTRY_POINT_RENAME(uprv_malloc)
+#define uprv_mapFile U_ICU_ENTRY_POINT_RENAME(uprv_mapFile)
+#define uprv_max U_ICU_ENTRY_POINT_RENAME(uprv_max)
+#define uprv_maxMantissa U_ICU_ENTRY_POINT_RENAME(uprv_maxMantissa)
+#define uprv_maximumPtr U_ICU_ENTRY_POINT_RENAME(uprv_maximumPtr)
+#define uprv_min U_ICU_ENTRY_POINT_RENAME(uprv_min)
+#define uprv_modf U_ICU_ENTRY_POINT_RENAME(uprv_modf)
+#define uprv_parseCurrency U_ICU_ENTRY_POINT_RENAME(uprv_parseCurrency)
+#define uprv_pathIsAbsolute U_ICU_ENTRY_POINT_RENAME(uprv_pathIsAbsolute)
+#define uprv_pow U_ICU_ENTRY_POINT_RENAME(uprv_pow)
+#define uprv_pow10 U_ICU_ENTRY_POINT_RENAME(uprv_pow10)
+#define uprv_realloc U_ICU_ENTRY_POINT_RENAME(uprv_realloc)
+#define uprv_round U_ICU_ENTRY_POINT_RENAME(uprv_round)
+#define uprv_sortArray U_ICU_ENTRY_POINT_RENAME(uprv_sortArray)
+#define uprv_stableBinarySearch U_ICU_ENTRY_POINT_RENAME(uprv_stableBinarySearch)
+#define uprv_strCompare U_ICU_ENTRY_POINT_RENAME(uprv_strCompare)
+#define uprv_strdup U_ICU_ENTRY_POINT_RENAME(uprv_strdup)
+#define uprv_stricmp U_ICU_ENTRY_POINT_RENAME(uprv_stricmp)
+#define uprv_strndup U_ICU_ENTRY_POINT_RENAME(uprv_strndup)
+#define uprv_strnicmp U_ICU_ENTRY_POINT_RENAME(uprv_strnicmp)
+#define uprv_syntaxError U_ICU_ENTRY_POINT_RENAME(uprv_syntaxError)
+#define uprv_timezone U_ICU_ENTRY_POINT_RENAME(uprv_timezone)
+#define uprv_toupper U_ICU_ENTRY_POINT_RENAME(uprv_toupper)
+#define uprv_trunc U_ICU_ENTRY_POINT_RENAME(uprv_trunc)
+#define uprv_tzname U_ICU_ENTRY_POINT_RENAME(uprv_tzname)
+#define uprv_tzset U_ICU_ENTRY_POINT_RENAME(uprv_tzset)
+#define uprv_uint16Comparator U_ICU_ENTRY_POINT_RENAME(uprv_uint16Comparator)
+#define uprv_uint32Comparator U_ICU_ENTRY_POINT_RENAME(uprv_uint32Comparator)
+#define uprv_unmapFile U_ICU_ENTRY_POINT_RENAME(uprv_unmapFile)
+#define upvec_cloneArray U_ICU_ENTRY_POINT_RENAME(upvec_cloneArray)
+#define upvec_close U_ICU_ENTRY_POINT_RENAME(upvec_close)
+#define upvec_compact U_ICU_ENTRY_POINT_RENAME(upvec_compact)
+#define upvec_compactToUTrie2Handler U_ICU_ENTRY_POINT_RENAME(upvec_compactToUTrie2Handler)
+#define upvec_compactToUTrie2WithRowIndexes U_ICU_ENTRY_POINT_RENAME(upvec_compactToUTrie2WithRowIndexes)
+#define upvec_getArray U_ICU_ENTRY_POINT_RENAME(upvec_getArray)
+#define upvec_getRow U_ICU_ENTRY_POINT_RENAME(upvec_getRow)
+#define upvec_getValue U_ICU_ENTRY_POINT_RENAME(upvec_getValue)
+#define upvec_open U_ICU_ENTRY_POINT_RENAME(upvec_open)
+#define upvec_setValue U_ICU_ENTRY_POINT_RENAME(upvec_setValue)
+#define uregex_appendReplacement U_ICU_ENTRY_POINT_RENAME(uregex_appendReplacement)
+#define uregex_appendReplacementUText U_ICU_ENTRY_POINT_RENAME(uregex_appendReplacementUText)
+#define uregex_appendTail U_ICU_ENTRY_POINT_RENAME(uregex_appendTail)
+#define uregex_appendTailUText U_ICU_ENTRY_POINT_RENAME(uregex_appendTailUText)
+#define uregex_clone U_ICU_ENTRY_POINT_RENAME(uregex_clone)
+#define uregex_close U_ICU_ENTRY_POINT_RENAME(uregex_close)
+#define uregex_end U_ICU_ENTRY_POINT_RENAME(uregex_end)
+#define uregex_end64 U_ICU_ENTRY_POINT_RENAME(uregex_end64)
+#define uregex_find U_ICU_ENTRY_POINT_RENAME(uregex_find)
+#define uregex_find64 U_ICU_ENTRY_POINT_RENAME(uregex_find64)
+#define uregex_findNext U_ICU_ENTRY_POINT_RENAME(uregex_findNext)
+#define uregex_flags U_ICU_ENTRY_POINT_RENAME(uregex_flags)
+#define uregex_getFindProgressCallback U_ICU_ENTRY_POINT_RENAME(uregex_getFindProgressCallback)
+#define uregex_getMatchCallback U_ICU_ENTRY_POINT_RENAME(uregex_getMatchCallback)
+#define uregex_getStackLimit U_ICU_ENTRY_POINT_RENAME(uregex_getStackLimit)
+#define uregex_getText U_ICU_ENTRY_POINT_RENAME(uregex_getText)
+#define uregex_getTimeLimit U_ICU_ENTRY_POINT_RENAME(uregex_getTimeLimit)
+#define uregex_getUText U_ICU_ENTRY_POINT_RENAME(uregex_getUText)
+#define uregex_group U_ICU_ENTRY_POINT_RENAME(uregex_group)
+#define uregex_groupCount U_ICU_ENTRY_POINT_RENAME(uregex_groupCount)
+#define uregex_groupUText U_ICU_ENTRY_POINT_RENAME(uregex_groupUText)
+#define uregex_groupUTextDeep U_ICU_ENTRY_POINT_RENAME(uregex_groupUTextDeep)
+#define uregex_hasAnchoringBounds U_ICU_ENTRY_POINT_RENAME(uregex_hasAnchoringBounds)
+#define uregex_hasTransparentBounds U_ICU_ENTRY_POINT_RENAME(uregex_hasTransparentBounds)
+#define uregex_hitEnd U_ICU_ENTRY_POINT_RENAME(uregex_hitEnd)
+#define uregex_lookingAt U_ICU_ENTRY_POINT_RENAME(uregex_lookingAt)
+#define uregex_lookingAt64 U_ICU_ENTRY_POINT_RENAME(uregex_lookingAt64)
+#define uregex_matches U_ICU_ENTRY_POINT_RENAME(uregex_matches)
+#define uregex_matches64 U_ICU_ENTRY_POINT_RENAME(uregex_matches64)
+#define uregex_open U_ICU_ENTRY_POINT_RENAME(uregex_open)
+#define uregex_openC U_ICU_ENTRY_POINT_RENAME(uregex_openC)
+#define uregex_openUText U_ICU_ENTRY_POINT_RENAME(uregex_openUText)
+#define uregex_pattern U_ICU_ENTRY_POINT_RENAME(uregex_pattern)
+#define uregex_patternUText U_ICU_ENTRY_POINT_RENAME(uregex_patternUText)
+#define uregex_refreshUText U_ICU_ENTRY_POINT_RENAME(uregex_refreshUText)
+#define uregex_regionEnd U_ICU_ENTRY_POINT_RENAME(uregex_regionEnd)
+#define uregex_regionEnd64 U_ICU_ENTRY_POINT_RENAME(uregex_regionEnd64)
+#define uregex_regionStart U_ICU_ENTRY_POINT_RENAME(uregex_regionStart)
+#define uregex_regionStart64 U_ICU_ENTRY_POINT_RENAME(uregex_regionStart64)
+#define uregex_replaceAll U_ICU_ENTRY_POINT_RENAME(uregex_replaceAll)
+#define uregex_replaceAllUText U_ICU_ENTRY_POINT_RENAME(uregex_replaceAllUText)
+#define uregex_replaceFirst U_ICU_ENTRY_POINT_RENAME(uregex_replaceFirst)
+#define uregex_replaceFirstUText U_ICU_ENTRY_POINT_RENAME(uregex_replaceFirstUText)
+#define uregex_requireEnd U_ICU_ENTRY_POINT_RENAME(uregex_requireEnd)
+#define uregex_reset U_ICU_ENTRY_POINT_RENAME(uregex_reset)
+#define uregex_reset64 U_ICU_ENTRY_POINT_RENAME(uregex_reset64)
+#define uregex_setFindProgressCallback U_ICU_ENTRY_POINT_RENAME(uregex_setFindProgressCallback)
+#define uregex_setMatchCallback U_ICU_ENTRY_POINT_RENAME(uregex_setMatchCallback)
+#define uregex_setRegion U_ICU_ENTRY_POINT_RENAME(uregex_setRegion)
+#define uregex_setRegion64 U_ICU_ENTRY_POINT_RENAME(uregex_setRegion64)
+#define uregex_setRegionAndStart U_ICU_ENTRY_POINT_RENAME(uregex_setRegionAndStart)
+#define uregex_setStackLimit U_ICU_ENTRY_POINT_RENAME(uregex_setStackLimit)
+#define uregex_setText U_ICU_ENTRY_POINT_RENAME(uregex_setText)
+#define uregex_setTimeLimit U_ICU_ENTRY_POINT_RENAME(uregex_setTimeLimit)
+#define uregex_setUText U_ICU_ENTRY_POINT_RENAME(uregex_setUText)
+#define uregex_split U_ICU_ENTRY_POINT_RENAME(uregex_split)
+#define uregex_splitUText U_ICU_ENTRY_POINT_RENAME(uregex_splitUText)
+#define uregex_start U_ICU_ENTRY_POINT_RENAME(uregex_start)
+#define uregex_start64 U_ICU_ENTRY_POINT_RENAME(uregex_start64)
+#define uregex_ucstr_unescape_charAt U_ICU_ENTRY_POINT_RENAME(uregex_ucstr_unescape_charAt)
+#define uregex_useAnchoringBounds U_ICU_ENTRY_POINT_RENAME(uregex_useAnchoringBounds)
+#define uregex_useTransparentBounds U_ICU_ENTRY_POINT_RENAME(uregex_useTransparentBounds)
+#define uregex_utext_unescape_charAt U_ICU_ENTRY_POINT_RENAME(uregex_utext_unescape_charAt)
+#define uregion_areEqual U_ICU_ENTRY_POINT_RENAME(uregion_areEqual)
+#define uregion_contains U_ICU_ENTRY_POINT_RENAME(uregion_contains)
+#define uregion_getAvailable U_ICU_ENTRY_POINT_RENAME(uregion_getAvailable)
+#define uregion_getContainedRegions U_ICU_ENTRY_POINT_RENAME(uregion_getContainedRegions)
+#define uregion_getContainedRegionsOfType U_ICU_ENTRY_POINT_RENAME(uregion_getContainedRegionsOfType)
+#define uregion_getContainingRegion U_ICU_ENTRY_POINT_RENAME(uregion_getContainingRegion)
+#define uregion_getContainingRegionOfType U_ICU_ENTRY_POINT_RENAME(uregion_getContainingRegionOfType)
+#define uregion_getNumericCode U_ICU_ENTRY_POINT_RENAME(uregion_getNumericCode)
+#define uregion_getPreferredValues U_ICU_ENTRY_POINT_RENAME(uregion_getPreferredValues)
+#define uregion_getRegionCode U_ICU_ENTRY_POINT_RENAME(uregion_getRegionCode)
+#define uregion_getRegionFromCode U_ICU_ENTRY_POINT_RENAME(uregion_getRegionFromCode)
+#define uregion_getRegionFromNumericCode U_ICU_ENTRY_POINT_RENAME(uregion_getRegionFromNumericCode)
+#define uregion_getType U_ICU_ENTRY_POINT_RENAME(uregion_getType)
+#define ures_close U_ICU_ENTRY_POINT_RENAME(ures_close)
+#define ures_copyResb U_ICU_ENTRY_POINT_RENAME(ures_copyResb)
+#define ures_countArrayItems U_ICU_ENTRY_POINT_RENAME(ures_countArrayItems)
+#define ures_findResource U_ICU_ENTRY_POINT_RENAME(ures_findResource)
+#define ures_findSubResource U_ICU_ENTRY_POINT_RENAME(ures_findSubResource)
+#define ures_getBinary U_ICU_ENTRY_POINT_RENAME(ures_getBinary)
+#define ures_getByIndex U_ICU_ENTRY_POINT_RENAME(ures_getByIndex)
+#define ures_getByKey U_ICU_ENTRY_POINT_RENAME(ures_getByKey)
+#define ures_getByKeyWithFallback U_ICU_ENTRY_POINT_RENAME(ures_getByKeyWithFallback)
+#define ures_getFunctionalEquivalent U_ICU_ENTRY_POINT_RENAME(ures_getFunctionalEquivalent)
+#define ures_getInt U_ICU_ENTRY_POINT_RENAME(ures_getInt)
+#define ures_getIntVector U_ICU_ENTRY_POINT_RENAME(ures_getIntVector)
+#define ures_getKey U_ICU_ENTRY_POINT_RENAME(ures_getKey)
+#define ures_getKeywordValues U_ICU_ENTRY_POINT_RENAME(ures_getKeywordValues)
+#define ures_getLocale U_ICU_ENTRY_POINT_RENAME(ures_getLocale)
+#define ures_getLocaleByType U_ICU_ENTRY_POINT_RENAME(ures_getLocaleByType)
+#define ures_getLocaleInternal U_ICU_ENTRY_POINT_RENAME(ures_getLocaleInternal)
+#define ures_getName U_ICU_ENTRY_POINT_RENAME(ures_getName)
+#define ures_getNextResource U_ICU_ENTRY_POINT_RENAME(ures_getNextResource)
+#define ures_getNextString U_ICU_ENTRY_POINT_RENAME(ures_getNextString)
+#define ures_getSize U_ICU_ENTRY_POINT_RENAME(ures_getSize)
+#define ures_getString U_ICU_ENTRY_POINT_RENAME(ures_getString)
+#define ures_getStringByIndex U_ICU_ENTRY_POINT_RENAME(ures_getStringByIndex)
+#define ures_getStringByKey U_ICU_ENTRY_POINT_RENAME(ures_getStringByKey)
+#define ures_getStringByKeyWithFallback U_ICU_ENTRY_POINT_RENAME(ures_getStringByKeyWithFallback)
+#define ures_getType U_ICU_ENTRY_POINT_RENAME(ures_getType)
+#define ures_getUInt U_ICU_ENTRY_POINT_RENAME(ures_getUInt)
+#define ures_getUTF8String U_ICU_ENTRY_POINT_RENAME(ures_getUTF8String)
+#define ures_getUTF8StringByIndex U_ICU_ENTRY_POINT_RENAME(ures_getUTF8StringByIndex)
+#define ures_getUTF8StringByKey U_ICU_ENTRY_POINT_RENAME(ures_getUTF8StringByKey)
+#define ures_getVersion U_ICU_ENTRY_POINT_RENAME(ures_getVersion)
+#define ures_getVersionByKey U_ICU_ENTRY_POINT_RENAME(ures_getVersionByKey)
+#define ures_getVersionNumber U_ICU_ENTRY_POINT_RENAME(ures_getVersionNumber)
+#define ures_getVersionNumberInternal U_ICU_ENTRY_POINT_RENAME(ures_getVersionNumberInternal)
+#define ures_hasNext U_ICU_ENTRY_POINT_RENAME(ures_hasNext)
+#define ures_initStackObject U_ICU_ENTRY_POINT_RENAME(ures_initStackObject)
+#define ures_open U_ICU_ENTRY_POINT_RENAME(ures_open)
+#define ures_openAvailableLocales U_ICU_ENTRY_POINT_RENAME(ures_openAvailableLocales)
+#define ures_openDirect U_ICU_ENTRY_POINT_RENAME(ures_openDirect)
+#define ures_openFillIn U_ICU_ENTRY_POINT_RENAME(ures_openFillIn)
+#define ures_openU U_ICU_ENTRY_POINT_RENAME(ures_openU)
+#define ures_resetIterator U_ICU_ENTRY_POINT_RENAME(ures_resetIterator)
+#define ures_swap U_ICU_ENTRY_POINT_RENAME(ures_swap)
+#define uscript_breaksBetweenLetters U_ICU_ENTRY_POINT_RENAME(uscript_breaksBetweenLetters)
+#define uscript_closeRun U_ICU_ENTRY_POINT_RENAME(uscript_closeRun)
+#define uscript_getCode U_ICU_ENTRY_POINT_RENAME(uscript_getCode)
+#define uscript_getName U_ICU_ENTRY_POINT_RENAME(uscript_getName)
+#define uscript_getSampleString U_ICU_ENTRY_POINT_RENAME(uscript_getSampleString)
+#define uscript_getSampleUnicodeString U_ICU_ENTRY_POINT_RENAME(uscript_getSampleUnicodeString)
+#define uscript_getScript U_ICU_ENTRY_POINT_RENAME(uscript_getScript)
+#define uscript_getScriptExtensions U_ICU_ENTRY_POINT_RENAME(uscript_getScriptExtensions)
+#define uscript_getShortName U_ICU_ENTRY_POINT_RENAME(uscript_getShortName)
+#define uscript_getUsage U_ICU_ENTRY_POINT_RENAME(uscript_getUsage)
+#define uscript_hasScript U_ICU_ENTRY_POINT_RENAME(uscript_hasScript)
+#define uscript_isCased U_ICU_ENTRY_POINT_RENAME(uscript_isCased)
+#define uscript_isRightToLeft U_ICU_ENTRY_POINT_RENAME(uscript_isRightToLeft)
+#define uscript_nextRun U_ICU_ENTRY_POINT_RENAME(uscript_nextRun)
+#define uscript_openRun U_ICU_ENTRY_POINT_RENAME(uscript_openRun)
+#define uscript_resetRun U_ICU_ENTRY_POINT_RENAME(uscript_resetRun)
+#define uscript_setRunText U_ICU_ENTRY_POINT_RENAME(uscript_setRunText)
+#define usearch_close U_ICU_ENTRY_POINT_RENAME(usearch_close)
+#define usearch_first U_ICU_ENTRY_POINT_RENAME(usearch_first)
+#define usearch_following U_ICU_ENTRY_POINT_RENAME(usearch_following)
+#define usearch_getAttribute U_ICU_ENTRY_POINT_RENAME(usearch_getAttribute)
+#define usearch_getBreakIterator U_ICU_ENTRY_POINT_RENAME(usearch_getBreakIterator)
+#define usearch_getCollator U_ICU_ENTRY_POINT_RENAME(usearch_getCollator)
+#define usearch_getMatchedLength U_ICU_ENTRY_POINT_RENAME(usearch_getMatchedLength)
+#define usearch_getMatchedStart U_ICU_ENTRY_POINT_RENAME(usearch_getMatchedStart)
+#define usearch_getMatchedText U_ICU_ENTRY_POINT_RENAME(usearch_getMatchedText)
+#define usearch_getOffset U_ICU_ENTRY_POINT_RENAME(usearch_getOffset)
+#define usearch_getPattern U_ICU_ENTRY_POINT_RENAME(usearch_getPattern)
+#define usearch_getText U_ICU_ENTRY_POINT_RENAME(usearch_getText)
+#define usearch_handleNextCanonical U_ICU_ENTRY_POINT_RENAME(usearch_handleNextCanonical)
+#define usearch_handleNextExact U_ICU_ENTRY_POINT_RENAME(usearch_handleNextExact)
+#define usearch_handlePreviousCanonical U_ICU_ENTRY_POINT_RENAME(usearch_handlePreviousCanonical)
+#define usearch_handlePreviousExact U_ICU_ENTRY_POINT_RENAME(usearch_handlePreviousExact)
+#define usearch_last U_ICU_ENTRY_POINT_RENAME(usearch_last)
+#define usearch_next U_ICU_ENTRY_POINT_RENAME(usearch_next)
+#define usearch_open U_ICU_ENTRY_POINT_RENAME(usearch_open)
+#define usearch_openFromCollator U_ICU_ENTRY_POINT_RENAME(usearch_openFromCollator)
+#define usearch_preceding U_ICU_ENTRY_POINT_RENAME(usearch_preceding)
+#define usearch_previous U_ICU_ENTRY_POINT_RENAME(usearch_previous)
+#define usearch_reset U_ICU_ENTRY_POINT_RENAME(usearch_reset)
+#define usearch_search U_ICU_ENTRY_POINT_RENAME(usearch_search)
+#define usearch_searchBackwards U_ICU_ENTRY_POINT_RENAME(usearch_searchBackwards)
+#define usearch_setAttribute U_ICU_ENTRY_POINT_RENAME(usearch_setAttribute)
+#define usearch_setBreakIterator U_ICU_ENTRY_POINT_RENAME(usearch_setBreakIterator)
+#define usearch_setCollator U_ICU_ENTRY_POINT_RENAME(usearch_setCollator)
+#define usearch_setOffset U_ICU_ENTRY_POINT_RENAME(usearch_setOffset)
+#define usearch_setPattern U_ICU_ENTRY_POINT_RENAME(usearch_setPattern)
+#define usearch_setText U_ICU_ENTRY_POINT_RENAME(usearch_setText)
+#define uset_add U_ICU_ENTRY_POINT_RENAME(uset_add)
+#define uset_addAll U_ICU_ENTRY_POINT_RENAME(uset_addAll)
+#define uset_addAllCodePoints U_ICU_ENTRY_POINT_RENAME(uset_addAllCodePoints)
+#define uset_addRange U_ICU_ENTRY_POINT_RENAME(uset_addRange)
+#define uset_addString U_ICU_ENTRY_POINT_RENAME(uset_addString)
+#define uset_applyIntPropertyValue U_ICU_ENTRY_POINT_RENAME(uset_applyIntPropertyValue)
+#define uset_applyPattern U_ICU_ENTRY_POINT_RENAME(uset_applyPattern)
+#define uset_applyPropertyAlias U_ICU_ENTRY_POINT_RENAME(uset_applyPropertyAlias)
+#define uset_charAt U_ICU_ENTRY_POINT_RENAME(uset_charAt)
+#define uset_clear U_ICU_ENTRY_POINT_RENAME(uset_clear)
+#define uset_clone U_ICU_ENTRY_POINT_RENAME(uset_clone)
+#define uset_cloneAsThawed U_ICU_ENTRY_POINT_RENAME(uset_cloneAsThawed)
+#define uset_close U_ICU_ENTRY_POINT_RENAME(uset_close)
+#define uset_closeOver U_ICU_ENTRY_POINT_RENAME(uset_closeOver)
+#define uset_compact U_ICU_ENTRY_POINT_RENAME(uset_compact)
+#define uset_complement U_ICU_ENTRY_POINT_RENAME(uset_complement)
+#define uset_complementAll U_ICU_ENTRY_POINT_RENAME(uset_complementAll)
+#define uset_contains U_ICU_ENTRY_POINT_RENAME(uset_contains)
+#define uset_containsAll U_ICU_ENTRY_POINT_RENAME(uset_containsAll)
+#define uset_containsAllCodePoints U_ICU_ENTRY_POINT_RENAME(uset_containsAllCodePoints)
+#define uset_containsNone U_ICU_ENTRY_POINT_RENAME(uset_containsNone)
+#define uset_containsRange U_ICU_ENTRY_POINT_RENAME(uset_containsRange)
+#define uset_containsSome U_ICU_ENTRY_POINT_RENAME(uset_containsSome)
+#define uset_containsString U_ICU_ENTRY_POINT_RENAME(uset_containsString)
+#define uset_equals U_ICU_ENTRY_POINT_RENAME(uset_equals)
+#define uset_freeze U_ICU_ENTRY_POINT_RENAME(uset_freeze)
+#define uset_getItem U_ICU_ENTRY_POINT_RENAME(uset_getItem)
+#define uset_getItemCount U_ICU_ENTRY_POINT_RENAME(uset_getItemCount)
+#define uset_getSerializedRange U_ICU_ENTRY_POINT_RENAME(uset_getSerializedRange)
+#define uset_getSerializedRangeCount U_ICU_ENTRY_POINT_RENAME(uset_getSerializedRangeCount)
+#define uset_getSerializedSet U_ICU_ENTRY_POINT_RENAME(uset_getSerializedSet)
+#define uset_indexOf U_ICU_ENTRY_POINT_RENAME(uset_indexOf)
+#define uset_isEmpty U_ICU_ENTRY_POINT_RENAME(uset_isEmpty)
+#define uset_isFrozen U_ICU_ENTRY_POINT_RENAME(uset_isFrozen)
+#define uset_open U_ICU_ENTRY_POINT_RENAME(uset_open)
+#define uset_openEmpty U_ICU_ENTRY_POINT_RENAME(uset_openEmpty)
+#define uset_openPattern U_ICU_ENTRY_POINT_RENAME(uset_openPattern)
+#define uset_openPatternOptions U_ICU_ENTRY_POINT_RENAME(uset_openPatternOptions)
+#define uset_remove U_ICU_ENTRY_POINT_RENAME(uset_remove)
+#define uset_removeAll U_ICU_ENTRY_POINT_RENAME(uset_removeAll)
+#define uset_removeAllStrings U_ICU_ENTRY_POINT_RENAME(uset_removeAllStrings)
+#define uset_removeRange U_ICU_ENTRY_POINT_RENAME(uset_removeRange)
+#define uset_removeString U_ICU_ENTRY_POINT_RENAME(uset_removeString)
+#define uset_resemblesPattern U_ICU_ENTRY_POINT_RENAME(uset_resemblesPattern)
+#define uset_retain U_ICU_ENTRY_POINT_RENAME(uset_retain)
+#define uset_retainAll U_ICU_ENTRY_POINT_RENAME(uset_retainAll)
+#define uset_serialize U_ICU_ENTRY_POINT_RENAME(uset_serialize)
+#define uset_serializedContains U_ICU_ENTRY_POINT_RENAME(uset_serializedContains)
+#define uset_set U_ICU_ENTRY_POINT_RENAME(uset_set)
+#define uset_setSerializedToOne U_ICU_ENTRY_POINT_RENAME(uset_setSerializedToOne)
+#define uset_size U_ICU_ENTRY_POINT_RENAME(uset_size)
+#define uset_span U_ICU_ENTRY_POINT_RENAME(uset_span)
+#define uset_spanBack U_ICU_ENTRY_POINT_RENAME(uset_spanBack)
+#define uset_spanBackUTF8 U_ICU_ENTRY_POINT_RENAME(uset_spanBackUTF8)
+#define uset_spanUTF8 U_ICU_ENTRY_POINT_RENAME(uset_spanUTF8)
+#define uset_toPattern U_ICU_ENTRY_POINT_RENAME(uset_toPattern)
+#define uspoof_areConfusable U_ICU_ENTRY_POINT_RENAME(uspoof_areConfusable)
+#define uspoof_areConfusableUTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_areConfusableUTF8)
+#define uspoof_areConfusableUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_areConfusableUnicodeString)
+#define uspoof_check U_ICU_ENTRY_POINT_RENAME(uspoof_check)
+#define uspoof_checkUTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_checkUTF8)
+#define uspoof_checkUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_checkUnicodeString)
+#define uspoof_clone U_ICU_ENTRY_POINT_RENAME(uspoof_clone)
+#define uspoof_close U_ICU_ENTRY_POINT_RENAME(uspoof_close)
+#define uspoof_getAllowedChars U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedChars)
+#define uspoof_getAllowedLocales U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedLocales)
+#define uspoof_getAllowedUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedUnicodeSet)
+#define uspoof_getChecks U_ICU_ENTRY_POINT_RENAME(uspoof_getChecks)
+#define uspoof_getInclusionSet U_ICU_ENTRY_POINT_RENAME(uspoof_getInclusionSet)
+#define uspoof_getInclusionUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_getInclusionUnicodeSet)
+#define uspoof_getRecommendedSet U_ICU_ENTRY_POINT_RENAME(uspoof_getRecommendedSet)
+#define uspoof_getRecommendedUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_getRecommendedUnicodeSet)
+#define uspoof_getRestrictionLevel U_ICU_ENTRY_POINT_RENAME(uspoof_getRestrictionLevel)
+#define uspoof_getSkeleton U_ICU_ENTRY_POINT_RENAME(uspoof_getSkeleton)
+#define uspoof_getSkeletonUTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_getSkeletonUTF8)
+#define uspoof_getSkeletonUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_getSkeletonUnicodeString)
+#define uspoof_open U_ICU_ENTRY_POINT_RENAME(uspoof_open)
+#define uspoof_openFromSerialized U_ICU_ENTRY_POINT_RENAME(uspoof_openFromSerialized)
+#define uspoof_openFromSource U_ICU_ENTRY_POINT_RENAME(uspoof_openFromSource)
+#define uspoof_serialize U_ICU_ENTRY_POINT_RENAME(uspoof_serialize)
+#define uspoof_setAllowedChars U_ICU_ENTRY_POINT_RENAME(uspoof_setAllowedChars)
+#define uspoof_setAllowedLocales U_ICU_ENTRY_POINT_RENAME(uspoof_setAllowedLocales)
+#define uspoof_setAllowedUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_setAllowedUnicodeSet)
+#define uspoof_setChecks U_ICU_ENTRY_POINT_RENAME(uspoof_setChecks)
+#define uspoof_setRestrictionLevel U_ICU_ENTRY_POINT_RENAME(uspoof_setRestrictionLevel)
+#define uspoof_swap U_ICU_ENTRY_POINT_RENAME(uspoof_swap)
+#define usprep_close U_ICU_ENTRY_POINT_RENAME(usprep_close)
+#define usprep_open U_ICU_ENTRY_POINT_RENAME(usprep_open)
+#define usprep_openByType U_ICU_ENTRY_POINT_RENAME(usprep_openByType)
+#define usprep_prepare U_ICU_ENTRY_POINT_RENAME(usprep_prepare)
+#define usprep_swap U_ICU_ENTRY_POINT_RENAME(usprep_swap)
+#define ustr_hashCharsN U_ICU_ENTRY_POINT_RENAME(ustr_hashCharsN)
+#define ustr_hashICharsN U_ICU_ENTRY_POINT_RENAME(ustr_hashICharsN)
+#define ustr_hashUCharsN U_ICU_ENTRY_POINT_RENAME(ustr_hashUCharsN)
+#define ustrcase_internalFold U_ICU_ENTRY_POINT_RENAME(ustrcase_internalFold)
+#define ustrcase_internalToLower U_ICU_ENTRY_POINT_RENAME(ustrcase_internalToLower)
+#define ustrcase_internalToTitle U_ICU_ENTRY_POINT_RENAME(ustrcase_internalToTitle)
+#define ustrcase_internalToUpper U_ICU_ENTRY_POINT_RENAME(ustrcase_internalToUpper)
+#define ustrcase_map U_ICU_ENTRY_POINT_RENAME(ustrcase_map)
+#define ustrcase_setTempCaseMapLocale U_ICU_ENTRY_POINT_RENAME(ustrcase_setTempCaseMapLocale)
+#define utext_char32At U_ICU_ENTRY_POINT_RENAME(utext_char32At)
+#define utext_clone U_ICU_ENTRY_POINT_RENAME(utext_clone)
+#define utext_close U_ICU_ENTRY_POINT_RENAME(utext_close)
+#define utext_copy U_ICU_ENTRY_POINT_RENAME(utext_copy)
+#define utext_current32 U_ICU_ENTRY_POINT_RENAME(utext_current32)
+#define utext_equals U_ICU_ENTRY_POINT_RENAME(utext_equals)
+#define utext_extract U_ICU_ENTRY_POINT_RENAME(utext_extract)
+#define utext_freeze U_ICU_ENTRY_POINT_RENAME(utext_freeze)
+#define utext_getNativeIndex U_ICU_ENTRY_POINT_RENAME(utext_getNativeIndex)
+#define utext_getPreviousNativeIndex U_ICU_ENTRY_POINT_RENAME(utext_getPreviousNativeIndex)
+#define utext_hasMetaData U_ICU_ENTRY_POINT_RENAME(utext_hasMetaData)
+#define utext_isLengthExpensive U_ICU_ENTRY_POINT_RENAME(utext_isLengthExpensive)
+#define utext_isWritable U_ICU_ENTRY_POINT_RENAME(utext_isWritable)
+#define utext_moveIndex32 U_ICU_ENTRY_POINT_RENAME(utext_moveIndex32)
+#define utext_nativeLength U_ICU_ENTRY_POINT_RENAME(utext_nativeLength)
+#define utext_next32 U_ICU_ENTRY_POINT_RENAME(utext_next32)
+#define utext_next32From U_ICU_ENTRY_POINT_RENAME(utext_next32From)
+#define utext_openCharacterIterator U_ICU_ENTRY_POINT_RENAME(utext_openCharacterIterator)
+#define utext_openConstUnicodeString U_ICU_ENTRY_POINT_RENAME(utext_openConstUnicodeString)
+#define utext_openReplaceable U_ICU_ENTRY_POINT_RENAME(utext_openReplaceable)
+#define utext_openUChars U_ICU_ENTRY_POINT_RENAME(utext_openUChars)
+#define utext_openUTF8 U_ICU_ENTRY_POINT_RENAME(utext_openUTF8)
+#define utext_openUnicodeString U_ICU_ENTRY_POINT_RENAME(utext_openUnicodeString)
+#define utext_previous32 U_ICU_ENTRY_POINT_RENAME(utext_previous32)
+#define utext_previous32From U_ICU_ENTRY_POINT_RENAME(utext_previous32From)
+#define utext_replace U_ICU_ENTRY_POINT_RENAME(utext_replace)
+#define utext_setNativeIndex U_ICU_ENTRY_POINT_RENAME(utext_setNativeIndex)
+#define utext_setup U_ICU_ENTRY_POINT_RENAME(utext_setup)
+#define utf8_appendCharSafeBody U_ICU_ENTRY_POINT_RENAME(utf8_appendCharSafeBody)
+#define utf8_back1SafeBody U_ICU_ENTRY_POINT_RENAME(utf8_back1SafeBody)
+#define utf8_countTrailBytes U_ICU_ENTRY_POINT_RENAME(utf8_countTrailBytes)
+#define utf8_nextCharSafeBody U_ICU_ENTRY_POINT_RENAME(utf8_nextCharSafeBody)
+#define utf8_prevCharSafeBody U_ICU_ENTRY_POINT_RENAME(utf8_prevCharSafeBody)
+#define utmscale_fromInt64 U_ICU_ENTRY_POINT_RENAME(utmscale_fromInt64)
+#define utmscale_getTimeScaleValue U_ICU_ENTRY_POINT_RENAME(utmscale_getTimeScaleValue)
+#define utmscale_toInt64 U_ICU_ENTRY_POINT_RENAME(utmscale_toInt64)
+#define utrace_cleanup U_ICU_ENTRY_POINT_RENAME(utrace_cleanup)
+#define utrace_data U_ICU_ENTRY_POINT_RENAME(utrace_data)
+#define utrace_entry U_ICU_ENTRY_POINT_RENAME(utrace_entry)
+#define utrace_exit U_ICU_ENTRY_POINT_RENAME(utrace_exit)
+#define utrace_format U_ICU_ENTRY_POINT_RENAME(utrace_format)
+#define utrace_functionName U_ICU_ENTRY_POINT_RENAME(utrace_functionName)
+#define utrace_getFunctions U_ICU_ENTRY_POINT_RENAME(utrace_getFunctions)
+#define utrace_getLevel U_ICU_ENTRY_POINT_RENAME(utrace_getLevel)
+#define utrace_level U_ICU_ENTRY_POINT_RENAME(utrace_level)
+#define utrace_setFunctions U_ICU_ENTRY_POINT_RENAME(utrace_setFunctions)
+#define utrace_setLevel U_ICU_ENTRY_POINT_RENAME(utrace_setLevel)
+#define utrace_vformat U_ICU_ENTRY_POINT_RENAME(utrace_vformat)
+#define utrans_clone U_ICU_ENTRY_POINT_RENAME(utrans_clone)
+#define utrans_close U_ICU_ENTRY_POINT_RENAME(utrans_close)
+#define utrans_countAvailableIDs U_ICU_ENTRY_POINT_RENAME(utrans_countAvailableIDs)
+#define utrans_getAvailableID U_ICU_ENTRY_POINT_RENAME(utrans_getAvailableID)
+#define utrans_getID U_ICU_ENTRY_POINT_RENAME(utrans_getID)
+#define utrans_getSourceSet U_ICU_ENTRY_POINT_RENAME(utrans_getSourceSet)
+#define utrans_getUnicodeID U_ICU_ENTRY_POINT_RENAME(utrans_getUnicodeID)
+#define utrans_open U_ICU_ENTRY_POINT_RENAME(utrans_open)
+#define utrans_openIDs U_ICU_ENTRY_POINT_RENAME(utrans_openIDs)
+#define utrans_openInverse U_ICU_ENTRY_POINT_RENAME(utrans_openInverse)
+#define utrans_openU U_ICU_ENTRY_POINT_RENAME(utrans_openU)
+#define utrans_register U_ICU_ENTRY_POINT_RENAME(utrans_register)
+#define utrans_rep_caseContextIterator U_ICU_ENTRY_POINT_RENAME(utrans_rep_caseContextIterator)
+#define utrans_setFilter U_ICU_ENTRY_POINT_RENAME(utrans_setFilter)
+#define utrans_stripRules U_ICU_ENTRY_POINT_RENAME(utrans_stripRules)
+#define utrans_toRules U_ICU_ENTRY_POINT_RENAME(utrans_toRules)
+#define utrans_trans U_ICU_ENTRY_POINT_RENAME(utrans_trans)
+#define utrans_transIncremental U_ICU_ENTRY_POINT_RENAME(utrans_transIncremental)
+#define utrans_transIncrementalUChars U_ICU_ENTRY_POINT_RENAME(utrans_transIncrementalUChars)
+#define utrans_transUChars U_ICU_ENTRY_POINT_RENAME(utrans_transUChars)
+#define utrans_transliterator_cleanup U_ICU_ENTRY_POINT_RENAME(utrans_transliterator_cleanup)
+#define utrans_unregister U_ICU_ENTRY_POINT_RENAME(utrans_unregister)
+#define utrans_unregisterID U_ICU_ENTRY_POINT_RENAME(utrans_unregisterID)
+#define utrie2_clone U_ICU_ENTRY_POINT_RENAME(utrie2_clone)
+#define utrie2_cloneAsThawed U_ICU_ENTRY_POINT_RENAME(utrie2_cloneAsThawed)
+#define utrie2_close U_ICU_ENTRY_POINT_RENAME(utrie2_close)
+#define utrie2_enum U_ICU_ENTRY_POINT_RENAME(utrie2_enum)
+#define utrie2_enumForLeadSurrogate U_ICU_ENTRY_POINT_RENAME(utrie2_enumForLeadSurrogate)
+#define utrie2_freeze U_ICU_ENTRY_POINT_RENAME(utrie2_freeze)
+#define utrie2_fromUTrie U_ICU_ENTRY_POINT_RENAME(utrie2_fromUTrie)
+#define utrie2_get32 U_ICU_ENTRY_POINT_RENAME(utrie2_get32)
+#define utrie2_get32FromLeadSurrogateCodeUnit U_ICU_ENTRY_POINT_RENAME(utrie2_get32FromLeadSurrogateCodeUnit)
+#define utrie2_getVersion U_ICU_ENTRY_POINT_RENAME(utrie2_getVersion)
+#define utrie2_internalU8NextIndex U_ICU_ENTRY_POINT_RENAME(utrie2_internalU8NextIndex)
+#define utrie2_internalU8PrevIndex U_ICU_ENTRY_POINT_RENAME(utrie2_internalU8PrevIndex)
+#define utrie2_isFrozen U_ICU_ENTRY_POINT_RENAME(utrie2_isFrozen)
+#define utrie2_open U_ICU_ENTRY_POINT_RENAME(utrie2_open)
+#define utrie2_openDummy U_ICU_ENTRY_POINT_RENAME(utrie2_openDummy)
+#define utrie2_openFromSerialized U_ICU_ENTRY_POINT_RENAME(utrie2_openFromSerialized)
+#define utrie2_serialize U_ICU_ENTRY_POINT_RENAME(utrie2_serialize)
+#define utrie2_set32 U_ICU_ENTRY_POINT_RENAME(utrie2_set32)
+#define utrie2_set32ForLeadSurrogateCodeUnit U_ICU_ENTRY_POINT_RENAME(utrie2_set32ForLeadSurrogateCodeUnit)
+#define utrie2_setRange32 U_ICU_ENTRY_POINT_RENAME(utrie2_setRange32)
+#define utrie2_swap U_ICU_ENTRY_POINT_RENAME(utrie2_swap)
+#define utrie2_swapAnyVersion U_ICU_ENTRY_POINT_RENAME(utrie2_swapAnyVersion)
+#define utrie_clone U_ICU_ENTRY_POINT_RENAME(utrie_clone)
+#define utrie_close U_ICU_ENTRY_POINT_RENAME(utrie_close)
+#define utrie_defaultGetFoldingOffset U_ICU_ENTRY_POINT_RENAME(utrie_defaultGetFoldingOffset)
+#define utrie_enum U_ICU_ENTRY_POINT_RENAME(utrie_enum)
+#define utrie_get32 U_ICU_ENTRY_POINT_RENAME(utrie_get32)
+#define utrie_getData U_ICU_ENTRY_POINT_RENAME(utrie_getData)
+#define utrie_open U_ICU_ENTRY_POINT_RENAME(utrie_open)
+#define utrie_serialize U_ICU_ENTRY_POINT_RENAME(utrie_serialize)
+#define utrie_set32 U_ICU_ENTRY_POINT_RENAME(utrie_set32)
+#define utrie_setRange32 U_ICU_ENTRY_POINT_RENAME(utrie_setRange32)
+#define utrie_swap U_ICU_ENTRY_POINT_RENAME(utrie_swap)
+#define utrie_unserialize U_ICU_ENTRY_POINT_RENAME(utrie_unserialize)
+#define utrie_unserializeDummy U_ICU_ENTRY_POINT_RENAME(utrie_unserializeDummy)
+#define vzone_clone U_ICU_ENTRY_POINT_RENAME(vzone_clone)
+#define vzone_close U_ICU_ENTRY_POINT_RENAME(vzone_close)
+#define vzone_countTransitionRules U_ICU_ENTRY_POINT_RENAME(vzone_countTransitionRules)
+#define vzone_equals U_ICU_ENTRY_POINT_RENAME(vzone_equals)
+#define vzone_getDynamicClassID U_ICU_ENTRY_POINT_RENAME(vzone_getDynamicClassID)
+#define vzone_getLastModified U_ICU_ENTRY_POINT_RENAME(vzone_getLastModified)
+#define vzone_getNextTransition U_ICU_ENTRY_POINT_RENAME(vzone_getNextTransition)
+#define vzone_getOffset U_ICU_ENTRY_POINT_RENAME(vzone_getOffset)
+#define vzone_getOffset2 U_ICU_ENTRY_POINT_RENAME(vzone_getOffset2)
+#define vzone_getOffset3 U_ICU_ENTRY_POINT_RENAME(vzone_getOffset3)
+#define vzone_getPreviousTransition U_ICU_ENTRY_POINT_RENAME(vzone_getPreviousTransition)
+#define vzone_getRawOffset U_ICU_ENTRY_POINT_RENAME(vzone_getRawOffset)
+#define vzone_getStaticClassID U_ICU_ENTRY_POINT_RENAME(vzone_getStaticClassID)
+#define vzone_getTZURL U_ICU_ENTRY_POINT_RENAME(vzone_getTZURL)
+#define vzone_hasSameRules U_ICU_ENTRY_POINT_RENAME(vzone_hasSameRules)
+#define vzone_inDaylightTime U_ICU_ENTRY_POINT_RENAME(vzone_inDaylightTime)
+#define vzone_openData U_ICU_ENTRY_POINT_RENAME(vzone_openData)
+#define vzone_openID U_ICU_ENTRY_POINT_RENAME(vzone_openID)
+#define vzone_setLastModified U_ICU_ENTRY_POINT_RENAME(vzone_setLastModified)
+#define vzone_setRawOffset U_ICU_ENTRY_POINT_RENAME(vzone_setRawOffset)
+#define vzone_setTZURL U_ICU_ENTRY_POINT_RENAME(vzone_setTZURL)
+#define vzone_useDaylightTime U_ICU_ENTRY_POINT_RENAME(vzone_useDaylightTime)
+#define vzone_write U_ICU_ENTRY_POINT_RENAME(vzone_write)
+#define vzone_writeFromStart U_ICU_ENTRY_POINT_RENAME(vzone_writeFromStart)
+#define vzone_writeSimple U_ICU_ENTRY_POINT_RENAME(vzone_writeSimple)
+#define zrule_close U_ICU_ENTRY_POINT_RENAME(zrule_close)
+#define zrule_equals U_ICU_ENTRY_POINT_RENAME(zrule_equals)
+#define zrule_getDSTSavings U_ICU_ENTRY_POINT_RENAME(zrule_getDSTSavings)
+#define zrule_getName U_ICU_ENTRY_POINT_RENAME(zrule_getName)
+#define zrule_getRawOffset U_ICU_ENTRY_POINT_RENAME(zrule_getRawOffset)
+#define zrule_isEquivalentTo U_ICU_ENTRY_POINT_RENAME(zrule_isEquivalentTo)
+#define ztrans_adoptFrom U_ICU_ENTRY_POINT_RENAME(ztrans_adoptFrom)
+#define ztrans_adoptTo U_ICU_ENTRY_POINT_RENAME(ztrans_adoptTo)
+#define ztrans_clone U_ICU_ENTRY_POINT_RENAME(ztrans_clone)
+#define ztrans_close U_ICU_ENTRY_POINT_RENAME(ztrans_close)
+#define ztrans_equals U_ICU_ENTRY_POINT_RENAME(ztrans_equals)
+#define ztrans_getDynamicClassID U_ICU_ENTRY_POINT_RENAME(ztrans_getDynamicClassID)
+#define ztrans_getFrom U_ICU_ENTRY_POINT_RENAME(ztrans_getFrom)
+#define ztrans_getStaticClassID U_ICU_ENTRY_POINT_RENAME(ztrans_getStaticClassID)
+#define ztrans_getTime U_ICU_ENTRY_POINT_RENAME(ztrans_getTime)
+#define ztrans_getTo U_ICU_ENTRY_POINT_RENAME(ztrans_getTo)
+#define ztrans_open U_ICU_ENTRY_POINT_RENAME(ztrans_open)
+#define ztrans_openEmpty U_ICU_ENTRY_POINT_RENAME(ztrans_openEmpty)
+#define ztrans_setFrom U_ICU_ENTRY_POINT_RENAME(ztrans_setFrom)
+#define ztrans_setTime U_ICU_ENTRY_POINT_RENAME(ztrans_setTime)
+#define ztrans_setTo U_ICU_ENTRY_POINT_RENAME(ztrans_setTo)
+
+#endif
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/uset.h b/src/core/basetypes/icu-ucsdet/include/unicode/uset.h
new file mode 100644
index 00000000..eb3c9e6a
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/uset.h
@@ -0,0 +1,1126 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 2002-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: uset.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002mar07
+* created by: Markus W. Scherer
+*
+* C version of UnicodeSet.
+*/
+
+
+/**
+ * \file
+ * \brief C API: Unicode Set
+ *
+ * <p>This is a C wrapper around the C++ UnicodeSet class.</p>
+ */
+
+#ifndef __USET_H__
+#define __USET_H__
+
+#include "unicode/utypes.h"
+#include "unicode/uchar.h"
+#include "unicode/localpointer.h"
+
+#ifndef UCNV_H
+struct USet;
+/**
+ * A UnicodeSet. Use the uset_* API to manipulate. Create with
+ * uset_open*, and destroy with uset_close.
+ * @stable ICU 2.4
+ */
+typedef struct USet USet;
+#endif
+
+/**
+ * Bitmask values to be passed to uset_openPatternOptions() or
+ * uset_applyPattern() taking an option parameter.
+ * @stable ICU 2.4
+ */
+enum {
+ /**
+ * Ignore white space within patterns unless quoted or escaped.
+ * @stable ICU 2.4
+ */
+ USET_IGNORE_SPACE = 1,
+
+ /**
+ * Enable case insensitive matching. E.g., "[ab]" with this flag
+ * will match 'a', 'A', 'b', and 'B'. "[^ab]" with this flag will
+ * match all except 'a', 'A', 'b', and 'B'. This performs a full
+ * closure over case mappings, e.g. U+017F for s.
+ *
+ * The resulting set is a superset of the input for the code points but
+ * not for the strings.
+ * It performs a case mapping closure of the code points and adds
+ * full case folding strings for the code points, and reduces strings of
+ * the original set to their full case folding equivalents.
+ *
+ * This is designed for case-insensitive matches, for example
+ * in regular expressions. The full code point case closure allows checking of
+ * an input character directly against the closure set.
+ * Strings are matched by comparing the case-folded form from the closure
+ * set with an incremental case folding of the string in question.
+ *
+ * The closure set will also contain single code points if the original
+ * set contained case-equivalent strings (like U+00DF for "ss" or "Ss" etc.).
+ * This is not necessary (that is, redundant) for the above matching method
+ * but results in the same closure sets regardless of whether the original
+ * set contained the code point or a string.
+ *
+ * @stable ICU 2.4
+ */
+ USET_CASE_INSENSITIVE = 2,
+
+ /**
+ * Enable case insensitive matching. E.g., "[ab]" with this flag
+ * will match 'a', 'A', 'b', and 'B'. "[^ab]" with this flag will
+ * match all except 'a', 'A', 'b', and 'B'. This adds the lower-,
+ * title-, and uppercase mappings as well as the case folding
+ * of each existing element in the set.
+ * @stable ICU 3.2
+ */
+ USET_ADD_CASE_MAPPINGS = 4
+};
+
+/**
+ * Argument values for whether span() and similar functions continue while
+ * the current character is contained vs. not contained in the set.
+ *
+ * The functionality is straightforward for sets with only single code points,
+ * without strings (which is the common case):
+ * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE work the same.
+ * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE are inverses of USET_SPAN_NOT_CONTAINED.
+ * - span() and spanBack() partition any string the same way when
+ * alternating between span(USET_SPAN_NOT_CONTAINED) and
+ * span(either "contained" condition).
+ * - Using a complemented (inverted) set and the opposite span conditions
+ * yields the same results.
+ *
+ * When a set contains multi-code point strings, then these statements may not
+ * be true, depending on the strings in the set (for example, whether they
+ * overlap with each other) and the string that is processed.
+ * For a set with strings:
+ * - The complement of the set contains the opposite set of code points,
+ * but the same set of strings.
+ * Therefore, complementing both the set and the span conditions
+ * may yield different results.
+ * - When starting spans at different positions in a string
+ * (span(s, ...) vs. span(s+1, ...)) the ends of the spans may be different
+ * because a set string may start before the later position.
+ * - span(USET_SPAN_SIMPLE) may be shorter than
+ * span(USET_SPAN_CONTAINED) because it will not recursively try
+ * all possible paths.
+ * For example, with a set which contains the three strings "xy", "xya" and "ax",
+ * span("xyax", USET_SPAN_CONTAINED) will return 4 but
+ * span("xyax", USET_SPAN_SIMPLE) will return 3.
+ * span(USET_SPAN_SIMPLE) will never be longer than
+ * span(USET_SPAN_CONTAINED).
+ * - With either "contained" condition, span() and spanBack() may partition
+ * a string in different ways.
+ * For example, with a set which contains the two strings "ab" and "ba",
+ * and when processing the string "aba",
+ * span() will yield contained/not-contained boundaries of { 0, 2, 3 }
+ * while spanBack() will yield boundaries of { 0, 1, 3 }.
+ *
+ * Note: If it is important to get the same boundaries whether iterating forward
+ * or backward through a string, then either only span() should be used and
+ * the boundaries cached for backward operation, or an ICU BreakIterator
+ * could be used.
+ *
+ * Note: Unpaired surrogates are treated like surrogate code points.
+ * Similarly, set strings match only on code point boundaries,
+ * never in the middle of a surrogate pair.
+ * Illegal UTF-8 sequences are treated like U+FFFD.
+ * When processing UTF-8 strings, malformed set strings
+ * (strings with unpaired surrogates which cannot be converted to UTF-8)
+ * are ignored.
+ *
+ * @stable ICU 3.8
+ */
+typedef enum USetSpanCondition {
+ /**
+ * Continues a span() while there is no set element at the current position.
+ * Increments by one code point at a time.
+ * Stops before the first set element (character or string).
+ * (For code points only, this is like while contains(current)==FALSE).
+ *
+ * When span() returns, the substring between where it started and the position
+ * it returned consists only of characters that are not in the set,
+ * and none of its strings overlap with the span.
+ *
+ * @stable ICU 3.8
+ */
+ USET_SPAN_NOT_CONTAINED = 0,
+ /**
+ * Spans the longest substring that is a concatenation of set elements (characters or strings).
+ * (For characters only, this is like while contains(current)==TRUE).
+ *
+ * When span() returns, the substring between where it started and the position
+ * it returned consists only of set elements (characters or strings) that are in the set.
+ *
+ * If a set contains strings, then the span will be the longest substring for which there
+ * exists at least one non-overlapping concatenation of set elements (characters or strings).
+ * This is equivalent to a POSIX regular expression for <code>(OR of each set element)*</code>.
+ * (Java/ICU/Perl regex stops at the first match of an OR.)
+ *
+ * @stable ICU 3.8
+ */
+ USET_SPAN_CONTAINED = 1,
+ /**
+ * Continues a span() while there is a set element at the current position.
+ * Increments by the longest matching element at each position.
+ * (For characters only, this is like while contains(current)==TRUE).
+ *
+ * When span() returns, the substring between where it started and the position
+ * it returned consists only of set elements (characters or strings) that are in the set.
+ *
+ * If a set only contains single characters, then this is the same
+ * as USET_SPAN_CONTAINED.
+ *
+ * If a set contains strings, then the span will be the longest substring
+ * with a match at each position with the longest single set element (character or string).
+ *
+ * Use this span condition together with other longest-match algorithms,
+ * such as ICU converters (ucnv_getUnicodeSet()).
+ *
+ * @stable ICU 3.8
+ */
+ USET_SPAN_SIMPLE = 2,
+ /**
+ * One more than the last span condition.
+ * @stable ICU 3.8
+ */
+ USET_SPAN_CONDITION_COUNT
+} USetSpanCondition;
+
+enum {
+ /**
+ * Capacity of USerializedSet::staticArray.
+ * Enough for any single-code point set.
+ * Also provides padding for nice sizeof(USerializedSet).
+ * @stable ICU 2.4
+ */
+ USET_SERIALIZED_STATIC_ARRAY_CAPACITY=8
+};
+
+/**
+ * A serialized form of a Unicode set. Limited manipulations are
+ * possible directly on a serialized set. See below.
+ * @stable ICU 2.4
+ */
+typedef struct USerializedSet {
+ /**
+ * The serialized Unicode Set.
+ * @stable ICU 2.4
+ */
+ const uint16_t *array;
+ /**
+ * The length of the array that contains BMP characters.
+ * @stable ICU 2.4
+ */
+ int32_t bmpLength;
+ /**
+ * The total length of the array.
+ * @stable ICU 2.4
+ */
+ int32_t length;
+ /**
+ * A small buffer for the array to reduce memory allocations.
+ * @stable ICU 2.4
+ */
+ uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY];
+} USerializedSet;
+
+/*********************************************************************
+ * USet API
+ *********************************************************************/
+
+/**
+ * Create an empty USet object.
+ * Equivalent to uset_open(1, 0).
+ * @return a newly created USet. The caller must call uset_close() on
+ * it when done.
+ * @stable ICU 4.2
+ */
+U_STABLE USet* U_EXPORT2
+uset_openEmpty(void);
+
+/**
+ * Creates a USet object that contains the range of characters
+ * start..end, inclusive. If <code>start > end</code>
+ * then an empty set is created (same as using uset_openEmpty()).
+ * @param start first character of the range, inclusive
+ * @param end last character of the range, inclusive
+ * @return a newly created USet. The caller must call uset_close() on
+ * it when done.
+ * @stable ICU 2.4
+ */
+U_STABLE USet* U_EXPORT2
+uset_open(UChar32 start, UChar32 end);
+
+/**
+ * Creates a set from the given pattern. See the UnicodeSet class
+ * description for the syntax of the pattern language.
+ * @param pattern a string specifying what characters are in the set
+ * @param patternLength the length of the pattern, or -1 if null
+ * terminated
+ * @param ec the error code
+ * @stable ICU 2.4
+ */
+U_STABLE USet* U_EXPORT2
+uset_openPattern(const UChar* pattern, int32_t patternLength,
+ UErrorCode* ec);
+
+/**
+ * Creates a set from the given pattern. See the UnicodeSet class
+ * description for the syntax of the pattern language.
+ * @param pattern a string specifying what characters are in the set
+ * @param patternLength the length of the pattern, or -1 if null
+ * terminated
+ * @param options bitmask for options to apply to the pattern.
+ * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
+ * @param ec the error code
+ * @stable ICU 2.4
+ */
+U_STABLE USet* U_EXPORT2
+uset_openPatternOptions(const UChar* pattern, int32_t patternLength,
+ uint32_t options,
+ UErrorCode* ec);
+
+/**
+ * Disposes of the storage used by a USet object. This function should
+ * be called exactly once for objects returned by uset_open().
+ * @param set the object to dispose of
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+uset_close(USet* set);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUSetPointer
+ * "Smart pointer" class, closes a USet via uset_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUSetPointer, USet, uset_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Returns a copy of this object.
+ * If this set is frozen, then the clone will be frozen as well.
+ * Use uset_cloneAsThawed() for a mutable clone of a frozen set.
+ * @param set the original set
+ * @return the newly allocated copy of the set
+ * @see uset_cloneAsThawed
+ * @stable ICU 3.8
+ */
+U_STABLE USet * U_EXPORT2
+uset_clone(const USet *set);
+
+/**
+ * Determines whether the set has been frozen (made immutable) or not.
+ * See the ICU4J Freezable interface for details.
+ * @param set the set
+ * @return TRUE/FALSE for whether the set has been frozen
+ * @see uset_freeze
+ * @see uset_cloneAsThawed
+ * @stable ICU 3.8
+ */
+U_STABLE UBool U_EXPORT2
+uset_isFrozen(const USet *set);
+
+/**
+ * Freeze the set (make it immutable).
+ * Once frozen, it cannot be unfrozen and is therefore thread-safe
+ * until it is deleted.
+ * See the ICU4J Freezable interface for details.
+ * Freezing the set may also make some operations faster, for example
+ * uset_contains() and uset_span().
+ * A frozen set will not be modified. (It remains frozen.)
+ * @param set the set
+ * @return the same set, now frozen
+ * @see uset_isFrozen
+ * @see uset_cloneAsThawed
+ * @stable ICU 3.8
+ */
+U_STABLE void U_EXPORT2
+uset_freeze(USet *set);
+
+/**
+ * Clone the set and make the clone mutable.
+ * See the ICU4J Freezable interface for details.
+ * @param set the set
+ * @return the mutable clone
+ * @see uset_freeze
+ * @see uset_isFrozen
+ * @see uset_clone
+ * @stable ICU 3.8
+ */
+U_STABLE USet * U_EXPORT2
+uset_cloneAsThawed(const USet *set);
+
+/**
+ * Causes the USet object to represent the range <code>start - end</code>.
+ * If <code>start > end</code> then this USet is set to an empty range.
+ * A frozen set will not be modified.
+ * @param set the object to set to the given range
+ * @param start first character in the set, inclusive
+ * @param end last character in the set, inclusive
+ * @stable ICU 3.2
+ */
+U_STABLE void U_EXPORT2
+uset_set(USet* set,
+ UChar32 start, UChar32 end);
+
+/**
+ * Modifies the set to represent the set specified by the given
+ * pattern. See the UnicodeSet class description for the syntax of
+ * the pattern language. See also the User Guide chapter about UnicodeSet.
+ * <em>Empties the set passed before applying the pattern.</em>
+ * A frozen set will not be modified.
+ * @param set The set to which the pattern is to be applied.
+ * @param pattern A pointer to UChar string specifying what characters are in the set.
+ * The character at pattern[0] must be a '['.
+ * @param patternLength The length of the UChar string. -1 if NUL terminated.
+ * @param options A bitmask for options to apply to the pattern.
+ * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
+ * @param status Returns an error if the pattern cannot be parsed.
+ * @return Upon successful parse, the value is either
+ * the index of the character after the closing ']'
+ * of the parsed pattern.
+ * If the status code indicates failure, then the return value
+ * is the index of the error in the source.
+ *
+ * @stable ICU 2.8
+ */
+U_STABLE int32_t U_EXPORT2
+uset_applyPattern(USet *set,
+ const UChar *pattern, int32_t patternLength,
+ uint32_t options,
+ UErrorCode *status);
+
+/**
+ * Modifies the set to contain those code points which have the given value
+ * for the given binary or enumerated property, as returned by
+ * u_getIntPropertyValue. Prior contents of this set are lost.
+ * A frozen set will not be modified.
+ *
+ * @param set the object to contain the code points defined by the property
+ *
+ * @param prop a property in the range UCHAR_BIN_START..UCHAR_BIN_LIMIT-1
+ * or UCHAR_INT_START..UCHAR_INT_LIMIT-1
+ * or UCHAR_MASK_START..UCHAR_MASK_LIMIT-1.
+ *
+ * @param value a value in the range u_getIntPropertyMinValue(prop)..
+ * u_getIntPropertyMaxValue(prop), with one exception. If prop is
+ * UCHAR_GENERAL_CATEGORY_MASK, then value should not be a UCharCategory, but
+ * rather a mask value produced by U_GET_GC_MASK(). This allows grouped
+ * categories such as [:L:] to be represented.
+ *
+ * @param ec error code input/output parameter
+ *
+ * @stable ICU 3.2
+ */
+U_STABLE void U_EXPORT2
+uset_applyIntPropertyValue(USet* set,
+ UProperty prop, int32_t value, UErrorCode* ec);
+
+/**
+ * Modifies the set to contain those code points which have the
+ * given value for the given property. Prior contents of this
+ * set are lost.
+ * A frozen set will not be modified.
+ *
+ * @param set the object to contain the code points defined by the given
+ * property and value alias
+ *
+ * @param prop a string specifying a property alias, either short or long.
+ * The name is matched loosely. See PropertyAliases.txt for names and a
+ * description of loose matching. If the value string is empty, then this
+ * string is interpreted as either a General_Category value alias, a Script
+ * value alias, a binary property alias, or a special ID. Special IDs are
+ * matched loosely and correspond to the following sets:
+ *
+ * "ANY" = [\\u0000-\\U0010FFFF],
+ * "ASCII" = [\\u0000-\\u007F],
+ * "Assigned" = [:^Cn:].
+ *
+ * @param propLength the length of the prop, or -1 if NULL
+ *
+ * @param value a string specifying a value alias, either short or long.
+ * The name is matched loosely. See PropertyValueAliases.txt for names
+ * and a description of loose matching. In addition to aliases listed,
+ * numeric values and canonical combining classes may be expressed
+ * numerically, e.g., ("nv", "0.5") or ("ccc", "220"). The value string
+ * may also be empty.
+ *
+ * @param valueLength the length of the value, or -1 if NULL
+ *
+ * @param ec error code input/output parameter
+ *
+ * @stable ICU 3.2
+ */
+U_STABLE void U_EXPORT2
+uset_applyPropertyAlias(USet* set,
+ const UChar *prop, int32_t propLength,
+ const UChar *value, int32_t valueLength,
+ UErrorCode* ec);
+
+/**
+ * Return true if the given position, in the given pattern, appears
+ * to be the start of a UnicodeSet pattern.
+ *
+ * @param pattern a string specifying the pattern
+ * @param patternLength the length of the pattern, or -1 if NULL
+ * @param pos the given position
+ * @stable ICU 3.2
+ */
+U_STABLE UBool U_EXPORT2
+uset_resemblesPattern(const UChar *pattern, int32_t patternLength,
+ int32_t pos);
+
+/**
+ * Returns a string representation of this set. If the result of
+ * calling this function is passed to a uset_openPattern(), it
+ * will produce another set that is equal to this one.
+ * @param set the set
+ * @param result the string to receive the rules, may be NULL
+ * @param resultCapacity the capacity of result, may be 0 if result is NULL
+ * @param escapeUnprintable if TRUE then convert unprintable
+ * character to their hex escape representations, \\uxxxx or
+ * \\Uxxxxxxxx. Unprintable characters are those other than
+ * U+000A, U+0020..U+007E.
+ * @param ec error code.
+ * @return length of string, possibly larger than resultCapacity
+ * @stable ICU 2.4
+ */
+U_STABLE int32_t U_EXPORT2
+uset_toPattern(const USet* set,
+ UChar* result, int32_t resultCapacity,
+ UBool escapeUnprintable,
+ UErrorCode* ec);
+
+/**
+ * Adds the given character to the given USet. After this call,
+ * uset_contains(set, c) will return TRUE.
+ * A frozen set will not be modified.
+ * @param set the object to which to add the character
+ * @param c the character to add
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+uset_add(USet* set, UChar32 c);
+
+/**
+ * Adds all of the elements in the specified set to this set if
+ * they're not already present. This operation effectively
+ * modifies this set so that its value is the <i>union</i> of the two
+ * sets. The behavior of this operation is unspecified if the specified
+ * collection is modified while the operation is in progress.
+ * A frozen set will not be modified.
+ *
+ * @param set the object to which to add the set
+ * @param additionalSet the source set whose elements are to be added to this set.
+ * @stable ICU 2.6
+ */
+U_STABLE void U_EXPORT2
+uset_addAll(USet* set, const USet *additionalSet);
+
+/**
+ * Adds the given range of characters to the given USet. After this call,
+ * uset_contains(set, start, end) will return TRUE.
+ * A frozen set will not be modified.
+ * @param set the object to which to add the character
+ * @param start the first character of the range to add, inclusive
+ * @param end the last character of the range to add, inclusive
+ * @stable ICU 2.2
+ */
+U_STABLE void U_EXPORT2
+uset_addRange(USet* set, UChar32 start, UChar32 end);
+
+/**
+ * Adds the given string to the given USet. After this call,
+ * uset_containsString(set, str, strLen) will return TRUE.
+ * A frozen set will not be modified.
+ * @param set the object to which to add the character
+ * @param str the string to add
+ * @param strLen the length of the string or -1 if null terminated.
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+uset_addString(USet* set, const UChar* str, int32_t strLen);
+
+/**
+ * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"}
+ * If this set already any particular character, it has no effect on that character.
+ * A frozen set will not be modified.
+ * @param set the object to which to add the character
+ * @param str the source string
+ * @param strLen the length of the string or -1 if null terminated.
+ * @stable ICU 3.4
+ */
+U_STABLE void U_EXPORT2
+uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen);
+
+/**
+ * Removes the given character from the given USet. After this call,
+ * uset_contains(set, c) will return FALSE.
+ * A frozen set will not be modified.
+ * @param set the object from which to remove the character
+ * @param c the character to remove
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+uset_remove(USet* set, UChar32 c);
+
+/**
+ * Removes the given range of characters from the given USet. After this call,
+ * uset_contains(set, start, end) will return FALSE.
+ * A frozen set will not be modified.
+ * @param set the object to which to add the character
+ * @param start the first character of the range to remove, inclusive
+ * @param end the last character of the range to remove, inclusive
+ * @stable ICU 2.2
+ */
+U_STABLE void U_EXPORT2
+uset_removeRange(USet* set, UChar32 start, UChar32 end);
+
+/**
+ * Removes the given string to the given USet. After this call,
+ * uset_containsString(set, str, strLen) will return FALSE.
+ * A frozen set will not be modified.
+ * @param set the object to which to add the character
+ * @param str the string to remove
+ * @param strLen the length of the string or -1 if null terminated.
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+uset_removeString(USet* set, const UChar* str, int32_t strLen);
+
+/**
+ * Removes from this set all of its elements that are contained in the
+ * specified set. This operation effectively modifies this
+ * set so that its value is the <i>asymmetric set difference</i> of
+ * the two sets.
+ * A frozen set will not be modified.
+ * @param set the object from which the elements are to be removed
+ * @param removeSet the object that defines which elements will be
+ * removed from this set
+ * @stable ICU 3.2
+ */
+U_STABLE void U_EXPORT2
+uset_removeAll(USet* set, const USet* removeSet);
+
+/**
+ * Retain only the elements in this set that are contained in the
+ * specified range. If <code>start > end</code> then an empty range is
+ * retained, leaving the set empty. This is equivalent to
+ * a boolean logic AND, or a set INTERSECTION.
+ * A frozen set will not be modified.
+ *
+ * @param set the object for which to retain only the specified range
+ * @param start first character, inclusive, of range to be retained
+ * to this set.
+ * @param end last character, inclusive, of range to be retained
+ * to this set.
+ * @stable ICU 3.2
+ */
+U_STABLE void U_EXPORT2
+uset_retain(USet* set, UChar32 start, UChar32 end);
+
+/**
+ * Retains only the elements in this set that are contained in the
+ * specified set. In other words, removes from this set all of
+ * its elements that are not contained in the specified set. This
+ * operation effectively modifies this set so that its value is
+ * the <i>intersection</i> of the two sets.
+ * A frozen set will not be modified.
+ *
+ * @param set the object on which to perform the retain
+ * @param retain set that defines which elements this set will retain
+ * @stable ICU 3.2
+ */
+U_STABLE void U_EXPORT2
+uset_retainAll(USet* set, const USet* retain);
+
+/**
+ * Reallocate this objects internal structures to take up the least
+ * possible space, without changing this object's value.
+ * A frozen set will not be modified.
+ *
+ * @param set the object on which to perfrom the compact
+ * @stable ICU 3.2
+ */
+U_STABLE void U_EXPORT2
+uset_compact(USet* set);
+
+/**
+ * Inverts this set. This operation modifies this set so that
+ * its value is its complement. This operation does not affect
+ * the multicharacter strings, if any.
+ * A frozen set will not be modified.
+ * @param set the set
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+uset_complement(USet* set);
+
+/**
+ * Complements in this set all elements contained in the specified
+ * set. Any character in the other set will be removed if it is
+ * in this set, or will be added if it is not in this set.
+ * A frozen set will not be modified.
+ *
+ * @param set the set with which to complement
+ * @param complement set that defines which elements will be xor'ed
+ * from this set.
+ * @stable ICU 3.2
+ */
+U_STABLE void U_EXPORT2
+uset_complementAll(USet* set, const USet* complement);
+
+/**
+ * Removes all of the elements from this set. This set will be
+ * empty after this call returns.
+ * A frozen set will not be modified.
+ * @param set the set
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+uset_clear(USet* set);
+
+/**
+ * Close this set over the given attribute. For the attribute
+ * USET_CASE, the result is to modify this set so that:
+ *
+ * 1. For each character or string 'a' in this set, all strings or
+ * characters 'b' such that foldCase(a) == foldCase(b) are added
+ * to this set.
+ *
+ * 2. For each string 'e' in the resulting set, if e !=
+ * foldCase(e), 'e' will be removed.
+ *
+ * Example: [aq\\u00DF{Bc}{bC}{Fi}] => [aAqQ\\u00DF\\uFB01{ss}{bc}{fi}]
+ *
+ * (Here foldCase(x) refers to the operation u_strFoldCase, and a
+ * == b denotes that the contents are the same, not pointer
+ * comparison.)
+ *
+ * A frozen set will not be modified.
+ *
+ * @param set the set
+ *
+ * @param attributes bitmask for attributes to close over.
+ * Currently only the USET_CASE bit is supported. Any undefined bits
+ * are ignored.
+ * @stable ICU 4.2
+ */
+U_STABLE void U_EXPORT2
+uset_closeOver(USet* set, int32_t attributes);
+
+/**
+ * Remove all strings from this set.
+ *
+ * @param set the set
+ * @stable ICU 4.2
+ */
+U_STABLE void U_EXPORT2
+uset_removeAllStrings(USet* set);
+
+/**
+ * Returns TRUE if the given USet contains no characters and no
+ * strings.
+ * @param set the set
+ * @return true if set is empty
+ * @stable ICU 2.4
+ */
+U_STABLE UBool U_EXPORT2
+uset_isEmpty(const USet* set);
+
+/**
+ * Returns TRUE if the given USet contains the given character.
+ * This function works faster with a frozen set.
+ * @param set the set
+ * @param c The codepoint to check for within the set
+ * @return true if set contains c
+ * @stable ICU 2.4
+ */
+U_STABLE UBool U_EXPORT2
+uset_contains(const USet* set, UChar32 c);
+
+/**
+ * Returns TRUE if the given USet contains all characters c
+ * where start <= c && c <= end.
+ * @param set the set
+ * @param start the first character of the range to test, inclusive
+ * @param end the last character of the range to test, inclusive
+ * @return TRUE if set contains the range
+ * @stable ICU 2.2
+ */
+U_STABLE UBool U_EXPORT2
+uset_containsRange(const USet* set, UChar32 start, UChar32 end);
+
+/**
+ * Returns TRUE if the given USet contains the given string.
+ * @param set the set
+ * @param str the string
+ * @param strLen the length of the string or -1 if null terminated.
+ * @return true if set contains str
+ * @stable ICU 2.4
+ */
+U_STABLE UBool U_EXPORT2
+uset_containsString(const USet* set, const UChar* str, int32_t strLen);
+
+/**
+ * Returns the index of the given character within this set, where
+ * the set is ordered by ascending code point. If the character
+ * is not in this set, return -1. The inverse of this method is
+ * <code>charAt()</code>.
+ * @param set the set
+ * @param c the character to obtain the index for
+ * @return an index from 0..size()-1, or -1
+ * @stable ICU 3.2
+ */
+U_STABLE int32_t U_EXPORT2
+uset_indexOf(const USet* set, UChar32 c);
+
+/**
+ * Returns the character at the given index within this set, where
+ * the set is ordered by ascending code point. If the index is
+ * out of range, return (UChar32)-1. The inverse of this method is
+ * <code>indexOf()</code>.
+ * @param set the set
+ * @param charIndex an index from 0..size()-1 to obtain the char for
+ * @return the character at the given index, or (UChar32)-1.
+ * @stable ICU 3.2
+ */
+U_STABLE UChar32 U_EXPORT2
+uset_charAt(const USet* set, int32_t charIndex);
+
+/**
+ * Returns the number of characters and strings contained in the given
+ * USet.
+ * @param set the set
+ * @return a non-negative integer counting the characters and strings
+ * contained in set
+ * @stable ICU 2.4
+ */
+U_STABLE int32_t U_EXPORT2
+uset_size(const USet* set);
+
+/**
+ * Returns the number of items in this set. An item is either a range
+ * of characters or a single multicharacter string.
+ * @param set the set
+ * @return a non-negative integer counting the character ranges
+ * and/or strings contained in set
+ * @stable ICU 2.4
+ */
+U_STABLE int32_t U_EXPORT2
+uset_getItemCount(const USet* set);
+
+/**
+ * Returns an item of this set. An item is either a range of
+ * characters or a single multicharacter string.
+ * @param set the set
+ * @param itemIndex a non-negative integer in the range 0..
+ * uset_getItemCount(set)-1
+ * @param start pointer to variable to receive first character
+ * in range, inclusive
+ * @param end pointer to variable to receive last character in range,
+ * inclusive
+ * @param str buffer to receive the string, may be NULL
+ * @param strCapacity capacity of str, or 0 if str is NULL
+ * @param ec error code
+ * @return the length of the string (>= 2), or 0 if the item is a
+ * range, in which case it is the range *start..*end, or -1 if
+ * itemIndex is out of range
+ * @stable ICU 2.4
+ */
+U_STABLE int32_t U_EXPORT2
+uset_getItem(const USet* set, int32_t itemIndex,
+ UChar32* start, UChar32* end,
+ UChar* str, int32_t strCapacity,
+ UErrorCode* ec);
+
+/**
+ * Returns true if set1 contains all the characters and strings
+ * of set2. It answers the question, 'Is set1 a superset of set2?'
+ * @param set1 set to be checked for containment
+ * @param set2 set to be checked for containment
+ * @return true if the test condition is met
+ * @stable ICU 3.2
+ */
+U_STABLE UBool U_EXPORT2
+uset_containsAll(const USet* set1, const USet* set2);
+
+/**
+ * Returns true if this set contains all the characters
+ * of the given string. This is does not check containment of grapheme
+ * clusters, like uset_containsString.
+ * @param set set of characters to be checked for containment
+ * @param str string containing codepoints to be checked for containment
+ * @param strLen the length of the string or -1 if null terminated.
+ * @return true if the test condition is met
+ * @stable ICU 3.4
+ */
+U_STABLE UBool U_EXPORT2
+uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen);
+
+/**
+ * Returns true if set1 contains none of the characters and strings
+ * of set2. It answers the question, 'Is set1 a disjoint set of set2?'
+ * @param set1 set to be checked for containment
+ * @param set2 set to be checked for containment
+ * @return true if the test condition is met
+ * @stable ICU 3.2
+ */
+U_STABLE UBool U_EXPORT2
+uset_containsNone(const USet* set1, const USet* set2);
+
+/**
+ * Returns true if set1 contains some of the characters and strings
+ * of set2. It answers the question, 'Does set1 and set2 have an intersection?'
+ * @param set1 set to be checked for containment
+ * @param set2 set to be checked for containment
+ * @return true if the test condition is met
+ * @stable ICU 3.2
+ */
+U_STABLE UBool U_EXPORT2
+uset_containsSome(const USet* set1, const USet* set2);
+
+/**
+ * Returns the length of the initial substring of the input string which
+ * consists only of characters and strings that are contained in this set
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+ * or only of characters and strings that are not contained
+ * in this set (USET_SPAN_NOT_CONTAINED).
+ * See USetSpanCondition for details.
+ * Similar to the strspn() C library function.
+ * Unpaired surrogates are treated according to contains() of their surrogate code points.
+ * This function works faster with a frozen set and with a non-negative string length argument.
+ * @param set the set
+ * @param s start of the string
+ * @param length of the string; can be -1 for NUL-terminated
+ * @param spanCondition specifies the containment condition
+ * @return the length of the initial substring according to the spanCondition;
+ * 0 if the start of the string does not fit the spanCondition
+ * @stable ICU 3.8
+ * @see USetSpanCondition
+ */
+U_STABLE int32_t U_EXPORT2
+uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
+
+/**
+ * Returns the start of the trailing substring of the input string which
+ * consists only of characters and strings that are contained in this set
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+ * or only of characters and strings that are not contained
+ * in this set (USET_SPAN_NOT_CONTAINED).
+ * See USetSpanCondition for details.
+ * Unpaired surrogates are treated according to contains() of their surrogate code points.
+ * This function works faster with a frozen set and with a non-negative string length argument.
+ * @param set the set
+ * @param s start of the string
+ * @param length of the string; can be -1 for NUL-terminated
+ * @param spanCondition specifies the containment condition
+ * @return the start of the trailing substring according to the spanCondition;
+ * the string length if the end of the string does not fit the spanCondition
+ * @stable ICU 3.8
+ * @see USetSpanCondition
+ */
+U_STABLE int32_t U_EXPORT2
+uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
+
+/**
+ * Returns the length of the initial substring of the input string which
+ * consists only of characters and strings that are contained in this set
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+ * or only of characters and strings that are not contained
+ * in this set (USET_SPAN_NOT_CONTAINED).
+ * See USetSpanCondition for details.
+ * Similar to the strspn() C library function.
+ * Malformed byte sequences are treated according to contains(0xfffd).
+ * This function works faster with a frozen set and with a non-negative string length argument.
+ * @param set the set
+ * @param s start of the string (UTF-8)
+ * @param length of the string; can be -1 for NUL-terminated
+ * @param spanCondition specifies the containment condition
+ * @return the length of the initial substring according to the spanCondition;
+ * 0 if the start of the string does not fit the spanCondition
+ * @stable ICU 3.8
+ * @see USetSpanCondition
+ */
+U_STABLE int32_t U_EXPORT2
+uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
+
+/**
+ * Returns the start of the trailing substring of the input string which
+ * consists only of characters and strings that are contained in this set
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+ * or only of characters and strings that are not contained
+ * in this set (USET_SPAN_NOT_CONTAINED).
+ * See USetSpanCondition for details.
+ * Malformed byte sequences are treated according to contains(0xfffd).
+ * This function works faster with a frozen set and with a non-negative string length argument.
+ * @param set the set
+ * @param s start of the string (UTF-8)
+ * @param length of the string; can be -1 for NUL-terminated
+ * @param spanCondition specifies the containment condition
+ * @return the start of the trailing substring according to the spanCondition;
+ * the string length if the end of the string does not fit the spanCondition
+ * @stable ICU 3.8
+ * @see USetSpanCondition
+ */
+U_STABLE int32_t U_EXPORT2
+uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
+
+/**
+ * Returns true if set1 contains all of the characters and strings
+ * of set2, and vis versa. It answers the question, 'Is set1 equal to set2?'
+ * @param set1 set to be checked for containment
+ * @param set2 set to be checked for containment
+ * @return true if the test condition is met
+ * @stable ICU 3.2
+ */
+U_STABLE UBool U_EXPORT2
+uset_equals(const USet* set1, const USet* set2);
+
+/*********************************************************************
+ * Serialized set API
+ *********************************************************************/
+
+/**
+ * Serializes this set into an array of 16-bit integers. Serialization
+ * (currently) only records the characters in the set; multicharacter
+ * strings are ignored.
+ *
+ * The array
+ * has following format (each line is one 16-bit integer):
+ *
+ * length = (n+2*m) | (m!=0?0x8000:0)
+ * bmpLength = n; present if m!=0
+ * bmp[0]
+ * bmp[1]
+ * ...
+ * bmp[n-1]
+ * supp-high[0]
+ * supp-low[0]
+ * supp-high[1]
+ * supp-low[1]
+ * ...
+ * supp-high[m-1]
+ * supp-low[m-1]
+ *
+ * The array starts with a header. After the header are n bmp
+ * code points, then m supplementary code points. Either n or m
+ * or both may be zero. n+2*m is always <= 0x7FFF.
+ *
+ * If there are no supplementary characters (if m==0) then the
+ * header is one 16-bit integer, 'length', with value n.
+ *
+ * If there are supplementary characters (if m!=0) then the header
+ * is two 16-bit integers. The first, 'length', has value
+ * (n+2*m)|0x8000. The second, 'bmpLength', has value n.
+ *
+ * After the header the code points are stored in ascending order.
+ * Supplementary code points are stored as most significant 16
+ * bits followed by least significant 16 bits.
+ *
+ * @param set the set
+ * @param dest pointer to buffer of destCapacity 16-bit integers.
+ * May be NULL only if destCapacity is zero.
+ * @param destCapacity size of dest, or zero. Must not be negative.
+ * @param pErrorCode pointer to the error code. Will be set to
+ * U_INDEX_OUTOFBOUNDS_ERROR if n+2*m > 0x7FFF. Will be set to
+ * U_BUFFER_OVERFLOW_ERROR if n+2*m+(m!=0?2:1) > destCapacity.
+ * @return the total length of the serialized format, including
+ * the header, that is, n+2*m+(m!=0?2:1), or 0 on error other
+ * than U_BUFFER_OVERFLOW_ERROR.
+ * @stable ICU 2.4
+ */
+U_STABLE int32_t U_EXPORT2
+uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode);
+
+/**
+ * Given a serialized array, fill in the given serialized set object.
+ * @param fillSet pointer to result
+ * @param src pointer to start of array
+ * @param srcLength length of array
+ * @return true if the given array is valid, otherwise false
+ * @stable ICU 2.4
+ */
+U_STABLE UBool U_EXPORT2
+uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength);
+
+/**
+ * Set the USerializedSet to contain the given character (and nothing
+ * else).
+ * @param fillSet pointer to result
+ * @param c The codepoint to set
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+uset_setSerializedToOne(USerializedSet* fillSet, UChar32 c);
+
+/**
+ * Returns TRUE if the given USerializedSet contains the given
+ * character.
+ * @param set the serialized set
+ * @param c The codepoint to check for within the set
+ * @return true if set contains c
+ * @stable ICU 2.4
+ */
+U_STABLE UBool U_EXPORT2
+uset_serializedContains(const USerializedSet* set, UChar32 c);
+
+/**
+ * Returns the number of disjoint ranges of characters contained in
+ * the given serialized set. Ignores any strings contained in the
+ * set.
+ * @param set the serialized set
+ * @return a non-negative integer counting the character ranges
+ * contained in set
+ * @stable ICU 2.4
+ */
+U_STABLE int32_t U_EXPORT2
+uset_getSerializedRangeCount(const USerializedSet* set);
+
+/**
+ * Returns a range of characters contained in the given serialized
+ * set.
+ * @param set the serialized set
+ * @param rangeIndex a non-negative integer in the range 0..
+ * uset_getSerializedRangeCount(set)-1
+ * @param pStart pointer to variable to receive first character
+ * in range, inclusive
+ * @param pEnd pointer to variable to receive last character in range,
+ * inclusive
+ * @return true if rangeIndex is valid, otherwise false
+ * @stable ICU 2.4
+ */
+U_STABLE UBool U_EXPORT2
+uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
+ UChar32* pStart, UChar32* pEnd);
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/ustring.h b/src/core/basetypes/icu-ucsdet/include/unicode/ustring.h
new file mode 100644
index 00000000..6d141e8d
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/ustring.h
@@ -0,0 +1,1700 @@
+/*
+**********************************************************************
+* Copyright (C) 1998-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*
+* File ustring.h
+*
+* Modification History:
+*
+* Date Name Description
+* 12/07/98 bertrand Creation.
+******************************************************************************
+*/
+
+#ifndef USTRING_H
+#define USTRING_H
+
+#include "unicode/utypes.h"
+#include "unicode/putil.h"
+#include "unicode/uiter.h"
+
+/**
+ * \def UBRK_TYPEDEF_UBREAK_ITERATOR
+ * @internal
+ */
+
+#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
+# define UBRK_TYPEDEF_UBREAK_ITERATOR
+/** Simple declaration for u_strToTitle() to avoid including unicode/ubrk.h. @stable ICU 2.1*/
+ typedef struct UBreakIterator UBreakIterator;
+#endif
+
+/**
+ * \file
+ * \brief C API: Unicode string handling functions
+ *
+ * These C API functions provide general Unicode string handling.
+ *
+ * Some functions are equivalent in name, signature, and behavior to the ANSI C <string.h>
+ * functions. (For example, they do not check for bad arguments like NULL string pointers.)
+ * In some cases, only the thread-safe variant of such a function is implemented here
+ * (see u_strtok_r()).
+ *
+ * Other functions provide more Unicode-specific functionality like locale-specific
+ * upper/lower-casing and string comparison in code point order.
+ *
+ * ICU uses 16-bit Unicode (UTF-16) in the form of arrays of UChar code units.
+ * UTF-16 encodes each Unicode code point with either one or two UChar code units.
+ * (This is the default form of Unicode, and a forward-compatible extension of the original,
+ * fixed-width form that was known as UCS-2. UTF-16 superseded UCS-2 with Unicode 2.0
+ * in 1996.)
+ *
+ * Some APIs accept a 32-bit UChar32 value for a single code point.
+ *
+ * ICU also handles 16-bit Unicode text with unpaired surrogates.
+ * Such text is not well-formed UTF-16.
+ * Code-point-related functions treat unpaired surrogates as surrogate code points,
+ * i.e., as separate units.
+ *
+ * Although UTF-16 is a variable-width encoding form (like some legacy multi-byte encodings),
+ * it is much more efficient even for random access because the code unit values
+ * for single-unit characters vs. lead units vs. trail units are completely disjoint.
+ * This means that it is easy to determine character (code point) boundaries from
+ * random offsets in the string.
+ *
+ * Unicode (UTF-16) string processing is optimized for the single-unit case.
+ * Although it is important to support supplementary characters
+ * (which use pairs of lead/trail code units called "surrogates"),
+ * their occurrence is rare. Almost all characters in modern use require only
+ * a single UChar code unit (i.e., their code point values are <=0xffff).
+ *
+ * For more details see the User Guide Strings chapter (http://icu-project.org/userguide/strings.html).
+ * For a discussion of the handling of unpaired surrogates see also
+ * Jitterbug 2145 and its icu mailing list proposal on 2002-sep-18.
+ */
+
+/**
+ * \defgroup ustring_ustrlen String Length
+ * \ingroup ustring_strlen
+ */
+/*@{*/
+/**
+ * Determine the length of an array of UChar.
+ *
+ * @param s The array of UChars, NULL (U+0000) terminated.
+ * @return The number of UChars in <code>chars</code>, minus the terminator.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strlen(const UChar *s);
+/*@}*/
+
+/**
+ * Count Unicode code points in the length UChar code units of the string.
+ * A code point may occupy either one or two UChar code units.
+ * Counting code points involves reading all code units.
+ *
+ * This functions is basically the inverse of the U16_FWD_N() macro (see utf.h).
+ *
+ * @param s The input string.
+ * @param length The number of UChar code units to be checked, or -1 to count all
+ * code points before the first NUL (U+0000).
+ * @return The number of code points in the specified code units.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_countChar32(const UChar *s, int32_t length);
+
+/**
+ * Check if the string contains more Unicode code points than a certain number.
+ * This is more efficient than counting all code points in the entire string
+ * and comparing that number with a threshold.
+ * This function may not need to scan the string at all if the length is known
+ * (not -1 for NUL-termination) and falls within a certain range, and
+ * never needs to count more than 'number+1' code points.
+ * Logically equivalent to (u_countChar32(s, length)>number).
+ * A Unicode code point may occupy either one or two UChar code units.
+ *
+ * @param s The input string.
+ * @param length The length of the string, or -1 if it is NUL-terminated.
+ * @param number The number of code points in the string is compared against
+ * the 'number' parameter.
+ * @return Boolean value for whether the string contains more Unicode code points
+ * than 'number'. Same as (u_countChar32(s, length)>number).
+ * @stable ICU 2.4
+ */
+U_STABLE UBool U_EXPORT2
+u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number);
+
+/**
+ * Concatenate two ustrings. Appends a copy of <code>src</code>,
+ * including the null terminator, to <code>dst</code>. The initial copied
+ * character from <code>src</code> overwrites the null terminator in <code>dst</code>.
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2
+u_strcat(UChar *dst,
+ const UChar *src);
+
+/**
+ * Concatenate two ustrings.
+ * Appends at most <code>n</code> characters from <code>src</code> to <code>dst</code>.
+ * Adds a terminating NUL.
+ * If src is too long, then only <code>n-1</code> characters will be copied
+ * before the terminating NUL.
+ * If <code>n&lt;=0</code> then dst is not modified.
+ *
+ * @param dst The destination string.
+ * @param src The source string (can be NULL/invalid if n<=0).
+ * @param n The maximum number of characters to append; no-op if <=0.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2
+u_strncat(UChar *dst,
+ const UChar *src,
+ int32_t n);
+
+/**
+ * Find the first occurrence of a substring in a string.
+ * The substring is found at code point boundaries.
+ * That means that if the substring begins with
+ * a trail surrogate or ends with a lead surrogate,
+ * then it is found only if these surrogates stand alone in the text.
+ * Otherwise, the substring edge units would be matched against
+ * halves of surrogate pairs.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param substring The substring to find (NUL-terminated).
+ * @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>,
+ * or <code>s</code> itself if the <code>substring</code> is empty,
+ * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
+ * @stable ICU 2.0
+ *
+ * @see u_strrstr
+ * @see u_strFindFirst
+ * @see u_strFindLast
+ */
+U_STABLE UChar * U_EXPORT2
+u_strstr(const UChar *s, const UChar *substring);
+
+/**
+ * Find the first occurrence of a substring in a string.
+ * The substring is found at code point boundaries.
+ * That means that if the substring begins with
+ * a trail surrogate or ends with a lead surrogate,
+ * then it is found only if these surrogates stand alone in the text.
+ * Otherwise, the substring edge units would be matched against
+ * halves of surrogate pairs.
+ *
+ * @param s The string to search.
+ * @param length The length of s (number of UChars), or -1 if it is NUL-terminated.
+ * @param substring The substring to find (NUL-terminated).
+ * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated.
+ * @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>,
+ * or <code>s</code> itself if the <code>substring</code> is empty,
+ * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
+ * @stable ICU 2.4
+ *
+ * @see u_strstr
+ * @see u_strFindLast
+ */
+U_STABLE UChar * U_EXPORT2
+u_strFindFirst(const UChar *s, int32_t length, const UChar *substring, int32_t subLength);
+
+/**
+ * Find the first occurrence of a BMP code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param c The BMP code point to find.
+ * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
+ * or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.0
+ *
+ * @see u_strchr32
+ * @see u_memchr
+ * @see u_strstr
+ * @see u_strFindFirst
+ */
+U_STABLE UChar * U_EXPORT2
+u_strchr(const UChar *s, UChar c);
+
+/**
+ * Find the first occurrence of a code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param c The code point to find.
+ * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
+ * or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.0
+ *
+ * @see u_strchr
+ * @see u_memchr32
+ * @see u_strstr
+ * @see u_strFindFirst
+ */
+U_STABLE UChar * U_EXPORT2
+u_strchr32(const UChar *s, UChar32 c);
+
+/**
+ * Find the last occurrence of a substring in a string.
+ * The substring is found at code point boundaries.
+ * That means that if the substring begins with
+ * a trail surrogate or ends with a lead surrogate,
+ * then it is found only if these surrogates stand alone in the text.
+ * Otherwise, the substring edge units would be matched against
+ * halves of surrogate pairs.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param substring The substring to find (NUL-terminated).
+ * @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>,
+ * or <code>s</code> itself if the <code>substring</code> is empty,
+ * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
+ * @stable ICU 2.4
+ *
+ * @see u_strstr
+ * @see u_strFindFirst
+ * @see u_strFindLast
+ */
+U_STABLE UChar * U_EXPORT2
+u_strrstr(const UChar *s, const UChar *substring);
+
+/**
+ * Find the last occurrence of a substring in a string.
+ * The substring is found at code point boundaries.
+ * That means that if the substring begins with
+ * a trail surrogate or ends with a lead surrogate,
+ * then it is found only if these surrogates stand alone in the text.
+ * Otherwise, the substring edge units would be matched against
+ * halves of surrogate pairs.
+ *
+ * @param s The string to search.
+ * @param length The length of s (number of UChars), or -1 if it is NUL-terminated.
+ * @param substring The substring to find (NUL-terminated).
+ * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated.
+ * @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>,
+ * or <code>s</code> itself if the <code>substring</code> is empty,
+ * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
+ * @stable ICU 2.4
+ *
+ * @see u_strstr
+ * @see u_strFindLast
+ */
+U_STABLE UChar * U_EXPORT2
+u_strFindLast(const UChar *s, int32_t length, const UChar *substring, int32_t subLength);
+
+/**
+ * Find the last occurrence of a BMP code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param c The BMP code point to find.
+ * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
+ * or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.4
+ *
+ * @see u_strrchr32
+ * @see u_memrchr
+ * @see u_strrstr
+ * @see u_strFindLast
+ */
+U_STABLE UChar * U_EXPORT2
+u_strrchr(const UChar *s, UChar c);
+
+/**
+ * Find the last occurrence of a code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param c The code point to find.
+ * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
+ * or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.4
+ *
+ * @see u_strrchr
+ * @see u_memchr32
+ * @see u_strrstr
+ * @see u_strFindLast
+ */
+U_STABLE UChar * U_EXPORT2
+u_strrchr32(const UChar *s, UChar32 c);
+
+/**
+ * Locates the first occurrence in the string <code>string</code> of any of the characters
+ * in the string <code>matchSet</code>.
+ * Works just like C's strpbrk but with Unicode.
+ *
+ * @param string The string in which to search, NUL-terminated.
+ * @param matchSet A NUL-terminated string defining a set of code points
+ * for which to search in the text string.
+ * @return A pointer to the character in <code>string</code> that matches one of the
+ * characters in <code>matchSet</code>, or NULL if no such character is found.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar * U_EXPORT2
+u_strpbrk(const UChar *string, const UChar *matchSet);
+
+/**
+ * Returns the number of consecutive characters in <code>string</code>,
+ * beginning with the first, that do not occur somewhere in <code>matchSet</code>.
+ * Works just like C's strcspn but with Unicode.
+ *
+ * @param string The string in which to search, NUL-terminated.
+ * @param matchSet A NUL-terminated string defining a set of code points
+ * for which to search in the text string.
+ * @return The number of initial characters in <code>string</code> that do not
+ * occur in <code>matchSet</code>.
+ * @see u_strspn
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strcspn(const UChar *string, const UChar *matchSet);
+
+/**
+ * Returns the number of consecutive characters in <code>string</code>,
+ * beginning with the first, that occur somewhere in <code>matchSet</code>.
+ * Works just like C's strspn but with Unicode.
+ *
+ * @param string The string in which to search, NUL-terminated.
+ * @param matchSet A NUL-terminated string defining a set of code points
+ * for which to search in the text string.
+ * @return The number of initial characters in <code>string</code> that do
+ * occur in <code>matchSet</code>.
+ * @see u_strcspn
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strspn(const UChar *string, const UChar *matchSet);
+
+/**
+ * The string tokenizer API allows an application to break a string into
+ * tokens. Unlike strtok(), the saveState (the current pointer within the
+ * original string) is maintained in saveState. In the first call, the
+ * argument src is a pointer to the string. In subsequent calls to
+ * return successive tokens of that string, src must be specified as
+ * NULL. The value saveState is set by this function to maintain the
+ * function's position within the string, and on each subsequent call
+ * you must give this argument the same variable. This function does
+ * handle surrogate pairs. This function is similar to the strtok_r()
+ * the POSIX Threads Extension (1003.1c-1995) version.
+ *
+ * @param src String containing token(s). This string will be modified.
+ * After the first call to u_strtok_r(), this argument must
+ * be NULL to get to the next token.
+ * @param delim Set of delimiter characters (Unicode code points).
+ * @param saveState The current pointer within the original string,
+ * which is set by this function. The saveState
+ * parameter should the address of a local variable of type
+ * UChar *. (i.e. defined "Uhar *myLocalSaveState" and use
+ * &myLocalSaveState for this parameter).
+ * @return A pointer to the next token found in src, or NULL
+ * when there are no more tokens.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar * U_EXPORT2
+u_strtok_r(UChar *src,
+ const UChar *delim,
+ UChar **saveState);
+
+/**
+ * Compare two Unicode strings for bitwise equality (code unit order).
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative
+ * value if <code>s1</code> is bitwise less than <code>s2,</code>; a positive
+ * value if <code>s1</code> is bitwise greater than <code>s2</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strcmp(const UChar *s1,
+ const UChar *s2);
+
+/**
+ * Compare two Unicode strings in code point order.
+ * See u_strCompare for details.
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * the first string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strcmpCodePointOrder(const UChar *s1, const UChar *s2);
+
+/**
+ * Compare two Unicode strings (binary order).
+ *
+ * The comparison can be done in code unit order or in code point order.
+ * They differ only in UTF-16 when
+ * comparing supplementary code points (U+10000..U+10ffff)
+ * to BMP code points near the end of the BMP (i.e., U+e000..U+ffff).
+ * In code unit order, high BMP code points sort after supplementary code points
+ * because they are stored as pairs of surrogates which are at U+d800..U+dfff.
+ *
+ * This functions works with strings of different explicitly specified lengths
+ * unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
+ * NUL-terminated strings are possible with length arguments of -1.
+ *
+ * @param s1 First source string.
+ * @param length1 Length of first source string, or -1 if NUL-terminated.
+ *
+ * @param s2 Second source string.
+ * @param length2 Length of second source string, or -1 if NUL-terminated.
+ *
+ * @param codePointOrder Choose between code unit order (FALSE)
+ * and code point order (TRUE).
+ *
+ * @return <0 or 0 or >0 as usual for string comparisons
+ *
+ * @stable ICU 2.2
+ */
+U_STABLE int32_t U_EXPORT2
+u_strCompare(const UChar *s1, int32_t length1,
+ const UChar *s2, int32_t length2,
+ UBool codePointOrder);
+
+/**
+ * Compare two Unicode strings (binary order)
+ * as presented by UCharIterator objects.
+ * Works otherwise just like u_strCompare().
+ *
+ * Both iterators are reset to their start positions.
+ * When the function returns, it is undefined where the iterators
+ * have stopped.
+ *
+ * @param iter1 First source string iterator.
+ * @param iter2 Second source string iterator.
+ * @param codePointOrder Choose between code unit order (FALSE)
+ * and code point order (TRUE).
+ *
+ * @return <0 or 0 or >0 as usual for string comparisons
+ *
+ * @see u_strCompare
+ *
+ * @stable ICU 2.6
+ */
+U_STABLE int32_t U_EXPORT2
+u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder);
+
+#ifndef U_COMPARE_CODE_POINT_ORDER
+/* see also unistr.h and unorm.h */
+/**
+ * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
+ * Compare strings in code point order instead of code unit order.
+ * @stable ICU 2.2
+ */
+#define U_COMPARE_CODE_POINT_ORDER 0x8000
+#endif
+
+/**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to
+ * u_strCompare(u_strFoldCase(s1, options),
+ * u_strFoldCase(s2, options),
+ * (options&U_COMPARE_CODE_POINT_ORDER)!=0).
+ *
+ * The comparison can be done in UTF-16 code unit order or in code point order.
+ * They differ only when comparing supplementary code points (U+10000..U+10ffff)
+ * to BMP code points near the end of the BMP (i.e., U+e000..U+ffff).
+ * In code unit order, high BMP code points sort after supplementary code points
+ * because they are stored as pairs of surrogates which are at U+d800..U+dfff.
+ *
+ * This functions works with strings of different explicitly specified lengths
+ * unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
+ * NUL-terminated strings are possible with length arguments of -1.
+ *
+ * @param s1 First source string.
+ * @param length1 Length of first source string, or -1 if NUL-terminated.
+ *
+ * @param s2 Second source string.
+ * @param length2 Length of second source string, or -1 if NUL-terminated.
+ *
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ *
+ * @return <0 or 0 or >0 as usual for string comparisons
+ *
+ * @stable ICU 2.2
+ */
+U_STABLE int32_t U_EXPORT2
+u_strCaseCompare(const UChar *s1, int32_t length1,
+ const UChar *s2, int32_t length2,
+ uint32_t options,
+ UErrorCode *pErrorCode);
+
+/**
+ * Compare two ustrings for bitwise equality.
+ * Compares at most <code>n</code> characters.
+ *
+ * @param ucs1 A string to compare (can be NULL/invalid if n<=0).
+ * @param ucs2 A string to compare (can be NULL/invalid if n<=0).
+ * @param n The maximum number of characters to compare; always returns 0 if n<=0.
+ * @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative
+ * value if <code>s1</code> is bitwise less than <code>s2</code>; a positive
+ * value if <code>s1</code> is bitwise greater than <code>s2</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strncmp(const UChar *ucs1,
+ const UChar *ucs2,
+ int32_t n);
+
+/**
+ * Compare two Unicode strings in code point order.
+ * This is different in UTF-16 from u_strncmp() if supplementary characters are present.
+ * For details, see u_strCompare().
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @param n The maximum number of characters to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * the first string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n);
+
+/**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to u_strcmp(u_strFoldCase(s1, options), u_strFoldCase(s2, options)).
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options);
+
+/**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to u_strcmp(u_strFoldCase(s1, at most n, options),
+ * u_strFoldCase(s2, at most n, options)).
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @param n The maximum number of characters each string to case-fold and then compare.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options);
+
+/**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to u_strcmp(u_strFoldCase(s1, n, options),
+ * u_strFoldCase(s2, n, options)).
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @param length The number of characters in each string to case-fold and then compare.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options);
+
+/**
+ * Copy a ustring. Adds a null terminator.
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2
+u_strcpy(UChar *dst,
+ const UChar *src);
+
+/**
+ * Copy a ustring.
+ * Copies at most <code>n</code> characters. The result will be null terminated
+ * if the length of <code>src</code> is less than <code>n</code>.
+ *
+ * @param dst The destination string.
+ * @param src The source string (can be NULL/invalid if n<=0).
+ * @param n The maximum number of characters to copy; no-op if <=0.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2
+u_strncpy(UChar *dst,
+ const UChar *src,
+ int32_t n);
+
+#if !UCONFIG_NO_CONVERSION
+
+/**
+ * Copy a byte string encoded in the default codepage to a ustring.
+ * Adds a null terminator.
+ * Performs a host byte to UChar conversion
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2 u_uastrcpy(UChar *dst,
+ const char *src );
+
+/**
+ * Copy a byte string encoded in the default codepage to a ustring.
+ * Copies at most <code>n</code> characters. The result will be null terminated
+ * if the length of <code>src</code> is less than <code>n</code>.
+ * Performs a host byte to UChar conversion
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @param n The maximum number of characters to copy.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2 u_uastrncpy(UChar *dst,
+ const char *src,
+ int32_t n);
+
+/**
+ * Copy ustring to a byte string encoded in the default codepage.
+ * Adds a null terminator.
+ * Performs a UChar to host byte conversion
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE char* U_EXPORT2 u_austrcpy(char *dst,
+ const UChar *src );
+
+/**
+ * Copy ustring to a byte string encoded in the default codepage.
+ * Copies at most <code>n</code> characters. The result will be null terminated
+ * if the length of <code>src</code> is less than <code>n</code>.
+ * Performs a UChar to host byte conversion
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @param n The maximum number of characters to copy.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE char* U_EXPORT2 u_austrncpy(char *dst,
+ const UChar *src,
+ int32_t n );
+
+#endif
+
+/**
+ * Synonym for memcpy(), but with UChars only.
+ * @param dest The destination string
+ * @param src The source string (can be NULL/invalid if count<=0)
+ * @param count The number of characters to copy; no-op if <=0
+ * @return A pointer to <code>dest</code>
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2
+u_memcpy(UChar *dest, const UChar *src, int32_t count);
+
+/**
+ * Synonym for memmove(), but with UChars only.
+ * @param dest The destination string
+ * @param src The source string (can be NULL/invalid if count<=0)
+ * @param count The number of characters to move; no-op if <=0
+ * @return A pointer to <code>dest</code>
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2
+u_memmove(UChar *dest, const UChar *src, int32_t count);
+
+/**
+ * Initialize <code>count</code> characters of <code>dest</code> to <code>c</code>.
+ *
+ * @param dest The destination string.
+ * @param c The character to initialize the string.
+ * @param count The maximum number of characters to set.
+ * @return A pointer to <code>dest</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2
+u_memset(UChar *dest, UChar c, int32_t count);
+
+/**
+ * Compare the first <code>count</code> UChars of each buffer.
+ *
+ * @param buf1 The first string to compare.
+ * @param buf2 The second string to compare.
+ * @param count The maximum number of UChars to compare.
+ * @return When buf1 < buf2, a negative number is returned.
+ * When buf1 == buf2, 0 is returned.
+ * When buf1 > buf2, a positive number is returned.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count);
+
+/**
+ * Compare two Unicode strings in code point order.
+ * This is different in UTF-16 from u_memcmp() if supplementary characters are present.
+ * For details, see u_strCompare().
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @param count The maximum number of characters to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * the first string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count);
+
+/**
+ * Find the first occurrence of a BMP code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (contains <code>count</code> UChars).
+ * @param c The BMP code point to find.
+ * @param count The length of the string.
+ * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
+ * or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.0
+ *
+ * @see u_strchr
+ * @see u_memchr32
+ * @see u_strFindFirst
+ */
+U_STABLE UChar* U_EXPORT2
+u_memchr(const UChar *s, UChar c, int32_t count);
+
+/**
+ * Find the first occurrence of a code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (contains <code>count</code> UChars).
+ * @param c The code point to find.
+ * @param count The length of the string.
+ * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
+ * or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.0
+ *
+ * @see u_strchr32
+ * @see u_memchr
+ * @see u_strFindFirst
+ */
+U_STABLE UChar* U_EXPORT2
+u_memchr32(const UChar *s, UChar32 c, int32_t count);
+
+/**
+ * Find the last occurrence of a BMP code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (contains <code>count</code> UChars).
+ * @param c The BMP code point to find.
+ * @param count The length of the string.
+ * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
+ * or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.4
+ *
+ * @see u_strrchr
+ * @see u_memrchr32
+ * @see u_strFindLast
+ */
+U_STABLE UChar* U_EXPORT2
+u_memrchr(const UChar *s, UChar c, int32_t count);
+
+/**
+ * Find the last occurrence of a code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (contains <code>count</code> UChars).
+ * @param c The code point to find.
+ * @param count The length of the string.
+ * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
+ * or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.4
+ *
+ * @see u_strrchr32
+ * @see u_memrchr
+ * @see u_strFindLast
+ */
+U_STABLE UChar* U_EXPORT2
+u_memrchr32(const UChar *s, UChar32 c, int32_t count);
+
+/**
+ * Unicode String literals in C.
+ * We need one macro to declare a variable for the string
+ * and to statically preinitialize it if possible,
+ * and a second macro to dynamically intialize such a string variable if necessary.
+ *
+ * The macros are defined for maximum performance.
+ * They work only for strings that contain "invariant characters", i.e.,
+ * only latin letters, digits, and some punctuation.
+ * See utypes.h for details.
+ *
+ * A pair of macros for a single string must be used with the same
+ * parameters.
+ * The string parameter must be a C string literal.
+ * The length of the string, not including the terminating
+ * <code>NUL</code>, must be specified as a constant.
+ * The U_STRING_DECL macro should be invoked exactly once for one
+ * such string variable before it is used.
+ *
+ * Usage:
+ * <pre>
+ * U_STRING_DECL(ustringVar1, "Quick-Fox 2", 11);
+ * U_STRING_DECL(ustringVar2, "jumps 5%", 8);
+ * static UBool didInit=FALSE;
+ *
+ * int32_t function() {
+ * if(!didInit) {
+ * U_STRING_INIT(ustringVar1, "Quick-Fox 2", 11);
+ * U_STRING_INIT(ustringVar2, "jumps 5%", 8);
+ * didInit=TRUE;
+ * }
+ * return u_strcmp(ustringVar1, ustringVar2);
+ * }
+ * </pre>
+ *
+ * Note that the macros will NOT consistently work if their argument is another <code>#define</code>.
+ * The following will not work on all platforms, don't use it.
+ *
+ * <pre>
+ * #define GLUCK "Mr. Gluck"
+ * U_STRING_DECL(var, GLUCK, 9)
+ * U_STRING_INIT(var, GLUCK, 9)
+ * </pre>
+ *
+ * Instead, use the string literal "Mr. Gluck" as the argument to both macro
+ * calls.
+ *
+ *
+ * @stable ICU 2.0
+ */
+#if defined(U_DECLARE_UTF16)
+# define U_STRING_DECL(var, cs, length) static const UChar *var=(const UChar *)U_DECLARE_UTF16(cs)
+ /**@stable ICU 2.0 */
+# define U_STRING_INIT(var, cs, length)
+#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
+# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=L ## cs
+ /**@stable ICU 2.0 */
+# define U_STRING_INIT(var, cs, length)
+#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
+# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=cs
+ /**@stable ICU 2.0 */
+# define U_STRING_INIT(var, cs, length)
+#else
+# define U_STRING_DECL(var, cs, length) static UChar var[(length)+1]
+ /**@stable ICU 2.0 */
+# define U_STRING_INIT(var, cs, length) u_charsToUChars(cs, var, length+1)
+#endif
+
+/**
+ * Unescape a string of characters and write the resulting
+ * Unicode characters to the destination buffer. The following escape
+ * sequences are recognized:
+ *
+ * \\uhhhh 4 hex digits; h in [0-9A-Fa-f]
+ * \\Uhhhhhhhh 8 hex digits
+ * \\xhh 1-2 hex digits
+ * \\x{h...} 1-8 hex digits
+ * \\ooo 1-3 octal digits; o in [0-7]
+ * \\cX control-X; X is masked with 0x1F
+ *
+ * as well as the standard ANSI C escapes:
+ *
+ * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
+ * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
+ * \\&quot; => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
+ *
+ * Anything else following a backslash is generically escaped. For
+ * example, "[a\\-z]" returns "[a-z]".
+ *
+ * If an escape sequence is ill-formed, this method returns an empty
+ * string. An example of an ill-formed sequence is "\\u" followed by
+ * fewer than 4 hex digits.
+ *
+ * The above characters are recognized in the compiler's codepage,
+ * that is, they are coded as 'u', '\\', etc. Characters that are
+ * not parts of escape sequences are converted using u_charsToUChars().
+ *
+ * This function is similar to UnicodeString::unescape() but not
+ * identical to it. The latter takes a source UnicodeString, so it
+ * does escape recognition but no conversion.
+ *
+ * @param src a zero-terminated string of invariant characters
+ * @param dest pointer to buffer to receive converted and unescaped
+ * text and, if there is room, a zero terminator. May be NULL for
+ * preflighting, in which case no UChars will be written, but the
+ * return value will still be valid. On error, an empty string is
+ * stored here (if possible).
+ * @param destCapacity the number of UChars that may be written at
+ * dest. Ignored if dest == NULL.
+ * @return the length of unescaped string.
+ * @see u_unescapeAt
+ * @see UnicodeString#unescape()
+ * @see UnicodeString#unescapeAt()
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_unescape(const char *src,
+ UChar *dest, int32_t destCapacity);
+
+U_CDECL_BEGIN
+/**
+ * Callback function for u_unescapeAt() that returns a character of
+ * the source text given an offset and a context pointer. The context
+ * pointer will be whatever is passed into u_unescapeAt().
+ *
+ * @param offset pointer to the offset that will be passed to u_unescapeAt().
+ * @param context an opaque pointer passed directly into u_unescapeAt()
+ * @return the character represented by the escape sequence at
+ * offset
+ * @see u_unescapeAt
+ * @stable ICU 2.0
+ */
+typedef UChar (U_CALLCONV *UNESCAPE_CHAR_AT)(int32_t offset, void *context);
+U_CDECL_END
+
+/**
+ * Unescape a single sequence. The character at offset-1 is assumed
+ * (without checking) to be a backslash. This method takes a callback
+ * pointer to a function that returns the UChar at a given offset. By
+ * varying this callback, ICU functions are able to unescape char*
+ * strings, UnicodeString objects, and UFILE pointers.
+ *
+ * If offset is out of range, or if the escape sequence is ill-formed,
+ * (UChar32)0xFFFFFFFF is returned. See documentation of u_unescape()
+ * for a list of recognized sequences.
+ *
+ * @param charAt callback function that returns a UChar of the source
+ * text given an offset and a context pointer.
+ * @param offset pointer to the offset that will be passed to charAt.
+ * The offset value will be updated upon return to point after the
+ * last parsed character of the escape sequence. On error the offset
+ * is unchanged.
+ * @param length the number of characters in the source text. The
+ * last character of the source text is considered to be at offset
+ * length-1.
+ * @param context an opaque pointer passed directly into charAt.
+ * @return the character represented by the escape sequence at
+ * offset, or (UChar32)0xFFFFFFFF on error.
+ * @see u_unescape()
+ * @see UnicodeString#unescape()
+ * @see UnicodeString#unescapeAt()
+ * @stable ICU 2.0
+ */
+U_STABLE UChar32 U_EXPORT2
+u_unescapeAt(UNESCAPE_CHAR_AT charAt,
+ int32_t *offset,
+ int32_t length,
+ void *context);
+
+/**
+ * Uppercase the characters in a string.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer are allowed to overlap.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param src The original string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param locale The locale to consider, or "" for the root locale or NULL for the default locale.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string. It may be greater than destCapacity. In that case,
+ * only some of the result was written to the destination buffer.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strToUpper(UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ const char *locale,
+ UErrorCode *pErrorCode);
+
+/**
+ * Lowercase the characters in a string.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer are allowed to overlap.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param src The original string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param locale The locale to consider, or "" for the root locale or NULL for the default locale.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string. It may be greater than destCapacity. In that case,
+ * only some of the result was written to the destination buffer.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strToLower(UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ const char *locale,
+ UErrorCode *pErrorCode);
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+/**
+ * Titlecase a string.
+ * Casing is locale-dependent and context-sensitive.
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others.
+ *
+ * The titlecase break iterator can be provided to customize for arbitrary
+ * styles, using rules and dictionaries beyond the standard iterators.
+ * It may be more efficient to always provide an iterator to avoid
+ * opening and closing one for each string.
+ * The standard titlecase iterator for the root locale implements the
+ * algorithm of Unicode TR 21.
+ *
+ * This function uses only the setText(), first() and next() methods of the
+ * provided break iterator.
+ *
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer are allowed to overlap.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param src The original string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param titleIter A break iterator to find the first characters of words
+ * that are to be titlecased.
+ * If none is provided (NULL), then a standard titlecase
+ * break iterator is opened.
+ * @param locale The locale to consider, or "" for the root locale or NULL for the default locale.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string. It may be greater than destCapacity. In that case,
+ * only some of the result was written to the destination buffer.
+ * @stable ICU 2.1
+ */
+U_STABLE int32_t U_EXPORT2
+u_strToTitle(UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UBreakIterator *titleIter,
+ const char *locale,
+ UErrorCode *pErrorCode);
+
+#endif
+
+/**
+ * Case-folds the characters in a string.
+ *
+ * Case-folding is locale-independent and not context-sensitive,
+ * but there is an option for whether to include or exclude mappings for dotted I
+ * and dotless i that are marked with 'T' in CaseFolding.txt.
+ *
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer are allowed to overlap.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param src The original string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string. It may be greater than destCapacity. In that case,
+ * only some of the result was written to the destination buffer.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strFoldCase(UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ uint32_t options,
+ UErrorCode *pErrorCode);
+
+#if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION
+/**
+ * Convert a UTF-16 string to a wchar_t string.
+ * If it is known at compile time that wchar_t strings are in UTF-16 or UTF-32, then
+ * this function simply calls the fast, dedicated function for that.
+ * Otherwise, two conversions UTF-16 -> default charset -> wchar_t* are performed.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of wchar_t's). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The pointer to destination buffer.
+ * @stable ICU 2.0
+ */
+U_STABLE wchar_t* U_EXPORT2
+u_strToWCS(wchar_t *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode);
+/**
+ * Convert a wchar_t string to UTF-16.
+ * If it is known at compile time that wchar_t strings are in UTF-16 or UTF-32, then
+ * this function simply calls the fast, dedicated function for that.
+ * Otherwise, two conversions wchar_t* -> default charset -> UTF-16 are performed.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The pointer to destination buffer.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2
+u_strFromWCS(UChar *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const wchar_t *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode);
+#endif /* defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION */
+
+/**
+ * Convert a UTF-16 string to UTF-8.
+ * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of chars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The pointer to destination buffer.
+ * @stable ICU 2.0
+ * @see u_strToUTF8WithSub
+ * @see u_strFromUTF8
+ */
+U_STABLE char* U_EXPORT2
+u_strToUTF8(char *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert a UTF-8 string to UTF-16.
+ * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The pointer to destination buffer.
+ * @stable ICU 2.0
+ * @see u_strFromUTF8WithSub
+ * @see u_strFromUTF8Lenient
+ */
+U_STABLE UChar* U_EXPORT2
+u_strFromUTF8(UChar *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const char *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert a UTF-16 string to UTF-8.
+ *
+ * Same as u_strToUTF8() except for the additional subchar which is output for
+ * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
+ * With subchar==U_SENTINEL, this function behaves exactly like u_strToUTF8().
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of chars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param subchar The substitution character to use in place of an illegal input sequence,
+ * or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
+ * A substitution character can be any valid Unicode code point (up to U+10FFFF)
+ * except for surrogate code points (U+D800..U+DFFF).
+ * The recommended value is U+FFFD "REPLACEMENT CHARACTER".
+ * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
+ * Set to 0 if no substitutions occur or subchar<0.
+ * pNumSubstitutions can be NULL.
+ * @param pErrorCode Pointer to a standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return The pointer to destination buffer.
+ * @see u_strToUTF8
+ * @see u_strFromUTF8WithSub
+ * @stable ICU 3.6
+ */
+U_STABLE char* U_EXPORT2
+u_strToUTF8WithSub(char *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar *src,
+ int32_t srcLength,
+ UChar32 subchar, int32_t *pNumSubstitutions,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert a UTF-8 string to UTF-16.
+ *
+ * Same as u_strFromUTF8() except for the additional subchar which is output for
+ * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
+ * With subchar==U_SENTINEL, this function behaves exactly like u_strFromUTF8().
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param subchar The substitution character to use in place of an illegal input sequence,
+ * or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
+ * A substitution character can be any valid Unicode code point (up to U+10FFFF)
+ * except for surrogate code points (U+D800..U+DFFF).
+ * The recommended value is U+FFFD "REPLACEMENT CHARACTER".
+ * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
+ * Set to 0 if no substitutions occur or subchar<0.
+ * pNumSubstitutions can be NULL.
+ * @param pErrorCode Pointer to a standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return The pointer to destination buffer.
+ * @see u_strFromUTF8
+ * @see u_strFromUTF8Lenient
+ * @see u_strToUTF8WithSub
+ * @stable ICU 3.6
+ */
+U_STABLE UChar* U_EXPORT2
+u_strFromUTF8WithSub(UChar *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const char *src,
+ int32_t srcLength,
+ UChar32 subchar, int32_t *pNumSubstitutions,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert a UTF-8 string to UTF-16.
+ *
+ * Same as u_strFromUTF8() except that this function is designed to be very fast,
+ * which it achieves by being lenient about malformed UTF-8 sequences.
+ * This function is intended for use in environments where UTF-8 text is
+ * expected to be well-formed.
+ *
+ * Its semantics are:
+ * - Well-formed UTF-8 text is correctly converted to well-formed UTF-16 text.
+ * - The function will not read beyond the input string, nor write beyond
+ * the destCapacity.
+ * - Malformed UTF-8 results in "garbage" 16-bit Unicode strings which may not
+ * be well-formed UTF-16.
+ * The function will resynchronize to valid code point boundaries
+ * within a small number of code points after an illegal sequence.
+ * - Non-shortest forms are not detected and will result in "spoofing" output.
+ *
+ * For further performance improvement, if srcLength is given (>=0),
+ * then it must be destCapacity>=srcLength.
+ *
+ * There is no inverse u_strToUTF8Lenient() function because there is practically
+ * no performance gain from not checking that a UTF-16 string is well-formed.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * Unlike for other ICU functions, if srcLength>=0 then it
+ * must be destCapacity>=srcLength.
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * Unlike for other ICU functions, if srcLength>=0 but
+ * destCapacity<srcLength, then *pDestLength will be set to srcLength
+ * (and U_BUFFER_OVERFLOW_ERROR will be set)
+ * regardless of the actual result length.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode Pointer to a standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return The pointer to destination buffer.
+ * @see u_strFromUTF8
+ * @see u_strFromUTF8WithSub
+ * @see u_strToUTF8WithSub
+ * @stable ICU 3.6
+ */
+U_STABLE UChar * U_EXPORT2
+u_strFromUTF8Lenient(UChar *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const char *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert a UTF-16 string to UTF-32.
+ * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChar32s). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The pointer to destination buffer.
+ * @see u_strToUTF32WithSub
+ * @see u_strFromUTF32
+ * @stable ICU 2.0
+ */
+U_STABLE UChar32* U_EXPORT2
+u_strToUTF32(UChar32 *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert a UTF-32 string to UTF-16.
+ * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The pointer to destination buffer.
+ * @see u_strFromUTF32WithSub
+ * @see u_strToUTF32
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2
+u_strFromUTF32(UChar *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar32 *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert a UTF-16 string to UTF-32.
+ *
+ * Same as u_strToUTF32() except for the additional subchar which is output for
+ * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
+ * With subchar==U_SENTINEL, this function behaves exactly like u_strToUTF32().
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChar32s). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param subchar The substitution character to use in place of an illegal input sequence,
+ * or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
+ * A substitution character can be any valid Unicode code point (up to U+10FFFF)
+ * except for surrogate code points (U+D800..U+DFFF).
+ * The recommended value is U+FFFD "REPLACEMENT CHARACTER".
+ * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
+ * Set to 0 if no substitutions occur or subchar<0.
+ * pNumSubstitutions can be NULL.
+ * @param pErrorCode Pointer to a standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return The pointer to destination buffer.
+ * @see u_strToUTF32
+ * @see u_strFromUTF32WithSub
+ * @stable ICU 4.2
+ */
+U_STABLE UChar32* U_EXPORT2
+u_strToUTF32WithSub(UChar32 *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar *src,
+ int32_t srcLength,
+ UChar32 subchar, int32_t *pNumSubstitutions,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert a UTF-32 string to UTF-16.
+ *
+ * Same as u_strFromUTF32() except for the additional subchar which is output for
+ * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
+ * With subchar==U_SENTINEL, this function behaves exactly like u_strFromUTF32().
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param subchar The substitution character to use in place of an illegal input sequence,
+ * or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
+ * A substitution character can be any valid Unicode code point (up to U+10FFFF)
+ * except for surrogate code points (U+D800..U+DFFF).
+ * The recommended value is U+FFFD "REPLACEMENT CHARACTER".
+ * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
+ * Set to 0 if no substitutions occur or subchar<0.
+ * pNumSubstitutions can be NULL.
+ * @param pErrorCode Pointer to a standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return The pointer to destination buffer.
+ * @see u_strFromUTF32
+ * @see u_strToUTF32WithSub
+ * @stable ICU 4.2
+ */
+U_STABLE UChar* U_EXPORT2
+u_strFromUTF32WithSub(UChar *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar32 *src,
+ int32_t srcLength,
+ UChar32 subchar, int32_t *pNumSubstitutions,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert a 16-bit Unicode string to Java Modified UTF-8.
+ * See http://java.sun.com/javase/6/docs/api/java/io/DataInput.html#modified-utf-8
+ *
+ * This function behaves according to the documentation for Java DataOutput.writeUTF()
+ * except that it does not encode the output length in the destination buffer
+ * and does not have an output length restriction.
+ * See http://java.sun.com/javase/6/docs/api/java/io/DataOutput.html#writeUTF(java.lang.String)
+ *
+ * The input string need not be well-formed UTF-16.
+ * (Therefore there is no subchar parameter.)
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of chars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode Pointer to a standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return The pointer to destination buffer.
+ * @stable ICU 4.4
+ * @see u_strToUTF8WithSub
+ * @see u_strFromJavaModifiedUTF8WithSub
+ */
+U_STABLE char* U_EXPORT2
+u_strToJavaModifiedUTF8(
+ char *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert a Java Modified UTF-8 string to a 16-bit Unicode string.
+ * If the input string is not well-formed and no substitution char is specified,
+ * then the U_INVALID_CHAR_FOUND error code is set.
+ *
+ * This function behaves according to the documentation for Java DataInput.readUTF()
+ * except that it takes a length parameter rather than
+ * interpreting the first two input bytes as the length.
+ * See http://java.sun.com/javase/6/docs/api/java/io/DataInput.html#readUTF()
+ *
+ * The output string may not be well-formed UTF-16.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param subchar The substitution character to use in place of an illegal input sequence,
+ * or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
+ * A substitution character can be any valid Unicode code point (up to U+10FFFF)
+ * except for surrogate code points (U+D800..U+DFFF).
+ * The recommended value is U+FFFD "REPLACEMENT CHARACTER".
+ * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
+ * Set to 0 if no substitutions occur or subchar<0.
+ * pNumSubstitutions can be NULL.
+ * @param pErrorCode Pointer to a standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return The pointer to destination buffer.
+ * @see u_strFromUTF8WithSub
+ * @see u_strFromUTF8Lenient
+ * @see u_strToJavaModifiedUTF8
+ * @stable ICU 4.4
+ */
+U_STABLE UChar* U_EXPORT2
+u_strFromJavaModifiedUTF8WithSub(
+ UChar *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const char *src,
+ int32_t srcLength,
+ UChar32 subchar, int32_t *pNumSubstitutions,
+ UErrorCode *pErrorCode);
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/utf.h b/src/core/basetypes/icu-ucsdet/include/unicode/utf.h
new file mode 100644
index 00000000..f5954fe9
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/utf.h
@@ -0,0 +1,223 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 1999-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: utf.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 1999sep09
+* created by: Markus W. Scherer
+*/
+
+/**
+ * \file
+ * \brief C API: Code point macros
+ *
+ * This file defines macros for checking whether a code point is
+ * a surrogate or a non-character etc.
+ *
+ * The UChar and UChar32 data types for Unicode code units and code points
+ * are defined in umachine.h because they can be machine-dependent.
+ *
+ * If U_NO_DEFAULT_INCLUDE_UTF_HEADERS is 0 then utf.h is included by utypes.h
+ * and itself includes utf8.h and utf16.h after some
+ * common definitions.
+ * If U_NO_DEFAULT_INCLUDE_UTF_HEADERS is 1 then each of these headers must be
+ * included explicitly if their definitions are used.
+ *
+ * utf8.h and utf16.h define macros for efficiently getting code points
+ * in and out of UTF-8/16 strings.
+ * utf16.h macros have "U16_" prefixes.
+ * utf8.h defines similar macros with "U8_" prefixes for UTF-8 string handling.
+ *
+ * ICU mostly processes 16-bit Unicode strings.
+ * Most of the time, such strings are well-formed UTF-16.
+ * Single, unpaired surrogates must be handled as well, and are treated in ICU
+ * like regular code points where possible.
+ * (Pairs of surrogate code points are indistinguishable from supplementary
+ * code points encoded as pairs of supplementary code units.)
+ *
+ * In fact, almost all Unicode code points in normal text (>99%)
+ * are on the BMP (<=U+ffff) and even <=U+d7ff.
+ * ICU functions handle supplementary code points (U+10000..U+10ffff)
+ * but are optimized for the much more frequently occurring BMP code points.
+ *
+ * umachine.h defines UChar to be an unsigned 16-bit integer.
+ * Where available, UChar is defined to be a char16_t
+ * or a wchar_t (if that is an unsigned 16-bit type), otherwise uint16_t.
+ *
+ * UChar32 is defined to be a signed 32-bit integer (int32_t), large enough for a 21-bit
+ * Unicode code point (Unicode scalar value, 0..0x10ffff).
+ * Before ICU 2.4, the definition of UChar32 was similarly platform-dependent as
+ * the definition of UChar. For details see the documentation for UChar32 itself.
+ *
+ * utf.h defines a small number of C macros for single Unicode code points.
+ * These are simple checks for surrogates and non-characters.
+ * For actual Unicode character properties see uchar.h.
+ *
+ * By default, string operations must be done with error checking in case
+ * a string is not well-formed UTF-16.
+ * The macros will detect if a surrogate code unit is unpaired
+ * (lead unit without trail unit or vice versa) and just return the unit itself
+ * as the code point.
+ *
+ * The regular "safe" macros require that the initial, passed-in string index
+ * is within bounds. They only check the index when they read more than one
+ * code unit. This is usually done with code similar to the following loop:
+ * <pre>while(i<length) {
+ * U16_NEXT(s, i, length, c);
+ * // use c
+ * }</pre>
+ *
+ * When it is safe to assume that text is well-formed UTF-16
+ * (does not contain single, unpaired surrogates), then one can use
+ * U16_..._UNSAFE macros.
+ * These do not check for proper code unit sequences or truncated text and may
+ * yield wrong results or even cause a crash if they are used with "malformed"
+ * text.
+ * In practice, U16_..._UNSAFE macros will produce slightly less code but
+ * should not be faster because the processing is only different when a
+ * surrogate code unit is detected, which will be rare.
+ *
+ * Similarly for UTF-8, there are "safe" macros without a suffix,
+ * and U8_..._UNSAFE versions.
+ * The performance differences are much larger here because UTF-8 provides so
+ * many opportunities for malformed sequences.
+ * The unsafe UTF-8 macros are entirely implemented inside the macro definitions
+ * and are fast, while the safe UTF-8 macros call functions for all but the
+ * trivial (ASCII) cases.
+ * (ICU 3.6 optimizes U8_NEXT() and U8_APPEND() to handle most other common
+ * characters inline as well.)
+ *
+ * Unlike with UTF-16, malformed sequences cannot be expressed with distinct
+ * code point values (0..U+10ffff). They are indicated with negative values instead.
+ *
+ * For more information see the ICU User Guide Strings chapter
+ * (http://userguide.icu-project.org/strings).
+ *
+ * <em>Usage:</em>
+ * ICU coding guidelines for if() statements should be followed when using these macros.
+ * Compound statements (curly braces {}) must be used for if-else-while...
+ * bodies and all macro statements should be terminated with semicolon.
+ *
+ * @stable ICU 2.4
+ */
+
+#ifndef __UTF_H__
+#define __UTF_H__
+
+#include "unicode/umachine.h"
+/* include the utfXX.h after the following definitions */
+
+/* single-code point definitions -------------------------------------------- */
+
+/**
+ * Is this code point a Unicode noncharacter?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U_IS_UNICODE_NONCHAR(c) \
+ ((c)>=0xfdd0 && \
+ ((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \
+ (uint32_t)(c)<=0x10ffff)
+
+/**
+ * Is c a Unicode code point value (0..U+10ffff)
+ * that can be assigned a character?
+ *
+ * Code points that are not characters include:
+ * - single surrogate code points (U+d800..U+dfff, 2048 code points)
+ * - the last two code points on each plane (U+__fffe and U+__ffff, 34 code points)
+ * - U+fdd0..U+fdef (new with Unicode 3.1, 32 code points)
+ * - the highest Unicode code point value is U+10ffff
+ *
+ * This means that all code points below U+d800 are character code points,
+ * and that boundary is tested first for performance.
+ *
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U_IS_UNICODE_CHAR(c) \
+ ((uint32_t)(c)<0xd800 || \
+ ((uint32_t)(c)>0xdfff && \
+ (uint32_t)(c)<=0x10ffff && \
+ !U_IS_UNICODE_NONCHAR(c)))
+
+/**
+ * Is this code point a BMP code point (U+0000..U+ffff)?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @stable ICU 2.8
+ */
+#define U_IS_BMP(c) ((uint32_t)(c)<=0xffff)
+
+/**
+ * Is this code point a supplementary code point (U+10000..U+10ffff)?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @stable ICU 2.8
+ */
+#define U_IS_SUPPLEMENTARY(c) ((uint32_t)((c)-0x10000)<=0xfffff)
+
+/**
+ * Is this code point a lead surrogate (U+d800..U+dbff)?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
+
+/**
+ * Is this code point a trail surrogate (U+dc00..U+dfff)?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
+
+/**
+ * Is this code point a surrogate (U+d800..U+dfff)?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800)
+
+/**
+ * Assuming c is a surrogate code point (U_IS_SURROGATE(c)),
+ * is it a lead surrogate?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
+
+/**
+ * Assuming c is a surrogate code point (U_IS_SURROGATE(c)),
+ * is it a trail surrogate?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @stable ICU 4.2
+ */
+#define U_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0)
+
+/* include the utfXX.h ------------------------------------------------------ */
+
+#if !U_NO_DEFAULT_INCLUDE_UTF_HEADERS
+
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
+
+/* utf_old.h contains deprecated, pre-ICU 2.4 definitions */
+#include "unicode/utf_old.h"
+
+#endif /* !U_NO_DEFAULT_INCLUDE_UTF_HEADERS */
+
+#endif /* __UTF_H__ */
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/utf16.h b/src/core/basetypes/icu-ucsdet/include/unicode/utf16.h
new file mode 100644
index 00000000..bdd88a8b
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/utf16.h
@@ -0,0 +1,623 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 1999-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: utf16.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 1999sep09
+* created by: Markus W. Scherer
+*/
+
+/**
+ * \file
+ * \brief C API: 16-bit Unicode handling macros
+ *
+ * This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings.
+ *
+ * For more information see utf.h and the ICU User Guide Strings chapter
+ * (http://userguide.icu-project.org/strings).
+ *
+ * <em>Usage:</em>
+ * ICU coding guidelines for if() statements should be followed when using these macros.
+ * Compound statements (curly braces {}) must be used for if-else-while...
+ * bodies and all macro statements should be terminated with semicolon.
+ */
+
+#ifndef __UTF16_H__
+#define __UTF16_H__
+
+#include "unicode/umachine.h"
+#ifndef __UTF_H__
+# include "unicode/utf.h"
+#endif
+
+/* single-code point definitions -------------------------------------------- */
+
+/**
+ * Does this code unit alone encode a code point (BMP, not a surrogate)?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U16_IS_SINGLE(c) !U_IS_SURROGATE(c)
+
+/**
+ * Is this code unit a lead surrogate (U+d800..U+dbff)?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
+
+/**
+ * Is this code unit a trail surrogate (U+dc00..U+dfff)?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
+
+/**
+ * Is this code unit a surrogate (U+d800..U+dfff)?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
+
+/**
+ * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
+ * is it a lead surrogate?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
+
+/**
+ * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
+ * is it a trail surrogate?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 4.2
+ */
+#define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0)
+
+/**
+ * Helper constant for U16_GET_SUPPLEMENTARY.
+ * @internal
+ */
+#define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
+
+/**
+ * Get a supplementary code point value (U+10000..U+10ffff)
+ * from its lead and trail surrogates.
+ * The result is undefined if the input values are not
+ * lead and trail surrogates.
+ *
+ * @param lead lead surrogate (U+d800..U+dbff)
+ * @param trail trail surrogate (U+dc00..U+dfff)
+ * @return supplementary code point (U+10000..U+10ffff)
+ * @stable ICU 2.4
+ */
+#define U16_GET_SUPPLEMENTARY(lead, trail) \
+ (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)
+
+
+/**
+ * Get the lead surrogate (0xd800..0xdbff) for a
+ * supplementary code point (0x10000..0x10ffff).
+ * @param supplementary 32-bit code point (U+10000..U+10ffff)
+ * @return lead surrogate (U+d800..U+dbff) for supplementary
+ * @stable ICU 2.4
+ */
+#define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
+
+/**
+ * Get the trail surrogate (0xdc00..0xdfff) for a
+ * supplementary code point (0x10000..0x10ffff).
+ * @param supplementary 32-bit code point (U+10000..U+10ffff)
+ * @return trail surrogate (U+dc00..U+dfff) for supplementary
+ * @stable ICU 2.4
+ */
+#define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
+
+/**
+ * How many 16-bit code units are used to encode this Unicode code point? (1 or 2)
+ * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff).
+ * @param c 32-bit code point
+ * @return 1 or 2
+ * @stable ICU 2.4
+ */
+#define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
+
+/**
+ * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff).
+ * @return 2
+ * @stable ICU 2.4
+ */
+#define U16_MAX_LENGTH 2
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * The offset may point to either the lead or trail surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the adjacent matching surrogate as well.
+ * The result is undefined if the offset points to a single, unpaired surrogate.
+ * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U16_GET
+ * @stable ICU 2.4
+ */
+#define U16_GET_UNSAFE(s, i, c) { \
+ (c)=(s)[i]; \
+ if(U16_IS_SURROGATE(c)) { \
+ if(U16_IS_SURROGATE_LEAD(c)) { \
+ (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \
+ } else { \
+ (c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \
+ } \
+ } \
+}
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The offset may point to either the lead or trail surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the adjacent matching surrogate as well.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * If the offset points to a single, unpaired surrogate, then that itself
+ * will be returned as the code point.
+ * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<=i<length
+ * @param length string length
+ * @param c output UChar32 variable
+ * @see U16_GET_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_GET(s, start, i, length, c) { \
+ (c)=(s)[i]; \
+ if(U16_IS_SURROGATE(c)) { \
+ uint16_t __c2; \
+ if(U16_IS_SURROGATE_LEAD(c)) { \
+ if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
+ (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
+ } \
+ } else { \
+ if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
+ (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
+ } \
+ } \
+ } \
+}
+
+/* definitions with forward iteration --------------------------------------- */
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * The offset may point to the lead surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the following trail surrogate as well.
+ * If the offset points to a trail surrogate, then that itself
+ * will be returned as the code point.
+ * The result is undefined if the offset points to a single, unpaired lead surrogate.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U16_NEXT
+ * @stable ICU 2.4
+ */
+#define U16_NEXT_UNSAFE(s, i, c) { \
+ (c)=(s)[(i)++]; \
+ if(U16_IS_LEAD(c)) { \
+ (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \
+ } \
+}
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * The offset may point to the lead surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the following trail surrogate as well.
+ * If the offset points to a trail surrogate or
+ * to a single, unpaired lead surrogate, then that itself
+ * will be returned as the code point.
+ *
+ * @param s const UChar * string
+ * @param i string offset, must be i<length
+ * @param length string length
+ * @param c output UChar32 variable
+ * @see U16_NEXT_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_NEXT(s, i, length, c) { \
+ (c)=(s)[(i)++]; \
+ if(U16_IS_LEAD(c)) { \
+ uint16_t __c2; \
+ if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
+ ++(i); \
+ (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
+ } \
+ } \
+}
+
+/**
+ * Append a code point to a string, overwriting 1 or 2 code units.
+ * The offset points to the current end of the string contents
+ * and is advanced (post-increment).
+ * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
+ * Otherwise, the result is undefined.
+ *
+ * @param s const UChar * string buffer
+ * @param i string offset
+ * @param c code point to append
+ * @see U16_APPEND
+ * @stable ICU 2.4
+ */
+#define U16_APPEND_UNSAFE(s, i, c) { \
+ if((uint32_t)(c)<=0xffff) { \
+ (s)[(i)++]=(uint16_t)(c); \
+ } else { \
+ (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
+ (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
+ } \
+}
+
+/**
+ * Append a code point to a string, overwriting 1 or 2 code units.
+ * The offset points to the current end of the string contents
+ * and is advanced (post-increment).
+ * "Safe" macro, checks for a valid code point.
+ * If a surrogate pair is written, checks for sufficient space in the string.
+ * If the code point is not valid or a trail surrogate does not fit,
+ * then isError is set to TRUE.
+ *
+ * @param s const UChar * string buffer
+ * @param i string offset, must be i<capacity
+ * @param capacity size of the string buffer
+ * @param c code point to append
+ * @param isError output UBool set to TRUE if an error occurs, otherwise not modified
+ * @see U16_APPEND_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_APPEND(s, i, capacity, c, isError) { \
+ if((uint32_t)(c)<=0xffff) { \
+ (s)[(i)++]=(uint16_t)(c); \
+ } else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \
+ (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
+ (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
+ } else /* c>0x10ffff or not enough space */ { \
+ (isError)=TRUE; \
+ } \
+}
+
+/**
+ * Advance the string offset from one code point boundary to the next.
+ * (Post-incrementing iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @see U16_FWD_1
+ * @stable ICU 2.4
+ */
+#define U16_FWD_1_UNSAFE(s, i) { \
+ if(U16_IS_LEAD((s)[(i)++])) { \
+ ++(i); \
+ } \
+}
+
+/**
+ * Advance the string offset from one code point boundary to the next.
+ * (Post-incrementing iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * @param s const UChar * string
+ * @param i string offset, must be i<length
+ * @param length string length
+ * @see U16_FWD_1_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_FWD_1(s, i, length) { \
+ if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \
+ ++(i); \
+ } \
+}
+
+/**
+ * Advance the string offset from one code point boundary to the n-th next one,
+ * i.e., move forward by n code points.
+ * (Post-incrementing iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param n number of code points to skip
+ * @see U16_FWD_N
+ * @stable ICU 2.4
+ */
+#define U16_FWD_N_UNSAFE(s, i, n) { \
+ int32_t __N=(n); \
+ while(__N>0) { \
+ U16_FWD_1_UNSAFE(s, i); \
+ --__N; \
+ } \
+}
+
+/**
+ * Advance the string offset from one code point boundary to the n-th next one,
+ * i.e., move forward by n code points.
+ * (Post-incrementing iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * @param s const UChar * string
+ * @param i int32_t string offset, must be i<length
+ * @param length int32_t string length
+ * @param n number of code points to skip
+ * @see U16_FWD_N_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_FWD_N(s, i, length, n) { \
+ int32_t __N=(n); \
+ while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
+ U16_FWD_1(s, i, length); \
+ --__N; \
+ } \
+}
+
+/**
+ * Adjust a random-access offset to a code point boundary
+ * at the start of a code point.
+ * If the offset points to the trail surrogate of a surrogate pair,
+ * then the offset is decremented.
+ * Otherwise, it is not modified.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @see U16_SET_CP_START
+ * @stable ICU 2.4
+ */
+#define U16_SET_CP_START_UNSAFE(s, i) { \
+ if(U16_IS_TRAIL((s)[i])) { \
+ --(i); \
+ } \
+}
+
+/**
+ * Adjust a random-access offset to a code point boundary
+ * at the start of a code point.
+ * If the offset points to the trail surrogate of a surrogate pair,
+ * then the offset is decremented.
+ * Otherwise, it is not modified.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<=i
+ * @see U16_SET_CP_START_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_SET_CP_START(s, start, i) { \
+ if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
+ --(i); \
+ } \
+}
+
+/* definitions with backward iteration -------------------------------------- */
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a trail surrogate unit
+ * for a supplementary code point, then the macro will read
+ * the preceding lead surrogate as well.
+ * If the offset is behind a lead surrogate, then that itself
+ * will be returned as the code point.
+ * The result is undefined if the offset is behind a single, unpaired trail surrogate.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U16_PREV
+ * @stable ICU 2.4
+ */
+#define U16_PREV_UNSAFE(s, i, c) { \
+ (c)=(s)[--(i)]; \
+ if(U16_IS_TRAIL(c)) { \
+ (c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \
+ } \
+}
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a trail surrogate unit
+ * for a supplementary code point, then the macro will read
+ * the preceding lead surrogate as well.
+ * If the offset is behind a lead surrogate or behind a single, unpaired
+ * trail surrogate, then that itself
+ * will be returned as the code point.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<i
+ * @param c output UChar32 variable
+ * @see U16_PREV_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_PREV(s, start, i, c) { \
+ (c)=(s)[--(i)]; \
+ if(U16_IS_TRAIL(c)) { \
+ uint16_t __c2; \
+ if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
+ --(i); \
+ (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
+ } \
+ } \
+}
+
+/**
+ * Move the string offset from one code point boundary to the previous one.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @see U16_BACK_1
+ * @stable ICU 2.4
+ */
+#define U16_BACK_1_UNSAFE(s, i) { \
+ if(U16_IS_TRAIL((s)[--(i)])) { \
+ --(i); \
+ } \
+}
+
+/**
+ * Move the string offset from one code point boundary to the previous one.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<i
+ * @see U16_BACK_1_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_BACK_1(s, start, i) { \
+ if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
+ --(i); \
+ } \
+}
+
+/**
+ * Move the string offset from one code point boundary to the n-th one before it,
+ * i.e., move backward by n code points.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param n number of code points to skip
+ * @see U16_BACK_N
+ * @stable ICU 2.4
+ */
+#define U16_BACK_N_UNSAFE(s, i, n) { \
+ int32_t __N=(n); \
+ while(__N>0) { \
+ U16_BACK_1_UNSAFE(s, i); \
+ --__N; \
+ } \
+}
+
+/**
+ * Move the string offset from one code point boundary to the n-th one before it,
+ * i.e., move backward by n code points.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * @param s const UChar * string
+ * @param start start of string
+ * @param i string offset, must be start<i
+ * @param n number of code points to skip
+ * @see U16_BACK_N_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_BACK_N(s, start, i, n) { \
+ int32_t __N=(n); \
+ while(__N>0 && (i)>(start)) { \
+ U16_BACK_1(s, start, i); \
+ --__N; \
+ } \
+}
+
+/**
+ * Adjust a random-access offset to a code point boundary after a code point.
+ * If the offset is behind the lead surrogate of a surrogate pair,
+ * then the offset is incremented.
+ * Otherwise, it is not modified.
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @see U16_SET_CP_LIMIT
+ * @stable ICU 2.4
+ */
+#define U16_SET_CP_LIMIT_UNSAFE(s, i) { \
+ if(U16_IS_LEAD((s)[(i)-1])) { \
+ ++(i); \
+ } \
+}
+
+/**
+ * Adjust a random-access offset to a code point boundary after a code point.
+ * If the offset is behind the lead surrogate of a surrogate pair,
+ * then the offset is incremented.
+ * Otherwise, it is not modified.
+ * The input offset may be the same as the string length.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * @param s const UChar * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param i int32_t string offset, start<=i<=length
+ * @param length int32_t string length
+ * @see U16_SET_CP_LIMIT_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_SET_CP_LIMIT(s, start, i, length) { \
+ if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \
+ ++(i); \
+ } \
+}
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/utf8.h b/src/core/basetypes/icu-ucsdet/include/unicode/utf8.h
new file mode 100644
index 00000000..7bd0b0e8
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/utf8.h
@@ -0,0 +1,824 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 1999-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: utf8.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 1999sep13
+* created by: Markus W. Scherer
+*/
+
+/**
+ * \file
+ * \brief C API: 8-bit Unicode handling macros
+ *
+ * This file defines macros to deal with 8-bit Unicode (UTF-8) code units (bytes) and strings.
+ *
+ * For more information see utf.h and the ICU User Guide Strings chapter
+ * (http://userguide.icu-project.org/strings).
+ *
+ * <em>Usage:</em>
+ * ICU coding guidelines for if() statements should be followed when using these macros.
+ * Compound statements (curly braces {}) must be used for if-else-while...
+ * bodies and all macro statements should be terminated with semicolon.
+ */
+
+#ifndef __UTF8_H__
+#define __UTF8_H__
+
+#include "unicode/umachine.h"
+#ifndef __UTF_H__
+# include "unicode/utf.h"
+#endif
+
+/* internal definitions ----------------------------------------------------- */
+
+/**
+ * \var utf8_countTrailBytes
+ * Internal array with numbers of trail bytes for any given byte used in
+ * lead byte position.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is called by public macros in this file and thus must remain stable,
+ * and should not be hidden when other internal functions are hidden (otherwise
+ * public macros would fail to compile).
+ * @internal
+ */
+#ifdef U_UTF8_IMPL
+U_EXPORT const uint8_t
+#elif defined(U_STATIC_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION)
+U_CFUNC const uint8_t
+#else
+U_CFUNC U_IMPORT const uint8_t /* U_IMPORT2? */ /*U_IMPORT*/
+#endif
+utf8_countTrailBytes[256];
+
+/**
+ * Counts the trail bytes for a UTF-8 lead byte.
+ * Returns 0 for 0..0xbf as well as for 0xfe and 0xff.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is called by public macros in this file and thus must remain stable.
+ *
+ * Note: Beginning with ICU 50, the implementation uses a multi-condition expression
+ * which was shown in 2012 (on x86-64) to compile to fast, branch-free code.
+ * leadByte is evaluated multiple times.
+ *
+ * The pre-ICU 50 implementation used the exported array utf8_countTrailBytes:
+ * #define U8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[leadByte])
+ * leadByte was evaluated exactly once.
+ *
+ * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
+ * @internal
+ */
+#define U8_COUNT_TRAIL_BYTES(leadByte) \
+ ((uint8_t)(leadByte)<0xf0 ? \
+ ((uint8_t)(leadByte)>=0xc0)+((uint8_t)(leadByte)>=0xe0) : \
+ (uint8_t)(leadByte)<0xfe ? 3+((uint8_t)(leadByte)>=0xf8)+((uint8_t)(leadByte)>=0xfc) : 0)
+
+/**
+ * Counts the trail bytes for a UTF-8 lead byte of a valid UTF-8 sequence.
+ * The maximum supported lead byte is 0xf4 corresponding to U+10FFFF.
+ * leadByte might be evaluated multiple times.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is called by public macros in this file and thus must remain stable.
+ *
+ * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
+ * @internal
+ */
+#define U8_COUNT_TRAIL_BYTES_UNSAFE(leadByte) \
+ (((leadByte)>=0xc0)+((leadByte)>=0xe0)+((leadByte)>=0xf0))
+
+/**
+ * Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is called by public macros in this file and thus must remain stable.
+ * @internal
+ */
+#define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
+
+/**
+ * Function for handling "next code point" with error-checking.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
+ * file and thus must remain stable, and should not be hidden when other internal
+ * functions are hidden (otherwise public macros would fail to compile).
+ * @internal
+ */
+U_STABLE UChar32 U_EXPORT2
+utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict);
+
+/**
+ * Function for handling "append code point" with error-checking.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
+ * file and thus must remain stable, and should not be hidden when other internal
+ * functions are hidden (otherwise public macros would fail to compile).
+ * @internal
+ */
+U_STABLE int32_t U_EXPORT2
+utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool *pIsError);
+
+/**
+ * Function for handling "previous code point" with error-checking.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
+ * file and thus must remain stable, and should not be hidden when other internal
+ * functions are hidden (otherwise public macros would fail to compile).
+ * @internal
+ */
+U_STABLE UChar32 U_EXPORT2
+utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict);
+
+/**
+ * Function for handling "skip backward one code point" with error-checking.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
+ * file and thus must remain stable, and should not be hidden when other internal
+ * functions are hidden (otherwise public macros would fail to compile).
+ * @internal
+ */
+U_STABLE int32_t U_EXPORT2
+utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
+
+/* single-code point definitions -------------------------------------------- */
+
+/**
+ * Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)?
+ * @param c 8-bit code unit (byte)
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U8_IS_SINGLE(c) (((c)&0x80)==0)
+
+/**
+ * Is this code unit (byte) a UTF-8 lead byte?
+ * @param c 8-bit code unit (byte)
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U8_IS_LEAD(c) ((uint8_t)((c)-0xc0)<0x3e)
+
+/**
+ * Is this code unit (byte) a UTF-8 trail byte?
+ * @param c 8-bit code unit (byte)
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U8_IS_TRAIL(c) (((c)&0xc0)==0x80)
+
+/**
+ * How many code units (bytes) are used for the UTF-8 encoding
+ * of this Unicode code point?
+ * @param c 32-bit code point
+ * @return 1..4, or 0 if c is a surrogate or not a Unicode code point
+ * @stable ICU 2.4
+ */
+#define U8_LENGTH(c) \
+ ((uint32_t)(c)<=0x7f ? 1 : \
+ ((uint32_t)(c)<=0x7ff ? 2 : \
+ ((uint32_t)(c)<=0xd7ff ? 3 : \
+ ((uint32_t)(c)<=0xdfff || (uint32_t)(c)>0x10ffff ? 0 : \
+ ((uint32_t)(c)<=0xffff ? 3 : 4)\
+ ) \
+ ) \
+ ) \
+ )
+
+/**
+ * The maximum number of UTF-8 code units (bytes) per Unicode code point (U+0000..U+10ffff).
+ * @return 4
+ * @stable ICU 2.4
+ */
+#define U8_MAX_LENGTH 4
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * The offset may point to either the lead byte or one of the trail bytes
+ * for a code point, in which case the macro will read all of the bytes
+ * for the code point.
+ * The result is undefined if the offset points to an illegal UTF-8
+ * byte sequence.
+ * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U8_GET
+ * @stable ICU 2.4
+ */
+#define U8_GET_UNSAFE(s, i, c) { \
+ int32_t _u8_get_unsafe_index=(int32_t)(i); \
+ U8_SET_CP_START_UNSAFE(s, _u8_get_unsafe_index); \
+ U8_NEXT_UNSAFE(s, _u8_get_unsafe_index, c); \
+}
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * The offset may point to either the lead byte or one of the trail bytes
+ * for a code point, in which case the macro will read all of the bytes
+ * for the code point.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * If the offset points to an illegal UTF-8 byte sequence, then
+ * c is set to a negative value.
+ * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset
+ * @param i int32_t string offset, must be start<=i<length
+ * @param length int32_t string length
+ * @param c output UChar32 variable, set to <0 in case of an error
+ * @see U8_GET_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_GET(s, start, i, length, c) { \
+ int32_t _u8_get_index=(i); \
+ U8_SET_CP_START(s, start, _u8_get_index); \
+ U8_NEXT(s, _u8_get_index, length, c); \
+}
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * The offset may point to either the lead byte or one of the trail bytes
+ * for a code point, in which case the macro will read all of the bytes
+ * for the code point.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * If the offset points to an illegal UTF-8 byte sequence, then
+ * c is set to U+FFFD.
+ * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT_OR_FFFD.
+ *
+ * This macro does not distinguish between a real U+FFFD in the text
+ * and U+FFFD returned for an ill-formed sequence.
+ * Use U8_GET() if that distinction is important.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset
+ * @param i int32_t string offset, must be start<=i<length
+ * @param length int32_t string length
+ * @param c output UChar32 variable, set to U+FFFD in case of an error
+ * @see U8_GET
+ * @stable ICU 51
+ */
+#define U8_GET_OR_FFFD(s, start, i, length, c) { \
+ int32_t _u8_get_index=(i); \
+ U8_SET_CP_START(s, start, _u8_get_index); \
+ U8_NEXT_OR_FFFD(s, _u8_get_index, length, c); \
+}
+
+/* definitions with forward iteration --------------------------------------- */
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * The offset may point to the lead byte of a multi-byte sequence,
+ * in which case the macro will read the whole sequence.
+ * The result is undefined if the offset points to a trail byte
+ * or an illegal UTF-8 sequence.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U8_NEXT
+ * @stable ICU 2.4
+ */
+#define U8_NEXT_UNSAFE(s, i, c) { \
+ (c)=(uint8_t)(s)[(i)++]; \
+ if((c)>=0x80) { \
+ if((c)<0xe0) { \
+ (c)=(((c)&0x1f)<<6)|((s)[(i)++]&0x3f); \
+ } else if((c)<0xf0) { \
+ /* no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ \
+ (c)=(UChar)(((c)<<12)|(((s)[i]&0x3f)<<6)|((s)[(i)+1]&0x3f)); \
+ (i)+=2; \
+ } else { \
+ (c)=(((c)&7)<<18)|(((s)[i]&0x3f)<<12)|(((s)[(i)+1]&0x3f)<<6)|((s)[(i)+2]&0x3f); \
+ (i)+=3; \
+ } \
+ } \
+}
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * The offset may point to the lead byte of a multi-byte sequence,
+ * in which case the macro will read the whole sequence.
+ * If the offset points to a trail byte or an illegal UTF-8 sequence, then
+ * c is set to a negative value.
+ *
+ * @param s const uint8_t * string
+ * @param i int32_t string offset, must be i<length
+ * @param length int32_t string length
+ * @param c output UChar32 variable, set to <0 in case of an error
+ * @see U8_NEXT_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_NEXT(s, i, length, c) { \
+ (c)=(uint8_t)(s)[(i)++]; \
+ if((c)>=0x80) { \
+ uint8_t __t1, __t2; \
+ if( /* handle U+1000..U+CFFF inline */ \
+ (0xe0<(c) && (c)<=0xec) && \
+ (((i)+1)<(length) || (length)<0) && \
+ (__t1=(uint8_t)((s)[i]-0x80))<=0x3f && \
+ (__t2=(uint8_t)((s)[(i)+1]-0x80))<= 0x3f \
+ ) { \
+ /* no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ \
+ (c)=(UChar)(((c)<<12)|(__t1<<6)|__t2); \
+ (i)+=2; \
+ } else if( /* handle U+0080..U+07FF inline */ \
+ ((c)<0xe0 && (c)>=0xc2) && \
+ ((i)!=(length)) && \
+ (__t1=(uint8_t)((s)[i]-0x80))<=0x3f \
+ ) { \
+ (c)=(((c)&0x1f)<<6)|__t1; \
+ ++(i); \
+ } else { \
+ /* function call for "complicated" and error cases */ \
+ (c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (length), c, -1); \
+ } \
+ } \
+}
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * The offset may point to the lead byte of a multi-byte sequence,
+ * in which case the macro will read the whole sequence.
+ * If the offset points to a trail byte or an illegal UTF-8 sequence, then
+ * c is set to U+FFFD.
+ *
+ * This macro does not distinguish between a real U+FFFD in the text
+ * and U+FFFD returned for an ill-formed sequence.
+ * Use U8_NEXT() if that distinction is important.
+ *
+ * @param s const uint8_t * string
+ * @param i int32_t string offset, must be i<length
+ * @param length int32_t string length
+ * @param c output UChar32 variable, set to U+FFFD in case of an error
+ * @see U8_NEXT
+ * @stable ICU 51
+ */
+#define U8_NEXT_OR_FFFD(s, i, length, c) { \
+ (c)=(uint8_t)(s)[(i)++]; \
+ if((c)>=0x80) { \
+ uint8_t __t1, __t2; \
+ if( /* handle U+1000..U+CFFF inline */ \
+ (0xe0<(c) && (c)<=0xec) && \
+ (((i)+1)<(length) || (length)<0) && \
+ (__t1=(uint8_t)((s)[i]-0x80))<=0x3f && \
+ (__t2=(uint8_t)((s)[(i)+1]-0x80))<= 0x3f \
+ ) { \
+ /* no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ \
+ (c)=(UChar)(((c)<<12)|(__t1<<6)|__t2); \
+ (i)+=2; \
+ } else if( /* handle U+0080..U+07FF inline */ \
+ ((c)<0xe0 && (c)>=0xc2) && \
+ ((i)!=(length)) && \
+ (__t1=(uint8_t)((s)[i]-0x80))<=0x3f \
+ ) { \
+ (c)=(((c)&0x1f)<<6)|__t1; \
+ ++(i); \
+ } else { \
+ /* function call for "complicated" and error cases */ \
+ (c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (length), c, -3); \
+ } \
+ } \
+}
+
+/**
+ * Append a code point to a string, overwriting 1 to 4 bytes.
+ * The offset points to the current end of the string contents
+ * and is advanced (post-increment).
+ * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
+ * Otherwise, the result is undefined.
+ *
+ * @param s const uint8_t * string buffer
+ * @param i string offset
+ * @param c code point to append
+ * @see U8_APPEND
+ * @stable ICU 2.4
+ */
+#define U8_APPEND_UNSAFE(s, i, c) { \
+ if((uint32_t)(c)<=0x7f) { \
+ (s)[(i)++]=(uint8_t)(c); \
+ } else { \
+ if((uint32_t)(c)<=0x7ff) { \
+ (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \
+ } else { \
+ if((uint32_t)(c)<=0xffff) { \
+ (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \
+ } else { \
+ (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \
+ (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \
+ } \
+ (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
+ } \
+ (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
+ } \
+}
+
+/**
+ * Append a code point to a string, overwriting 1 to 4 bytes.
+ * The offset points to the current end of the string contents
+ * and is advanced (post-increment).
+ * "Safe" macro, checks for a valid code point.
+ * If a non-ASCII code point is written, checks for sufficient space in the string.
+ * If the code point is not valid or trail bytes do not fit,
+ * then isError is set to TRUE.
+ *
+ * @param s const uint8_t * string buffer
+ * @param i int32_t string offset, must be i<capacity
+ * @param capacity int32_t size of the string buffer
+ * @param c UChar32 code point to append
+ * @param isError output UBool set to TRUE if an error occurs, otherwise not modified
+ * @see U8_APPEND_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_APPEND(s, i, capacity, c, isError) { \
+ if((uint32_t)(c)<=0x7f) { \
+ (s)[(i)++]=(uint8_t)(c); \
+ } else if((uint32_t)(c)<=0x7ff && (i)+1<(capacity)) { \
+ (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \
+ (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
+ } else if((uint32_t)(c)<=0xd7ff && (i)+2<(capacity)) { \
+ (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \
+ (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
+ (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
+ } else { \
+ (i)=utf8_appendCharSafeBody(s, (i), (capacity), c, &(isError)); \
+ } \
+}
+
+/**
+ * Advance the string offset from one code point boundary to the next.
+ * (Post-incrementing iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @see U8_FWD_1
+ * @stable ICU 2.4
+ */
+#define U8_FWD_1_UNSAFE(s, i) { \
+ (i)+=1+U8_COUNT_TRAIL_BYTES_UNSAFE((uint8_t)(s)[i]); \
+}
+
+/**
+ * Advance the string offset from one code point boundary to the next.
+ * (Post-incrementing iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * @param s const uint8_t * string
+ * @param i int32_t string offset, must be i<length
+ * @param length int32_t string length
+ * @see U8_FWD_1_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_FWD_1(s, i, length) { \
+ uint8_t __b=(uint8_t)(s)[(i)++]; \
+ if(U8_IS_LEAD(__b)) { \
+ uint8_t __count=U8_COUNT_TRAIL_BYTES(__b); \
+ if((i)+__count>(length) && (length)>=0) { \
+ __count=(uint8_t)((length)-(i)); \
+ } \
+ while(__count>0 && U8_IS_TRAIL((s)[i])) { \
+ ++(i); \
+ --__count; \
+ } \
+ } \
+}
+
+/**
+ * Advance the string offset from one code point boundary to the n-th next one,
+ * i.e., move forward by n code points.
+ * (Post-incrementing iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @param n number of code points to skip
+ * @see U8_FWD_N
+ * @stable ICU 2.4
+ */
+#define U8_FWD_N_UNSAFE(s, i, n) { \
+ int32_t __N=(n); \
+ while(__N>0) { \
+ U8_FWD_1_UNSAFE(s, i); \
+ --__N; \
+ } \
+}
+
+/**
+ * Advance the string offset from one code point boundary to the n-th next one,
+ * i.e., move forward by n code points.
+ * (Post-incrementing iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * @param s const uint8_t * string
+ * @param i int32_t string offset, must be i<length
+ * @param length int32_t string length
+ * @param n number of code points to skip
+ * @see U8_FWD_N_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_FWD_N(s, i, length, n) { \
+ int32_t __N=(n); \
+ while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
+ U8_FWD_1(s, i, length); \
+ --__N; \
+ } \
+}
+
+/**
+ * Adjust a random-access offset to a code point boundary
+ * at the start of a code point.
+ * If the offset points to a UTF-8 trail byte,
+ * then the offset is moved backward to the corresponding lead byte.
+ * Otherwise, it is not modified.
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @see U8_SET_CP_START
+ * @stable ICU 2.4
+ */
+#define U8_SET_CP_START_UNSAFE(s, i) { \
+ while(U8_IS_TRAIL((s)[i])) { --(i); } \
+}
+
+/**
+ * Adjust a random-access offset to a code point boundary
+ * at the start of a code point.
+ * If the offset points to a UTF-8 trail byte,
+ * then the offset is moved backward to the corresponding lead byte.
+ * Otherwise, it is not modified.
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param i int32_t string offset, must be start<=i
+ * @see U8_SET_CP_START_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_SET_CP_START(s, start, i) { \
+ if(U8_IS_TRAIL((s)[(i)])) { \
+ (i)=utf8_back1SafeBody(s, start, (i)); \
+ } \
+}
+
+/* definitions with backward iteration -------------------------------------- */
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a multi-byte sequence, then the macro will read
+ * the whole sequence.
+ * If the offset is behind a lead byte, then that itself
+ * will be returned as the code point.
+ * The result is undefined if the offset is behind an illegal UTF-8 sequence.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U8_PREV
+ * @stable ICU 2.4
+ */
+#define U8_PREV_UNSAFE(s, i, c) { \
+ (c)=(uint8_t)(s)[--(i)]; \
+ if(U8_IS_TRAIL(c)) { \
+ uint8_t __b, __count=1, __shift=6; \
+\
+ /* c is a trail byte */ \
+ (c)&=0x3f; \
+ for(;;) { \
+ __b=(uint8_t)(s)[--(i)]; \
+ if(__b>=0xc0) { \
+ U8_MASK_LEAD_BYTE(__b, __count); \
+ (c)|=(UChar32)__b<<__shift; \
+ break; \
+ } else { \
+ (c)|=(UChar32)(__b&0x3f)<<__shift; \
+ ++__count; \
+ __shift+=6; \
+ } \
+ } \
+ } \
+}
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a multi-byte sequence, then the macro will read
+ * the whole sequence.
+ * If the offset is behind a lead byte, then that itself
+ * will be returned as the code point.
+ * If the offset is behind an illegal UTF-8 sequence, then c is set to a negative value.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param i int32_t string offset, must be start<i
+ * @param c output UChar32 variable, set to <0 in case of an error
+ * @see U8_PREV_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_PREV(s, start, i, c) { \
+ (c)=(uint8_t)(s)[--(i)]; \
+ if((c)>=0x80) { \
+ (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -1); \
+ } \
+}
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a multi-byte sequence, then the macro will read
+ * the whole sequence.
+ * If the offset is behind a lead byte, then that itself
+ * will be returned as the code point.
+ * If the offset is behind an illegal UTF-8 sequence, then c is set to U+FFFD.
+ *
+ * This macro does not distinguish between a real U+FFFD in the text
+ * and U+FFFD returned for an ill-formed sequence.
+ * Use U8_PREV() if that distinction is important.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param i int32_t string offset, must be start<i
+ * @param c output UChar32 variable, set to U+FFFD in case of an error
+ * @see U8_PREV
+ * @stable ICU 51
+ */
+#define U8_PREV_OR_FFFD(s, start, i, c) { \
+ (c)=(uint8_t)(s)[--(i)]; \
+ if((c)>=0x80) { \
+ (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -3); \
+ } \
+}
+
+/**
+ * Move the string offset from one code point boundary to the previous one.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @see U8_BACK_1
+ * @stable ICU 2.4
+ */
+#define U8_BACK_1_UNSAFE(s, i) { \
+ while(U8_IS_TRAIL((s)[--(i)])) {} \
+}
+
+/**
+ * Move the string offset from one code point boundary to the previous one.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param i int32_t string offset, must be start<i
+ * @see U8_BACK_1_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_BACK_1(s, start, i) { \
+ if(U8_IS_TRAIL((s)[--(i)])) { \
+ (i)=utf8_back1SafeBody(s, start, (i)); \
+ } \
+}
+
+/**
+ * Move the string offset from one code point boundary to the n-th one before it,
+ * i.e., move backward by n code points.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @param n number of code points to skip
+ * @see U8_BACK_N
+ * @stable ICU 2.4
+ */
+#define U8_BACK_N_UNSAFE(s, i, n) { \
+ int32_t __N=(n); \
+ while(__N>0) { \
+ U8_BACK_1_UNSAFE(s, i); \
+ --__N; \
+ } \
+}
+
+/**
+ * Move the string offset from one code point boundary to the n-th one before it,
+ * i.e., move backward by n code points.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t index of the start of the string
+ * @param i int32_t string offset, must be start<i
+ * @param n number of code points to skip
+ * @see U8_BACK_N_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_BACK_N(s, start, i, n) { \
+ int32_t __N=(n); \
+ while(__N>0 && (i)>(start)) { \
+ U8_BACK_1(s, start, i); \
+ --__N; \
+ } \
+}
+
+/**
+ * Adjust a random-access offset to a code point boundary after a code point.
+ * If the offset is behind a partial multi-byte sequence,
+ * then the offset is incremented to behind the whole sequence.
+ * Otherwise, it is not modified.
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @see U8_SET_CP_LIMIT
+ * @stable ICU 2.4
+ */
+#define U8_SET_CP_LIMIT_UNSAFE(s, i) { \
+ U8_BACK_1_UNSAFE(s, i); \
+ U8_FWD_1_UNSAFE(s, i); \
+}
+
+/**
+ * Adjust a random-access offset to a code point boundary after a code point.
+ * If the offset is behind a partial multi-byte sequence,
+ * then the offset is incremented to behind the whole sequence.
+ * Otherwise, it is not modified.
+ * The input offset may be the same as the string length.
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param i int32_t string offset, must be start<=i<=length
+ * @param length int32_t string length
+ * @see U8_SET_CP_LIMIT_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_SET_CP_LIMIT(s, start, i, length) { \
+ if((start)<(i) && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
+ U8_BACK_1(s, start, i); \
+ U8_FWD_1(s, i, length); \
+ } \
+}
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/utf_old.h b/src/core/basetypes/icu-ucsdet/include/unicode/utf_old.h
new file mode 100644
index 00000000..f9125b1d
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/utf_old.h
@@ -0,0 +1,1169 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 2002-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: utf_old.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002sep21
+* created by: Markus W. Scherer
+*/
+
+/**
+ * \file
+ * \brief C API: Deprecated macros for Unicode string handling
+ */
+
+/**
+ *
+ * The macros in utf_old.h are all deprecated and their use discouraged.
+ * Some of the design principles behind the set of UTF macros
+ * have changed or proved impractical.
+ * Almost all of the old "UTF macros" are at least renamed.
+ * If you are looking for a new equivalent to an old macro, please see the
+ * comment at the old one.
+ *
+ * Brief summary of reasons for deprecation:
+ * - Switch on UTF_SIZE (selection of UTF-8/16/32 default string processing)
+ * was impractical.
+ * - Switch on UTF_SAFE etc. (selection of unsafe/safe/strict default string processing)
+ * was of little use and impractical.
+ * - Whole classes of macros became obsolete outside of the UTF_SIZE/UTF_SAFE
+ * selection framework: UTF32_ macros (all trivial)
+ * and UTF_ default and intermediate macros (all aliases).
+ * - The selection framework also caused many macro aliases.
+ * - Change in Unicode standard: "irregular" sequences (3.0) became illegal (3.2).
+ * - Change of language in Unicode standard:
+ * Growing distinction between internal x-bit Unicode strings and external UTF-x
+ * forms, with the former more lenient.
+ * Suggests renaming of UTF16_ macros to U16_.
+ * - The prefix "UTF_" without a width number confused some users.
+ * - "Safe" append macros needed the addition of an error indicator output.
+ * - "Safe" UTF-8 macros used legitimate (if rarely used) code point values
+ * to indicate error conditions.
+ * - The use of the "_CHAR" infix for code point operations confused some users.
+ *
+ * More details:
+ *
+ * Until ICU 2.2, utf.h theoretically allowed to choose among UTF-8/16/32
+ * for string processing, and among unsafe/safe/strict default macros for that.
+ *
+ * It proved nearly impossible to write non-trivial, high-performance code
+ * that is UTF-generic.
+ * Unsafe default macros would be dangerous for default string processing,
+ * and the main reason for the "strict" versions disappeared:
+ * Between Unicode 3.0 and 3.2 all "irregular" UTF-8 sequences became illegal.
+ * The only other conditions that "strict" checked for were non-characters,
+ * which are valid during processing. Only during text input/output should they
+ * be checked, and at that time other well-formedness checks may be
+ * necessary or useful as well.
+ * This can still be done by using U16_NEXT and U_IS_UNICODE_NONCHAR
+ * or U_IS_UNICODE_CHAR.
+ *
+ * The old UTF8_..._SAFE macros also used some normal Unicode code points
+ * to indicate malformed sequences.
+ * The new UTF8_ macros without suffix use negative values instead.
+ *
+ * The entire contents of utf32.h was moved here without replacement
+ * because all those macros were trivial and
+ * were meaningful only in the framework of choosing the UTF size.
+ *
+ * See Jitterbug 2150 and its discussion on the ICU mailing list
+ * in September 2002.
+ *
+ * <hr>
+ *
+ * <em>Obsolete part</em> of pre-ICU 2.4 utf.h file documentation:
+ *
+ * <p>The original concept for these files was for ICU to allow
+ * in principle to set which UTF (UTF-8/16/32) is used internally
+ * by defining UTF_SIZE to either 8, 16, or 32. utf.h would then define the UChar type
+ * accordingly. UTF-16 was the default.</p>
+ *
+ * <p>This concept has been abandoned.
+ * A lot of the ICU source code assumes UChar strings are in UTF-16.
+ * This is especially true for low-level code like
+ * conversion, normalization, and collation.
+ * The utf.h header enforces the default of UTF-16.
+ * The UTF-8 and UTF-32 macros remain for now for completeness and backward compatibility.</p>
+ *
+ * <p>Accordingly, utf.h defines UChar to be an unsigned 16-bit integer. If this matches wchar_t, then
+ * UChar is defined to be exactly wchar_t, otherwise uint16_t.</p>
+ *
+ * <p>UChar32 is defined to be a signed 32-bit integer (int32_t), large enough for a 21-bit
+ * Unicode code point (Unicode scalar value, 0..0x10ffff).
+ * Before ICU 2.4, the definition of UChar32 was similarly platform-dependent as
+ * the definition of UChar. For details see the documentation for UChar32 itself.</p>
+ *
+ * <p>utf.h also defines a number of C macros for handling single Unicode code points and
+ * for using UTF Unicode strings. It includes utf8.h, utf16.h, and utf32.h for the actual
+ * implementations of those macros and then aliases one set of them (for UTF-16) for general use.
+ * The UTF-specific macros have the UTF size in the macro name prefixes (UTF16_...), while
+ * the general alias macros always begin with UTF_...</p>
+ *
+ * <p>Many string operations can be done with or without error checking.
+ * Where such a distinction is useful, there are two versions of the macros, "unsafe" and "safe"
+ * ones with ..._UNSAFE and ..._SAFE suffixes. The unsafe macros are fast but may cause
+ * program failures if the strings are not well-formed. The safe macros have an additional, boolean
+ * parameter "strict". If strict is FALSE, then only illegal sequences are detected.
+ * Otherwise, irregular sequences and non-characters are detected as well (like single surrogates).
+ * Safe macros return special error code points for illegal/irregular sequences:
+ * Typically, U+ffff, or values that would result in a code unit sequence of the same length
+ * as the erroneous input sequence.<br>
+ * Note that _UNSAFE macros have fewer parameters: They do not have the strictness parameter, and
+ * they do not have start/length parameters for boundary checking.</p>
+ *
+ * <p>Here, the macros are aliased in two steps:
+ * In the first step, the UTF-specific macros with UTF16_ prefix and _UNSAFE and _SAFE suffixes are
+ * aliased according to the UTF_SIZE to macros with UTF_ prefix and the same suffixes and signatures.
+ * Then, in a second step, the default, general alias macros are set to use either the unsafe or
+ * the safe/not strict (default) or the safe/strict macro;
+ * these general macros do not have a strictness parameter.</p>
+ *
+ * <p>It is possible to change the default choice for the general alias macros to be unsafe, safe/not strict or safe/strict.
+ * The default is safe/not strict. It is not recommended to select the unsafe macros as the basis for
+ * Unicode string handling in ICU! To select this, define UTF_SAFE, UTF_STRICT, or UTF_UNSAFE.</p>
+ *
+ * <p>For general use, one should use the default, general macros with UTF_ prefix and no _SAFE/_UNSAFE suffix.
+ * Only in some cases it may be necessary to control the choice of macro directly and use a less generic alias.
+ * For example, if it can be assumed that a string is well-formed and the index will stay within the bounds,
+ * then the _UNSAFE version may be used.
+ * If a UTF-8 string is to be processed, then the macros with UTF8_ prefixes need to be used.</p>
+ *
+ * <hr>
+ *
+ * @deprecated ICU 2.4. Use the macros in utf.h, utf16.h, utf8.h instead.
+ */
+
+#ifndef __UTF_OLD_H__
+#define __UTF_OLD_H__
+
+#ifndef U_HIDE_DEPRECATED_API
+
+#include "unicode/utf.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
+
+/* Formerly utf.h, part 1 --------------------------------------------------- */
+
+#ifdef U_USE_UTF_DEPRECATES
+/**
+ * Unicode string and array offset and index type.
+ * ICU always counts Unicode code units (UChars) for
+ * string offsets, indexes, and lengths, not Unicode code points.
+ *
+ * @obsolete ICU 2.6. Use int32_t directly instead since this API will be removed in that release.
+ */
+typedef int32_t UTextOffset;
+#endif
+
+/** Number of bits in a Unicode string code unit - ICU uses 16-bit Unicode. @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF_SIZE 16
+
+/**
+ * The default choice for general Unicode string macros is to use the ..._SAFE macro implementations
+ * with strict=FALSE.
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define UTF_SAFE
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#undef UTF_UNSAFE
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#undef UTF_STRICT
+
+/**
+ * UTF8_ERROR_VALUE_1 and UTF8_ERROR_VALUE_2 are special error values for UTF-8,
+ * which need 1 or 2 bytes in UTF-8:
+ * \code
+ * U+0015 = NAK = Negative Acknowledge, C0 control character
+ * U+009f = highest C1 control character
+ * \endcode
+ *
+ * These are used by UTF8_..._SAFE macros so that they can return an error value
+ * that needs the same number of code units (bytes) as were seen by
+ * a macro. They should be tested with UTF_IS_ERROR() or UTF_IS_VALID().
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define UTF8_ERROR_VALUE_1 0x15
+
+/**
+ * See documentation on UTF8_ERROR_VALUE_1 for details.
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define UTF8_ERROR_VALUE_2 0x9f
+
+/**
+ * Error value for all UTFs. This code point value will be set by macros with error
+ * checking if an error is detected.
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define UTF_ERROR_VALUE 0xffff
+
+/**
+ * Is a given 32-bit code an error value
+ * as returned by one of the macros for any UTF?
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define UTF_IS_ERROR(c) \
+ (((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2)
+
+/**
+ * This is a combined macro: Is c a valid Unicode value _and_ not an error code?
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define UTF_IS_VALID(c) \
+ (UTF_IS_UNICODE_CHAR(c) && \
+ (c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2)
+
+/**
+ * Is this code unit or code point a surrogate (U+d800..U+dfff)?
+ * @deprecated ICU 2.4. Renamed to U_IS_SURROGATE and U16_IS_SURROGATE, see utf_old.h.
+ */
+#define UTF_IS_SURROGATE(uchar) (((uchar)&0xfffff800)==0xd800)
+
+/**
+ * Is a given 32-bit code point a Unicode noncharacter?
+ *
+ * @deprecated ICU 2.4. Renamed to U_IS_UNICODE_NONCHAR, see utf_old.h.
+ */
+#define UTF_IS_UNICODE_NONCHAR(c) \
+ ((c)>=0xfdd0 && \
+ ((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \
+ (uint32_t)(c)<=0x10ffff)
+
+/**
+ * Is a given 32-bit value a Unicode code point value (0..U+10ffff)
+ * that can be assigned a character?
+ *
+ * Code points that are not characters include:
+ * - single surrogate code points (U+d800..U+dfff, 2048 code points)
+ * - the last two code points on each plane (U+__fffe and U+__ffff, 34 code points)
+ * - U+fdd0..U+fdef (new with Unicode 3.1, 32 code points)
+ * - the highest Unicode code point value is U+10ffff
+ *
+ * This means that all code points below U+d800 are character code points,
+ * and that boundary is tested first for performance.
+ *
+ * @deprecated ICU 2.4. Renamed to U_IS_UNICODE_CHAR, see utf_old.h.
+ */
+#define UTF_IS_UNICODE_CHAR(c) \
+ ((uint32_t)(c)<0xd800 || \
+ ((uint32_t)(c)>0xdfff && \
+ (uint32_t)(c)<=0x10ffff && \
+ !UTF_IS_UNICODE_NONCHAR(c)))
+
+/* Formerly utf8.h ---------------------------------------------------------- */
+
+/**
+ * Count the trail bytes for a UTF-8 lead byte.
+ * @deprecated ICU 2.4. Renamed to U8_COUNT_TRAIL_BYTES, see utf_old.h.
+ */
+#define UTF8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte])
+
+/**
+ * Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
+ * @deprecated ICU 2.4. Renamed to U8_MASK_LEAD_BYTE, see utf_old.h.
+ */
+#define UTF8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
+
+/** Is this this code point a single code unit (byte)? @deprecated ICU 2.4. Renamed to U8_IS_SINGLE, see utf_old.h. */
+#define UTF8_IS_SINGLE(uchar) (((uchar)&0x80)==0)
+/** Is this this code unit the lead code unit (byte) of a code point? @deprecated ICU 2.4. Renamed to U8_IS_LEAD, see utf_old.h. */
+#define UTF8_IS_LEAD(uchar) ((uint8_t)((uchar)-0xc0)<0x3e)
+/** Is this this code unit a trailing code unit (byte) of a code point? @deprecated ICU 2.4. Renamed to U8_IS_TRAIL, see utf_old.h. */
+#define UTF8_IS_TRAIL(uchar) (((uchar)&0xc0)==0x80)
+
+/** Does this scalar Unicode value need multiple code units for storage? @deprecated ICU 2.4. Use U8_LENGTH or test ((uint32_t)(c)>0x7f) instead, see utf_old.h. */
+#define UTF8_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0x7f)
+
+/**
+ * Given the lead character, how many bytes are taken by this code point.
+ * ICU does not deal with code points >0x10ffff
+ * unless necessary for advancing in the byte stream.
+ *
+ * These length macros take into account that for values >0x10ffff
+ * the UTF8_APPEND_CHAR_SAFE macros would write the error code point 0xffff
+ * with 3 bytes.
+ * Code point comparisons need to be in uint32_t because UChar32
+ * may be a signed type, and negative values must be recognized.
+ *
+ * @deprecated ICU 2.4. Use U8_LENGTH instead, see utf.h.
+ */
+#if 1
+# define UTF8_CHAR_LENGTH(c) \
+ ((uint32_t)(c)<=0x7f ? 1 : \
+ ((uint32_t)(c)<=0x7ff ? 2 : \
+ ((uint32_t)((c)-0x10000)>0xfffff ? 3 : 4) \
+ ) \
+ )
+#else
+# define UTF8_CHAR_LENGTH(c) \
+ ((uint32_t)(c)<=0x7f ? 1 : \
+ ((uint32_t)(c)<=0x7ff ? 2 : \
+ ((uint32_t)(c)<=0xffff ? 3 : \
+ ((uint32_t)(c)<=0x10ffff ? 4 : \
+ ((uint32_t)(c)<=0x3ffffff ? 5 : \
+ ((uint32_t)(c)<=0x7fffffff ? 6 : 3) \
+ ) \
+ ) \
+ ) \
+ ) \
+ )
+#endif
+
+/** The maximum number of bytes per code point. @deprecated ICU 2.4. Renamed to U8_MAX_LENGTH, see utf_old.h. */
+#define UTF8_MAX_CHAR_LENGTH 4
+
+/** Average number of code units compared to UTF-16. @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF8_ARRAY_SIZE(size) ((5*(size))/2)
+
+/** @deprecated ICU 2.4. Renamed to U8_GET_UNSAFE, see utf_old.h. */
+#define UTF8_GET_CHAR_UNSAFE(s, i, c) { \
+ int32_t _utf8_get_char_unsafe_index=(int32_t)(i); \
+ UTF8_SET_CHAR_START_UNSAFE(s, _utf8_get_char_unsafe_index); \
+ UTF8_NEXT_CHAR_UNSAFE(s, _utf8_get_char_unsafe_index, c); \
+}
+
+/** @deprecated ICU 2.4. Use U8_GET instead, see utf_old.h. */
+#define UTF8_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
+ int32_t _utf8_get_char_safe_index=(int32_t)(i); \
+ UTF8_SET_CHAR_START_SAFE(s, start, _utf8_get_char_safe_index); \
+ UTF8_NEXT_CHAR_SAFE(s, _utf8_get_char_safe_index, length, c, strict); \
+}
+
+/** @deprecated ICU 2.4. Renamed to U8_NEXT_UNSAFE, see utf_old.h. */
+#define UTF8_NEXT_CHAR_UNSAFE(s, i, c) { \
+ (c)=(s)[(i)++]; \
+ if((uint8_t)((c)-0xc0)<0x35) { \
+ uint8_t __count=UTF8_COUNT_TRAIL_BYTES(c); \
+ UTF8_MASK_LEAD_BYTE(c, __count); \
+ switch(__count) { \
+ /* each following branch falls through to the next one */ \
+ case 3: \
+ (c)=((c)<<6)|((s)[(i)++]&0x3f); \
+ case 2: \
+ (c)=((c)<<6)|((s)[(i)++]&0x3f); \
+ case 1: \
+ (c)=((c)<<6)|((s)[(i)++]&0x3f); \
+ /* no other branches to optimize switch() */ \
+ break; \
+ } \
+ } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U8_APPEND_UNSAFE, see utf_old.h. */
+#define UTF8_APPEND_CHAR_UNSAFE(s, i, c) { \
+ if((uint32_t)(c)<=0x7f) { \
+ (s)[(i)++]=(uint8_t)(c); \
+ } else { \
+ if((uint32_t)(c)<=0x7ff) { \
+ (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \
+ } else { \
+ if((uint32_t)(c)<=0xffff) { \
+ (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \
+ } else { \
+ (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \
+ (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \
+ } \
+ (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
+ } \
+ (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
+ } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U8_FWD_1_UNSAFE, see utf_old.h. */
+#define UTF8_FWD_1_UNSAFE(s, i) { \
+ (i)+=1+UTF8_COUNT_TRAIL_BYTES((s)[i]); \
+}
+
+/** @deprecated ICU 2.4. Renamed to U8_FWD_N_UNSAFE, see utf_old.h. */
+#define UTF8_FWD_N_UNSAFE(s, i, n) { \
+ int32_t __N=(n); \
+ while(__N>0) { \
+ UTF8_FWD_1_UNSAFE(s, i); \
+ --__N; \
+ } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U8_SET_CP_START_UNSAFE, see utf_old.h. */
+#define UTF8_SET_CHAR_START_UNSAFE(s, i) { \
+ while(UTF8_IS_TRAIL((s)[i])) { --(i); } \
+}
+
+/** @deprecated ICU 2.4. Use U8_NEXT instead, see utf_old.h. */
+#define UTF8_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
+ (c)=(s)[(i)++]; \
+ if((c)>=0x80) { \
+ if(UTF8_IS_LEAD(c)) { \
+ (c)=utf8_nextCharSafeBody(s, &(i), (int32_t)(length), c, strict); \
+ } else { \
+ (c)=UTF8_ERROR_VALUE_1; \
+ } \
+ } \
+}
+
+/** @deprecated ICU 2.4. Use U8_APPEND instead, see utf_old.h. */
+#define UTF8_APPEND_CHAR_SAFE(s, i, length, c) { \
+ if((uint32_t)(c)<=0x7f) { \
+ (s)[(i)++]=(uint8_t)(c); \
+ } else { \
+ (i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(length), c, NULL); \
+ } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U8_FWD_1, see utf_old.h. */
+#define UTF8_FWD_1_SAFE(s, i, length) U8_FWD_1(s, i, length)
+
+/** @deprecated ICU 2.4. Renamed to U8_FWD_N, see utf_old.h. */
+#define UTF8_FWD_N_SAFE(s, i, length, n) U8_FWD_N(s, i, length, n)
+
+/** @deprecated ICU 2.4. Renamed to U8_SET_CP_START, see utf_old.h. */
+#define UTF8_SET_CHAR_START_SAFE(s, start, i) U8_SET_CP_START(s, start, i)
+
+/** @deprecated ICU 2.4. Renamed to U8_PREV_UNSAFE, see utf_old.h. */
+#define UTF8_PREV_CHAR_UNSAFE(s, i, c) { \
+ (c)=(s)[--(i)]; \
+ if(UTF8_IS_TRAIL(c)) { \
+ uint8_t __b, __count=1, __shift=6; \
+\
+ /* c is a trail byte */ \
+ (c)&=0x3f; \
+ for(;;) { \
+ __b=(s)[--(i)]; \
+ if(__b>=0xc0) { \
+ UTF8_MASK_LEAD_BYTE(__b, __count); \
+ (c)|=(UChar32)__b<<__shift; \
+ break; \
+ } else { \
+ (c)|=(UChar32)(__b&0x3f)<<__shift; \
+ ++__count; \
+ __shift+=6; \
+ } \
+ } \
+ } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U8_BACK_1_UNSAFE, see utf_old.h. */
+#define UTF8_BACK_1_UNSAFE(s, i) { \
+ while(UTF8_IS_TRAIL((s)[--(i)])) {} \
+}
+
+/** @deprecated ICU 2.4. Renamed to U8_BACK_N_UNSAFE, see utf_old.h. */
+#define UTF8_BACK_N_UNSAFE(s, i, n) { \
+ int32_t __N=(n); \
+ while(__N>0) { \
+ UTF8_BACK_1_UNSAFE(s, i); \
+ --__N; \
+ } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U8_SET_CP_LIMIT_UNSAFE, see utf_old.h. */
+#define UTF8_SET_CHAR_LIMIT_UNSAFE(s, i) { \
+ UTF8_BACK_1_UNSAFE(s, i); \
+ UTF8_FWD_1_UNSAFE(s, i); \
+}
+
+/** @deprecated ICU 2.4. Use U8_PREV instead, see utf_old.h. */
+#define UTF8_PREV_CHAR_SAFE(s, start, i, c, strict) { \
+ (c)=(s)[--(i)]; \
+ if((c)>=0x80) { \
+ if((c)<=0xbf) { \
+ (c)=utf8_prevCharSafeBody(s, start, &(i), c, strict); \
+ } else { \
+ (c)=UTF8_ERROR_VALUE_1; \
+ } \
+ } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U8_BACK_1, see utf_old.h. */
+#define UTF8_BACK_1_SAFE(s, start, i) U8_BACK_1(s, start, i)
+
+/** @deprecated ICU 2.4. Renamed to U8_BACK_N, see utf_old.h. */
+#define UTF8_BACK_N_SAFE(s, start, i, n) U8_BACK_N(s, start, i, n)
+
+/** @deprecated ICU 2.4. Renamed to U8_SET_CP_LIMIT, see utf_old.h. */
+#define UTF8_SET_CHAR_LIMIT_SAFE(s, start, i, length) U8_SET_CP_LIMIT(s, start, i, length)
+
+/* Formerly utf16.h --------------------------------------------------------- */
+
+/** Is uchar a first/lead surrogate? @deprecated ICU 2.4. Renamed to U_IS_LEAD and U16_IS_LEAD, see utf_old.h. */
+#define UTF_IS_FIRST_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xd800)
+
+/** Is uchar a second/trail surrogate? @deprecated ICU 2.4. Renamed to U_IS_TRAIL and U16_IS_TRAIL, see utf_old.h. */
+#define UTF_IS_SECOND_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xdc00)
+
+/** Assuming c is a surrogate, is it a first/lead surrogate? @deprecated ICU 2.4. Renamed to U_IS_SURROGATE_LEAD and U16_IS_SURROGATE_LEAD, see utf_old.h. */
+#define UTF_IS_SURROGATE_FIRST(c) (((c)&0x400)==0)
+
+/** Helper constant for UTF16_GET_PAIR_VALUE. @deprecated ICU 2.4. Renamed to U16_SURROGATE_OFFSET, see utf_old.h. */
+#define UTF_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
+
+/** Get the UTF-32 value from the surrogate code units. @deprecated ICU 2.4. Renamed to U16_GET_SUPPLEMENTARY, see utf_old.h. */
+#define UTF16_GET_PAIR_VALUE(first, second) \
+ (((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET)
+
+/** @deprecated ICU 2.4. Renamed to U16_LEAD, see utf_old.h. */
+#define UTF_FIRST_SURROGATE(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
+
+/** @deprecated ICU 2.4. Renamed to U16_TRAIL, see utf_old.h. */
+#define UTF_SECOND_SURROGATE(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
+
+/** @deprecated ICU 2.4. Renamed to U16_LEAD, see utf_old.h. */
+#define UTF16_LEAD(supplementary) UTF_FIRST_SURROGATE(supplementary)
+
+/** @deprecated ICU 2.4. Renamed to U16_TRAIL, see utf_old.h. */
+#define UTF16_TRAIL(supplementary) UTF_SECOND_SURROGATE(supplementary)
+
+/** @deprecated ICU 2.4. Renamed to U16_IS_SINGLE, see utf_old.h. */
+#define UTF16_IS_SINGLE(uchar) !UTF_IS_SURROGATE(uchar)
+
+/** @deprecated ICU 2.4. Renamed to U16_IS_LEAD, see utf_old.h. */
+#define UTF16_IS_LEAD(uchar) UTF_IS_FIRST_SURROGATE(uchar)
+
+/** @deprecated ICU 2.4. Renamed to U16_IS_TRAIL, see utf_old.h. */
+#define UTF16_IS_TRAIL(uchar) UTF_IS_SECOND_SURROGATE(uchar)
+
+/** Does this scalar Unicode value need multiple code units for storage? @deprecated ICU 2.4. Use U16_LENGTH or test ((uint32_t)(c)>0xffff) instead, see utf_old.h. */
+#define UTF16_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0xffff)
+
+/** @deprecated ICU 2.4. Renamed to U16_LENGTH, see utf_old.h. */
+#define UTF16_CHAR_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
+
+/** @deprecated ICU 2.4. Renamed to U16_MAX_LENGTH, see utf_old.h. */
+#define UTF16_MAX_CHAR_LENGTH 2
+
+/** Average number of code units compared to UTF-16. @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF16_ARRAY_SIZE(size) (size)
+
+/**
+ * Get a single code point from an offset that points to any
+ * of the code units that belong to that code point.
+ * Assume 0<=i<length.
+ *
+ * This could be used for iteration together with
+ * UTF16_CHAR_LENGTH() and UTF_IS_ERROR(),
+ * but the use of UTF16_NEXT_CHAR[_UNSAFE]() and
+ * UTF16_PREV_CHAR[_UNSAFE]() is more efficient for that.
+ * @deprecated ICU 2.4. Renamed to U16_GET_UNSAFE, see utf_old.h.
+ */
+#define UTF16_GET_CHAR_UNSAFE(s, i, c) { \
+ (c)=(s)[i]; \
+ if(UTF_IS_SURROGATE(c)) { \
+ if(UTF_IS_SURROGATE_FIRST(c)) { \
+ (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)+1]); \
+ } else { \
+ (c)=UTF16_GET_PAIR_VALUE((s)[(i)-1], (c)); \
+ } \
+ } \
+}
+
+/** @deprecated ICU 2.4. Use U16_GET instead, see utf_old.h. */
+#define UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
+ (c)=(s)[i]; \
+ if(UTF_IS_SURROGATE(c)) { \
+ uint16_t __c2; \
+ if(UTF_IS_SURROGATE_FIRST(c)) { \
+ if((i)+1<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)+1])) { \
+ (c)=UTF16_GET_PAIR_VALUE((c), __c2); \
+ /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
+ } else if(strict) {\
+ /* unmatched first surrogate */ \
+ (c)=UTF_ERROR_VALUE; \
+ } \
+ } else { \
+ if((i)-1>=(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
+ (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
+ /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
+ } else if(strict) {\
+ /* unmatched second surrogate */ \
+ (c)=UTF_ERROR_VALUE; \
+ } \
+ } \
+ } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
+ (c)=UTF_ERROR_VALUE; \
+ } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U16_NEXT_UNSAFE, see utf_old.h. */
+#define UTF16_NEXT_CHAR_UNSAFE(s, i, c) { \
+ (c)=(s)[(i)++]; \
+ if(UTF_IS_FIRST_SURROGATE(c)) { \
+ (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)++]); \
+ } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U16_APPEND_UNSAFE, see utf_old.h. */
+#define UTF16_APPEND_CHAR_UNSAFE(s, i, c) { \
+ if((uint32_t)(c)<=0xffff) { \
+ (s)[(i)++]=(uint16_t)(c); \
+ } else { \
+ (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
+ (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
+ } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_1_UNSAFE, see utf_old.h. */
+#define UTF16_FWD_1_UNSAFE(s, i) { \
+ if(UTF_IS_FIRST_SURROGATE((s)[(i)++])) { \
+ ++(i); \
+ } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_N_UNSAFE, see utf_old.h. */
+#define UTF16_FWD_N_UNSAFE(s, i, n) { \
+ int32_t __N=(n); \
+ while(__N>0) { \
+ UTF16_FWD_1_UNSAFE(s, i); \
+ --__N; \
+ } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START_UNSAFE, see utf_old.h. */
+#define UTF16_SET_CHAR_START_UNSAFE(s, i) { \
+ if(UTF_IS_SECOND_SURROGATE((s)[i])) { \
+ --(i); \
+ } \
+}
+
+/** @deprecated ICU 2.4. Use U16_NEXT instead, see utf_old.h. */
+#define UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
+ (c)=(s)[(i)++]; \
+ if(UTF_IS_FIRST_SURROGATE(c)) { \
+ uint16_t __c2; \
+ if((i)<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)])) { \
+ ++(i); \
+ (c)=UTF16_GET_PAIR_VALUE((c), __c2); \
+ /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
+ } else if(strict) {\
+ /* unmatched first surrogate */ \
+ (c)=UTF_ERROR_VALUE; \
+ } \
+ } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
+ /* unmatched second surrogate or other non-character */ \
+ (c)=UTF_ERROR_VALUE; \
+ } \
+}
+
+/** @deprecated ICU 2.4. Use U16_APPEND instead, see utf_old.h. */
+#define UTF16_APPEND_CHAR_SAFE(s, i, length, c) { \
+ if((uint32_t)(c)<=0xffff) { \
+ (s)[(i)++]=(uint16_t)(c); \
+ } else if((uint32_t)(c)<=0x10ffff) { \
+ if((i)+1<(length)) { \
+ (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
+ (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
+ } else /* not enough space */ { \
+ (s)[(i)++]=UTF_ERROR_VALUE; \
+ } \
+ } else /* c>0x10ffff, write error value */ { \
+ (s)[(i)++]=UTF_ERROR_VALUE; \
+ } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_1, see utf_old.h. */
+#define UTF16_FWD_1_SAFE(s, i, length) U16_FWD_1(s, i, length)
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_N, see utf_old.h. */
+#define UTF16_FWD_N_SAFE(s, i, length, n) U16_FWD_N(s, i, length, n)
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START, see utf_old.h. */
+#define UTF16_SET_CHAR_START_SAFE(s, start, i) U16_SET_CP_START(s, start, i)
+
+/** @deprecated ICU 2.4. Renamed to U16_PREV_UNSAFE, see utf_old.h. */
+#define UTF16_PREV_CHAR_UNSAFE(s, i, c) { \
+ (c)=(s)[--(i)]; \
+ if(UTF_IS_SECOND_SURROGATE(c)) { \
+ (c)=UTF16_GET_PAIR_VALUE((s)[--(i)], (c)); \
+ } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_1_UNSAFE, see utf_old.h. */
+#define UTF16_BACK_1_UNSAFE(s, i) { \
+ if(UTF_IS_SECOND_SURROGATE((s)[--(i)])) { \
+ --(i); \
+ } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_N_UNSAFE, see utf_old.h. */
+#define UTF16_BACK_N_UNSAFE(s, i, n) { \
+ int32_t __N=(n); \
+ while(__N>0) { \
+ UTF16_BACK_1_UNSAFE(s, i); \
+ --__N; \
+ } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT_UNSAFE, see utf_old.h. */
+#define UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) { \
+ if(UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
+ ++(i); \
+ } \
+}
+
+/** @deprecated ICU 2.4. Use U16_PREV instead, see utf_old.h. */
+#define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) { \
+ (c)=(s)[--(i)]; \
+ if(UTF_IS_SECOND_SURROGATE(c)) { \
+ uint16_t __c2; \
+ if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
+ --(i); \
+ (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
+ /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
+ } else if(strict) {\
+ /* unmatched second surrogate */ \
+ (c)=UTF_ERROR_VALUE; \
+ } \
+ } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
+ /* unmatched first surrogate or other non-character */ \
+ (c)=UTF_ERROR_VALUE; \
+ } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_1, see utf_old.h. */
+#define UTF16_BACK_1_SAFE(s, start, i) U16_BACK_1(s, start, i)
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_N, see utf_old.h. */
+#define UTF16_BACK_N_SAFE(s, start, i, n) U16_BACK_N(s, start, i, n)
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT, see utf_old.h. */
+#define UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length)
+
+/* Formerly utf32.h --------------------------------------------------------- */
+
+/*
+* Old documentation:
+*
+* This file defines macros to deal with UTF-32 code units and code points.
+* Signatures and semantics are the same as for the similarly named macros
+* in utf16.h.
+* utf32.h is included by utf.h after unicode/umachine.h</p>
+* and some common definitions.
+* <p><b>Usage:</b> ICU coding guidelines for if() statements should be followed when using these macros.
+* Compound statements (curly braces {}) must be used for if-else-while...
+* bodies and all macro statements should be terminated with semicolon.</p>
+*/
+
+/* internal definitions ----------------------------------------------------- */
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_IS_SAFE(c, strict) \
+ (!(strict) ? \
+ (uint32_t)(c)<=0x10ffff : \
+ UTF_IS_UNICODE_CHAR(c))
+
+/*
+ * For the semantics of all of these macros, see utf16.h.
+ * The UTF-32 versions are trivial because any code point is
+ * encoded using exactly one code unit.
+ */
+
+/* single-code point definitions -------------------------------------------- */
+
+/* classes of code unit values */
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_IS_SINGLE(uchar) 1
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_IS_LEAD(uchar) 0
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_IS_TRAIL(uchar) 0
+
+/* number of code units per code point */
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_NEED_MULTIPLE_UCHAR(c) 0
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_CHAR_LENGTH(c) 1
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_MAX_CHAR_LENGTH 1
+
+/* average number of code units compared to UTF-16 */
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_ARRAY_SIZE(size) (size)
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_GET_CHAR_UNSAFE(s, i, c) { \
+ (c)=(s)[i]; \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
+ (c)=(s)[i]; \
+ if(!UTF32_IS_SAFE(c, strict)) { \
+ (c)=UTF_ERROR_VALUE; \
+ } \
+}
+
+/* definitions with forward iteration --------------------------------------- */
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_NEXT_CHAR_UNSAFE(s, i, c) { \
+ (c)=(s)[(i)++]; \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_APPEND_CHAR_UNSAFE(s, i, c) { \
+ (s)[(i)++]=(c); \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_FWD_1_UNSAFE(s, i) { \
+ ++(i); \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_FWD_N_UNSAFE(s, i, n) { \
+ (i)+=(n); \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_SET_CHAR_START_UNSAFE(s, i) { \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
+ (c)=(s)[(i)++]; \
+ if(!UTF32_IS_SAFE(c, strict)) { \
+ (c)=UTF_ERROR_VALUE; \
+ } \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_APPEND_CHAR_SAFE(s, i, length, c) { \
+ if((uint32_t)(c)<=0x10ffff) { \
+ (s)[(i)++]=(c); \
+ } else /* c>0x10ffff, write 0xfffd */ { \
+ (s)[(i)++]=0xfffd; \
+ } \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_FWD_1_SAFE(s, i, length) { \
+ ++(i); \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_FWD_N_SAFE(s, i, length, n) { \
+ if(((i)+=(n))>(length)) { \
+ (i)=(length); \
+ } \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_SET_CHAR_START_SAFE(s, start, i) { \
+}
+
+/* definitions with backward iteration -------------------------------------- */
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_PREV_CHAR_UNSAFE(s, i, c) { \
+ (c)=(s)[--(i)]; \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_BACK_1_UNSAFE(s, i) { \
+ --(i); \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_BACK_N_UNSAFE(s, i, n) { \
+ (i)-=(n); \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_SET_CHAR_LIMIT_UNSAFE(s, i) { \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_PREV_CHAR_SAFE(s, start, i, c, strict) { \
+ (c)=(s)[--(i)]; \
+ if(!UTF32_IS_SAFE(c, strict)) { \
+ (c)=UTF_ERROR_VALUE; \
+ } \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_BACK_1_SAFE(s, start, i) { \
+ --(i); \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_BACK_N_SAFE(s, start, i, n) { \
+ (i)-=(n); \
+ if((i)<(start)) { \
+ (i)=(start); \
+ } \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_SET_CHAR_LIMIT_SAFE(s, i, length) { \
+}
+
+/* Formerly utf.h, part 2 --------------------------------------------------- */
+
+/**
+ * Estimate the number of code units for a string based on the number of UTF-16 code units.
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define UTF_ARRAY_SIZE(size) UTF16_ARRAY_SIZE(size)
+
+/** @deprecated ICU 2.4. Renamed to U16_GET_UNSAFE, see utf_old.h. */
+#define UTF_GET_CHAR_UNSAFE(s, i, c) UTF16_GET_CHAR_UNSAFE(s, i, c)
+
+/** @deprecated ICU 2.4. Use U16_GET instead, see utf_old.h. */
+#define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_NEXT_UNSAFE, see utf_old.h. */
+#define UTF_NEXT_CHAR_UNSAFE(s, i, c) UTF16_NEXT_CHAR_UNSAFE(s, i, c)
+
+/** @deprecated ICU 2.4. Use U16_NEXT instead, see utf_old.h. */
+#define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict) UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_APPEND_UNSAFE, see utf_old.h. */
+#define UTF_APPEND_CHAR_UNSAFE(s, i, c) UTF16_APPEND_CHAR_UNSAFE(s, i, c)
+
+/** @deprecated ICU 2.4. Use U16_APPEND instead, see utf_old.h. */
+#define UTF_APPEND_CHAR_SAFE(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_1_UNSAFE, see utf_old.h. */
+#define UTF_FWD_1_UNSAFE(s, i) UTF16_FWD_1_UNSAFE(s, i)
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_1, see utf_old.h. */
+#define UTF_FWD_1_SAFE(s, i, length) UTF16_FWD_1_SAFE(s, i, length)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_N_UNSAFE, see utf_old.h. */
+#define UTF_FWD_N_UNSAFE(s, i, n) UTF16_FWD_N_UNSAFE(s, i, n)
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_N, see utf_old.h. */
+#define UTF_FWD_N_SAFE(s, i, length, n) UTF16_FWD_N_SAFE(s, i, length, n)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START_UNSAFE, see utf_old.h. */
+#define UTF_SET_CHAR_START_UNSAFE(s, i) UTF16_SET_CHAR_START_UNSAFE(s, i)
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START, see utf_old.h. */
+#define UTF_SET_CHAR_START_SAFE(s, start, i) UTF16_SET_CHAR_START_SAFE(s, start, i)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_PREV_UNSAFE, see utf_old.h. */
+#define UTF_PREV_CHAR_UNSAFE(s, i, c) UTF16_PREV_CHAR_UNSAFE(s, i, c)
+
+/** @deprecated ICU 2.4. Use U16_PREV instead, see utf_old.h. */
+#define UTF_PREV_CHAR_SAFE(s, start, i, c, strict) UTF16_PREV_CHAR_SAFE(s, start, i, c, strict)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_1_UNSAFE, see utf_old.h. */
+#define UTF_BACK_1_UNSAFE(s, i) UTF16_BACK_1_UNSAFE(s, i)
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_1, see utf_old.h. */
+#define UTF_BACK_1_SAFE(s, start, i) UTF16_BACK_1_SAFE(s, start, i)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_N_UNSAFE, see utf_old.h. */
+#define UTF_BACK_N_UNSAFE(s, i, n) UTF16_BACK_N_UNSAFE(s, i, n)
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_N, see utf_old.h. */
+#define UTF_BACK_N_SAFE(s, start, i, n) UTF16_BACK_N_SAFE(s, start, i, n)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT_UNSAFE, see utf_old.h. */
+#define UTF_SET_CHAR_LIMIT_UNSAFE(s, i) UTF16_SET_CHAR_LIMIT_UNSAFE(s, i)
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT, see utf_old.h. */
+#define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length)
+
+/* Define default macros (UTF-16 "safe") ------------------------------------ */
+
+/**
+ * Does this code unit alone encode a code point (BMP, not a surrogate)?
+ * Same as UTF16_IS_SINGLE.
+ * @deprecated ICU 2.4. Renamed to U_IS_SINGLE and U16_IS_SINGLE, see utf_old.h.
+ */
+#define UTF_IS_SINGLE(uchar) U16_IS_SINGLE(uchar)
+
+/**
+ * Is this code unit the first one of several (a lead surrogate)?
+ * Same as UTF16_IS_LEAD.
+ * @deprecated ICU 2.4. Renamed to U_IS_LEAD and U16_IS_LEAD, see utf_old.h.
+ */
+#define UTF_IS_LEAD(uchar) U16_IS_LEAD(uchar)
+
+/**
+ * Is this code unit one of several but not the first one (a trail surrogate)?
+ * Same as UTF16_IS_TRAIL.
+ * @deprecated ICU 2.4. Renamed to U_IS_TRAIL and U16_IS_TRAIL, see utf_old.h.
+ */
+#define UTF_IS_TRAIL(uchar) U16_IS_TRAIL(uchar)
+
+/**
+ * Does this code point require multiple code units (is it a supplementary code point)?
+ * Same as UTF16_NEED_MULTIPLE_UCHAR.
+ * @deprecated ICU 2.4. Use U16_LENGTH or test ((uint32_t)(c)>0xffff) instead.
+ */
+#define UTF_NEED_MULTIPLE_UCHAR(c) UTF16_NEED_MULTIPLE_UCHAR(c)
+
+/**
+ * How many code units are used to encode this code point (1 or 2)?
+ * Same as UTF16_CHAR_LENGTH.
+ * @deprecated ICU 2.4. Renamed to U16_LENGTH, see utf_old.h.
+ */
+#define UTF_CHAR_LENGTH(c) U16_LENGTH(c)
+
+/**
+ * How many code units are used at most for any Unicode code point (2)?
+ * Same as UTF16_MAX_CHAR_LENGTH.
+ * @deprecated ICU 2.4. Renamed to U16_MAX_LENGTH, see utf_old.h.
+ */
+#define UTF_MAX_CHAR_LENGTH U16_MAX_LENGTH
+
+/**
+ * Set c to the code point that contains the code unit i.
+ * i could point to the lead or the trail surrogate for the code point.
+ * i is not modified.
+ * Same as UTF16_GET_CHAR.
+ * \pre 0<=i<length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_GET, see utf_old.h.
+ */
+#define UTF_GET_CHAR(s, start, i, length, c) U16_GET(s, start, i, length, c)
+
+/**
+ * Set c to the code point that starts at code unit i
+ * and advance i to beyond the code units of this code point (post-increment).
+ * i must point to the first code unit of a code point.
+ * Otherwise c is set to the trail unit (surrogate) itself.
+ * Same as UTF16_NEXT_CHAR.
+ * \pre 0<=i<length
+ * \post 0<i<=length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_NEXT, see utf_old.h.
+ */
+#define UTF_NEXT_CHAR(s, i, length, c) U16_NEXT(s, i, length, c)
+
+/**
+ * Append the code units of code point c to the string at index i
+ * and advance i to beyond the new code units (post-increment).
+ * The code units beginning at index i will be overwritten.
+ * Same as UTF16_APPEND_CHAR.
+ * \pre 0<=c<=0x10ffff
+ * \pre 0<=i<length
+ * \post 0<i<=length
+ *
+ * @deprecated ICU 2.4. Use U16_APPEND instead, see utf_old.h.
+ */
+#define UTF_APPEND_CHAR(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c)
+
+/**
+ * Advance i to beyond the code units of the code point that begins at i.
+ * I.e., advance i by one code point.
+ * Same as UTF16_FWD_1.
+ * \pre 0<=i<length
+ * \post 0<i<=length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_FWD_1, see utf_old.h.
+ */
+#define UTF_FWD_1(s, i, length) U16_FWD_1(s, i, length)
+
+/**
+ * Advance i to beyond the code units of the n code points where the first one begins at i.
+ * I.e., advance i by n code points.
+ * Same as UT16_FWD_N.
+ * \pre 0<=i<length
+ * \post 0<i<=length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_FWD_N, see utf_old.h.
+ */
+#define UTF_FWD_N(s, i, length, n) U16_FWD_N(s, i, length, n)
+
+/**
+ * Take the random-access index i and adjust it so that it points to the beginning
+ * of a code point.
+ * The input index points to any code unit of a code point and is moved to point to
+ * the first code unit of the same code point. i is never incremented.
+ * In other words, if i points to a trail surrogate that is preceded by a matching
+ * lead surrogate, then i is decremented. Otherwise it is not modified.
+ * This can be used to start an iteration with UTF_NEXT_CHAR() from a random index.
+ * Same as UTF16_SET_CHAR_START.
+ * \pre start<=i<length
+ * \post start<=i<length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_SET_CP_START, see utf_old.h.
+ */
+#define UTF_SET_CHAR_START(s, start, i) U16_SET_CP_START(s, start, i)
+
+/**
+ * Set c to the code point that has code units before i
+ * and move i backward (towards the beginning of the string)
+ * to the first code unit of this code point (pre-increment).
+ * i must point to the first code unit after the last unit of a code point (i==length is allowed).
+ * Same as UTF16_PREV_CHAR.
+ * \pre start<i<=length
+ * \post start<=i<length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_PREV, see utf_old.h.
+ */
+#define UTF_PREV_CHAR(s, start, i, c) U16_PREV(s, start, i, c)
+
+/**
+ * Move i backward (towards the beginning of the string)
+ * to the first code unit of the code point that has code units before i.
+ * I.e., move i backward by one code point.
+ * i must point to the first code unit after the last unit of a code point (i==length is allowed).
+ * Same as UTF16_BACK_1.
+ * \pre start<i<=length
+ * \post start<=i<length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_BACK_1, see utf_old.h.
+ */
+#define UTF_BACK_1(s, start, i) U16_BACK_1(s, start, i)
+
+/**
+ * Move i backward (towards the beginning of the string)
+ * to the first code unit of the n code points that have code units before i.
+ * I.e., move i backward by n code points.
+ * i must point to the first code unit after the last unit of a code point (i==length is allowed).
+ * Same as UTF16_BACK_N.
+ * \pre start<i<=length
+ * \post start<=i<length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_BACK_N, see utf_old.h.
+ */
+#define UTF_BACK_N(s, start, i, n) U16_BACK_N(s, start, i, n)
+
+/**
+ * Take the random-access index i and adjust it so that it points beyond
+ * a code point. The input index points beyond any code unit
+ * of a code point and is moved to point beyond the last code unit of the same
+ * code point. i is never decremented.
+ * In other words, if i points to a trail surrogate that is preceded by a matching
+ * lead surrogate, then i is incremented. Otherwise it is not modified.
+ * This can be used to start an iteration with UTF_PREV_CHAR() from a random index.
+ * Same as UTF16_SET_CHAR_LIMIT.
+ * \pre start<i<=length
+ * \post start<i<=length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT, see utf_old.h.
+ */
+#define UTF_SET_CHAR_LIMIT(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length)
+
+#endif /* U_HIDE_DEPRECATED_API */
+
+#endif
+
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/utrace.h b/src/core/basetypes/icu-ucsdet/include/unicode/utrace.h
new file mode 100644
index 00000000..2621cf9c
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/utrace.h
@@ -0,0 +1,359 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 2003-2013, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: utrace.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2003aug06
+* created by: Markus W. Scherer
+*
+* Definitions for ICU tracing/logging.
+*
+*/
+
+#ifndef __UTRACE_H__
+#define __UTRACE_H__
+
+#include <stdarg.h>
+#include "unicode/utypes.h"
+
+/**
+ * \file
+ * \brief C API: Definitions for ICU tracing/logging.
+ *
+ * This provides API for debugging the internals of ICU without the use of
+ * a traditional debugger.
+ *
+ * By default, tracing is disabled in ICU. If you need to debug ICU with
+ * tracing, please compile ICU with the --enable-tracing configure option.
+ */
+
+U_CDECL_BEGIN
+
+/**
+ * Trace severity levels. Higher levels increase the verbosity of the trace output.
+ * @see utrace_setLevel
+ * @stable ICU 2.8
+ */
+typedef enum UTraceLevel {
+ /** Disable all tracing @stable ICU 2.8*/
+ UTRACE_OFF=-1,
+ /** Trace error conditions only @stable ICU 2.8*/
+ UTRACE_ERROR=0,
+ /** Trace errors and warnings @stable ICU 2.8*/
+ UTRACE_WARNING=3,
+ /** Trace opens and closes of ICU services @stable ICU 2.8*/
+ UTRACE_OPEN_CLOSE=5,
+ /** Trace an intermediate number of ICU operations @stable ICU 2.8*/
+ UTRACE_INFO=7,
+ /** Trace the maximum number of ICU operations @stable ICU 2.8*/
+ UTRACE_VERBOSE=9
+} UTraceLevel;
+
+/**
+ * These are the ICU functions that will be traced when tracing is enabled.
+ * @stable ICU 2.8
+ */
+typedef enum UTraceFunctionNumber {
+ UTRACE_FUNCTION_START=0,
+ UTRACE_U_INIT=UTRACE_FUNCTION_START,
+ UTRACE_U_CLEANUP,
+ UTRACE_FUNCTION_LIMIT,
+
+ UTRACE_CONVERSION_START=0x1000,
+ UTRACE_UCNV_OPEN=UTRACE_CONVERSION_START,
+ UTRACE_UCNV_OPEN_PACKAGE,
+ UTRACE_UCNV_OPEN_ALGORITHMIC,
+ UTRACE_UCNV_CLONE,
+ UTRACE_UCNV_CLOSE,
+ UTRACE_UCNV_FLUSH_CACHE,
+ UTRACE_UCNV_LOAD,
+ UTRACE_UCNV_UNLOAD,
+ UTRACE_CONVERSION_LIMIT,
+
+ UTRACE_COLLATION_START=0x2000,
+ UTRACE_UCOL_OPEN=UTRACE_COLLATION_START,
+ UTRACE_UCOL_CLOSE,
+ UTRACE_UCOL_STRCOLL,
+ UTRACE_UCOL_GET_SORTKEY,
+ UTRACE_UCOL_GETLOCALE,
+ UTRACE_UCOL_NEXTSORTKEYPART,
+ UTRACE_UCOL_STRCOLLITER,
+ UTRACE_UCOL_OPEN_FROM_SHORT_STRING,
+ UTRACE_UCOL_STRCOLLUTF8, /**< @stable ICU 50 */
+ UTRACE_COLLATION_LIMIT
+} UTraceFunctionNumber;
+
+/**
+ * Setter for the trace level.
+ * @param traceLevel A UTraceLevel value.
+ * @stable ICU 2.8
+ */
+U_STABLE void U_EXPORT2
+utrace_setLevel(int32_t traceLevel);
+
+/**
+ * Getter for the trace level.
+ * @return The UTraceLevel value being used by ICU.
+ * @stable ICU 2.8
+ */
+U_STABLE int32_t U_EXPORT2
+utrace_getLevel(void);
+
+/* Trace function pointers types ----------------------------- */
+
+/**
+ * Type signature for the trace function to be called when entering a function.
+ * @param context value supplied at the time the trace functions are set.
+ * @param fnNumber Enum value indicating the ICU function being entered.
+ * @stable ICU 2.8
+ */
+typedef void U_CALLCONV
+UTraceEntry(const void *context, int32_t fnNumber);
+
+/**
+ * Type signature for the trace function to be called when exiting from a function.
+ * @param context value supplied at the time the trace functions are set.
+ * @param fnNumber Enum value indicating the ICU function being exited.
+ * @param fmt A formatting string that describes the number and types
+ * of arguments included with the variable args. The fmt
+ * string has the same form as the utrace_vformat format
+ * string.
+ * @param args A variable arguments list. Contents are described by
+ * the fmt parameter.
+ * @see utrace_vformat
+ * @stable ICU 2.8
+ */
+typedef void U_CALLCONV
+UTraceExit(const void *context, int32_t fnNumber,
+ const char *fmt, va_list args);
+
+/**
+ * Type signature for the trace function to be called from within an ICU function
+ * to display data or messages.
+ * @param context value supplied at the time the trace functions are set.
+ * @param fnNumber Enum value indicating the ICU function being exited.
+ * @param level The current tracing level
+ * @param fmt A format string describing the tracing data that is supplied
+ * as variable args
+ * @param args The data being traced, passed as variable args.
+ * @stable ICU 2.8
+ */
+typedef void U_CALLCONV
+UTraceData(const void *context, int32_t fnNumber, int32_t level,
+ const char *fmt, va_list args);
+
+/**
+ * Set ICU Tracing functions. Installs application-provided tracing
+ * functions into ICU. After doing this, subsequent ICU operations
+ * will call back to the installed functions, providing a trace
+ * of the use of ICU. Passing a NULL pointer for a tracing function
+ * is allowed, and inhibits tracing action at points where that function
+ * would be called.
+ * <p>
+ * Tracing and Threads: Tracing functions are global to a process, and
+ * will be called in response to ICU operations performed by any
+ * thread. If tracing of an individual thread is desired, the
+ * tracing functions must themselves filter by checking that the
+ * current thread is the desired thread.
+ *
+ * @param context an uninterpretted pointer. Whatever is passed in
+ * here will in turn be passed to each of the tracing
+ * functions UTraceEntry, UTraceExit and UTraceData.
+ * ICU does not use or alter this pointer.
+ * @param e Callback function to be called on entry to a
+ * a traced ICU function.
+ * @param x Callback function to be called on exit from a
+ * traced ICU function.
+ * @param d Callback function to be called from within a
+ * traced ICU function, for the purpose of providing
+ * data to the trace.
+ *
+ * @stable ICU 2.8
+ */
+U_STABLE void U_EXPORT2
+utrace_setFunctions(const void *context,
+ UTraceEntry *e, UTraceExit *x, UTraceData *d);
+
+/**
+ * Get the currently installed ICU tracing functions. Note that a null function
+ * pointer will be returned if no trace function has been set.
+ *
+ * @param context The currently installed tracing context.
+ * @param e The currently installed UTraceEntry function.
+ * @param x The currently installed UTraceExit function.
+ * @param d The currently installed UTraceData function.
+ * @stable ICU 2.8
+ */
+U_STABLE void U_EXPORT2
+utrace_getFunctions(const void **context,
+ UTraceEntry **e, UTraceExit **x, UTraceData **d);
+
+
+
+/*
+ *
+ * ICU trace format string syntax
+ *
+ * Format Strings are passed to UTraceData functions, and define the
+ * number and types of the trace data being passed on each call.
+ *
+ * The UTraceData function, which is supplied by the application,
+ * not by ICU, can either forward the trace data (passed via
+ * varargs) and the format string back to ICU for formatting into
+ * a displayable string, or it can interpret the format itself,
+ * and do as it wishes with the trace data.
+ *
+ *
+ * Goals for the format string
+ * - basic data output
+ * - easy to use for trace programmer
+ * - sufficient provision for data types for trace output readability
+ * - well-defined types and binary portable APIs
+ *
+ * Non-goals
+ * - printf compatibility
+ * - fancy formatting
+ * - argument reordering and other internationalization features
+ *
+ * ICU trace format strings contain plain text with argument inserts,
+ * much like standard printf format strings.
+ * Each insert begins with a '%', then optionally contains a 'v',
+ * then exactly one type character.
+ * Two '%' in a row represent a '%' instead of an insert.
+ * The trace format strings need not have \n at the end.
+ *
+ *
+ * Types
+ * -----
+ *
+ * Type characters:
+ * - c A char character in the default codepage.
+ * - s A NUL-terminated char * string in the default codepage.
+ * - S A UChar * string. Requires two params, (ptr, length). Length=-1 for nul term.
+ * - b A byte (8-bit integer).
+ * - h A 16-bit integer. Also a 16 bit Unicode code unit.
+ * - d A 32-bit integer. Also a 20 bit Unicode code point value.
+ * - l A 64-bit integer.
+ * - p A data pointer.
+ *
+ * Vectors
+ * -------
+ *
+ * If the 'v' is not specified, then one item of the specified type
+ * is passed in.
+ * If the 'v' (for "vector") is specified, then a vector of items of the
+ * specified type is passed in, via a pointer to the first item
+ * and an int32_t value for the length of the vector.
+ * Length==-1 means zero or NUL termination. Works for vectors of all types.
+ *
+ * Note: %vS is a vector of (UChar *) strings. The strings must
+ * be nul terminated as there is no way to provide a
+ * separate length parameter for each string. The length
+ * parameter (required for all vectors) is the number of
+ * strings, not the length of the strings.
+ *
+ * Examples
+ * --------
+ *
+ * These examples show the parameters that will be passed to an application's
+ * UTraceData() function for various formats.
+ *
+ * - the precise formatting is up to the application!
+ * - the examples use type casts for arguments only to _show_ the types of
+ * arguments without needing variable declarations in the examples;
+ * the type casts will not be necessary in actual code
+ *
+ * UTraceDataFunc(context, fnNumber, level,
+ * "There is a character %c in the string %s.", // Format String
+ * (char)c, (const char *)s); // varargs parameters
+ * -> There is a character 0x42 'B' in the string "Bravo".
+ *
+ * UTraceDataFunc(context, fnNumber, level,
+ * "Vector of bytes %vb vector of chars %vc",
+ * (const uint8_t *)bytes, (int32_t)bytesLength,
+ * (const char *)chars, (int32_t)charsLength);
+ * -> Vector of bytes
+ * 42 63 64 3f [4]
+ * vector of chars
+ * "Bcd?"[4]
+ *
+ * UTraceDataFunc(context, fnNumber, level,
+ * "An int32_t %d and a whole bunch of them %vd",
+ * (int32_t)-5, (const int32_t *)ints, (int32_t)intsLength);
+ * -> An int32_t 0xfffffffb and a whole bunch of them
+ * fffffffb 00000005 0000010a [3]
+ *
+ */
+
+
+
+/**
+ * Trace output Formatter. An application's UTraceData tracing functions may call
+ * back to this function to format the trace output in a
+ * human readable form. Note that a UTraceData function may choose
+ * to not format the data; it could, for example, save it in
+ * in the raw form it was received (more compact), leaving
+ * formatting for a later trace analyis tool.
+ * @param outBuf pointer to a buffer to receive the formatted output. Output
+ * will be nul terminated if there is space in the buffer -
+ * if the length of the requested output < the output buffer size.
+ * @param capacity Length of the output buffer.
+ * @param indent Number of spaces to indent the output. Intended to allow
+ * data displayed from nested functions to be indented for readability.
+ * @param fmt Format specification for the data to output
+ * @param args Data to be formatted.
+ * @return Length of formatted output, including the terminating NUL.
+ * If buffer capacity is insufficient, the required capacity is returned.
+ * @stable ICU 2.8
+ */
+U_STABLE int32_t U_EXPORT2
+utrace_vformat(char *outBuf, int32_t capacity,
+ int32_t indent, const char *fmt, va_list args);
+
+/**
+ * Trace output Formatter. An application's UTraceData tracing functions may call
+ * this function to format any additional trace data, beyond that
+ * provided by default, in human readable form with the same
+ * formatting conventions used by utrace_vformat().
+ * @param outBuf pointer to a buffer to receive the formatted output. Output
+ * will be nul terminated if there is space in the buffer -
+ * if the length of the requested output < the output buffer size.
+ * @param capacity Length of the output buffer.
+ * @param indent Number of spaces to indent the output. Intended to allow
+ * data displayed from nested functions to be indented for readability.
+ * @param fmt Format specification for the data to output
+ * @param ... Data to be formatted.
+ * @return Length of formatted output, including the terminating NUL.
+ * If buffer capacity is insufficient, the required capacity is returned.
+ * @stable ICU 2.8
+ */
+U_STABLE int32_t U_EXPORT2
+utrace_format(char *outBuf, int32_t capacity,
+ int32_t indent, const char *fmt, ...);
+
+
+
+/* Trace function numbers --------------------------------------------------- */
+
+/**
+ * Get the name of a function from its trace function number.
+ *
+ * @param fnNumber The trace number for an ICU function.
+ * @return The name string for the function.
+ *
+ * @see UTraceFunctionNumber
+ * @stable ICU 2.8
+ */
+U_STABLE const char * U_EXPORT2
+utrace_functionName(int32_t fnNumber);
+
+U_CDECL_END
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/utypes.h b/src/core/basetypes/icu-ucsdet/include/unicode/utypes.h
new file mode 100644
index 00000000..704089c7
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/utypes.h
@@ -0,0 +1,723 @@
+/*
+**********************************************************************
+* Copyright (C) 1996-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*
+* FILE NAME : UTYPES.H (formerly ptypes.h)
+*
+* Date Name Description
+* 12/11/96 helena Creation.
+* 02/27/97 aliu Added typedefs for UClassID, int8, int16, int32,
+* uint8, uint16, and uint32.
+* 04/01/97 aliu Added XP_CPLUSPLUS and modified to work under C as
+* well as C++.
+* Modified to use memcpy() for uprv_arrayCopy() fns.
+* 04/14/97 aliu Added TPlatformUtilities.
+* 05/07/97 aliu Added import/export specifiers (replacing the old
+* broken EXT_CLASS). Added version number for our
+* code. Cleaned up header.
+* 6/20/97 helena Java class name change.
+* 08/11/98 stephen UErrorCode changed from typedef to enum
+* 08/12/98 erm Changed T_ANALYTIC_PACKAGE_VERSION to 3
+* 08/14/98 stephen Added uprv_arrayCopy() for int8_t, int16_t, int32_t
+* 12/09/98 jfitz Added BUFFER_OVERFLOW_ERROR (bug 1100066)
+* 04/20/99 stephen Cleaned up & reworked for autoconf.
+* Renamed to utypes.h.
+* 05/05/99 stephen Changed to use <inttypes.h>
+* 12/07/99 helena Moved copyright notice string from ucnv_bld.h here.
+*******************************************************************************
+*/
+
+#ifndef UTYPES_H
+#define UTYPES_H
+
+
+#include "unicode/umachine.h"
+#include "unicode/uversion.h"
+#include "unicode/uconfig.h"
+#include <float.h>
+
+#if !U_NO_DEFAULT_INCLUDE_UTF_HEADERS
+# include "unicode/utf.h"
+#endif
+
+/*!
+ * \file
+ * \brief Basic definitions for ICU, for both C and C++ APIs
+ *
+ * This file defines basic types, constants, and enumerations directly or
+ * indirectly by including other header files, especially utf.h for the
+ * basic character and string definitions and umachine.h for consistent
+ * integer and other types.
+ */
+
+
+/**
+ * \def U_SHOW_CPLUSPLUS_API
+ * @internal
+ */
+#ifdef __cplusplus
+# ifndef U_SHOW_CPLUSPLUS_API
+# define U_SHOW_CPLUSPLUS_API 1
+# endif
+#else
+# undef U_SHOW_CPLUSPLUS_API
+# define U_SHOW_CPLUSPLUS_API 0
+#endif
+
+/** @{ API visibility control */
+
+/**
+ * \def U_HIDE_DRAFT_API
+ * Define this to 1 to request that draft API be "hidden"
+ * @internal
+ */
+/**
+ * \def U_HIDE_INTERNAL_API
+ * Define this to 1 to request that internal API be "hidden"
+ * @internal
+ */
+#if !U_DEFAULT_SHOW_DRAFT && !defined(U_SHOW_DRAFT_API)
+#define U_HIDE_DRAFT_API 1
+#endif
+#if !U_DEFAULT_SHOW_DRAFT && !defined(U_SHOW_INTERNAL_API)
+#define U_HIDE_INTERNAL_API 1
+#endif
+
+/** @} */
+
+/*===========================================================================*/
+/* ICUDATA naming scheme */
+/*===========================================================================*/
+
+/**
+ * \def U_ICUDATA_TYPE_LETTER
+ *
+ * This is a platform-dependent string containing one letter:
+ * - b for big-endian, ASCII-family platforms
+ * - l for little-endian, ASCII-family platforms
+ * - e for big-endian, EBCDIC-family platforms
+ * This letter is part of the common data file name.
+ * @stable ICU 2.0
+ */
+
+/**
+ * \def U_ICUDATA_TYPE_LITLETTER
+ * The non-string form of U_ICUDATA_TYPE_LETTER
+ * @stable ICU 2.0
+ */
+#if U_CHARSET_FAMILY
+# if U_IS_BIG_ENDIAN
+ /* EBCDIC - should always be BE */
+# define U_ICUDATA_TYPE_LETTER "e"
+# define U_ICUDATA_TYPE_LITLETTER e
+# else
+# error "Don't know what to do with little endian EBCDIC!"
+# define U_ICUDATA_TYPE_LETTER "x"
+# define U_ICUDATA_TYPE_LITLETTER x
+# endif
+#else
+# if U_IS_BIG_ENDIAN
+ /* Big-endian ASCII */
+# define U_ICUDATA_TYPE_LETTER "b"
+# define U_ICUDATA_TYPE_LITLETTER b
+# else
+ /* Little-endian ASCII */
+# define U_ICUDATA_TYPE_LETTER "l"
+# define U_ICUDATA_TYPE_LITLETTER l
+# endif
+#endif
+
+/**
+ * A single string literal containing the icudata stub name. i.e. 'icudt18e' for
+ * ICU 1.8.x on EBCDIC, etc..
+ * @stable ICU 2.0
+ */
+#define U_ICUDATA_NAME "icudt" U_ICU_VERSION_SHORT U_ICUDATA_TYPE_LETTER
+#ifndef U_HIDE_INTERNAL_API
+#define U_USRDATA_NAME "usrdt" U_ICU_VERSION_SHORT U_ICUDATA_TYPE_LETTER /**< @internal */
+#define U_USE_USRDATA 1 /**< @internal */
+#endif /* U_HIDE_INTERNAL_API */
+
+/**
+ * U_ICU_ENTRY_POINT is the name of the DLL entry point to the ICU data library.
+ * Defined as a literal, not a string.
+ * Tricky Preprocessor use - ## operator replaces macro paramters with the literal string
+ * from the corresponding macro invocation, _before_ other macro substitutions.
+ * Need a nested \#defines to get the actual version numbers rather than
+ * the literal text U_ICU_VERSION_MAJOR_NUM into the name.
+ * The net result will be something of the form
+ * \#define U_ICU_ENTRY_POINT icudt19_dat
+ * @stable ICU 2.4
+ */
+#define U_ICUDATA_ENTRY_POINT U_DEF2_ICUDATA_ENTRY_POINT(U_ICU_VERSION_MAJOR_NUM,U_LIB_SUFFIX_C_NAME)
+
+#ifndef U_HIDE_INTERNAL_API
+/**
+ * Do not use. Note that it's OK for the 2nd argument to be undefined (literal).
+ * @internal
+ */
+#define U_DEF2_ICUDATA_ENTRY_POINT(major,suff) U_DEF_ICUDATA_ENTRY_POINT(major,suff)
+
+/**
+ * Do not use.
+ * @internal
+ */
+#ifndef U_DEF_ICUDATA_ENTRY_POINT
+/* affected by symbol renaming. See platform.h */
+#ifndef U_LIB_SUFFIX_C_NAME
+#define U_DEF_ICUDATA_ENTRY_POINT(major, suff) icudt##major##_dat
+#else
+#define U_DEF_ICUDATA_ENTRY_POINT(major, suff) icudt##suff ## major##_dat
+#endif
+#endif
+#endif /* U_HIDE_INTERNAL_API */
+
+/**
+ * \def NULL
+ * Define NULL if necessary, to 0 for C++ and to ((void *)0) for C.
+ * @stable ICU 2.0
+ */
+#ifndef NULL
+#ifdef __cplusplus
+#define NULL 0
+#else
+#define NULL ((void *)0)
+#endif
+#endif
+
+/*===========================================================================*/
+/* Calendar/TimeZone data types */
+/*===========================================================================*/
+
+/**
+ * Date and Time data type.
+ * This is a primitive data type that holds the date and time
+ * as the number of milliseconds since 1970-jan-01, 00:00 UTC.
+ * UTC leap seconds are ignored.
+ * @stable ICU 2.0
+ */
+typedef double UDate;
+
+/** The number of milliseconds per second @stable ICU 2.0 */
+#define U_MILLIS_PER_SECOND (1000)
+/** The number of milliseconds per minute @stable ICU 2.0 */
+#define U_MILLIS_PER_MINUTE (60000)
+/** The number of milliseconds per hour @stable ICU 2.0 */
+#define U_MILLIS_PER_HOUR (3600000)
+/** The number of milliseconds per day @stable ICU 2.0 */
+#define U_MILLIS_PER_DAY (86400000)
+
+/**
+ * Maximum UDate value
+ * @stable ICU 4.8
+ */
+#define U_DATE_MAX DBL_MAX
+
+/**
+ * Minimum UDate value
+ * @stable ICU 4.8
+ */
+#define U_DATE_MIN -U_DATE_MAX
+
+/*===========================================================================*/
+/* Shared library/DLL import-export API control */
+/*===========================================================================*/
+
+/*
+ * Control of symbol import/export.
+ * ICU is separated into three libraries.
+ */
+
+/**
+ * \def U_COMBINED_IMPLEMENTATION
+ * Set to export library symbols from inside the ICU library
+ * when all of ICU is in a single library.
+ * This can be set as a compiler option while building ICU, and it
+ * needs to be the first one tested to override U_COMMON_API, U_I18N_API, etc.
+ * @stable ICU 2.0
+ */
+
+/**
+ * \def U_DATA_API
+ * Set to export library symbols from inside the stubdata library,
+ * and to import them from outside.
+ * @stable ICU 3.0
+ */
+
+/**
+ * \def U_COMMON_API
+ * Set to export library symbols from inside the common library,
+ * and to import them from outside.
+ * @stable ICU 2.0
+ */
+
+/**
+ * \def U_I18N_API
+ * Set to export library symbols from inside the i18n library,
+ * and to import them from outside.
+ * @stable ICU 2.0
+ */
+
+/**
+ * \def U_LAYOUT_API
+ * Set to export library symbols from inside the layout engine library,
+ * and to import them from outside.
+ * @stable ICU 2.0
+ */
+
+/**
+ * \def U_LAYOUTEX_API
+ * Set to export library symbols from inside the layout extensions library,
+ * and to import them from outside.
+ * @stable ICU 2.6
+ */
+
+/**
+ * \def U_IO_API
+ * Set to export library symbols from inside the ustdio library,
+ * and to import them from outside.
+ * @stable ICU 2.0
+ */
+
+/**
+ * \def U_TOOLUTIL_API
+ * Set to export library symbols from inside the toolutil library,
+ * and to import them from outside.
+ * @stable ICU 3.4
+ */
+
+#if defined(U_COMBINED_IMPLEMENTATION)
+#define U_DATA_API U_EXPORT
+#define U_COMMON_API U_EXPORT
+#define U_I18N_API U_EXPORT
+#define U_LAYOUT_API U_EXPORT
+#define U_LAYOUTEX_API U_EXPORT
+#define U_IO_API U_EXPORT
+#define U_TOOLUTIL_API U_EXPORT
+#elif defined(U_STATIC_IMPLEMENTATION)
+#define U_DATA_API
+#define U_COMMON_API
+#define U_I18N_API
+#define U_LAYOUT_API
+#define U_LAYOUTEX_API
+#define U_IO_API
+#define U_TOOLUTIL_API
+#elif defined(U_COMMON_IMPLEMENTATION)
+#define U_DATA_API U_IMPORT
+#define U_COMMON_API U_EXPORT
+#define U_I18N_API U_IMPORT
+#define U_LAYOUT_API U_IMPORT
+#define U_LAYOUTEX_API U_IMPORT
+#define U_IO_API U_IMPORT
+#define U_TOOLUTIL_API U_IMPORT
+#elif defined(U_I18N_IMPLEMENTATION)
+#define U_DATA_API U_IMPORT
+#define U_COMMON_API U_IMPORT
+#define U_I18N_API U_EXPORT
+#define U_LAYOUT_API U_IMPORT
+#define U_LAYOUTEX_API U_IMPORT
+#define U_IO_API U_IMPORT
+#define U_TOOLUTIL_API U_IMPORT
+#elif defined(U_LAYOUT_IMPLEMENTATION)
+#define U_DATA_API U_IMPORT
+#define U_COMMON_API U_IMPORT
+#define U_I18N_API U_IMPORT
+#define U_LAYOUT_API U_EXPORT
+#define U_LAYOUTEX_API U_IMPORT
+#define U_IO_API U_IMPORT
+#define U_TOOLUTIL_API U_IMPORT
+#elif defined(U_LAYOUTEX_IMPLEMENTATION)
+#define U_DATA_API U_IMPORT
+#define U_COMMON_API U_IMPORT
+#define U_I18N_API U_IMPORT
+#define U_LAYOUT_API U_IMPORT
+#define U_LAYOUTEX_API U_EXPORT
+#define U_IO_API U_IMPORT
+#define U_TOOLUTIL_API U_IMPORT
+#elif defined(U_IO_IMPLEMENTATION)
+#define U_DATA_API U_IMPORT
+#define U_COMMON_API U_IMPORT
+#define U_I18N_API U_IMPORT
+#define U_LAYOUT_API U_IMPORT
+#define U_LAYOUTEX_API U_IMPORT
+#define U_IO_API U_EXPORT
+#define U_TOOLUTIL_API U_IMPORT
+#elif defined(U_TOOLUTIL_IMPLEMENTATION)
+#define U_DATA_API U_IMPORT
+#define U_COMMON_API U_IMPORT
+#define U_I18N_API U_IMPORT
+#define U_LAYOUT_API U_IMPORT
+#define U_LAYOUTEX_API U_IMPORT
+#define U_IO_API U_IMPORT
+#define U_TOOLUTIL_API U_EXPORT
+#else
+#define U_DATA_API U_IMPORT
+#define U_COMMON_API U_IMPORT
+#define U_I18N_API U_IMPORT
+#define U_LAYOUT_API U_IMPORT
+#define U_LAYOUTEX_API U_IMPORT
+#define U_IO_API U_IMPORT
+#define U_TOOLUTIL_API U_IMPORT
+#endif
+
+/**
+ * \def U_STANDARD_CPP_NAMESPACE
+ * Control of C++ Namespace
+ * @stable ICU 2.0
+ */
+#ifdef __cplusplus
+#define U_STANDARD_CPP_NAMESPACE ::
+#else
+#define U_STANDARD_CPP_NAMESPACE
+#endif
+
+
+/*===========================================================================*/
+/* Global delete operator */
+/*===========================================================================*/
+
+/*
+ * The ICU4C library must not use the global new and delete operators.
+ * These operators here are defined to enable testing for this.
+ * See Jitterbug 2581 for details of why this is necessary.
+ *
+ * Verification that ICU4C's memory usage is correct, i.e.,
+ * that global new/delete are not used:
+ *
+ * a) Check for imports of global new/delete (see uobject.cpp for details)
+ * b) Verify that new is never imported.
+ * c) Verify that delete is only imported from object code for interface/mixin classes.
+ * d) Add global delete and delete[] only for the ICU4C library itself
+ * and define them in a way that crashes or otherwise easily shows a problem.
+ *
+ * The following implements d).
+ * The operator implementations crash; this is intentional and used for library debugging.
+ *
+ * Note: This is currently only done on Windows because
+ * some Linux/Unix compilers have problems with defining global new/delete.
+ * On Windows, it is _MSC_VER>=1200 for MSVC 6.0 and higher.
+ */
+#if defined(__cplusplus) && U_DEBUG && U_OVERRIDE_CXX_ALLOCATION && (_MSC_VER>=1200) && !defined(U_STATIC_IMPLEMENTATION) && (defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) || defined(U_LAYOUT_IMPLEMENTATION) || defined(U_LAYOUTEX_IMPLEMENTATION))
+
+#ifndef U_HIDE_INTERNAL_API
+/**
+ * Global operator new, defined only inside ICU4C, must not be used.
+ * Crashes intentionally.
+ * @internal
+ */
+inline void *
+operator new(size_t /*size*/) {
+ char *q=NULL;
+ *q=5; /* break it */
+ return q;
+}
+
+#ifdef _Ret_bytecap_
+/* This is only needed to suppress a Visual C++ 2008 warning for operator new[]. */
+_Ret_bytecap_(_Size)
+#endif
+/**
+ * Global operator new[], defined only inside ICU4C, must not be used.
+ * Crashes intentionally.
+ * @internal
+ */
+inline void *
+operator new[](size_t /*size*/) {
+ char *q=NULL;
+ *q=5; /* break it */
+ return q;
+}
+
+/**
+ * Global operator delete, defined only inside ICU4C, must not be used.
+ * Crashes intentionally.
+ * @internal
+ */
+inline void
+operator delete(void * /*p*/) {
+ char *q=NULL;
+ *q=5; /* break it */
+}
+
+/**
+ * Global operator delete[], defined only inside ICU4C, must not be used.
+ * Crashes intentionally.
+ * @internal
+ */
+inline void
+operator delete[](void * /*p*/) {
+ char *q=NULL;
+ *q=5; /* break it */
+}
+
+#endif /* U_HIDE_INTERNAL_API */
+#endif
+
+/*===========================================================================*/
+/* UErrorCode */
+/*===========================================================================*/
+
+/**
+ * Error code to replace exception handling, so that the code is compatible with all C++ compilers,
+ * and to use the same mechanism for C and C++.
+ *
+ * \par
+ * ICU functions that take a reference (C++) or a pointer (C) to a UErrorCode
+ * first test if(U_FAILURE(errorCode)) { return immediately; }
+ * so that in a chain of such functions the first one that sets an error code
+ * causes the following ones to not perform any operations.
+ *
+ * \par
+ * Error codes should be tested using U_FAILURE() and U_SUCCESS().
+ * @stable ICU 2.0
+ */
+typedef enum UErrorCode {
+ /* The ordering of U_ERROR_INFO_START Vs U_USING_FALLBACK_WARNING looks weird
+ * and is that way because VC++ debugger displays first encountered constant,
+ * which is not the what the code is used for
+ */
+
+ U_USING_FALLBACK_WARNING = -128, /**< A resource bundle lookup returned a fallback result (not an error) */
+
+ U_ERROR_WARNING_START = -128, /**< Start of information results (semantically successful) */
+
+ U_USING_DEFAULT_WARNING = -127, /**< A resource bundle lookup returned a result from the root locale (not an error) */
+
+ U_SAFECLONE_ALLOCATED_WARNING = -126, /**< A SafeClone operation required allocating memory (informational only) */
+
+ U_STATE_OLD_WARNING = -125, /**< ICU has to use compatibility layer to construct the service. Expect performance/memory usage degradation. Consider upgrading */
+
+ U_STRING_NOT_TERMINATED_WARNING = -124,/**< An output string could not be NUL-terminated because output length==destCapacity. */
+
+ U_SORT_KEY_TOO_SHORT_WARNING = -123, /**< Number of levels requested in getBound is higher than the number of levels in the sort key */
+
+ U_AMBIGUOUS_ALIAS_WARNING = -122, /**< This converter alias can go to different converter implementations */
+
+ U_DIFFERENT_UCA_VERSION = -121, /**< ucol_open encountered a mismatch between UCA version and collator image version, so the collator was constructed from rules. No impact to further function */
+
+ U_PLUGIN_CHANGED_LEVEL_WARNING = -120, /**< A plugin caused a level change. May not be an error, but later plugins may not load. */
+
+ U_ERROR_WARNING_LIMIT, /**< This must always be the last warning value to indicate the limit for UErrorCode warnings (last warning code +1) */
+
+
+ U_ZERO_ERROR = 0, /**< No error, no warning. */
+
+ U_ILLEGAL_ARGUMENT_ERROR = 1, /**< Start of codes indicating failure */
+ U_MISSING_RESOURCE_ERROR = 2, /**< The requested resource cannot be found */
+ U_INVALID_FORMAT_ERROR = 3, /**< Data format is not what is expected */
+ U_FILE_ACCESS_ERROR = 4, /**< The requested file cannot be found */
+ U_INTERNAL_PROGRAM_ERROR = 5, /**< Indicates a bug in the library code */
+ U_MESSAGE_PARSE_ERROR = 6, /**< Unable to parse a message (message format) */
+ U_MEMORY_ALLOCATION_ERROR = 7, /**< Memory allocation error */
+ U_INDEX_OUTOFBOUNDS_ERROR = 8, /**< Trying to access the index that is out of bounds */
+ U_PARSE_ERROR = 9, /**< Equivalent to Java ParseException */
+ U_INVALID_CHAR_FOUND = 10, /**< Character conversion: Unmappable input sequence. In other APIs: Invalid character. */
+ U_TRUNCATED_CHAR_FOUND = 11, /**< Character conversion: Incomplete input sequence. */
+ U_ILLEGAL_CHAR_FOUND = 12, /**< Character conversion: Illegal input sequence/combination of input units. */
+ U_INVALID_TABLE_FORMAT = 13, /**< Conversion table file found, but corrupted */
+ U_INVALID_TABLE_FILE = 14, /**< Conversion table file not found */
+ U_BUFFER_OVERFLOW_ERROR = 15, /**< A result would not fit in the supplied buffer */
+ U_UNSUPPORTED_ERROR = 16, /**< Requested operation not supported in current context */
+ U_RESOURCE_TYPE_MISMATCH = 17, /**< an operation is requested over a resource that does not support it */
+ U_ILLEGAL_ESCAPE_SEQUENCE = 18, /**< ISO-2022 illlegal escape sequence */
+ U_UNSUPPORTED_ESCAPE_SEQUENCE = 19, /**< ISO-2022 unsupported escape sequence */
+ U_NO_SPACE_AVAILABLE = 20, /**< No space available for in-buffer expansion for Arabic shaping */
+ U_CE_NOT_FOUND_ERROR = 21, /**< Currently used only while setting variable top, but can be used generally */
+ U_PRIMARY_TOO_LONG_ERROR = 22, /**< User tried to set variable top to a primary that is longer than two bytes */
+ U_STATE_TOO_OLD_ERROR = 23, /**< ICU cannot construct a service from this state, as it is no longer supported */
+ U_TOO_MANY_ALIASES_ERROR = 24, /**< There are too many aliases in the path to the requested resource.
+ It is very possible that a circular alias definition has occured */
+ U_ENUM_OUT_OF_SYNC_ERROR = 25, /**< UEnumeration out of sync with underlying collection */
+ U_INVARIANT_CONVERSION_ERROR = 26, /**< Unable to convert a UChar* string to char* with the invariant converter. */
+ U_INVALID_STATE_ERROR = 27, /**< Requested operation can not be completed with ICU in its current state */
+ U_COLLATOR_VERSION_MISMATCH = 28, /**< Collator version is not compatible with the base version */
+ U_USELESS_COLLATOR_ERROR = 29, /**< Collator is options only and no base is specified */
+ U_NO_WRITE_PERMISSION = 30, /**< Attempt to modify read-only or constant data. */
+
+ U_STANDARD_ERROR_LIMIT, /**< This must always be the last value to indicate the limit for standard errors */
+ /*
+ * the error code range 0x10000 0x10100 are reserved for Transliterator
+ */
+ U_BAD_VARIABLE_DEFINITION=0x10000,/**< Missing '$' or duplicate variable name */
+ U_PARSE_ERROR_START = 0x10000, /**< Start of Transliterator errors */
+ U_MALFORMED_RULE, /**< Elements of a rule are misplaced */
+ U_MALFORMED_SET, /**< A UnicodeSet pattern is invalid*/
+ U_MALFORMED_SYMBOL_REFERENCE, /**< UNUSED as of ICU 2.4 */
+ U_MALFORMED_UNICODE_ESCAPE, /**< A Unicode escape pattern is invalid*/
+ U_MALFORMED_VARIABLE_DEFINITION, /**< A variable definition is invalid */
+ U_MALFORMED_VARIABLE_REFERENCE, /**< A variable reference is invalid */
+ U_MISMATCHED_SEGMENT_DELIMITERS, /**< UNUSED as of ICU 2.4 */
+ U_MISPLACED_ANCHOR_START, /**< A start anchor appears at an illegal position */
+ U_MISPLACED_CURSOR_OFFSET, /**< A cursor offset occurs at an illegal position */
+ U_MISPLACED_QUANTIFIER, /**< A quantifier appears after a segment close delimiter */
+ U_MISSING_OPERATOR, /**< A rule contains no operator */
+ U_MISSING_SEGMENT_CLOSE, /**< UNUSED as of ICU 2.4 */
+ U_MULTIPLE_ANTE_CONTEXTS, /**< More than one ante context */
+ U_MULTIPLE_CURSORS, /**< More than one cursor */
+ U_MULTIPLE_POST_CONTEXTS, /**< More than one post context */
+ U_TRAILING_BACKSLASH, /**< A dangling backslash */
+ U_UNDEFINED_SEGMENT_REFERENCE, /**< A segment reference does not correspond to a defined segment */
+ U_UNDEFINED_VARIABLE, /**< A variable reference does not correspond to a defined variable */
+ U_UNQUOTED_SPECIAL, /**< A special character was not quoted or escaped */
+ U_UNTERMINATED_QUOTE, /**< A closing single quote is missing */
+ U_RULE_MASK_ERROR, /**< A rule is hidden by an earlier more general rule */
+ U_MISPLACED_COMPOUND_FILTER, /**< A compound filter is in an invalid location */
+ U_MULTIPLE_COMPOUND_FILTERS, /**< More than one compound filter */
+ U_INVALID_RBT_SYNTAX, /**< A "::id" rule was passed to the RuleBasedTransliterator parser */
+ U_INVALID_PROPERTY_PATTERN, /**< UNUSED as of ICU 2.4 */
+ U_MALFORMED_PRAGMA, /**< A 'use' pragma is invlalid */
+ U_UNCLOSED_SEGMENT, /**< A closing ')' is missing */
+ U_ILLEGAL_CHAR_IN_SEGMENT, /**< UNUSED as of ICU 2.4 */
+ U_VARIABLE_RANGE_EXHAUSTED, /**< Too many stand-ins generated for the given variable range */
+ U_VARIABLE_RANGE_OVERLAP, /**< The variable range overlaps characters used in rules */
+ U_ILLEGAL_CHARACTER, /**< A special character is outside its allowed context */
+ U_INTERNAL_TRANSLITERATOR_ERROR, /**< Internal transliterator system error */
+ U_INVALID_ID, /**< A "::id" rule specifies an unknown transliterator */
+ U_INVALID_FUNCTION, /**< A "&fn()" rule specifies an unknown transliterator */
+ U_PARSE_ERROR_LIMIT, /**< The limit for Transliterator errors */
+
+ /*
+ * the error code range 0x10100 0x10200 are reserved for formatting API parsing error
+ */
+ U_UNEXPECTED_TOKEN=0x10100, /**< Syntax error in format pattern */
+ U_FMT_PARSE_ERROR_START=0x10100, /**< Start of format library errors */
+ U_MULTIPLE_DECIMAL_SEPARATORS, /**< More than one decimal separator in number pattern */
+ U_MULTIPLE_DECIMAL_SEPERATORS = U_MULTIPLE_DECIMAL_SEPARATORS, /**< Typo: kept for backward compatibility. Use U_MULTIPLE_DECIMAL_SEPARATORS */
+ U_MULTIPLE_EXPONENTIAL_SYMBOLS, /**< More than one exponent symbol in number pattern */
+ U_MALFORMED_EXPONENTIAL_PATTERN, /**< Grouping symbol in exponent pattern */
+ U_MULTIPLE_PERCENT_SYMBOLS, /**< More than one percent symbol in number pattern */
+ U_MULTIPLE_PERMILL_SYMBOLS, /**< More than one permill symbol in number pattern */
+ U_MULTIPLE_PAD_SPECIFIERS, /**< More than one pad symbol in number pattern */
+ U_PATTERN_SYNTAX_ERROR, /**< Syntax error in format pattern */
+ U_ILLEGAL_PAD_POSITION, /**< Pad symbol misplaced in number pattern */
+ U_UNMATCHED_BRACES, /**< Braces do not match in message pattern */
+ U_UNSUPPORTED_PROPERTY, /**< UNUSED as of ICU 2.4 */
+ U_UNSUPPORTED_ATTRIBUTE, /**< UNUSED as of ICU 2.4 */
+ U_ARGUMENT_TYPE_MISMATCH, /**< Argument name and argument index mismatch in MessageFormat functions */
+ U_DUPLICATE_KEYWORD, /**< Duplicate keyword in PluralFormat */
+ U_UNDEFINED_KEYWORD, /**< Undefined Plural keyword */
+ U_DEFAULT_KEYWORD_MISSING, /**< Missing DEFAULT rule in plural rules */
+ U_DECIMAL_NUMBER_SYNTAX_ERROR, /**< Decimal number syntax error */
+ U_FORMAT_INEXACT_ERROR, /**< Cannot format a number exactly and rounding mode is ROUND_UNNECESSARY @stable ICU 4.8 */
+ U_FMT_PARSE_ERROR_LIMIT, /**< The limit for format library errors */
+
+ /*
+ * the error code range 0x10200 0x102ff are reserved for Break Iterator related error
+ */
+ U_BRK_INTERNAL_ERROR=0x10200, /**< An internal error (bug) was detected. */
+ U_BRK_ERROR_START=0x10200, /**< Start of codes indicating Break Iterator failures */
+ U_BRK_HEX_DIGITS_EXPECTED, /**< Hex digits expected as part of a escaped char in a rule. */
+ U_BRK_SEMICOLON_EXPECTED, /**< Missing ';' at the end of a RBBI rule. */
+ U_BRK_RULE_SYNTAX, /**< Syntax error in RBBI rule. */
+ U_BRK_UNCLOSED_SET, /**< UnicodeSet witing an RBBI rule missing a closing ']'. */
+ U_BRK_ASSIGN_ERROR, /**< Syntax error in RBBI rule assignment statement. */
+ U_BRK_VARIABLE_REDFINITION, /**< RBBI rule $Variable redefined. */
+ U_BRK_MISMATCHED_PAREN, /**< Mis-matched parentheses in an RBBI rule. */
+ U_BRK_NEW_LINE_IN_QUOTED_STRING, /**< Missing closing quote in an RBBI rule. */
+ U_BRK_UNDEFINED_VARIABLE, /**< Use of an undefined $Variable in an RBBI rule. */
+ U_BRK_INIT_ERROR, /**< Initialization failure. Probable missing ICU Data. */
+ U_BRK_RULE_EMPTY_SET, /**< Rule contains an empty Unicode Set. */
+ U_BRK_UNRECOGNIZED_OPTION, /**< !!option in RBBI rules not recognized. */
+ U_BRK_MALFORMED_RULE_TAG, /**< The {nnn} tag on a rule is mal formed */
+ U_BRK_ERROR_LIMIT, /**< This must always be the last value to indicate the limit for Break Iterator failures */
+
+ /*
+ * The error codes in the range 0x10300-0x103ff are reserved for regular expression related errrs
+ */
+ U_REGEX_INTERNAL_ERROR=0x10300, /**< An internal error (bug) was detected. */
+ U_REGEX_ERROR_START=0x10300, /**< Start of codes indicating Regexp failures */
+ U_REGEX_RULE_SYNTAX, /**< Syntax error in regexp pattern. */
+ U_REGEX_INVALID_STATE, /**< RegexMatcher in invalid state for requested operation */
+ U_REGEX_BAD_ESCAPE_SEQUENCE, /**< Unrecognized backslash escape sequence in pattern */
+ U_REGEX_PROPERTY_SYNTAX, /**< Incorrect Unicode property */
+ U_REGEX_UNIMPLEMENTED, /**< Use of regexp feature that is not yet implemented. */
+ U_REGEX_MISMATCHED_PAREN, /**< Incorrectly nested parentheses in regexp pattern. */
+ U_REGEX_NUMBER_TOO_BIG, /**< Decimal number is too large. */
+ U_REGEX_BAD_INTERVAL, /**< Error in {min,max} interval */
+ U_REGEX_MAX_LT_MIN, /**< In {min,max}, max is less than min. */
+ U_REGEX_INVALID_BACK_REF, /**< Back-reference to a non-existent capture group. */
+ U_REGEX_INVALID_FLAG, /**< Invalid value for match mode flags. */
+ U_REGEX_LOOK_BEHIND_LIMIT, /**< Look-Behind pattern matches must have a bounded maximum length. */
+ U_REGEX_SET_CONTAINS_STRING, /**< Regexps cannot have UnicodeSets containing strings.*/
+ U_REGEX_OCTAL_TOO_BIG, /**< Octal character constants must be <= 0377. @deprecated ICU 54. This error cannot occur. */
+ U_REGEX_MISSING_CLOSE_BRACKET, /**< Missing closing bracket on a bracket expression. */
+ U_REGEX_INVALID_RANGE, /**< In a character range [x-y], x is greater than y. */
+ U_REGEX_STACK_OVERFLOW, /**< Regular expression backtrack stack overflow. */
+ U_REGEX_TIME_OUT, /**< Maximum allowed match time exceeded */
+ U_REGEX_STOPPED_BY_CALLER, /**< Matching operation aborted by user callback fn. */
+ U_REGEX_ERROR_LIMIT, /**< This must always be the last value to indicate the limit for regexp errors */
+
+ /*
+ * The error code in the range 0x10400-0x104ff are reserved for IDNA related error codes
+ */
+ U_IDNA_PROHIBITED_ERROR=0x10400,
+ U_IDNA_ERROR_START=0x10400,
+ U_IDNA_UNASSIGNED_ERROR,
+ U_IDNA_CHECK_BIDI_ERROR,
+ U_IDNA_STD3_ASCII_RULES_ERROR,
+ U_IDNA_ACE_PREFIX_ERROR,
+ U_IDNA_VERIFICATION_ERROR,
+ U_IDNA_LABEL_TOO_LONG_ERROR,
+ U_IDNA_ZERO_LENGTH_LABEL_ERROR,
+ U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR,
+ U_IDNA_ERROR_LIMIT,
+ /*
+ * Aliases for StringPrep
+ */
+ U_STRINGPREP_PROHIBITED_ERROR = U_IDNA_PROHIBITED_ERROR,
+ U_STRINGPREP_UNASSIGNED_ERROR = U_IDNA_UNASSIGNED_ERROR,
+ U_STRINGPREP_CHECK_BIDI_ERROR = U_IDNA_CHECK_BIDI_ERROR,
+
+ /*
+ * The error code in the range 0x10500-0x105ff are reserved for Plugin related error codes
+ */
+ U_PLUGIN_ERROR_START=0x10500, /**< Start of codes indicating plugin failures */
+ U_PLUGIN_TOO_HIGH=0x10500, /**< The plugin's level is too high to be loaded right now. */
+ U_PLUGIN_DIDNT_SET_LEVEL, /**< The plugin didn't call uplug_setPlugLevel in response to a QUERY */
+ U_PLUGIN_ERROR_LIMIT, /**< This must always be the last value to indicate the limit for plugin errors */
+
+ U_ERROR_LIMIT=U_PLUGIN_ERROR_LIMIT /**< This must always be the last value to indicate the limit for UErrorCode (last error code +1) */
+} UErrorCode;
+
+/* Use the following to determine if an UErrorCode represents */
+/* operational success or failure. */
+
+#ifdef __cplusplus
+ /**
+ * Does the error code indicate success?
+ * @stable ICU 2.0
+ */
+ static
+ inline UBool U_SUCCESS(UErrorCode code) { return (UBool)(code<=U_ZERO_ERROR); }
+ /**
+ * Does the error code indicate a failure?
+ * @stable ICU 2.0
+ */
+ static
+ inline UBool U_FAILURE(UErrorCode code) { return (UBool)(code>U_ZERO_ERROR); }
+#else
+ /**
+ * Does the error code indicate success?
+ * @stable ICU 2.0
+ */
+# define U_SUCCESS(x) ((x)<=U_ZERO_ERROR)
+ /**
+ * Does the error code indicate a failure?
+ * @stable ICU 2.0
+ */
+# define U_FAILURE(x) ((x)>U_ZERO_ERROR)
+#endif
+
+/**
+ * Return a string for a UErrorCode value.
+ * The string will be the same as the name of the error code constant
+ * in the UErrorCode enum above.
+ * @stable ICU 2.0
+ */
+U_STABLE const char * U_EXPORT2
+u_errorName(UErrorCode code);
+
+
+#endif /* _UTYPES */
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/uvernum.h b/src/core/basetypes/icu-ucsdet/include/unicode/uvernum.h
new file mode 100644
index 00000000..5af40e33
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/uvernum.h
@@ -0,0 +1,170 @@
+/*
+*******************************************************************************
+* Copyright (C) 2000-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+*
+* file name: uvernum.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* Created by: Vladimir Weinstein
+* Updated by: Steven R. Loomis
+*
+*/
+
+/**
+ * \file
+ * \brief C API: definitions of ICU version numbers
+ *
+ * This file is included by uversion.h and other files. This file contains only
+ * macros and definitions. The actual version numbers are defined here.
+ */
+
+ /*
+ * IMPORTANT: When updating version, the following things need to be done:
+ * source/common/unicode/uvernum.h - this file: update major, minor,
+ * patchlevel, suffix, version, short version constants, namespace,
+ * renaming macro, and copyright
+ *
+ * The following files need to be updated as well, which can be done
+ * by running the UNIX makefile target 'update-windows-makefiles' in icu/source.
+ *
+ *
+ * source/common/common.vcproj - update 'Output file name' on the link tab so
+ * that it contains the new major/minor combination
+ * source/i18n/i18n.vcproj - same as for the common.vcproj
+ * source/layout/layout.vcproj - same as for the common.vcproj
+ * source/layoutex/layoutex.vcproj - same
+ * source/stubdata/stubdata.vcproj - same as for the common.vcproj
+ * source/io/io.vcproj - same as for the common.vcproj
+ * source/data/makedata.mak - change U_ICUDATA_NAME so that it contains
+ * the new major/minor combination and the Unicode version.
+ */
+
+#ifndef UVERNUM_H
+#define UVERNUM_H
+
+/** The standard copyright notice that gets compiled into each library.
+ * This value will change in the subsequent releases of ICU
+ * @stable ICU 2.4
+ */
+#define U_COPYRIGHT_STRING \
+ " Copyright (C) 2014, International Business Machines Corporation and others. All Rights Reserved. "
+
+/** The current ICU major version as an integer.
+ * This value will change in the subsequent releases of ICU
+ * @stable ICU 2.4
+ */
+#define U_ICU_VERSION_MAJOR_NUM 54
+
+/** The current ICU minor version as an integer.
+ * This value will change in the subsequent releases of ICU
+ * @stable ICU 2.6
+ */
+#define U_ICU_VERSION_MINOR_NUM 1
+
+/** The current ICU patchlevel version as an integer.
+ * This value will change in the subsequent releases of ICU
+ * @stable ICU 2.4
+ */
+#define U_ICU_VERSION_PATCHLEVEL_NUM 0
+
+/** The current ICU build level version as an integer.
+ * This value is for use by ICU clients. It defaults to 0.
+ * @stable ICU 4.0
+ */
+#ifndef U_ICU_VERSION_BUILDLEVEL_NUM
+#define U_ICU_VERSION_BUILDLEVEL_NUM 0
+#endif
+
+/** Glued version suffix for renamers
+ * This value will change in the subsequent releases of ICU
+ * @stable ICU 2.6
+ */
+#define U_ICU_VERSION_SUFFIX _54_mc
+
+/**
+ * \def U_DEF2_ICU_ENTRY_POINT_RENAME
+ * @internal
+ */
+/**
+ * \def U_DEF_ICU_ENTRY_POINT_RENAME
+ * @internal
+ */
+/** Glued version suffix function for renamers
+ * This value will change in the subsequent releases of ICU.
+ * If a custom suffix (such as matching library suffixes) is desired, this can be modified.
+ * Note that if present, platform.h may contain an earlier definition of this macro.
+ * \def U_ICU_ENTRY_POINT_RENAME
+ * @stable ICU 4.2
+ */
+
+#ifndef U_ICU_ENTRY_POINT_RENAME
+#ifdef U_HAVE_LIB_SUFFIX
+#define U_DEF_ICU_ENTRY_POINT_RENAME(x,y,z) x ## y ## z
+#define U_DEF2_ICU_ENTRY_POINT_RENAME(x,y,z) U_DEF_ICU_ENTRY_POINT_RENAME(x,y,z)
+#define U_ICU_ENTRY_POINT_RENAME(x) U_DEF2_ICU_ENTRY_POINT_RENAME(x,U_ICU_VERSION_SUFFIX,U_LIB_SUFFIX_C_NAME)
+#else
+#define U_DEF_ICU_ENTRY_POINT_RENAME(x,y) x ## y
+#define U_DEF2_ICU_ENTRY_POINT_RENAME(x,y) U_DEF_ICU_ENTRY_POINT_RENAME(x,y)
+#define U_ICU_ENTRY_POINT_RENAME(x) U_DEF2_ICU_ENTRY_POINT_RENAME(x,U_ICU_VERSION_SUFFIX)
+#endif
+#endif
+
+/** The current ICU library version as a dotted-decimal string. The patchlevel
+ * only appears in this string if it non-zero.
+ * This value will change in the subsequent releases of ICU
+ * @stable ICU 2.4
+ */
+#define U_ICU_VERSION "54.1"
+
+/** The current ICU library major/minor version as a string without dots, for library name suffixes.
+ * This value will change in the subsequent releases of ICU
+ * @stable ICU 2.6
+ */
+#define U_ICU_VERSION_SHORT "54"
+
+#ifndef U_HIDE_INTERNAL_API
+/** Data version in ICU4C.
+ * @internal ICU 4.4 Internal Use Only
+ **/
+#define U_ICU_DATA_VERSION "54.1"
+#endif /* U_HIDE_INTERNAL_API */
+
+/*===========================================================================
+ * ICU collation framework version information
+ * Version info that can be obtained from a collator is affected by these
+ * numbers in a secret and magic way. Please use collator version as whole
+ *===========================================================================
+ */
+
+/**
+ * Collation runtime version (sort key generator, strcoll).
+ * If the version is different, sort keys for the same string could be different.
+ * This value may change in subsequent releases of ICU.
+ * @stable ICU 2.4
+ */
+#define UCOL_RUNTIME_VERSION 8
+
+/**
+ * Collation builder code version.
+ * When this is different, the same tailoring might result
+ * in assigning different collation elements to code points.
+ * This value may change in subsequent releases of ICU.
+ * @stable ICU 2.4
+ */
+#define UCOL_BUILDER_VERSION 9
+
+#ifndef U_HIDE_DEPRECATED_API
+/**
+ * Constant 1.
+ * This was intended to be the version of collation tailorings,
+ * but instead the tailoring data carries a version number.
+ * @deprecated ICU 54
+ */
+#define UCOL_TAILORINGS_VERSION 1
+#endif /* U_HIDE_DEPRECATED_API */
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/uversion.h b/src/core/basetypes/icu-ucsdet/include/unicode/uversion.h
new file mode 100644
index 00000000..74e30910
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/uversion.h
@@ -0,0 +1,193 @@
+/*
+*******************************************************************************
+* Copyright (C) 2000-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+*
+* file name: uversion.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* Created by: Vladimir Weinstein
+*
+* Gets included by utypes.h and Windows .rc files
+*/
+
+/**
+ * \file
+ * \brief C API: API for accessing ICU version numbers.
+ */
+/*===========================================================================*/
+/* Main ICU version information */
+/*===========================================================================*/
+
+#ifndef UVERSION_H
+#define UVERSION_H
+
+#include "unicode/umachine.h"
+
+/* Actual version info lives in uvernum.h */
+#include "unicode/uvernum.h"
+
+/** Maximum length of the copyright string.
+ * @stable ICU 2.4
+ */
+#define U_COPYRIGHT_STRING_LENGTH 128
+
+/** An ICU version consists of up to 4 numbers from 0..255.
+ * @stable ICU 2.4
+ */
+#define U_MAX_VERSION_LENGTH 4
+
+/** In a string, ICU version fields are delimited by dots.
+ * @stable ICU 2.4
+ */
+#define U_VERSION_DELIMITER '.'
+
+/** The maximum length of an ICU version string.
+ * @stable ICU 2.4
+ */
+#define U_MAX_VERSION_STRING_LENGTH 20
+
+/** The binary form of a version on ICU APIs is an array of 4 uint8_t.
+ * To compare two versions, use memcmp(v1,v2,sizeof(UVersionInfo)).
+ * @stable ICU 2.4
+ */
+typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
+
+/*===========================================================================*/
+/* C++ namespace if supported. Versioned unless versioning is disabled. */
+/*===========================================================================*/
+
+/**
+ * \def U_NAMESPACE_BEGIN
+ * This is used to begin a declaration of a public ICU C++ API.
+ * When not compiling for C++, it does nothing.
+ * When compiling for C++, it begins an extern "C++" linkage block (to protect
+ * against cases in which an external client includes ICU header files inside
+ * an extern "C" linkage block).
+ *
+ * It also begins a versioned-ICU-namespace block.
+ * @stable ICU 2.4
+ */
+
+/**
+ * \def U_NAMESPACE_END
+ * This is used to end a declaration of a public ICU C++ API.
+ * When not compiling for C++, it does nothing.
+ * When compiling for C++, it ends the extern "C++" block begun by
+ * U_NAMESPACE_BEGIN.
+ *
+ * It also ends the versioned-ICU-namespace block begun by U_NAMESPACE_BEGIN.
+ * @stable ICU 2.4
+ */
+
+/**
+ * \def U_NAMESPACE_USE
+ * This is used to specify that the rest of the code uses the
+ * public ICU C++ API namespace.
+ * This is invoked by default; we recommend that you turn it off:
+ * See the "Recommended Build Options" section of the ICU4C readme
+ * (http://source.icu-project.org/repos/icu/icu/trunk/readme.html#RecBuild)
+ * @stable ICU 2.4
+ */
+
+/**
+ * \def U_NAMESPACE_QUALIFIER
+ * This is used to qualify that a function or class is part of
+ * the public ICU C++ API namespace.
+ *
+ * This macro is unnecessary since ICU 49 requires namespace support.
+ * You can just use "icu::" instead.
+ * @stable ICU 2.4
+ */
+
+/* Define namespace symbols if the compiler supports it. */
+#ifdef __cplusplus
+# if U_DISABLE_RENAMING
+# define U_ICU_NAMESPACE icu
+ namespace U_ICU_NAMESPACE { }
+# else
+# define U_ICU_NAMESPACE U_ICU_ENTRY_POINT_RENAME(icu)
+ namespace U_ICU_NAMESPACE { }
+ namespace icu = U_ICU_NAMESPACE;
+# endif
+
+# define U_NAMESPACE_BEGIN extern "C++" { namespace U_ICU_NAMESPACE {
+# define U_NAMESPACE_END } }
+# define U_NAMESPACE_USE using namespace U_ICU_NAMESPACE;
+# define U_NAMESPACE_QUALIFIER U_ICU_NAMESPACE::
+
+# ifndef U_USING_ICU_NAMESPACE
+# define U_USING_ICU_NAMESPACE 1
+# endif
+# if U_USING_ICU_NAMESPACE
+ U_NAMESPACE_USE
+# endif
+#else
+# define U_NAMESPACE_BEGIN
+# define U_NAMESPACE_END
+# define U_NAMESPACE_USE
+# define U_NAMESPACE_QUALIFIER
+#endif
+
+/*===========================================================================*/
+/* General version helper functions. Definitions in putil.c */
+/*===========================================================================*/
+
+/**
+ * Parse a string with dotted-decimal version information and
+ * fill in a UVersionInfo structure with the result.
+ * Definition of this function lives in putil.c
+ *
+ * @param versionArray The destination structure for the version information.
+ * @param versionString A string with dotted-decimal version information,
+ * with up to four non-negative number fields with
+ * values of up to 255 each.
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+u_versionFromString(UVersionInfo versionArray, const char *versionString);
+
+/**
+ * Parse a Unicode string with dotted-decimal version information and
+ * fill in a UVersionInfo structure with the result.
+ * Definition of this function lives in putil.c
+ *
+ * @param versionArray The destination structure for the version information.
+ * @param versionString A Unicode string with dotted-decimal version
+ * information, with up to four non-negative number
+ * fields with values of up to 255 each.
+ * @stable ICU 4.2
+ */
+U_STABLE void U_EXPORT2
+u_versionFromUString(UVersionInfo versionArray, const UChar *versionString);
+
+
+/**
+ * Write a string with dotted-decimal version information according
+ * to the input UVersionInfo.
+ * Definition of this function lives in putil.c
+ *
+ * @param versionArray The version information to be written as a string.
+ * @param versionString A string buffer that will be filled in with
+ * a string corresponding to the numeric version
+ * information in versionArray.
+ * The buffer size must be at least U_MAX_VERSION_STRING_LENGTH.
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+u_versionToString(const UVersionInfo versionArray, char *versionString);
+
+/**
+ * Gets the ICU release version. The version array stores the version information
+ * for ICU. For example, release "1.3.31.2" is then represented as 0x01031F02.
+ * Definition of this function lives in putil.c
+ *
+ * @param versionArray the version # information, the result will be filled in
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+u_getVersion(UVersionInfo versionArray);
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/uposixdefs.h b/src/core/basetypes/icu-ucsdet/include/uposixdefs.h
new file mode 100644
index 00000000..bd64d91a
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/uposixdefs.h
@@ -0,0 +1,73 @@
+/*
+*******************************************************************************
+* Copyright (C) 2011-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: uposixdefs.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2011jul25
+* created by: Markus W. Scherer
+*
+* Common definitions for implementation files working with POSIX functions.
+* *Important*: #include this file before any other header files!
+*/
+
+#ifndef __UPOSIXDEFS_H__
+#define __UPOSIXDEFS_H__
+
+/*
+ * Define _XOPEN_SOURCE for access to POSIX functions.
+ *
+ * We cannot use U_PLATFORM from platform.h/utypes.h because
+ * "The Open Group Base Specifications"
+ * chapter "2.2 The Compilation Environment" says:
+ * "In the compilation of an application that #defines a feature test macro
+ * specified by IEEE Std 1003.1-2001,
+ * no header defined by IEEE Std 1003.1-2001 shall be included prior to
+ * the definition of the feature test macro."
+ */
+#ifdef _XOPEN_SOURCE
+ /* Use the predefined value. */
+#else
+ /*
+ * Version 6.0:
+ * The Open Group Base Specifications Issue 6 (IEEE Std 1003.1, 2004 Edition)
+ * also known as
+ * SUSv3 = Open Group Single UNIX Specification, Version 3 (UNIX03)
+ *
+ * Note: This definition used to be in C source code (e.g., putil.c)
+ * and define _XOPEN_SOURCE to different values depending on __STDC_VERSION__.
+ * In C++ source code (e.g., putil.cpp), __STDC_VERSION__ is not defined at all.
+ */
+# define _XOPEN_SOURCE 600
+#endif
+
+/*
+ * Make sure things like readlink and such functions work.
+ * Poorly upgraded Solaris machines can't have this defined.
+ * Cleanly installed Solaris can use this #define.
+ *
+ * z/OS needs this definition for timeval and to get usleep.
+ */
+#if !defined(_XOPEN_SOURCE_EXTENDED)
+# define _XOPEN_SOURCE_EXTENDED 1
+#endif
+
+/*
+ * There is an issue with turning on _XOPEN_SOURCE_EXTENDED on certain platforms.
+ * A compatibility issue exists between turning on _XOPEN_SOURCE_EXTENDED and using
+ * standard C++ string class. As a result, standard C++ string class needs to be
+ * turned off for the follwing platforms:
+ * -AIX/VACPP
+ * -Solaris/GCC
+ */
+#if (U_PLATFORM == U_PF_AIX && !defined(__GNUC__)) || (U_PLATFORM == U_PF_SOLARIS && defined(__GNUC__))
+# if _XOPEN_SOURCE_EXTENDED && !defined(U_HAVE_STD_STRING)
+# define U_HAVE_STD_STRING 0
+# endif
+#endif
+
+#endif /* __UPOSIXDEFS_H__ */
diff --git a/src/core/basetypes/icu-ucsdet/include/uset_imp.h b/src/core/basetypes/icu-ucsdet/include/uset_imp.h
new file mode 100644
index 00000000..07a7381e
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/uset_imp.h
@@ -0,0 +1,60 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 2004-2007, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: uset_imp.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2004sep07
+* created by: Markus W. Scherer
+*
+* Internal USet definitions.
+*/
+
+#ifndef __USET_IMP_H__
+#define __USET_IMP_H__
+
+#include "unicode/utypes.h"
+#include "unicode/uset.h"
+
+U_CDECL_BEGIN
+
+typedef void U_CALLCONV
+USetAdd(USet *set, UChar32 c);
+
+typedef void U_CALLCONV
+USetAddRange(USet *set, UChar32 start, UChar32 end);
+
+typedef void U_CALLCONV
+USetAddString(USet *set, const UChar *str, int32_t length);
+
+typedef void U_CALLCONV
+USetRemove(USet *set, UChar32 c);
+
+typedef void U_CALLCONV
+USetRemoveRange(USet *set, UChar32 start, UChar32 end);
+
+/**
+ * Interface for adding items to a USet, to keep low-level code from
+ * statically depending on the USet implementation.
+ * Calls will look like sa->add(sa->set, c);
+ */
+struct USetAdder {
+ USet *set;
+ USetAdd *add;
+ USetAddRange *addRange;
+ USetAddString *addString;
+ USetRemove *remove;
+ USetRemoveRange *removeRange;
+};
+typedef struct USetAdder USetAdder;
+
+U_CDECL_END
+
+#endif
+
diff --git a/src/core/basetypes/icu-ucsdet/include/ustr_imp.h b/src/core/basetypes/icu-ucsdet/include/ustr_imp.h
new file mode 100644
index 00000000..ee54d332
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/ustr_imp.h
@@ -0,0 +1,247 @@
+/*
+**********************************************************************
+* Copyright (C) 1999-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* file name: ustr_imp.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2001jan30
+* created by: Markus W. Scherer
+*/
+
+#ifndef __USTR_IMP_H__
+#define __USTR_IMP_H__
+
+#include "unicode/utypes.h"
+#include "unicode/uiter.h"
+#include "ucase.h"
+
+/** Simple declaration to avoid including unicode/ubrk.h. */
+#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
+# define UBRK_TYPEDEF_UBREAK_ITERATOR
+ typedef struct UBreakIterator UBreakIterator;
+#endif
+
+#ifndef U_COMPARE_IGNORE_CASE
+/* see also unorm.h */
+/**
+ * Option bit for unorm_compare:
+ * Perform case-insensitive comparison.
+ */
+#define U_COMPARE_IGNORE_CASE 0x10000
+#endif
+
+/**
+ * Internal option for unorm_cmpEquivFold() for strncmp style.
+ * If set, checks for both string length and terminating NUL.
+ */
+#define _STRNCMP_STYLE 0x1000
+
+/**
+ * Compare two strings in code point order or code unit order.
+ * Works in strcmp style (both lengths -1),
+ * strncmp style (lengths equal and >=0, flag TRUE),
+ * and memcmp/UnicodeString style (at least one length >=0).
+ */
+U_CFUNC int32_t U_EXPORT2
+uprv_strCompare(const UChar *s1, int32_t length1,
+ const UChar *s2, int32_t length2,
+ UBool strncmpStyle, UBool codePointOrder);
+
+/**
+ * Internal API, used by u_strcasecmp() etc.
+ * Compare strings case-insensitively,
+ * in code point order or code unit order.
+ */
+U_CFUNC int32_t
+u_strcmpFold(const UChar *s1, int32_t length1,
+ const UChar *s2, int32_t length2,
+ uint32_t options,
+ UErrorCode *pErrorCode);
+
+/**
+ * Are the Unicode properties loaded?
+ * This must be used before internal functions are called that do
+ * not perform this check.
+ * Generate a debug assertion failure if data is not loaded.
+ */
+U_CFUNC UBool
+uprv_haveProperties(UErrorCode *pErrorCode);
+
+/**
+ * Load the Unicode property data.
+ * Intended primarily for use from u_init().
+ * Has no effect if property data is already loaded.
+ * NOT thread safe.
+ */
+/*U_CFUNC int8_t
+uprv_loadPropsData(UErrorCode *errorCode);*/
+
+/*
+ * Internal string casing functions implementing
+ * ustring.h/ustrcase.c and UnicodeString case mapping functions.
+ */
+
+struct UCaseMap {
+ const UCaseProps *csp;
+#if !UCONFIG_NO_BREAK_ITERATION
+ UBreakIterator *iter; /* We adopt the iterator, so we own it. */
+#endif
+ char locale[32];
+ int32_t locCache;
+ uint32_t options;
+};
+
+#ifndef __UCASEMAP_H__
+typedef struct UCaseMap UCaseMap;
+#endif
+
+#if UCONFIG_NO_BREAK_ITERATION
+# define UCASEMAP_INITIALIZER { NULL, { 0 }, 0, 0 }
+#else
+# define UCASEMAP_INITIALIZER { NULL, NULL, { 0 }, 0, 0 }
+#endif
+
+U_CFUNC void
+ustrcase_setTempCaseMapLocale(UCaseMap *csm, const char *locale);
+
+#ifndef U_STRING_CASE_MAPPER_DEFINED
+#define U_STRING_CASE_MAPPER_DEFINED
+
+/**
+ * String case mapping function type, used by ustrcase_map().
+ * All error checking must be done.
+ * The UCaseMap must be fully initialized, with locale and/or iter set as needed.
+ * src and dest must not overlap.
+ */
+typedef int32_t U_CALLCONV
+UStringCaseMapper(const UCaseMap *csm,
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+#endif
+
+/** Implements UStringCaseMapper. */
+U_CFUNC int32_t U_CALLCONV
+ustrcase_internalToLower(const UCaseMap *csm,
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/** Implements UStringCaseMapper. */
+U_CFUNC int32_t U_CALLCONV
+ustrcase_internalToUpper(const UCaseMap *csm,
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+/** Implements UStringCaseMapper. */
+U_CFUNC int32_t U_CALLCONV
+ustrcase_internalToTitle(const UCaseMap *csm,
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+#endif
+
+/** Implements UStringCaseMapper. */
+U_CFUNC int32_t U_CALLCONV
+ustrcase_internalFold(const UCaseMap *csm,
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Implements argument checking and buffer handling
+ * for string case mapping as a common function.
+ */
+U_CFUNC int32_t
+ustrcase_map(const UCaseMap *csm,
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UStringCaseMapper *stringCaseMapper,
+ UErrorCode *pErrorCode);
+
+/**
+ * UTF-8 string case mapping function type, used by ucasemap_mapUTF8().
+ * UTF-8 version of UStringCaseMapper.
+ * All error checking must be done.
+ * The UCaseMap must be fully initialized, with locale and/or iter set as needed.
+ * src and dest must not overlap.
+ */
+typedef int32_t U_CALLCONV
+UTF8CaseMapper(const UCaseMap *csm,
+ uint8_t *dest, int32_t destCapacity,
+ const uint8_t *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/** Implements UTF8CaseMapper. */
+U_CFUNC int32_t U_CALLCONV
+ucasemap_internalUTF8ToTitle(const UCaseMap *csm,
+ uint8_t *dest, int32_t destCapacity,
+ const uint8_t *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Implements argument checking and buffer handling
+ * for UTF-8 string case mapping as a common function.
+ */
+U_CFUNC int32_t
+ucasemap_mapUTF8(const UCaseMap *csm,
+ uint8_t *dest, int32_t destCapacity,
+ const uint8_t *src, int32_t srcLength,
+ UTF8CaseMapper *stringCaseMapper,
+ UErrorCode *pErrorCode);
+
+U_CAPI int32_t U_EXPORT2
+ustr_hashUCharsN(const UChar *str, int32_t length);
+
+U_CAPI int32_t U_EXPORT2
+ustr_hashCharsN(const char *str, int32_t length);
+
+U_CAPI int32_t U_EXPORT2
+ustr_hashICharsN(const char *str, int32_t length);
+
+/**
+ * NUL-terminate a UChar * string if possible.
+ * If length < destCapacity then NUL-terminate.
+ * If length == destCapacity then do not terminate but set U_STRING_NOT_TERMINATED_WARNING.
+ * If length > destCapacity then do not terminate but set U_BUFFER_OVERFLOW_ERROR.
+ *
+ * @param dest Destination buffer, can be NULL if destCapacity==0.
+ * @param destCapacity Number of UChars available at dest.
+ * @param length Number of UChars that were (to be) written to dest.
+ * @param pErrorCode ICU error code.
+ * @return length
+ */
+U_CAPI int32_t U_EXPORT2
+u_terminateUChars(UChar *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode);
+
+/**
+ * NUL-terminate a char * string if possible.
+ * Same as u_terminateUChars() but for a different string type.
+ */
+U_CAPI int32_t U_EXPORT2
+u_terminateChars(char *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode);
+
+/**
+ * NUL-terminate a UChar32 * string if possible.
+ * Same as u_terminateUChars() but for a different string type.
+ */
+U_CAPI int32_t U_EXPORT2
+u_terminateUChar32s(UChar32 *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode);
+
+/**
+ * NUL-terminate a wchar_t * string if possible.
+ * Same as u_terminateUChars() but for a different string type.
+ */
+U_CAPI int32_t U_EXPORT2
+u_terminateWChars(wchar_t *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode);
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/utracimp.h b/src/core/basetypes/icu-ucsdet/include/utracimp.h
new file mode 100644
index 00000000..317fbe35
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/utracimp.h
@@ -0,0 +1,384 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 2003-2009, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: utracimp.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2003aug06
+* created by: Markus W. Scherer
+*
+* Internal header for ICU tracing/logging.
+*
+*
+* Various notes:
+* - using a trace level variable to only call trace functions
+* when the level is sufficient
+* - using the same variable for tracing on/off to never make a function
+* call when off
+* - the function number is put into a local variable by the entry macro
+* and used implicitly to avoid copy&paste/typing mistakes by the developer
+* - the application must call utrace_setFunctions() and pass in
+* implementations for the trace functions
+* - ICU trace macros call ICU functions that route through the function
+* pointers if they have been set;
+* this avoids an indirection at the call site
+* (which would cost more code for another check and for the indirection)
+*
+* ### TODO Issues:
+* - Verify that va_list is portable among compilers for the same platform.
+* va_list should be portable because printf() would fail otherwise!
+* - Should enum values like UTraceLevel be passed into int32_t-type arguments,
+* or should enum types be used?
+*/
+
+#ifndef __UTRACIMP_H__
+#define __UTRACIMP_H__
+
+#include "unicode/utrace.h"
+#include <stdarg.h>
+
+U_CDECL_BEGIN
+
+/**
+ * \var utrace_level
+ * Trace level variable. Negative for "off".
+ * Use only via UTRACE_ macros.
+ * @internal
+ */
+#ifdef UTRACE_IMPL
+U_EXPORT int32_t
+#else
+U_CFUNC U_COMMON_API int32_t
+#endif
+utrace_level;
+
+
+/**
+ * Traced Function Exit return types.
+ * Flags indicating the number and types of varargs included in a call
+ * to a UTraceExit function.
+ * Bits 0-3: The function return type. First variable param.
+ * Bit 4: Flag for presence of U_ErrorCode status param.
+ * @internal
+ */
+typedef enum UTraceExitVal {
+ /** The traced function returns no value @internal */
+ UTRACE_EXITV_NONE = 0,
+ /** The traced function returns an int32_t, or compatible, type. @internal */
+ UTRACE_EXITV_I32 = 1,
+ /** The traced function returns a pointer @internal */
+ UTRACE_EXITV_PTR = 2,
+ /** The traced function returns a UBool @internal */
+ UTRACE_EXITV_BOOL = 3,
+ /** Mask to extract the return type values from a UTraceExitVal @internal */
+ UTRACE_EXITV_MASK = 0xf,
+ /** Bit indicating that the traced function includes a UErrorCode parameter @internal */
+ UTRACE_EXITV_STATUS = 0x10
+} UTraceExitVal;
+
+/**
+ * Trace function for the entry point of a function.
+ * Do not use directly, use UTRACE_ENTRY instead.
+ * @param fnNumber The UTraceFunctionNumber for the current function.
+ * @internal
+ */
+U_CAPI void U_EXPORT2
+utrace_entry(int32_t fnNumber);
+
+/**
+ * Trace function for each exit point of a function.
+ * Do not use directly, use UTRACE_EXIT* instead.
+ * @param fnNumber The UTraceFunctionNumber for the current function.
+ * @param returnType The type of the value returned by the function.
+ * @param errorCode The UErrorCode value at function exit. See UTRACE_EXIT.
+ * @internal
+ */
+U_CAPI void U_EXPORT2
+utrace_exit(int32_t fnNumber, int32_t returnType, ...);
+
+
+/**
+ * Trace function used inside functions that have a UTRACE_ENTRY() statement.
+ * Do not use directly, use UTRACE_DATAX() macros instead.
+ *
+ * @param utraceFnNumber The number of the current function, from the local
+ * variable of the same name.
+ * @param level The trace level for this message.
+ * @param fmt The trace format string.
+ *
+ * @internal
+ */
+U_CAPI void U_EXPORT2
+utrace_data(int32_t utraceFnNumber, int32_t level, const char *fmt, ...);
+
+U_CDECL_END
+
+#if U_ENABLE_TRACING
+
+/**
+ * Boolean expression to see if ICU tracing is turned on
+ * to at least the specified level.
+ * @internal
+ */
+#define UTRACE_LEVEL(level) (utrace_getLevel()>=(level))
+
+/**
+ * Flag bit in utraceFnNumber, the local variable added to each function
+ * with tracing code to contains the function number.
+ *
+ * Set the flag if the function's entry is traced, which will cause the
+ * function's exit to also be traced. utraceFnNumber is uncoditionally
+ * set at entry, whether or not the entry is traced, so that it will
+ * always be available for error trace output.
+ * @internal
+ */
+#define UTRACE_TRACED_ENTRY 0x80000000
+
+/**
+ * Trace statement for the entry point of a function.
+ * Stores the function number in a local variable.
+ * In C code, must be placed immediately after the last variable declaration.
+ * Must be matched with UTRACE_EXIT() at all function exit points.
+ *
+ * Tracing should start with UTRACE_ENTRY after checking for
+ * U_FAILURE at function entry, so that if a function returns immediately
+ * because of a pre-existing error condition, it does not show up in the trace,
+ * consistent with ICU's error handling model.
+ *
+ * @param fnNumber The UTraceFunctionNumber for the current function.
+ * @internal
+ */
+#define UTRACE_ENTRY(fnNumber) \
+ int32_t utraceFnNumber=(fnNumber); \
+ if(utrace_getLevel()>=UTRACE_INFO) { \
+ utrace_entry(fnNumber); \
+ utraceFnNumber |= UTRACE_TRACED_ENTRY; \
+ }
+
+
+/**
+ * Trace statement for the entry point of open and close functions.
+ * Produces trace output at a less verbose setting than plain UTRACE_ENTRY
+ * Stores the function number in a local variable.
+ * In C code, must be placed immediately after the last variable declaration.
+ * Must be matched with UTRACE_EXIT() at all function exit points.
+ *
+ * @param fnNumber The UTraceFunctionNumber for the current function.
+ * @internal
+ */
+#define UTRACE_ENTRY_OC(fnNumber) \
+ int32_t utraceFnNumber=(fnNumber); \
+ if(utrace_getLevel()>=UTRACE_OPEN_CLOSE) { \
+ utrace_entry(fnNumber); \
+ utraceFnNumber |= UTRACE_TRACED_ENTRY; \
+ }
+
+/**
+ * Trace statement for each exit point of a function that has a UTRACE_ENTRY()
+ * statement.
+ *
+ * @param errorCode The function's ICU UErrorCode value at function exit,
+ * or U_ZERO_ERROR if the function does not use a UErrorCode.
+ * 0==U_ZERO_ERROR indicates success,
+ * positive values an error (see u_errorName()),
+ * negative values an informational status.
+ *
+ * @internal
+ */
+#define UTRACE_EXIT() \
+ {if(utraceFnNumber & UTRACE_TRACED_ENTRY) { \
+ utrace_exit(utraceFnNumber & ~UTRACE_TRACED_ENTRY, UTRACE_EXITV_NONE); \
+ }}
+
+/**
+ * Trace statement for each exit point of a function that has a UTRACE_ENTRY()
+ * statement, and that returns a value.
+ *
+ * @param val The function's return value, int32_t or comatible type.
+ *
+ * @internal
+ */
+#define UTRACE_EXIT_VALUE(val) \
+ {if(utraceFnNumber & UTRACE_TRACED_ENTRY) { \
+ utrace_exit(utraceFnNumber & ~UTRACE_TRACED_ENTRY, UTRACE_EXITV_I32, val); \
+ }}
+
+#define UTRACE_EXIT_STATUS(status) \
+ {if(utraceFnNumber & UTRACE_TRACED_ENTRY) { \
+ utrace_exit(utraceFnNumber & ~UTRACE_TRACED_ENTRY, UTRACE_EXITV_STATUS, status); \
+ }}
+
+#define UTRACE_EXIT_VALUE_STATUS(val, status) \
+ {if(utraceFnNumber & UTRACE_TRACED_ENTRY) { \
+ utrace_exit(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (UTRACE_EXITV_I32 | UTRACE_EXITV_STATUS), val, status); \
+ }}
+
+#define UTRACE_EXIT_PTR_STATUS(ptr, status) \
+ {if(utraceFnNumber & UTRACE_TRACED_ENTRY) { \
+ utrace_exit(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (UTRACE_EXITV_PTR | UTRACE_EXITV_STATUS), ptr, status); \
+ }}
+
+/**
+ * Trace statement used inside functions that have a UTRACE_ENTRY() statement.
+ * Takes no data arguments.
+ * The number of arguments for this macro must match the number of inserts
+ * in the format string. Vector inserts count as two arguments.
+ * Calls utrace_data() if the level is high enough.
+ * @internal
+ */
+#define UTRACE_DATA0(level, fmt) \
+ if(UTRACE_LEVEL(level)) { \
+ utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt)); \
+ }
+
+/**
+ * Trace statement used inside functions that have a UTRACE_ENTRY() statement.
+ * Takes one data argument.
+ * The number of arguments for this macro must match the number of inserts
+ * in the format string. Vector inserts count as two arguments.
+ * Calls utrace_data() if the level is high enough.
+ * @internal
+ */
+#define UTRACE_DATA1(level, fmt, a) \
+ if(UTRACE_LEVEL(level)) { \
+ utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY , (level), (fmt), (a)); \
+ }
+
+/**
+ * Trace statement used inside functions that have a UTRACE_ENTRY() statement.
+ * Takes two data arguments.
+ * The number of arguments for this macro must match the number of inserts
+ * in the format string. Vector inserts count as two arguments.
+ * Calls utrace_data() if the level is high enough.
+ * @internal
+ */
+#define UTRACE_DATA2(level, fmt, a, b) \
+ if(UTRACE_LEVEL(level)) { \
+ utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY , (level), (fmt), (a), (b)); \
+ }
+
+/**
+ * Trace statement used inside functions that have a UTRACE_ENTRY() statement.
+ * Takes three data arguments.
+ * The number of arguments for this macro must match the number of inserts
+ * in the format string. Vector inserts count as two arguments.
+ * Calls utrace_data() if the level is high enough.
+ * @internal
+ */
+#define UTRACE_DATA3(level, fmt, a, b, c) \
+ if(UTRACE_LEVEL(level)) { \
+ utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c)); \
+ }
+
+/**
+ * Trace statement used inside functions that have a UTRACE_ENTRY() statement.
+ * Takes four data arguments.
+ * The number of arguments for this macro must match the number of inserts
+ * in the format string. Vector inserts count as two arguments.
+ * Calls utrace_data() if the level is high enough.
+ * @internal
+ */
+#define UTRACE_DATA4(level, fmt, a, b, c, d) \
+ if(UTRACE_LEVEL(level)) { \
+ utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c), (d)); \
+ }
+
+/**
+ * Trace statement used inside functions that have a UTRACE_ENTRY() statement.
+ * Takes five data arguments.
+ * The number of arguments for this macro must match the number of inserts
+ * in the format string. Vector inserts count as two arguments.
+ * Calls utrace_data() if the level is high enough.
+ * @internal
+ */
+#define UTRACE_DATA5(level, fmt, a, b, c, d, e) \
+ if(UTRACE_LEVEL(level)) { \
+ utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c), (d), (e)); \
+ }
+
+/**
+ * Trace statement used inside functions that have a UTRACE_ENTRY() statement.
+ * Takes six data arguments.
+ * The number of arguments for this macro must match the number of inserts
+ * in the format string. Vector inserts count as two arguments.
+ * Calls utrace_data() if the level is high enough.
+ * @internal
+ */
+#define UTRACE_DATA6(level, fmt, a, b, c, d, e, f) \
+ if(UTRACE_LEVEL(level)) { \
+ utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c), (d), (e), (f)); \
+ }
+
+/**
+ * Trace statement used inside functions that have a UTRACE_ENTRY() statement.
+ * Takes seven data arguments.
+ * The number of arguments for this macro must match the number of inserts
+ * in the format string. Vector inserts count as two arguments.
+ * Calls utrace_data() if the level is high enough.
+ * @internal
+ */
+#define UTRACE_DATA7(level, fmt, a, b, c, d, e, f, g) \
+ if(UTRACE_LEVEL(level)) { \
+ utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c), (d), (e), (f), (g)); \
+ }
+
+/**
+ * Trace statement used inside functions that have a UTRACE_ENTRY() statement.
+ * Takes eight data arguments.
+ * The number of arguments for this macro must match the number of inserts
+ * in the format string. Vector inserts count as two arguments.
+ * Calls utrace_data() if the level is high enough.
+ * @internal
+ */
+#define UTRACE_DATA8(level, fmt, a, b, c, d, e, f, g, h) \
+ if(UTRACE_LEVEL(level)) { \
+ utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c), (d), (e), (f), (g), (h)); \
+ }
+
+/**
+ * Trace statement used inside functions that have a UTRACE_ENTRY() statement.
+ * Takes nine data arguments.
+ * The number of arguments for this macro must match the number of inserts
+ * in the format string. Vector inserts count as two arguments.
+ * Calls utrace_data() if the level is high enough.
+ * @internal
+ */
+#define UTRACE_DATA9(level, fmt, a, b, c, d, e, f, g, h, i) \
+ if(UTRACE_LEVEL(level)) { \
+ utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c), (d), (e), (f), (g), (h), (i)); \
+ }
+
+#else
+
+/*
+ * When tracing is disabled, the following macros become empty
+ */
+
+#define UTRACE_LEVEL(level) 0
+#define UTRACE_ENTRY(fnNumber)
+#define UTRACE_ENTRY_OC(fnNumber)
+#define UTRACE_EXIT()
+#define UTRACE_EXIT_VALUE(val)
+#define UTRACE_EXIT_STATUS(status)
+#define UTRACE_EXIT_VALUE_STATUS(val, status)
+#define UTRACE_EXIT_PTR_STATUS(ptr, status)
+#define UTRACE_DATA0(level, fmt)
+#define UTRACE_DATA1(level, fmt, a)
+#define UTRACE_DATA2(level, fmt, a, b)
+#define UTRACE_DATA3(level, fmt, a, b, c)
+#define UTRACE_DATA4(level, fmt, a, b, c, d)
+#define UTRACE_DATA5(level, fmt, a, b, c, d, e)
+#define UTRACE_DATA6(level, fmt, a, b, c, d, e, f)
+#define UTRACE_DATA7(level, fmt, a, b, c, d, e, f, g)
+#define UTRACE_DATA8(level, fmt, a, b, c, d, e, f, g, h)
+#define UTRACE_DATA9(level, fmt, a, b, c, d, e, f, g, h, i)
+
+#endif
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/inputext.cpp b/src/core/basetypes/icu-ucsdet/inputext.cpp
new file mode 100644
index 00000000..e5a8ee18
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/inputext.cpp
@@ -0,0 +1,164 @@
+/*
+ **********************************************************************
+ * Copyright (C) 2005-2009, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ **********************************************************************
+ */
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "inputext.h"
+
+#include "cmemory.h"
+#include "cstring.h"
+
+#include <string.h>
+
+U_NAMESPACE_BEGIN
+
+#define BUFFER_SIZE 8192
+
+#define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
+
+#define NEW_ARRAY(type,count) (type *) uprv_malloc((count) * sizeof(type))
+#define DELETE_ARRAY(array) uprv_free((void *) (array))
+
+InputText::InputText(UErrorCode &status)
+ : fInputBytes(NEW_ARRAY(uint8_t, BUFFER_SIZE)), // The text to be checked. Markup will have been
+ // removed if appropriate.
+ fByteStats(NEW_ARRAY(int16_t, 256)), // byte frequency statistics for the input text.
+ // Value is percent, not absolute.
+ fDeclaredEncoding(0),
+ fRawInput(0),
+ fRawLength(0)
+{
+ if (fInputBytes == NULL || fByteStats == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ }
+}
+
+InputText::~InputText()
+{
+ DELETE_ARRAY(fDeclaredEncoding);
+ DELETE_ARRAY(fByteStats);
+ DELETE_ARRAY(fInputBytes);
+}
+
+void InputText::setText(const char *in, int32_t len)
+{
+ fInputLen = 0;
+ fC1Bytes = FALSE;
+ fRawInput = (const uint8_t *) in;
+ fRawLength = len == -1? (int32_t)uprv_strlen(in) : len;
+}
+
+void InputText::setDeclaredEncoding(const char* encoding, int32_t len)
+{
+ if(encoding) {
+ if (len == -1) {
+ len = (int32_t)uprv_strlen(encoding);
+ }
+
+ len += 1; // to make place for the \0 at the end.
+ uprv_free(fDeclaredEncoding);
+ fDeclaredEncoding = NEW_ARRAY(char, len);
+ uprv_strncpy(fDeclaredEncoding, encoding, len);
+ }
+}
+
+UBool InputText::isSet() const
+{
+ return fRawInput != NULL;
+}
+
+/**
+* MungeInput - after getting a set of raw input data to be analyzed, preprocess
+* it by removing what appears to be html markup.
+*
+* @internal
+*/
+void InputText::MungeInput(UBool fStripTags) {
+ int srci = 0;
+ int dsti = 0;
+ uint8_t b;
+ bool inMarkup = FALSE;
+ int32_t openTags = 0;
+ int32_t badTags = 0;
+
+ //
+ // html / xml markup stripping.
+ // quick and dirty, not 100% accurate, but hopefully good enough, statistically.
+ // discard everything within < brackets >
+ // Count how many total '<' and illegal (nested) '<' occur, so we can make some
+ // guess as to whether the input was actually marked up at all.
+ // TODO: Think about how this interacts with EBCDIC charsets that are detected.
+ if (fStripTags) {
+ for (srci = 0; srci < fRawLength && dsti < BUFFER_SIZE; srci += 1) {
+ b = fRawInput[srci];
+
+ if (b == (uint8_t)0x3C) { /* Check for the ASCII '<' */
+ if (inMarkup) {
+ badTags += 1;
+ }
+
+ inMarkup = TRUE;
+ openTags += 1;
+ }
+
+ if (! inMarkup) {
+ fInputBytes[dsti++] = b;
+ }
+
+ if (b == (uint8_t)0x3E) { /* Check for the ASCII '>' */
+ inMarkup = FALSE;
+ }
+ }
+
+ fInputLen = dsti;
+ }
+
+ //
+ // If it looks like this input wasn't marked up, or if it looks like it's
+ // essentially nothing but markup abandon the markup stripping.
+ // Detection will have to work on the unstripped input.
+ //
+ if (openTags<5 || openTags/5 < badTags ||
+ (fInputLen < 100 && fRawLength>600))
+ {
+ int32_t limit = fRawLength;
+
+ if (limit > BUFFER_SIZE) {
+ limit = BUFFER_SIZE;
+ }
+
+ for (srci=0; srci<limit; srci++) {
+ fInputBytes[srci] = fRawInput[srci];
+ }
+
+ fInputLen = srci;
+ }
+
+ //
+ // Tally up the byte occurence statistics.
+ // These are available for use by the various detectors.
+ //
+
+ uprv_memset(fByteStats, 0, (sizeof fByteStats[0]) * 256);
+
+ for (srci = 0; srci < fInputLen; srci += 1) {
+ fByteStats[fInputBytes[srci]] += 1;
+ }
+
+ for (int32_t i = 0x80; i <= 0x9F; i += 1) {
+ if (fByteStats[i] != 0) {
+ fC1Bytes = TRUE;
+ break;
+ }
+ }
+}
+
+U_NAMESPACE_END
+#endif
+
diff --git a/src/core/basetypes/icu-ucsdet/uarrsort.c b/src/core/basetypes/icu-ucsdet/uarrsort.c
new file mode 100644
index 00000000..22c76972
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/uarrsort.c
@@ -0,0 +1,283 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 2003-2013, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: uarrsort.c
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2003aug04
+* created by: Markus W. Scherer
+*
+* Internal function for sorting arrays.
+*/
+
+#include "unicode/utypes.h"
+#include "cmemory.h"
+#include "uarrsort.h"
+
+enum {
+ /**
+ * "from Knuth"
+ *
+ * A binary search over 8 items performs 4 comparisons:
+ * log2(8)=3 to subdivide, +1 to check for equality.
+ * A linear search over 8 items on average also performs 4 comparisons.
+ */
+ MIN_QSORT=9,
+ STACK_ITEM_SIZE=200
+};
+
+/* UComparator convenience implementations ---------------------------------- */
+
+U_CAPI int32_t U_EXPORT2
+uprv_uint16Comparator(const void *context, const void *left, const void *right) {
+ return (int32_t)*(const uint16_t *)left - (int32_t)*(const uint16_t *)right;
+}
+
+U_CAPI int32_t U_EXPORT2
+uprv_int32Comparator(const void *context, const void *left, const void *right) {
+ return *(const int32_t *)left - *(const int32_t *)right;
+}
+
+U_CAPI int32_t U_EXPORT2
+uprv_uint32Comparator(const void *context, const void *left, const void *right) {
+ uint32_t l=*(const uint32_t *)left, r=*(const uint32_t *)right;
+
+ /* compare directly because (l-r) would overflow the int32_t result */
+ if(l<r) {
+ return -1;
+ } else if(l==r) {
+ return 0;
+ } else /* l>r */ {
+ return 1;
+ }
+}
+
+/* Insertion sort using binary search --------------------------------------- */
+
+U_CAPI int32_t U_EXPORT2
+uprv_stableBinarySearch(char *array, int32_t limit, void *item, int32_t itemSize,
+ UComparator *cmp, const void *context) {
+ int32_t start=0;
+ UBool found=FALSE;
+
+ /* Binary search until we get down to a tiny sub-array. */
+ while((limit-start)>=MIN_QSORT) {
+ int32_t i=(start+limit)/2;
+ int32_t diff=cmp(context, item, array+i*itemSize);
+ if(diff==0) {
+ /*
+ * Found the item. We look for the *last* occurrence of such
+ * an item, for stable sorting.
+ * If we knew that there will be only few equal items,
+ * we could break now and enter the linear search.
+ * However, if there are many equal items, then it should be
+ * faster to continue with the binary search.
+ * It seems likely that we either have all unique items
+ * (where found will never become TRUE in the insertion sort)
+ * or potentially many duplicates.
+ */
+ found=TRUE;
+ start=i+1;
+ } else if(diff<0) {
+ limit=i;
+ } else {
+ start=i;
+ }
+ }
+
+ /* Linear search over the remaining tiny sub-array. */
+ while(start<limit) {
+ int32_t diff=cmp(context, item, array+start*itemSize);
+ if(diff==0) {
+ found=TRUE;
+ } else if(diff<0) {
+ break;
+ }
+ ++start;
+ }
+ return found ? (start-1) : ~start;
+}
+
+static void
+doInsertionSort(char *array, int32_t length, int32_t itemSize,
+ UComparator *cmp, const void *context, void *pv) {
+ int32_t j;
+
+ for(j=1; j<length; ++j) {
+ char *item=array+j*itemSize;
+ int32_t insertionPoint=uprv_stableBinarySearch(array, j, item, itemSize, cmp, context);
+ if(insertionPoint<0) {
+ insertionPoint=~insertionPoint;
+ } else {
+ ++insertionPoint; /* one past the last equal item */
+ }
+ if(insertionPoint<j) {
+ char *dest=array+insertionPoint*itemSize;
+ uprv_memcpy(pv, item, itemSize); /* v=array[j] */
+ uprv_memmove(dest+itemSize, dest, (j-insertionPoint)*itemSize);
+ uprv_memcpy(dest, pv, itemSize); /* array[insertionPoint]=v */
+ }
+ }
+}
+
+static void
+insertionSort(char *array, int32_t length, int32_t itemSize,
+ UComparator *cmp, const void *context, UErrorCode *pErrorCode) {
+ UAlignedMemory v[STACK_ITEM_SIZE/sizeof(UAlignedMemory)+1];
+ void *pv;
+
+ /* allocate an intermediate item variable (v) */
+ if(itemSize<=STACK_ITEM_SIZE) {
+ pv=v;
+ } else {
+ pv=uprv_malloc(itemSize);
+ if(pv==NULL) {
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ }
+
+ doInsertionSort(array, length, itemSize, cmp, context, pv);
+
+ if(pv!=v) {
+ uprv_free(pv);
+ }
+}
+
+/* QuickSort ---------------------------------------------------------------- */
+
+/*
+ * This implementation is semi-recursive:
+ * It recurses for the smaller sub-array to shorten the recursion depth,
+ * and loops for the larger sub-array.
+ *
+ * Loosely after QuickSort algorithms in
+ * Niklaus Wirth
+ * Algorithmen und Datenstrukturen mit Modula-2
+ * B.G. Teubner Stuttgart
+ * 4. Auflage 1986
+ * ISBN 3-519-02260-5
+ */
+static void
+subQuickSort(char *array, int32_t start, int32_t limit, int32_t itemSize,
+ UComparator *cmp, const void *context,
+ void *px, void *pw) {
+ int32_t left, right;
+
+ /* start and left are inclusive, limit and right are exclusive */
+ do {
+ if((start+MIN_QSORT)>=limit) {
+ doInsertionSort(array+start*itemSize, limit-start, itemSize, cmp, context, px);
+ break;
+ }
+
+ left=start;
+ right=limit;
+
+ /* x=array[middle] */
+ uprv_memcpy(px, array+((start+limit)/2)*itemSize, itemSize);
+
+ do {
+ while(/* array[left]<x */
+ cmp(context, array+left*itemSize, px)<0
+ ) {
+ ++left;
+ }
+ while(/* x<array[right-1] */
+ cmp(context, px, array+(right-1)*itemSize)<0
+ ) {
+ --right;
+ }
+
+ /* swap array[left] and array[right-1] via w; ++left; --right */
+ if(left<right) {
+ --right;
+
+ if(left<right) {
+ uprv_memcpy(pw, array+left*itemSize, itemSize);
+ uprv_memcpy(array+left*itemSize, array+right*itemSize, itemSize);
+ uprv_memcpy(array+right*itemSize, pw, itemSize);
+ }
+
+ ++left;
+ }
+ } while(left<right);
+
+ /* sort sub-arrays */
+ if((right-start)<(limit-left)) {
+ /* sort [start..right[ */
+ if(start<(right-1)) {
+ subQuickSort(array, start, right, itemSize, cmp, context, px, pw);
+ }
+
+ /* sort [left..limit[ */
+ start=left;
+ } else {
+ /* sort [left..limit[ */
+ if(left<(limit-1)) {
+ subQuickSort(array, left, limit, itemSize, cmp, context, px, pw);
+ }
+
+ /* sort [start..right[ */
+ limit=right;
+ }
+ } while(start<(limit-1));
+}
+
+static void
+quickSort(char *array, int32_t length, int32_t itemSize,
+ UComparator *cmp, const void *context, UErrorCode *pErrorCode) {
+ UAlignedMemory xw[(2*STACK_ITEM_SIZE)/sizeof(UAlignedMemory)+1];
+ void *p;
+
+ /* allocate two intermediate item variables (x and w) */
+ if(itemSize<=STACK_ITEM_SIZE) {
+ p=xw;
+ } else {
+ p=uprv_malloc(2*itemSize);
+ if(p==NULL) {
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ }
+
+ subQuickSort(array, 0, length, itemSize,
+ cmp, context, p, (char *)p+itemSize);
+
+ if(p!=xw) {
+ uprv_free(p);
+ }
+}
+
+/* uprv_sortArray() API ----------------------------------------------------- */
+
+/*
+ * Check arguments, select an appropriate implementation,
+ * cast the array to char * so that array+i*itemSize works.
+ */
+U_CAPI void U_EXPORT2
+uprv_sortArray(void *array, int32_t length, int32_t itemSize,
+ UComparator *cmp, const void *context,
+ UBool sortStable, UErrorCode *pErrorCode) {
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return;
+ }
+ if((length>0 && array==NULL) || length<0 || itemSize<=0 || cmp==NULL) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ if(length<=1) {
+ return;
+ } else if(length<MIN_QSORT || sortStable) {
+ insertionSort((char *)array, length, itemSize, cmp, context, pErrorCode);
+ } else {
+ quickSort((char *)array, length, itemSize, cmp, context, pErrorCode);
+ }
+}
diff --git a/src/core/basetypes/icu-ucsdet/ucln_cmn.cpp b/src/core/basetypes/icu-ucsdet/ucln_cmn.cpp
new file mode 100644
index 00000000..2480c9d3
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/ucln_cmn.cpp
@@ -0,0 +1,111 @@
+/*
+******************************************************************************
+* Copyright (C) 2001-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+******************************************************************************
+* file name: ucln_cmn.cpp
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2001July05
+* created by: George Rhoten
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/uclean.h"
+#include "cmemory.h"
+#include "mutex.h"
+#include "uassert.h"
+#include "ucln.h"
+#include "ucln_cmn.h"
+#include "utracimp.h"
+#include "umutex.h"
+
+/** Auto-client for UCLN_COMMON **/
+#define UCLN_TYPE_IS_COMMON
+#include "ucln_imp.h"
+
+static cleanupFunc *gCommonCleanupFunctions[UCLN_COMMON_COUNT];
+static cleanupFunc *gLibCleanupFunctions[UCLN_COMMON];
+
+
+/************************************************
+ The cleanup order is important in this function.
+ Please be sure that you have read ucln.h
+ ************************************************/
+U_CAPI void U_EXPORT2
+u_cleanup(void)
+{
+ UTRACE_ENTRY_OC(UTRACE_U_CLEANUP);
+ umtx_lock(NULL); /* Force a memory barrier, so that we are sure to see */
+ umtx_unlock(NULL); /* all state left around by any other threads. */
+
+ ucln_lib_cleanup();
+
+ cmemory_cleanup(); /* undo any heap functions set by u_setMemoryFunctions(). */
+ UTRACE_EXIT(); /* Must be before utrace_cleanup(), which turns off tracing. */
+/*#if U_ENABLE_TRACING*/
+ utrace_cleanup();
+/*#endif*/
+}
+
+U_CAPI void U_EXPORT2 ucln_cleanupOne(ECleanupLibraryType libType)
+{
+ if (gLibCleanupFunctions[libType])
+ {
+ gLibCleanupFunctions[libType]();
+ gLibCleanupFunctions[libType] = NULL;
+ }
+}
+
+U_CFUNC void
+ucln_common_registerCleanup(ECleanupCommonType type,
+ cleanupFunc *func)
+{
+ U_ASSERT(UCLN_COMMON_START < type && type < UCLN_COMMON_COUNT);
+ if (UCLN_COMMON_START < type && type < UCLN_COMMON_COUNT)
+ {
+ icu::Mutex m; // See ticket 10295 for discussion.
+ gCommonCleanupFunctions[type] = func;
+ }
+#if !UCLN_NO_AUTO_CLEANUP && (defined(UCLN_AUTO_ATEXIT) || defined(UCLN_AUTO_LOCAL))
+ ucln_registerAutomaticCleanup();
+#endif
+}
+
+// Note: ucln_registerCleanup() is called with the ICU global mutex locked.
+// Be aware if adding anything to the function.
+// See ticket 10295 for discussion.
+
+U_CAPI void U_EXPORT2
+ucln_registerCleanup(ECleanupLibraryType type,
+ cleanupFunc *func)
+{
+ U_ASSERT(UCLN_START < type && type < UCLN_COMMON);
+ if (UCLN_START < type && type < UCLN_COMMON)
+ {
+ gLibCleanupFunctions[type] = func;
+ }
+}
+
+U_CFUNC UBool ucln_lib_cleanup(void) {
+ int32_t libType = UCLN_START;
+ int32_t commonFunc = UCLN_COMMON_START;
+
+ for (libType++; libType<UCLN_COMMON; libType++) {
+ ucln_cleanupOne(static_cast<ECleanupLibraryType>(libType));
+ }
+
+ for (commonFunc++; commonFunc<UCLN_COMMON_COUNT; commonFunc++) {
+ if (gCommonCleanupFunctions[commonFunc])
+ {
+ gCommonCleanupFunctions[commonFunc]();
+ gCommonCleanupFunctions[commonFunc] = NULL;
+ }
+ }
+#if !UCLN_NO_AUTO_CLEANUP && (defined(UCLN_AUTO_ATEXIT) || defined(UCLN_AUTO_LOCAL))
+ ucln_unRegisterAutomaticCleanup();
+#endif
+ return TRUE;
+}
diff --git a/src/core/basetypes/icu-ucsdet/ucln_in.cpp b/src/core/basetypes/icu-ucsdet/ucln_in.cpp
new file mode 100644
index 00000000..431d4316
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/ucln_in.cpp
@@ -0,0 +1,63 @@
+/*
+******************************************************************************
+* *
+* Copyright (C) 2001-2014, International Business Machines *
+* Corporation and others. All Rights Reserved. *
+* *
+******************************************************************************
+* file name: ucln_in.cpp
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2001July05
+* created by: George Rhoten
+*/
+
+#include "ucln.h"
+#include "ucln_in.h"
+#include "mutex.h"
+#include "uassert.h"
+
+/** Auto-client for UCLN_I18N **/
+#define UCLN_TYPE UCLN_I18N
+#include "ucln_imp.h"
+
+/* Leave this copyright notice here! It needs to go somewhere in this library. */
+static const char copyright[] = U_COPYRIGHT_STRING;
+
+static cleanupFunc *gCleanupFunctions[UCLN_I18N_COUNT];
+
+static UBool i18n_cleanup(void)
+{
+ int32_t libType = UCLN_I18N_START;
+ (void)copyright; /* Suppress unused variable warning with clang. */
+
+ while (++libType<UCLN_I18N_COUNT) {
+ if (gCleanupFunctions[libType])
+ {
+ gCleanupFunctions[libType]();
+ gCleanupFunctions[libType] = NULL;
+ }
+ }
+#if !UCLN_NO_AUTO_CLEANUP && (defined(UCLN_AUTO_ATEXIT) || defined(UCLN_AUTO_LOCAL))
+ ucln_unRegisterAutomaticCleanup();
+#endif
+ return TRUE;
+}
+
+void ucln_i18n_registerCleanup(ECleanupI18NType type,
+ cleanupFunc *func) {
+ U_ASSERT(UCLN_I18N_START < type && type < UCLN_I18N_COUNT);
+ {
+ icu::Mutex m; // See ticket 10295 for discussion.
+ ucln_registerCleanup(UCLN_I18N, i18n_cleanup);
+ if (UCLN_I18N_START < type && type < UCLN_I18N_COUNT) {
+ gCleanupFunctions[type] = func;
+ }
+ }
+#if !UCLN_NO_AUTO_CLEANUP && (defined(UCLN_AUTO_ATEXIT) || defined(UCLN_AUTO_LOCAL))
+ ucln_registerAutomaticCleanup();
+#endif
+}
+
diff --git a/src/core/basetypes/icu-ucsdet/ucsdet.cpp b/src/core/basetypes/icu-ucsdet/ucsdet.cpp
new file mode 100644
index 00000000..b42758d5
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/ucsdet.cpp
@@ -0,0 +1,207 @@
+/*
+ ********************************************************************************
+ * Copyright (C) 2005-2013, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ ********************************************************************************
+ */
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+#include "unicode/ucsdet.h"
+#include "csdetect.h"
+#include "csmatch.h"
+#include "csrsbcs.h"
+#include "csrmbcs.h"
+#include "csrutf8.h"
+#include "csrucode.h"
+#include "csr2022.h"
+
+#include "cmemory.h"
+
+U_NAMESPACE_USE
+
+#define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
+
+#define NEW_ARRAY(type,count) (type *) uprv_malloc((count) * sizeof(type))
+#define DELETE_ARRAY(array) uprv_free((void *) (array))
+
+U_CDECL_BEGIN
+
+U_CAPI UCharsetDetector * U_EXPORT2
+ucsdet_open(UErrorCode *status)
+{
+ if(U_FAILURE(*status)) {
+ return 0;
+ }
+
+ CharsetDetector* csd = new CharsetDetector(*status);
+
+ if (U_FAILURE(*status)) {
+ delete csd;
+ csd = NULL;
+ }
+
+ return (UCharsetDetector *) csd;
+}
+
+U_CAPI void U_EXPORT2
+ucsdet_close(UCharsetDetector *ucsd)
+{
+ CharsetDetector *csd = (CharsetDetector *) ucsd;
+ delete csd;
+}
+
+U_CAPI void U_EXPORT2
+ucsdet_setText(UCharsetDetector *ucsd, const char *textIn, int32_t len, UErrorCode *status)
+{
+ if(U_FAILURE(*status)) {
+ return;
+ }
+
+ ((CharsetDetector *) ucsd)->setText(textIn, len);
+}
+
+U_CAPI const char * U_EXPORT2
+ucsdet_getName(const UCharsetMatch *ucsm, UErrorCode *status)
+{
+ if(U_FAILURE(*status)) {
+ return NULL;
+ }
+
+ return ((CharsetMatch *) ucsm)->getName();
+}
+
+U_CAPI int32_t U_EXPORT2
+ucsdet_getConfidence(const UCharsetMatch *ucsm, UErrorCode *status)
+{
+ if(U_FAILURE(*status)) {
+ return 0;
+ }
+
+ return ((CharsetMatch *) ucsm)->getConfidence();
+}
+
+U_CAPI const char * U_EXPORT2
+ucsdet_getLanguage(const UCharsetMatch *ucsm, UErrorCode *status)
+{
+ if(U_FAILURE(*status)) {
+ return NULL;
+ }
+
+ return ((CharsetMatch *) ucsm)->getLanguage();
+}
+
+U_CAPI const UCharsetMatch * U_EXPORT2
+ucsdet_detect(UCharsetDetector *ucsd, UErrorCode *status)
+{
+ if(U_FAILURE(*status)) {
+ return NULL;
+ }
+
+ return (const UCharsetMatch *) ((CharsetDetector *) ucsd)->detect(*status);
+}
+
+U_CAPI void U_EXPORT2
+ucsdet_setDeclaredEncoding(UCharsetDetector *ucsd, const char *encoding, int32_t length, UErrorCode *status)
+{
+ if(U_FAILURE(*status)) {
+ return;
+ }
+
+ ((CharsetDetector *) ucsd)->setDeclaredEncoding(encoding,length);
+}
+
+U_CAPI const UCharsetMatch**
+ucsdet_detectAll(UCharsetDetector *ucsd,
+ int32_t *maxMatchesFound, UErrorCode *status)
+{
+ if(U_FAILURE(*status)) {
+ return NULL;
+ }
+
+ CharsetDetector *csd = (CharsetDetector *) ucsd;
+
+ return (const UCharsetMatch**)csd->detectAll(*maxMatchesFound,*status);
+}
+
+// U_CAPI const char * U_EXPORT2
+// ucsdet_getDetectableCharsetName(const UCharsetDetector *csd, int32_t index, UErrorCode *status)
+// {
+// if(U_FAILURE(*status)) {
+// return 0;
+// }
+// return csd->getCharsetName(index,*status);
+// }
+
+// U_CAPI int32_t U_EXPORT2
+// ucsdet_getDetectableCharsetsCount(const UCharsetDetector *csd, UErrorCode *status)
+// {
+// if(U_FAILURE(*status)) {
+// return -1;
+// }
+// return UCharsetDetector::getDetectableCount();
+// }
+
+U_CAPI UBool U_EXPORT2
+ucsdet_isInputFilterEnabled(const UCharsetDetector *ucsd)
+{
+ // todo: could use an error return...
+ if (ucsd == NULL) {
+ return FALSE;
+ }
+
+ return ((CharsetDetector *) ucsd)->getStripTagsFlag();
+}
+
+U_CAPI UBool U_EXPORT2
+ucsdet_enableInputFilter(UCharsetDetector *ucsd, UBool filter)
+{
+ // todo: could use an error return...
+ if (ucsd == NULL) {
+ return FALSE;
+ }
+
+ CharsetDetector *csd = (CharsetDetector *) ucsd;
+ UBool prev = csd->getStripTagsFlag();
+
+ csd->setStripTagsFlag(filter);
+
+ return prev;
+}
+
+/*
+U_CAPI int32_t U_EXPORT2
+ucsdet_getUChars(const UCharsetMatch *ucsm,
+ UChar *buf, int32_t cap, UErrorCode *status)
+{
+ if(U_FAILURE(*status)) {
+ return 0;
+ }
+
+ return ((CharsetMatch *) ucsm)->getUChars(buf, cap, status);
+}
+*/
+
+U_CAPI void U_EXPORT2
+ucsdet_setDetectableCharset(UCharsetDetector *ucsd, const char *encoding, UBool enabled, UErrorCode *status)
+{
+ ((CharsetDetector *)ucsd)->setDetectableCharset(encoding, enabled, *status);
+}
+
+U_CAPI UEnumeration * U_EXPORT2
+ucsdet_getAllDetectableCharsets(const UCharsetDetector * /*ucsd*/, UErrorCode *status)
+{
+ return CharsetDetector::getAllDetectableCharsets(*status);
+}
+
+U_DRAFT UEnumeration * U_EXPORT2
+ucsdet_getDetectableCharsets(const UCharsetDetector *ucsd, UErrorCode *status)
+{
+ return ((CharsetDetector *)ucsd)->getDetectableCharsets(*status);
+}
+
+U_CDECL_END
+
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/udataswp.c b/src/core/basetypes/icu-ucsdet/udataswp.c
new file mode 100644
index 00000000..06fe85bc
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/udataswp.c
@@ -0,0 +1,471 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 2003-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: udataswp.c
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2003jun05
+* created by: Markus W. Scherer
+*
+* Definitions for ICU data transformations for different platforms,
+* changing between big- and little-endian data and/or between
+* charset families (ASCII<->EBCDIC).
+*/
+
+#include <stdarg.h>
+#include "unicode/utypes.h"
+#include "unicode/udata.h" /* UDataInfo */
+#include "ucmndata.h" /* DataHeader */
+#include "cmemory.h"
+#include "udataswp.h"
+
+/* swapping primitives ------------------------------------------------------ */
+
+static int32_t U_CALLCONV
+uprv_swapArray16(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode) {
+ const uint16_t *p;
+ uint16_t *q;
+ int32_t count;
+ uint16_t x;
+
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(ds==NULL || inData==NULL || length<0 || (length&1)!=0 || outData==NULL) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ /* setup and swapping */
+ p=(const uint16_t *)inData;
+ q=(uint16_t *)outData;
+ count=length/2;
+ while(count>0) {
+ x=*p++;
+ *q++=(uint16_t)((x<<8)|(x>>8));
+ --count;
+ }
+
+ return length;
+}
+
+static int32_t U_CALLCONV
+uprv_copyArray16(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode) {
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(ds==NULL || inData==NULL || length<0 || (length&1)!=0 || outData==NULL) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ if(length>0 && inData!=outData) {
+ uprv_memcpy(outData, inData, length);
+ }
+ return length;
+}
+
+static int32_t U_CALLCONV
+uprv_swapArray32(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode) {
+ const uint32_t *p;
+ uint32_t *q;
+ int32_t count;
+ uint32_t x;
+
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(ds==NULL || inData==NULL || length<0 || (length&3)!=0 || outData==NULL) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ /* setup and swapping */
+ p=(const uint32_t *)inData;
+ q=(uint32_t *)outData;
+ count=length/4;
+ while(count>0) {
+ x=*p++;
+ *q++=(uint32_t)((x<<24)|((x<<8)&0xff0000)|((x>>8)&0xff00)|(x>>24));
+ --count;
+ }
+
+ return length;
+}
+
+static int32_t U_CALLCONV
+uprv_copyArray32(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode) {
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(ds==NULL || inData==NULL || length<0 || (length&3)!=0 || outData==NULL) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ if(length>0 && inData!=outData) {
+ uprv_memcpy(outData, inData, length);
+ }
+ return length;
+}
+
+static int32_t U_CALLCONV
+uprv_swapArray64(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode) {
+ const uint64_t *p;
+ uint64_t *q;
+ int32_t count;
+
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(ds==NULL || inData==NULL || length<0 || (length&7)!=0 || outData==NULL) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ /* setup and swapping */
+ p=(const uint64_t *)inData;
+ q=(uint64_t *)outData;
+ count=length/8;
+ while(count>0) {
+ uint64_t x=*p++;
+ x=(x<<56)|((x&0xff00)<<40)|((x&0xff0000)<<24)|((x&0xff000000)<<8)|
+ ((x>>8)&0xff000000)|((x>>24)&0xff0000)|((x>>40)&0xff00)|(x>>56);
+ *q++=x;
+ --count;
+ }
+
+ return length;
+}
+
+static int32_t U_CALLCONV
+uprv_copyArray64(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode) {
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(ds==NULL || inData==NULL || length<0 || (length&7)!=0 || outData==NULL) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ if(length>0 && inData!=outData) {
+ uprv_memcpy(outData, inData, length);
+ }
+ return length;
+}
+
+static uint16_t U_CALLCONV
+uprv_readSwapUInt16(uint16_t x) {
+ return (uint16_t)((x<<8)|(x>>8));
+}
+
+static uint16_t U_CALLCONV
+uprv_readDirectUInt16(uint16_t x) {
+ return x;
+}
+
+static uint32_t U_CALLCONV
+uprv_readSwapUInt32(uint32_t x) {
+ return (uint32_t)((x<<24)|((x<<8)&0xff0000)|((x>>8)&0xff00)|(x>>24));
+}
+
+static uint32_t U_CALLCONV
+uprv_readDirectUInt32(uint32_t x) {
+ return x;
+}
+
+static void U_CALLCONV
+uprv_writeSwapUInt16(uint16_t *p, uint16_t x) {
+ *p=(uint16_t)((x<<8)|(x>>8));
+}
+
+static void U_CALLCONV
+uprv_writeDirectUInt16(uint16_t *p, uint16_t x) {
+ *p=x;
+}
+
+static void U_CALLCONV
+uprv_writeSwapUInt32(uint32_t *p, uint32_t x) {
+ *p=(uint32_t)((x<<24)|((x<<8)&0xff0000)|((x>>8)&0xff00)|(x>>24));
+}
+
+static void U_CALLCONV
+uprv_writeDirectUInt32(uint32_t *p, uint32_t x) {
+ *p=x;
+}
+
+U_CAPI int16_t U_EXPORT2
+udata_readInt16(const UDataSwapper *ds, int16_t x) {
+ return (int16_t)ds->readUInt16((uint16_t)x);
+}
+
+U_CAPI int32_t U_EXPORT2
+udata_readInt32(const UDataSwapper *ds, int32_t x) {
+ return (int32_t)ds->readUInt32((uint32_t)x);
+}
+
+/**
+ * Swap a block of invariant, NUL-terminated strings, but not padding
+ * bytes after the last string.
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+udata_swapInvStringBlock(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode) {
+ const char *inChars;
+ int32_t stringsLength;
+
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ /* reduce the strings length to not include bytes after the last NUL */
+ inChars=(const char *)inData;
+ stringsLength=length;
+ while(stringsLength>0 && inChars[stringsLength-1]!=0) {
+ --stringsLength;
+ }
+
+ /* swap up to the last NUL */
+ ds->swapInvChars(ds, inData, stringsLength, outData, pErrorCode);
+
+ /* copy the bytes after the last NUL */
+ if(inData!=outData && length>stringsLength) {
+ uprv_memcpy((char *)outData+stringsLength, inChars+stringsLength, length-stringsLength);
+ }
+
+ /* return the length including padding bytes */
+ if(U_SUCCESS(*pErrorCode)) {
+ return length;
+ } else {
+ return 0;
+ }
+}
+
+U_CAPI void U_EXPORT2
+udata_printError(const UDataSwapper *ds,
+ const char *fmt,
+ ...) {
+ va_list args;
+
+ if(ds->printError!=NULL) {
+ va_start(args, fmt);
+ ds->printError(ds->printErrorContext, fmt, args);
+ va_end(args);
+ }
+}
+
+/* swap a data header ------------------------------------------------------- */
+
+U_CAPI int32_t U_EXPORT2
+udata_swapDataHeader(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode) {
+ const DataHeader *pHeader;
+ uint16_t headerSize, infoSize;
+
+ /* argument checking */
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ /* check minimum length and magic bytes */
+ pHeader=(const DataHeader *)inData;
+ if( (length>=0 && length<sizeof(DataHeader)) ||
+ pHeader->dataHeader.magic1!=0xda ||
+ pHeader->dataHeader.magic2!=0x27 ||
+ pHeader->info.sizeofUChar!=2
+ ) {
+ udata_printError(ds, "udata_swapDataHeader(): initial bytes do not look like ICU data\n");
+ *pErrorCode=U_UNSUPPORTED_ERROR;
+ return 0;
+ }
+
+ headerSize=ds->readUInt16(pHeader->dataHeader.headerSize);
+ infoSize=ds->readUInt16(pHeader->info.size);
+
+ if( headerSize<sizeof(DataHeader) ||
+ infoSize<sizeof(UDataInfo) ||
+ headerSize<(sizeof(pHeader->dataHeader)+infoSize) ||
+ (length>=0 && length<headerSize)
+ ) {
+ udata_printError(ds, "udata_swapDataHeader(): header size mismatch - headerSize %d infoSize %d length %d\n",
+ headerSize, infoSize, length);
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+
+ if(length>0) {
+ DataHeader *outHeader;
+ const char *s;
+ int32_t maxLength;
+
+ /* Most of the fields are just bytes and need no swapping. */
+ if(inData!=outData) {
+ uprv_memcpy(outData, inData, headerSize);
+ }
+ outHeader=(DataHeader *)outData;
+
+ outHeader->info.isBigEndian = ds->outIsBigEndian;
+ outHeader->info.charsetFamily = ds->outCharset;
+
+ /* swap headerSize */
+ ds->swapArray16(ds, &pHeader->dataHeader.headerSize, 2, &outHeader->dataHeader.headerSize, pErrorCode);
+
+ /* swap UDataInfo size and reservedWord */
+ ds->swapArray16(ds, &pHeader->info.size, 4, &outHeader->info.size, pErrorCode);
+
+ /* swap copyright statement after the UDataInfo */
+ infoSize+=sizeof(pHeader->dataHeader);
+ s=(const char *)inData+infoSize;
+ maxLength=headerSize-infoSize;
+ /* get the length of the string */
+ for(length=0; length<maxLength && s[length]!=0; ++length) {}
+ /* swap the string contents */
+ ds->swapInvChars(ds, s, length, (char *)outData+infoSize, pErrorCode);
+ }
+
+ return headerSize;
+}
+
+/* API functions ------------------------------------------------------------ */
+
+U_CAPI UDataSwapper * U_EXPORT2
+udata_openSwapper(UBool inIsBigEndian, uint8_t inCharset,
+ UBool outIsBigEndian, uint8_t outCharset,
+ UErrorCode *pErrorCode) {
+ UDataSwapper *swapper;
+
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return NULL;
+ }
+ if(inCharset>U_EBCDIC_FAMILY || outCharset>U_EBCDIC_FAMILY) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+
+ /* allocate the swapper */
+ swapper=uprv_malloc(sizeof(UDataSwapper));
+ if(swapper==NULL) {
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ uprv_memset(swapper, 0, sizeof(UDataSwapper));
+
+ /* set values and functions pointers according to in/out parameters */
+ swapper->inIsBigEndian=inIsBigEndian;
+ swapper->inCharset=inCharset;
+ swapper->outIsBigEndian=outIsBigEndian;
+ swapper->outCharset=outCharset;
+
+ swapper->readUInt16= inIsBigEndian==U_IS_BIG_ENDIAN ? uprv_readDirectUInt16 : uprv_readSwapUInt16;
+ swapper->readUInt32= inIsBigEndian==U_IS_BIG_ENDIAN ? uprv_readDirectUInt32 : uprv_readSwapUInt32;
+
+ swapper->writeUInt16= outIsBigEndian==U_IS_BIG_ENDIAN ? uprv_writeDirectUInt16 : uprv_writeSwapUInt16;
+ swapper->writeUInt32= outIsBigEndian==U_IS_BIG_ENDIAN ? uprv_writeDirectUInt32 : uprv_writeSwapUInt32;
+
+ swapper->compareInvChars= outCharset==U_ASCII_FAMILY ? uprv_compareInvAscii : uprv_compareInvEbcdic;
+
+ if(inIsBigEndian==outIsBigEndian) {
+ swapper->swapArray16=uprv_copyArray16;
+ swapper->swapArray32=uprv_copyArray32;
+ swapper->swapArray64=uprv_copyArray64;
+ } else {
+ swapper->swapArray16=uprv_swapArray16;
+ swapper->swapArray32=uprv_swapArray32;
+ swapper->swapArray64=uprv_swapArray64;
+ }
+
+ if(inCharset==U_ASCII_FAMILY) {
+ swapper->swapInvChars= outCharset==U_ASCII_FAMILY ? uprv_copyAscii : uprv_ebcdicFromAscii;
+ } else /* U_EBCDIC_FAMILY */ {
+ swapper->swapInvChars= outCharset==U_EBCDIC_FAMILY ? uprv_copyEbcdic : uprv_asciiFromEbcdic;
+ }
+
+ return swapper;
+}
+
+U_CAPI UDataSwapper * U_EXPORT2
+udata_openSwapperForInputData(const void *data, int32_t length,
+ UBool outIsBigEndian, uint8_t outCharset,
+ UErrorCode *pErrorCode) {
+ const DataHeader *pHeader;
+ uint16_t headerSize, infoSize;
+ UBool inIsBigEndian;
+ int8_t inCharset;
+
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return NULL;
+ }
+ if( data==NULL ||
+ (length>=0 && length<sizeof(DataHeader)) ||
+ outCharset>U_EBCDIC_FAMILY
+ ) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+
+ pHeader=(const DataHeader *)data;
+ if( (length>=0 && length<sizeof(DataHeader)) ||
+ pHeader->dataHeader.magic1!=0xda ||
+ pHeader->dataHeader.magic2!=0x27 ||
+ pHeader->info.sizeofUChar!=2
+ ) {
+ *pErrorCode=U_UNSUPPORTED_ERROR;
+ return 0;
+ }
+
+ inIsBigEndian=(UBool)pHeader->info.isBigEndian;
+ inCharset=pHeader->info.charsetFamily;
+
+ if(inIsBigEndian==U_IS_BIG_ENDIAN) {
+ headerSize=pHeader->dataHeader.headerSize;
+ infoSize=pHeader->info.size;
+ } else {
+ headerSize=uprv_readSwapUInt16(pHeader->dataHeader.headerSize);
+ infoSize=uprv_readSwapUInt16(pHeader->info.size);
+ }
+
+ if( headerSize<sizeof(DataHeader) ||
+ infoSize<sizeof(UDataInfo) ||
+ headerSize<(sizeof(pHeader->dataHeader)+infoSize) ||
+ (length>=0 && length<headerSize)
+ ) {
+ *pErrorCode=U_UNSUPPORTED_ERROR;
+ return 0;
+ }
+
+ return udata_openSwapper(inIsBigEndian, inCharset, outIsBigEndian, outCharset, pErrorCode);
+}
+
+U_CAPI void U_EXPORT2
+udata_closeSwapper(UDataSwapper *ds) {
+ uprv_free(ds);
+}
diff --git a/src/core/basetypes/icu-ucsdet/uenum.c b/src/core/basetypes/icu-ucsdet/uenum.c
new file mode 100644
index 00000000..9a3d9e14
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/uenum.c
@@ -0,0 +1,187 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 2002-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: uenum.c
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:2
+*
+* created on: 2002jul08
+* created by: Vladimir Weinstein
+*/
+
+#include "unicode/putil.h"
+#include "uenumimp.h"
+#include "cmemory.h"
+
+/* Layout of the baseContext buffer. */
+typedef struct {
+ int32_t len; /* number of bytes available starting at 'data' */
+ char data; /* actual data starts here */
+} _UEnumBuffer;
+
+/* Extra bytes to allocate in the baseContext buffer. */
+static const int32_t PAD = 8;
+
+/* Return a pointer to the baseContext buffer, possibly allocating
+ or reallocating it if at least 'capacity' bytes are not available. */
+static void* _getBuffer(UEnumeration* en, int32_t capacity) {
+
+ if (en->baseContext != NULL) {
+ if (((_UEnumBuffer*) en->baseContext)->len < capacity) {
+ capacity += PAD;
+ en->baseContext = uprv_realloc(en->baseContext,
+ sizeof(int32_t) + capacity);
+ if (en->baseContext == NULL) {
+ return NULL;
+ }
+ ((_UEnumBuffer*) en->baseContext)->len = capacity;
+ }
+ } else {
+ capacity += PAD;
+ en->baseContext = uprv_malloc(sizeof(int32_t) + capacity);
+ if (en->baseContext == NULL) {
+ return NULL;
+ }
+ ((_UEnumBuffer*) en->baseContext)->len = capacity;
+ }
+
+ return (void*) & ((_UEnumBuffer*) en->baseContext)->data;
+}
+
+U_CAPI void U_EXPORT2
+uenum_close(UEnumeration* en)
+{
+ if (en) {
+ if (en->close != NULL) {
+ if (en->baseContext) {
+ uprv_free(en->baseContext);
+ }
+ en->close(en);
+ } else { /* this seems dangerous, but we better kill the object */
+ uprv_free(en);
+ }
+ }
+}
+
+U_CAPI int32_t U_EXPORT2
+uenum_count(UEnumeration* en, UErrorCode* status)
+{
+ if (!en || U_FAILURE(*status)) {
+ return -1;
+ }
+ if (en->count != NULL) {
+ return en->count(en, status);
+ } else {
+ *status = U_UNSUPPORTED_ERROR;
+ return -1;
+ }
+}
+
+/* Don't call this directly. Only uenum_unext should be calling this. */
+U_CAPI const UChar* U_EXPORT2
+uenum_unextDefault(UEnumeration* en,
+ int32_t* resultLength,
+ UErrorCode* status)
+{
+ UChar *ustr = NULL;
+ int32_t len = 0;
+ if (en->next != NULL) {
+ const char *cstr = en->next(en, &len, status);
+ if (cstr != NULL) {
+ ustr = (UChar*) _getBuffer(en, (len+1) * sizeof(UChar));
+ if (ustr == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ } else {
+ u_charsToUChars(cstr, ustr, len+1);
+ }
+ }
+ } else {
+ *status = U_UNSUPPORTED_ERROR;
+ }
+ if (resultLength) {
+ *resultLength = len;
+ }
+ return ustr;
+}
+
+/* Don't call this directly. Only uenum_next should be calling this. */
+U_CAPI const char* U_EXPORT2
+uenum_nextDefault(UEnumeration* en,
+ int32_t* resultLength,
+ UErrorCode* status)
+{
+ if (en->uNext != NULL) {
+ char *tempCharVal;
+ const UChar *tempUCharVal = en->uNext(en, resultLength, status);
+ if (tempUCharVal == NULL) {
+ return NULL;
+ }
+ tempCharVal = (char*)
+ _getBuffer(en, (*resultLength+1) * sizeof(char));
+ if (!tempCharVal) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ u_UCharsToChars(tempUCharVal, tempCharVal, *resultLength + 1);
+ return tempCharVal;
+ } else {
+ *status = U_UNSUPPORTED_ERROR;
+ return NULL;
+ }
+}
+
+U_CAPI const UChar* U_EXPORT2
+uenum_unext(UEnumeration* en,
+ int32_t* resultLength,
+ UErrorCode* status)
+{
+ if (!en || U_FAILURE(*status)) {
+ return NULL;
+ }
+ if (en->uNext != NULL) {
+ return en->uNext(en, resultLength, status);
+ } else {
+ *status = U_UNSUPPORTED_ERROR;
+ return NULL;
+ }
+}
+
+U_CAPI const char* U_EXPORT2
+uenum_next(UEnumeration* en,
+ int32_t* resultLength,
+ UErrorCode* status)
+{
+ if (!en || U_FAILURE(*status)) {
+ return NULL;
+ }
+ if (en->next != NULL) {
+ if (resultLength != NULL) {
+ return en->next(en, resultLength, status);
+ }
+ else {
+ int32_t dummyLength=0;
+ return en->next(en, &dummyLength, status);
+ }
+ } else {
+ *status = U_UNSUPPORTED_ERROR;
+ return NULL;
+ }
+}
+
+U_CAPI void U_EXPORT2
+uenum_reset(UEnumeration* en, UErrorCode* status)
+{
+ if (!en || U_FAILURE(*status)) {
+ return;
+ }
+ if (en->reset != NULL) {
+ en->reset(en, status);
+ } else {
+ *status = U_UNSUPPORTED_ERROR;
+ }
+}
diff --git a/src/core/basetypes/icu-ucsdet/uinvchar.c b/src/core/basetypes/icu-ucsdet/uinvchar.c
new file mode 100644
index 00000000..f874edd9
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/uinvchar.c
@@ -0,0 +1,611 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 1999-2010, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: uinvchar.c
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:2
+*
+* created on: 2004sep14
+* created by: Markus W. Scherer
+*
+* Functions for handling invariant characters, moved here from putil.c
+* for better modularization.
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/ustring.h"
+#include "udataswp.h"
+#include "cstring.h"
+#include "cmemory.h"
+#include "uassert.h"
+#include "uinvchar.h"
+
+/* invariant-character handling --------------------------------------------- */
+
+/*
+ * These maps for ASCII to/from EBCDIC map invariant characters (see utypes.h)
+ * appropriately for most EBCDIC codepages.
+ *
+ * They currently also map most other ASCII graphic characters,
+ * appropriately for codepages 37 and 1047.
+ * Exceptions: The characters for []^ have different codes in 37 & 1047.
+ * Both versions are mapped to ASCII.
+ *
+ * ASCII 37 1047
+ * [ 5B BA AD
+ * ] 5D BB BD
+ * ^ 5E B0 5F
+ *
+ * There are no mappings for variant characters from Unicode to EBCDIC.
+ *
+ * Currently, C0 control codes are also included in these maps.
+ * Exceptions: S/390 Open Edition swaps LF and NEL codes compared with other
+ * EBCDIC platforms; both codes (15 and 25) are mapped to ASCII LF (0A),
+ * but there is no mapping for ASCII LF back to EBCDIC.
+ *
+ * ASCII EBCDIC S/390-OE
+ * LF 0A 25 15
+ * NEL 85 15 25
+ *
+ * The maps below explicitly exclude the variant
+ * control and graphical characters that are in ASCII-based
+ * codepages at 0x80 and above.
+ * "No mapping" is expressed by mapping to a 00 byte.
+ *
+ * These tables do not establish a converter or a codepage.
+ */
+
+static const uint8_t asciiFromEbcdic[256]={
+ 0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07,
+ 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a,
+
+ 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
+ 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e,
+ 0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
+
+ 0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00,
+ 0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00,
+
+ 0x7b, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x5c, 0x00, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
+static const uint8_t ebcdicFromAscii[256]={
+ 0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f, 0x16, 0x05, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x3c, 0x3d, 0x32, 0x26, 0x18, 0x19, 0x3f, 0x27, 0x1c, 0x1d, 0x1e, 0x1f,
+ 0x40, 0x00, 0x7f, 0x00, 0x00, 0x6c, 0x50, 0x7d, 0x4d, 0x5d, 0x5c, 0x4e, 0x6b, 0x60, 0x4b, 0x61,
+ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0x7a, 0x5e, 0x4c, 0x7e, 0x6e, 0x6f,
+
+ 0x00, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6,
+ 0xd7, 0xd8, 0xd9, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0x00, 0x00, 0x00, 0x00, 0x6d,
+ 0x00, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
+ 0x97, 0x98, 0x99, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0x00, 0x00, 0x00, 0x00, 0x07,
+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+/* Same as asciiFromEbcdic[] except maps all letters to lowercase. */
+static const uint8_t lowercaseAsciiFromEbcdic[256]={
+ 0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07,
+ 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a,
+
+ 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
+ 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e,
+ 0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
+
+ 0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00,
+ 0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00,
+
+ 0x7b, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x7d, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x7c, 0x00, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
+/*
+ * Bit sets indicating which characters of the ASCII repertoire
+ * (by ASCII/Unicode code) are "invariant".
+ * See utypes.h for more details.
+ *
+ * As invariant are considered the characters of the ASCII repertoire except
+ * for the following:
+ * 21 '!' <exclamation mark>
+ * 23 '#' <number sign>
+ * 24 '$' <dollar sign>
+ *
+ * 40 '@' <commercial at>
+ *
+ * 5b '[' <left bracket>
+ * 5c '\' <backslash>
+ * 5d ']' <right bracket>
+ * 5e '^' <circumflex>
+ *
+ * 60 '`' <grave accent>
+ *
+ * 7b '{' <left brace>
+ * 7c '|' <vertical line>
+ * 7d '}' <right brace>
+ * 7e '~' <tilde>
+ */
+static const uint32_t invariantChars[4]={
+ 0xfffffbff, /* 00..1f but not 0a */
+ 0xffffffe5, /* 20..3f but not 21 23 24 */
+ 0x87fffffe, /* 40..5f but not 40 5b..5e */
+ 0x87fffffe /* 60..7f but not 60 7b..7e */
+};
+
+/*
+ * test unsigned types (or values known to be non-negative) for invariant characters,
+ * tests ASCII-family character values
+ */
+#define UCHAR_IS_INVARIANT(c) (((c)<=0x7f) && (invariantChars[(c)>>5]&((uint32_t)1<<((c)&0x1f)))!=0)
+
+/* test signed types for invariant characters, adds test for positive values */
+#define SCHAR_IS_INVARIANT(c) ((0<=(c)) && UCHAR_IS_INVARIANT(c))
+
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+#define CHAR_TO_UCHAR(c) c
+#define UCHAR_TO_CHAR(c) c
+#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+#define CHAR_TO_UCHAR(u) asciiFromEbcdic[u]
+#define UCHAR_TO_CHAR(u) ebcdicFromAscii[u]
+#else
+# error U_CHARSET_FAMILY is not valid
+#endif
+
+
+U_CAPI void U_EXPORT2
+u_charsToUChars(const char *cs, UChar *us, int32_t length) {
+ UChar u;
+ uint8_t c;
+
+ /*
+ * Allow the entire ASCII repertoire to be mapped _to_ Unicode.
+ * For EBCDIC systems, this works for characters with codes from
+ * codepages 37 and 1047 or compatible.
+ */
+ while(length>0) {
+ c=(uint8_t)(*cs++);
+ u=(UChar)CHAR_TO_UCHAR(c);
+ U_ASSERT((u!=0 || c==0)); /* only invariant chars converted? */
+ *us++=u;
+ --length;
+ }
+}
+
+U_CAPI void U_EXPORT2
+u_UCharsToChars(const UChar *us, char *cs, int32_t length) {
+ UChar u;
+
+ while(length>0) {
+ u=*us++;
+ if(!UCHAR_IS_INVARIANT(u)) {
+ U_ASSERT(FALSE); /* Variant characters were used. These are not portable in ICU. */
+ u=0;
+ }
+ *cs++=(char)UCHAR_TO_CHAR(u);
+ --length;
+ }
+}
+
+U_CAPI UBool U_EXPORT2
+uprv_isInvariantString(const char *s, int32_t length) {
+ uint8_t c;
+
+ for(;;) {
+ if(length<0) {
+ /* NUL-terminated */
+ c=(uint8_t)*s++;
+ if(c==0) {
+ break;
+ }
+ } else {
+ /* count length */
+ if(length==0) {
+ break;
+ }
+ --length;
+ c=(uint8_t)*s++;
+ if(c==0) {
+ continue; /* NUL is invariant */
+ }
+ }
+ /* c!=0 now, one branch below checks c==0 for variant characters */
+
+ /*
+ * no assertions here because these functions are legitimately called
+ * for strings with variant characters
+ */
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+ if(!UCHAR_IS_INVARIANT(c)) {
+ return FALSE; /* found a variant char */
+ }
+#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+ c=CHAR_TO_UCHAR(c);
+ if(c==0 || !UCHAR_IS_INVARIANT(c)) {
+ return FALSE; /* found a variant char */
+ }
+#else
+# error U_CHARSET_FAMILY is not valid
+#endif
+ }
+ return TRUE;
+}
+
+U_CAPI UBool U_EXPORT2
+uprv_isInvariantUString(const UChar *s, int32_t length) {
+ UChar c;
+
+ for(;;) {
+ if(length<0) {
+ /* NUL-terminated */
+ c=*s++;
+ if(c==0) {
+ break;
+ }
+ } else {
+ /* count length */
+ if(length==0) {
+ break;
+ }
+ --length;
+ c=*s++;
+ }
+
+ /*
+ * no assertions here because these functions are legitimately called
+ * for strings with variant characters
+ */
+ if(!UCHAR_IS_INVARIANT(c)) {
+ return FALSE; /* found a variant char */
+ }
+ }
+ return TRUE;
+}
+
+/* UDataSwapFn implementations used in udataswp.c ------- */
+
+/* convert ASCII to EBCDIC and verify that all characters are invariant */
+U_CAPI int32_t U_EXPORT2
+uprv_ebcdicFromAscii(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode) {
+ const uint8_t *s;
+ uint8_t *t;
+ uint8_t c;
+
+ int32_t count;
+
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ /* setup and swapping */
+ s=(const uint8_t *)inData;
+ t=(uint8_t *)outData;
+ count=length;
+ while(count>0) {
+ c=*s++;
+ if(!UCHAR_IS_INVARIANT(c)) {
+ udata_printError(ds, "uprv_ebcdicFromAscii() string[%d] contains a variant character in position %d\n",
+ length, length-count);
+ *pErrorCode=U_INVALID_CHAR_FOUND;
+ return 0;
+ }
+ *t++=ebcdicFromAscii[c];
+ --count;
+ }
+
+ return length;
+}
+
+/* this function only checks and copies ASCII strings without conversion */
+U_CFUNC int32_t
+uprv_copyAscii(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode) {
+ const uint8_t *s;
+ uint8_t c;
+
+ int32_t count;
+
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ /* setup and checking */
+ s=(const uint8_t *)inData;
+ count=length;
+ while(count>0) {
+ c=*s++;
+ if(!UCHAR_IS_INVARIANT(c)) {
+ udata_printError(ds, "uprv_copyFromAscii() string[%d] contains a variant character in position %d\n",
+ length, length-count);
+ *pErrorCode=U_INVALID_CHAR_FOUND;
+ return 0;
+ }
+ --count;
+ }
+
+ if(length>0 && inData!=outData) {
+ uprv_memcpy(outData, inData, length);
+ }
+
+ return length;
+}
+
+/* convert EBCDIC to ASCII and verify that all characters are invariant */
+U_CFUNC int32_t
+uprv_asciiFromEbcdic(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode) {
+ const uint8_t *s;
+ uint8_t *t;
+ uint8_t c;
+
+ int32_t count;
+
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ /* setup and swapping */
+ s=(const uint8_t *)inData;
+ t=(uint8_t *)outData;
+ count=length;
+ while(count>0) {
+ c=*s++;
+ if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) {
+ udata_printError(ds, "uprv_asciiFromEbcdic() string[%d] contains a variant character in position %d\n",
+ length, length-count);
+ *pErrorCode=U_INVALID_CHAR_FOUND;
+ return 0;
+ }
+ *t++=c;
+ --count;
+ }
+
+ return length;
+}
+
+/* this function only checks and copies EBCDIC strings without conversion */
+U_CFUNC int32_t
+uprv_copyEbcdic(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode) {
+ const uint8_t *s;
+ uint8_t c;
+
+ int32_t count;
+
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ /* setup and checking */
+ s=(const uint8_t *)inData;
+ count=length;
+ while(count>0) {
+ c=*s++;
+ if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) {
+ udata_printError(ds, "uprv_copyEbcdic() string[%] contains a variant character in position %d\n",
+ length, length-count);
+ *pErrorCode=U_INVALID_CHAR_FOUND;
+ return 0;
+ }
+ --count;
+ }
+
+ if(length>0 && inData!=outData) {
+ uprv_memcpy(outData, inData, length);
+ }
+
+ return length;
+}
+
+/* compare invariant strings; variant characters compare less than others and unlike each other */
+U_CFUNC int32_t
+uprv_compareInvAscii(const UDataSwapper *ds,
+ const char *outString, int32_t outLength,
+ const UChar *localString, int32_t localLength) {
+ int32_t minLength;
+ UChar32 c1, c2;
+ uint8_t c;
+
+ if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) {
+ return 0;
+ }
+
+ if(outLength<0) {
+ outLength=(int32_t)uprv_strlen(outString);
+ }
+ if(localLength<0) {
+ localLength=u_strlen(localString);
+ }
+
+ minLength= outLength<localLength ? outLength : localLength;
+
+ while(minLength>0) {
+ c=(uint8_t)*outString++;
+ if(UCHAR_IS_INVARIANT(c)) {
+ c1=c;
+ } else {
+ c1=-1;
+ }
+
+ c2=*localString++;
+ if(!UCHAR_IS_INVARIANT(c2)) {
+ c2=-2;
+ }
+
+ if((c1-=c2)!=0) {
+ return c1;
+ }
+
+ --minLength;
+ }
+
+ /* strings start with same prefix, compare lengths */
+ return outLength-localLength;
+}
+
+U_CFUNC int32_t
+uprv_compareInvEbcdic(const UDataSwapper *ds,
+ const char *outString, int32_t outLength,
+ const UChar *localString, int32_t localLength) {
+ int32_t minLength;
+ UChar32 c1, c2;
+ uint8_t c;
+
+ if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) {
+ return 0;
+ }
+
+ if(outLength<0) {
+ outLength=(int32_t)uprv_strlen(outString);
+ }
+ if(localLength<0) {
+ localLength=u_strlen(localString);
+ }
+
+ minLength= outLength<localLength ? outLength : localLength;
+
+ while(minLength>0) {
+ c=(uint8_t)*outString++;
+ if(c==0) {
+ c1=0;
+ } else if((c1=asciiFromEbcdic[c])!=0 && UCHAR_IS_INVARIANT(c1)) {
+ /* c1 is set */
+ } else {
+ c1=-1;
+ }
+
+ c2=*localString++;
+ if(!UCHAR_IS_INVARIANT(c2)) {
+ c2=-2;
+ }
+
+ if((c1-=c2)!=0) {
+ return c1;
+ }
+
+ --minLength;
+ }
+
+ /* strings start with same prefix, compare lengths */
+ return outLength-localLength;
+}
+
+U_CAPI int32_t U_EXPORT2
+uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2) {
+ int32_t c1, c2;
+
+ for(;; ++s1, ++s2) {
+ c1=(uint8_t)*s1;
+ c2=(uint8_t)*s2;
+ if(c1!=c2) {
+ if(c1!=0 && ((c1=asciiFromEbcdic[c1])==0 || !UCHAR_IS_INVARIANT(c1))) {
+ c1=-(int32_t)(uint8_t)*s1;
+ }
+ if(c2!=0 && ((c2=asciiFromEbcdic[c2])==0 || !UCHAR_IS_INVARIANT(c2))) {
+ c2=-(int32_t)(uint8_t)*s2;
+ }
+ return c1-c2;
+ } else if(c1==0) {
+ return 0;
+ }
+ }
+}
+
+U_CAPI char U_EXPORT2
+uprv_ebcdicToLowercaseAscii(char c) {
+ return (char)lowercaseAsciiFromEbcdic[(uint8_t)c];
+}
+
+U_INTERNAL uint8_t* U_EXPORT2
+uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n)
+{
+ uint8_t *orig_dst = dst;
+
+ if(n==-1) {
+ n = uprv_strlen((const char*)src)+1; /* copy NUL */
+ }
+ /* copy non-null */
+ while(*src && n>0) {
+ *(dst++) = asciiFromEbcdic[*(src++)];
+ n--;
+ }
+ /* pad */
+ while(n>0) {
+ *(dst++) = 0;
+ n--;
+ }
+ return orig_dst;
+}
+
+U_INTERNAL uint8_t* U_EXPORT2
+uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n)
+{
+ uint8_t *orig_dst = dst;
+
+ if(n==-1) {
+ n = uprv_strlen((const char*)src)+1; /* copy NUL */
+ }
+ /* copy non-null */
+ while(*src && n>0) {
+ char ch = ebcdicFromAscii[*(src++)];
+ if(ch == 0) {
+ ch = ebcdicFromAscii[0x3f]; /* questionmark (subchar) */
+ }
+ *(dst++) = ch;
+ n--;
+ }
+ /* pad */
+ while(n>0) {
+ *(dst++) = 0;
+ n--;
+ }
+ return orig_dst;
+}
+
diff --git a/src/core/basetypes/icu-ucsdet/umutex.cpp b/src/core/basetypes/icu-ucsdet/umutex.cpp
new file mode 100644
index 00000000..0c1fdc80
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/umutex.cpp
@@ -0,0 +1,392 @@
+/*
+******************************************************************************
+*
+* Copyright (C) 1997-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* File umutex.cpp
+*
+* Modification History:
+*
+* Date Name Description
+* 04/02/97 aliu Creation.
+* 04/07/99 srl updated
+* 05/13/99 stephen Changed to umutex (from cmutex).
+* 11/22/99 aliu Make non-global mutex autoinitialize [j151]
+******************************************************************************
+*/
+
+#include "umutex.h"
+
+#include "unicode/utypes.h"
+#include "uassert.h"
+#include "cmemory.h"
+
+
+// The ICU global mutex. Used when ICU implementation code passes NULL for the mutex pointer.
+static UMutex globalMutex = U_MUTEX_INITIALIZER;
+
+/*
+ * ICU Mutex wrappers. Wrap operating system mutexes, giving the rest of ICU a
+ * platform independent set of mutex operations. For internal ICU use only.
+ */
+
+#if defined(U_USER_MUTEX_CPP)
+// Build time user mutex hook: #include "U_USER_MUTEX_CPP"
+#include U_MUTEX_XSTR(U_USER_MUTEX_CPP)
+
+#elif U_PLATFORM_HAS_WIN32_API
+
+//-------------------------------------------------------------------------------------------
+//
+// Windows Specific Definitions
+//
+// Note: Cygwin (and possibly others) have both WIN32 and POSIX.
+// Prefer Win32 in these cases. (Win32 comes ahead in the #if chain)
+//
+//-------------------------------------------------------------------------------------------
+
+#if defined U_NO_PLATFORM_ATOMICS
+#error ICU on Win32 requires support for low level atomic operations.
+// Visual Studio, gcc, clang are OK. Shouldn't get here.
+#endif
+
+
+// This function is called when a test of a UInitOnce::fState reveals that
+// initialization has not completed, that we either need to call the
+// function on this thread, or wait for some other thread to complete.
+//
+// The actual call to the init function is made inline by template code
+// that knows the C++ types involved. This function returns TRUE if
+// the caller needs to call the Init function.
+//
+
+U_NAMESPACE_BEGIN
+
+U_COMMON_API UBool U_EXPORT2 umtx_initImplPreInit(UInitOnce &uio) {
+ for (;;) {
+ int32_t previousState = InterlockedCompareExchange(
+#if (U_PLATFORM == U_PF_MINGW) || (U_PLATFORM == U_PF_CYGWIN) || defined(__clang__)
+ (LONG volatile *) // this is the type given in the API doc for this function.
+#endif
+ &uio.fState, // Destination
+ 1, // Exchange Value
+ 0); // Compare value
+
+ if (previousState == 0) {
+ return true; // Caller will next call the init function.
+ // Current state == 1.
+ } else if (previousState == 2) {
+ // Another thread already completed the initialization.
+ // We can simply return FALSE, indicating no
+ // further action is needed by the caller.
+ return FALSE;
+ } else {
+ // Another thread is currently running the initialization.
+ // Wait until it completes.
+ do {
+ Sleep(1);
+ previousState = umtx_loadAcquire(uio.fState);
+ } while (previousState == 1);
+ }
+ }
+}
+
+// This function is called by the thread that ran an initialization function,
+// just after completing the function.
+//
+// success: True: the inialization succeeded. No further calls to the init
+// function will be made.
+// False: the initializtion failed. The next call to umtx_initOnce()
+// will retry the initialization.
+
+U_COMMON_API void U_EXPORT2 umtx_initImplPostInit(UInitOnce &uio) {
+ umtx_storeRelease(uio.fState, 2);
+}
+
+U_NAMESPACE_END
+
+static void winMutexInit(CRITICAL_SECTION *cs) {
+ InitializeCriticalSection(cs);
+ return;
+}
+
+U_CAPI void U_EXPORT2
+umtx_lock(UMutex *mutex) {
+ if (mutex == NULL) {
+ mutex = &globalMutex;
+ }
+ CRITICAL_SECTION *cs = &mutex->fCS;
+ umtx_initOnce(mutex->fInitOnce, winMutexInit, cs);
+ EnterCriticalSection(cs);
+}
+
+U_CAPI void U_EXPORT2
+umtx_unlock(UMutex* mutex)
+{
+ if (mutex == NULL) {
+ mutex = &globalMutex;
+ }
+ LeaveCriticalSection(&mutex->fCS);
+}
+
+
+U_CAPI void U_EXPORT2
+umtx_condBroadcast(UConditionVar *condition) {
+ // We require that the associated mutex be held by the caller,
+ // so access to fWaitCount is protected and safe. No other thread can
+ // call condWait() while we are here.
+ if (condition->fWaitCount == 0) {
+ return;
+ }
+ ResetEvent(condition->fExitGate);
+ SetEvent(condition->fEntryGate);
+}
+
+U_CAPI void U_EXPORT2
+umtx_condSignal(UConditionVar *condition) {
+ // Function not implemented. There is no immediate requirement from ICU to have it.
+ // Once ICU drops support for Windows XP and Server 2003, ICU Condition Variables will be
+ // changed to be thin wrappers on native Windows CONDITION_VARIABLEs, and this function
+ // becomes trivial to provide.
+ U_ASSERT(FALSE);
+}
+
+U_CAPI void U_EXPORT2
+umtx_condWait(UConditionVar *condition, UMutex *mutex) {
+ if (condition->fEntryGate == NULL) {
+ // Note: because the associated mutex must be locked when calling
+ // wait, we know that there can not be multiple threads
+ // running here with the same condition variable.
+ // Meaning that lazy initialization is safe.
+ U_ASSERT(condition->fExitGate == NULL);
+ condition->fEntryGate = CreateEvent(NULL, // Security Attributes
+ TRUE, // Manual Reset
+ FALSE, // Initially reset
+ NULL); // Name.
+ U_ASSERT(condition->fEntryGate != NULL);
+ condition->fExitGate = CreateEvent(NULL, TRUE, TRUE, NULL);
+ U_ASSERT(condition->fExitGate != NULL);
+ }
+
+ condition->fWaitCount++;
+ umtx_unlock(mutex);
+ WaitForSingleObject(condition->fEntryGate, INFINITE);
+ umtx_lock(mutex);
+ condition->fWaitCount--;
+ if (condition->fWaitCount == 0) {
+ // All threads that were waiting at the entry gate have woken up
+ // and moved through. Shut the entry gate and open the exit gate.
+ ResetEvent(condition->fEntryGate);
+ SetEvent(condition->fExitGate);
+ } else {
+ umtx_unlock(mutex);
+ WaitForSingleObject(condition->fExitGate, INFINITE);
+ umtx_lock(mutex);
+ }
+}
+
+
+#elif U_PLATFORM_IMPLEMENTS_POSIX
+
+//-------------------------------------------------------------------------------------------
+//
+// POSIX specific definitions
+//
+//-------------------------------------------------------------------------------------------
+
+# include <pthread.h>
+
+// Each UMutex consists of a pthread_mutex_t.
+// All are statically initialized and ready for use.
+// There is no runtime mutex initialization code needed.
+
+U_CAPI void U_EXPORT2
+umtx_lock(UMutex *mutex) {
+ if (mutex == NULL) {
+ mutex = &globalMutex;
+ }
+ int sysErr = pthread_mutex_lock(&mutex->fMutex);
+ (void)sysErr; // Suppress unused variable warnings.
+ U_ASSERT(sysErr == 0);
+}
+
+
+U_CAPI void U_EXPORT2
+umtx_unlock(UMutex* mutex)
+{
+ if (mutex == NULL) {
+ mutex = &globalMutex;
+ }
+ int sysErr = pthread_mutex_unlock(&mutex->fMutex);
+ (void)sysErr; // Suppress unused variable warnings.
+ U_ASSERT(sysErr == 0);
+}
+
+
+U_CAPI void U_EXPORT2
+umtx_condWait(UConditionVar *cond, UMutex *mutex) {
+ if (mutex == NULL) {
+ mutex = &globalMutex;
+ }
+ int sysErr = pthread_cond_wait(&cond->fCondition, &mutex->fMutex);
+ (void)sysErr;
+ U_ASSERT(sysErr == 0);
+}
+
+U_CAPI void U_EXPORT2
+umtx_condBroadcast(UConditionVar *cond) {
+ int sysErr = pthread_cond_broadcast(&cond->fCondition);
+ (void)sysErr;
+ U_ASSERT(sysErr == 0);
+}
+
+U_CAPI void U_EXPORT2
+umtx_condSignal(UConditionVar *cond) {
+ int sysErr = pthread_cond_signal(&cond->fCondition);
+ (void)sysErr;
+ U_ASSERT(sysErr == 0);
+}
+
+
+
+U_NAMESPACE_BEGIN
+
+static pthread_mutex_t initMutex = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t initCondition = PTHREAD_COND_INITIALIZER;
+
+
+// This function is called when a test of a UInitOnce::fState reveals that
+// initialization has not completed, that we either need to call the
+// function on this thread, or wait for some other thread to complete.
+//
+// The actual call to the init function is made inline by template code
+// that knows the C++ types involved. This function returns TRUE if
+// the caller needs to call the Init function.
+//
+U_COMMON_API UBool U_EXPORT2
+umtx_initImplPreInit(UInitOnce &uio) {
+ pthread_mutex_lock(&initMutex);
+ int32_t state = uio.fState;
+ if (state == 0) {
+ umtx_storeRelease(uio.fState, 1);
+ pthread_mutex_unlock(&initMutex);
+ return TRUE; // Caller will next call the init function.
+ } else {
+ while (uio.fState == 1) {
+ // Another thread is currently running the initialization.
+ // Wait until it completes.
+ pthread_cond_wait(&initCondition, &initMutex);
+ }
+ pthread_mutex_unlock(&initMutex);
+ U_ASSERT(uio.fState == 2);
+ return FALSE;
+ }
+}
+
+
+
+// This function is called by the thread that ran an initialization function,
+// just after completing the function.
+// Some threads may be waiting on the condition, requiring the broadcast wakeup.
+// Some threads may be racing to test the fState variable outside of the mutex,
+// requiring the use of store/release when changing its value.
+
+U_COMMON_API void U_EXPORT2
+umtx_initImplPostInit(UInitOnce &uio) {
+ pthread_mutex_lock(&initMutex);
+ umtx_storeRelease(uio.fState, 2);
+ pthread_cond_broadcast(&initCondition);
+ pthread_mutex_unlock(&initMutex);
+}
+
+U_NAMESPACE_END
+
+// End of POSIX specific umutex implementation.
+
+#else // Platform #define chain.
+
+#error Unknown Platform
+
+#endif // Platform #define chain.
+
+
+//-------------------------------------------------------------------------------
+//
+// Atomic Operations, out-of-line versions.
+// These are conditional, only defined if better versions
+// were not available for the platform.
+//
+// These versions are platform neutral.
+//
+//--------------------------------------------------------------------------------
+
+#if defined U_NO_PLATFORM_ATOMICS
+static UMutex gIncDecMutex = U_MUTEX_INITIALIZER;
+
+U_NAMESPACE_BEGIN
+
+U_COMMON_API int32_t U_EXPORT2
+umtx_atomic_inc(u_atomic_int32_t *p) {
+ int32_t retVal;
+ umtx_lock(&gIncDecMutex);
+ retVal = ++(*p);
+ umtx_unlock(&gIncDecMutex);
+ return retVal;
+}
+
+
+U_COMMON_API int32_t U_EXPORT2
+umtx_atomic_dec(u_atomic_int32_t *p) {
+ int32_t retVal;
+ umtx_lock(&gIncDecMutex);
+ retVal = --(*p);
+ umtx_unlock(&gIncDecMutex);
+ return retVal;
+}
+
+U_COMMON_API int32_t U_EXPORT2
+umtx_loadAcquire(u_atomic_int32_t &var) {
+ int32_t val = var;
+ umtx_lock(&gIncDecMutex);
+ umtx_unlock(&gIncDecMutex);
+ return val;
+}
+
+U_COMMON_API void U_EXPORT2
+umtx_storeRelease(u_atomic_int32_t &var, int32_t val) {
+ umtx_lock(&gIncDecMutex);
+ umtx_unlock(&gIncDecMutex);
+ var = val;
+}
+
+U_NAMESPACE_END
+#endif
+
+//--------------------------------------------------------------------------
+//
+// Deprecated functions for setting user mutexes.
+//
+//--------------------------------------------------------------------------
+
+U_DEPRECATED void U_EXPORT2
+u_setMutexFunctions(const void * /*context */, UMtxInitFn *, UMtxFn *,
+ UMtxFn *, UMtxFn *, UErrorCode *status) {
+ if (U_SUCCESS(*status)) {
+ *status = U_UNSUPPORTED_ERROR;
+ }
+ return;
+}
+
+
+
+U_DEPRECATED void U_EXPORT2
+u_setAtomicIncDecFunctions(const void * /*context */, UMtxAtomicFn *, UMtxAtomicFn *,
+ UErrorCode *status) {
+ if (U_SUCCESS(*status)) {
+ *status = U_UNSUPPORTED_ERROR;
+ }
+ return;
+}
diff --git a/src/core/basetypes/icu-ucsdet/uobject.cpp b/src/core/basetypes/icu-ucsdet/uobject.cpp
new file mode 100644
index 00000000..900e0345
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/uobject.cpp
@@ -0,0 +1,103 @@
+/*
+******************************************************************************
+*
+* Copyright (C) 2002-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: uobject.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002jun26
+* created by: Markus W. Scherer
+*/
+
+#include "unicode/uobject.h"
+#include "cmemory.h"
+
+U_NAMESPACE_BEGIN
+
+#if U_OVERRIDE_CXX_ALLOCATION
+
+/*
+ * Default implementation of UMemory::new/delete
+ * using uprv_malloc() and uprv_free().
+ *
+ * For testing, this is used together with a list of imported symbols to verify
+ * that ICU is not using the global ::new and ::delete operators.
+ *
+ * These operators can be implemented like this or any other appropriate way
+ * when customizing ICU for certain environments.
+ * Whenever ICU is customized in binary incompatible ways please be sure
+ * to use library name suffixes to distinguish such libraries from
+ * the standard build.
+ *
+ * Instead of just modifying these C++ new/delete operators, it is usually best
+ * to modify the uprv_malloc()/uprv_free()/uprv_realloc() functions in cmemory.c.
+ *
+ * Memory test on Windows/MSVC 6:
+ * The global operators new and delete look as follows:
+ * 04F 00000000 UNDEF notype () External | ??2@YAPAXI@Z (void * __cdecl operator new(unsigned int))
+ * 03F 00000000 UNDEF notype () External | ??3@YAXPAX@Z (void __cdecl operator delete(void *))
+ *
+ * These lines are from output generated by the MSVC 6 tool dumpbin with
+ * dumpbin /symbols *.obj
+ *
+ * ??2@YAPAXI@Z and ??3@YAXPAX@Z are the linker symbols in the .obj
+ * files and are imported from msvcrtd.dll (in a debug build).
+ *
+ * Make sure that with the UMemory operators new and delete defined these two symbols
+ * do not appear in the dumpbin /symbols output for the ICU libraries!
+ *
+ * If such a symbol appears in the output then look in the preceding lines in the output
+ * for which file and function calls the global new or delete operator,
+ * and replace with uprv_malloc/uprv_free.
+ */
+
+void * U_EXPORT2 UMemory::operator new(size_t size) U_NO_THROW {
+ return uprv_malloc(size);
+}
+
+void U_EXPORT2 UMemory::operator delete(void *p) U_NO_THROW {
+ if(p!=NULL) {
+ uprv_free(p);
+ }
+}
+
+void * U_EXPORT2 UMemory::operator new[](size_t size) U_NO_THROW {
+ return uprv_malloc(size);
+}
+
+void U_EXPORT2 UMemory::operator delete[](void *p) U_NO_THROW {
+ if(p!=NULL) {
+ uprv_free(p);
+ }
+}
+
+#if U_HAVE_DEBUG_LOCATION_NEW
+void * U_EXPORT2 UMemory::operator new(size_t size, const char* /*file*/, int /*line*/) U_NO_THROW {
+ return UMemory::operator new(size);
+}
+
+void U_EXPORT2 UMemory::operator delete(void* p, const char* /*file*/, int /*line*/) U_NO_THROW {
+ UMemory::operator delete(p);
+}
+#endif /* U_HAVE_DEBUG_LOCATION_NEW */
+
+
+#endif
+
+UObject::~UObject() {}
+
+UClassID UObject::getDynamicClassID() const { return NULL; }
+
+U_NAMESPACE_END
+
+U_NAMESPACE_USE
+
+U_CAPI void U_EXPORT2
+uprv_deleteUObject(void *obj) {
+ delete static_cast<UObject *>(obj);
+}
diff --git a/src/core/basetypes/icu-ucsdet/ustring.cpp b/src/core/basetypes/icu-ucsdet/ustring.cpp
new file mode 100644
index 00000000..40d23c06
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/ustring.cpp
@@ -0,0 +1,1516 @@
+/*
+******************************************************************************
+*
+* Copyright (C) 1998-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* File ustring.cpp
+*
+* Modification History:
+*
+* Date Name Description
+* 12/07/98 bertrand Creation.
+******************************************************************************
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/putil.h"
+#include "unicode/ustring.h"
+#include "unicode/utf16.h"
+#include "cstring.h"
+#include "cwchar.h"
+#include "cmemory.h"
+#include "ustr_imp.h"
+
+/* ANSI string.h - style functions ------------------------------------------ */
+
+/* U+ffff is the highest BMP code point, the highest one that fits into a 16-bit UChar */
+#define U_BMP_MAX 0xffff
+
+/* Forward binary string search functions ----------------------------------- */
+
+/*
+ * Test if a substring match inside a string is at code point boundaries.
+ * All pointers refer to the same buffer.
+ * The limit pointer may be NULL, all others must be real pointers.
+ */
+static inline UBool
+isMatchAtCPBoundary(const UChar *start, const UChar *match, const UChar *matchLimit, const UChar *limit) {
+ if(U16_IS_TRAIL(*match) && start!=match && U16_IS_LEAD(*(match-1))) {
+ /* the leading edge of the match is in the middle of a surrogate pair */
+ return FALSE;
+ }
+ if(U16_IS_LEAD(*(matchLimit-1)) && match!=limit && U16_IS_TRAIL(*matchLimit)) {
+ /* the trailing edge of the match is in the middle of a surrogate pair */
+ return FALSE;
+ }
+ return TRUE;
+}
+
+U_CAPI UChar * U_EXPORT2
+u_strFindFirst(const UChar *s, int32_t length,
+ const UChar *sub, int32_t subLength) {
+ const UChar *start, *p, *q, *subLimit;
+ UChar c, cs, cq;
+
+ if(sub==NULL || subLength<-1) {
+ return (UChar *)s;
+ }
+ if(s==NULL || length<-1) {
+ return NULL;
+ }
+
+ start=s;
+
+ if(length<0 && subLength<0) {
+ /* both strings are NUL-terminated */
+ if((cs=*sub++)==0) {
+ return (UChar *)s;
+ }
+ if(*sub==0 && !U16_IS_SURROGATE(cs)) {
+ /* the substring consists of a single, non-surrogate BMP code point */
+ return u_strchr(s, cs);
+ }
+
+ while((c=*s++)!=0) {
+ if(c==cs) {
+ /* found first substring UChar, compare rest */
+ p=s;
+ q=sub;
+ for(;;) {
+ if((cq=*q)==0) {
+ if(isMatchAtCPBoundary(start, s-1, p, NULL)) {
+ return (UChar *)(s-1); /* well-formed match */
+ } else {
+ break; /* no match because surrogate pair is split */
+ }
+ }
+ if((c=*p)==0) {
+ return NULL; /* no match, and none possible after s */
+ }
+ if(c!=cq) {
+ break; /* no match */
+ }
+ ++p;
+ ++q;
+ }
+ }
+ }
+
+ /* not found */
+ return NULL;
+ }
+
+ if(subLength<0) {
+ subLength=u_strlen(sub);
+ }
+ if(subLength==0) {
+ return (UChar *)s;
+ }
+
+ /* get sub[0] to search for it fast */
+ cs=*sub++;
+ --subLength;
+ subLimit=sub+subLength;
+
+ if(subLength==0 && !U16_IS_SURROGATE(cs)) {
+ /* the substring consists of a single, non-surrogate BMP code point */
+ return length<0 ? u_strchr(s, cs) : u_memchr(s, cs, length);
+ }
+
+ if(length<0) {
+ /* s is NUL-terminated */
+ while((c=*s++)!=0) {
+ if(c==cs) {
+ /* found first substring UChar, compare rest */
+ p=s;
+ q=sub;
+ for(;;) {
+ if(q==subLimit) {
+ if(isMatchAtCPBoundary(start, s-1, p, NULL)) {
+ return (UChar *)(s-1); /* well-formed match */
+ } else {
+ break; /* no match because surrogate pair is split */
+ }
+ }
+ if((c=*p)==0) {
+ return NULL; /* no match, and none possible after s */
+ }
+ if(c!=*q) {
+ break; /* no match */
+ }
+ ++p;
+ ++q;
+ }
+ }
+ }
+ } else {
+ const UChar *limit, *preLimit;
+
+ /* subLength was decremented above */
+ if(length<=subLength) {
+ return NULL; /* s is shorter than sub */
+ }
+
+ limit=s+length;
+
+ /* the substring must start before preLimit */
+ preLimit=limit-subLength;
+
+ while(s!=preLimit) {
+ c=*s++;
+ if(c==cs) {
+ /* found first substring UChar, compare rest */
+ p=s;
+ q=sub;
+ for(;;) {
+ if(q==subLimit) {
+ if(isMatchAtCPBoundary(start, s-1, p, limit)) {
+ return (UChar *)(s-1); /* well-formed match */
+ } else {
+ break; /* no match because surrogate pair is split */
+ }
+ }
+ if(*p!=*q) {
+ break; /* no match */
+ }
+ ++p;
+ ++q;
+ }
+ }
+ }
+ }
+
+ /* not found */
+ return NULL;
+}
+
+U_CAPI UChar * U_EXPORT2
+u_strstr(const UChar *s, const UChar *substring) {
+ return u_strFindFirst(s, -1, substring, -1);
+}
+
+U_CAPI UChar * U_EXPORT2
+u_strchr(const UChar *s, UChar c) {
+ if(U16_IS_SURROGATE(c)) {
+ /* make sure to not find half of a surrogate pair */
+ return u_strFindFirst(s, -1, &c, 1);
+ } else {
+ UChar cs;
+
+ /* trivial search for a BMP code point */
+ for(;;) {
+ if((cs=*s)==c) {
+ return (UChar *)s;
+ }
+ if(cs==0) {
+ return NULL;
+ }
+ ++s;
+ }
+ }
+}
+
+U_CAPI UChar * U_EXPORT2
+u_strchr32(const UChar *s, UChar32 c) {
+ if((uint32_t)c<=U_BMP_MAX) {
+ /* find BMP code point */
+ return u_strchr(s, (UChar)c);
+ } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
+ /* find supplementary code point as surrogate pair */
+ UChar cs, lead=U16_LEAD(c), trail=U16_TRAIL(c);
+
+ while((cs=*s++)!=0) {
+ if(cs==lead && *s==trail) {
+ return (UChar *)(s-1);
+ }
+ }
+ return NULL;
+ } else {
+ /* not a Unicode code point, not findable */
+ return NULL;
+ }
+}
+
+U_CAPI UChar * U_EXPORT2
+u_memchr(const UChar *s, UChar c, int32_t count) {
+ if(count<=0) {
+ return NULL; /* no string */
+ } else if(U16_IS_SURROGATE(c)) {
+ /* make sure to not find half of a surrogate pair */
+ return u_strFindFirst(s, count, &c, 1);
+ } else {
+ /* trivial search for a BMP code point */
+ const UChar *limit=s+count;
+ do {
+ if(*s==c) {
+ return (UChar *)s;
+ }
+ } while(++s!=limit);
+ return NULL;
+ }
+}
+
+U_CAPI UChar * U_EXPORT2
+u_memchr32(const UChar *s, UChar32 c, int32_t count) {
+ if((uint32_t)c<=U_BMP_MAX) {
+ /* find BMP code point */
+ return u_memchr(s, (UChar)c, count);
+ } else if(count<2) {
+ /* too short for a surrogate pair */
+ return NULL;
+ } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
+ /* find supplementary code point as surrogate pair */
+ const UChar *limit=s+count-1; /* -1 so that we do not need a separate check for the trail unit */
+ UChar lead=U16_LEAD(c), trail=U16_TRAIL(c);
+
+ do {
+ if(*s==lead && *(s+1)==trail) {
+ return (UChar *)s;
+ }
+ } while(++s!=limit);
+ return NULL;
+ } else {
+ /* not a Unicode code point, not findable */
+ return NULL;
+ }
+}
+
+/* Backward binary string search functions ---------------------------------- */
+
+U_CAPI UChar * U_EXPORT2
+u_strFindLast(const UChar *s, int32_t length,
+ const UChar *sub, int32_t subLength) {
+ const UChar *start, *limit, *p, *q, *subLimit;
+ UChar c, cs;
+
+ if(sub==NULL || subLength<-1) {
+ return (UChar *)s;
+ }
+ if(s==NULL || length<-1) {
+ return NULL;
+ }
+
+ /*
+ * This implementation is more lazy than the one for u_strFindFirst():
+ * There is no special search code for NUL-terminated strings.
+ * It does not seem to be worth it for searching substrings to
+ * search forward and find all matches like in u_strrchr() and similar.
+ * Therefore, we simply get both string lengths and search backward.
+ *
+ * markus 2002oct23
+ */
+
+ if(subLength<0) {
+ subLength=u_strlen(sub);
+ }
+ if(subLength==0) {
+ return (UChar *)s;
+ }
+
+ /* get sub[subLength-1] to search for it fast */
+ subLimit=sub+subLength;
+ cs=*(--subLimit);
+ --subLength;
+
+ if(subLength==0 && !U16_IS_SURROGATE(cs)) {
+ /* the substring consists of a single, non-surrogate BMP code point */
+ return length<0 ? u_strrchr(s, cs) : u_memrchr(s, cs, length);
+ }
+
+ if(length<0) {
+ length=u_strlen(s);
+ }
+
+ /* subLength was decremented above */
+ if(length<=subLength) {
+ return NULL; /* s is shorter than sub */
+ }
+
+ start=s;
+ limit=s+length;
+
+ /* the substring must start no later than s+subLength */
+ s+=subLength;
+
+ while(s!=limit) {
+ c=*(--limit);
+ if(c==cs) {
+ /* found last substring UChar, compare rest */
+ p=limit;
+ q=subLimit;
+ for(;;) {
+ if(q==sub) {
+ if(isMatchAtCPBoundary(start, p, limit+1, start+length)) {
+ return (UChar *)p; /* well-formed match */
+ } else {
+ break; /* no match because surrogate pair is split */
+ }
+ }
+ if(*(--p)!=*(--q)) {
+ break; /* no match */
+ }
+ }
+ }
+ }
+
+ /* not found */
+ return NULL;
+}
+
+U_CAPI UChar * U_EXPORT2
+u_strrstr(const UChar *s, const UChar *substring) {
+ return u_strFindLast(s, -1, substring, -1);
+}
+
+U_CAPI UChar * U_EXPORT2
+u_strrchr(const UChar *s, UChar c) {
+ if(U16_IS_SURROGATE(c)) {
+ /* make sure to not find half of a surrogate pair */
+ return u_strFindLast(s, -1, &c, 1);
+ } else {
+ const UChar *result=NULL;
+ UChar cs;
+
+ /* trivial search for a BMP code point */
+ for(;;) {
+ if((cs=*s)==c) {
+ result=s;
+ }
+ if(cs==0) {
+ return (UChar *)result;
+ }
+ ++s;
+ }
+ }
+}
+
+U_CAPI UChar * U_EXPORT2
+u_strrchr32(const UChar *s, UChar32 c) {
+ if((uint32_t)c<=U_BMP_MAX) {
+ /* find BMP code point */
+ return u_strrchr(s, (UChar)c);
+ } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
+ /* find supplementary code point as surrogate pair */
+ const UChar *result=NULL;
+ UChar cs, lead=U16_LEAD(c), trail=U16_TRAIL(c);
+
+ while((cs=*s++)!=0) {
+ if(cs==lead && *s==trail) {
+ result=s-1;
+ }
+ }
+ return (UChar *)result;
+ } else {
+ /* not a Unicode code point, not findable */
+ return NULL;
+ }
+}
+
+U_CAPI UChar * U_EXPORT2
+u_memrchr(const UChar *s, UChar c, int32_t count) {
+ if(count<=0) {
+ return NULL; /* no string */
+ } else if(U16_IS_SURROGATE(c)) {
+ /* make sure to not find half of a surrogate pair */
+ return u_strFindLast(s, count, &c, 1);
+ } else {
+ /* trivial search for a BMP code point */
+ const UChar *limit=s+count;
+ do {
+ if(*(--limit)==c) {
+ return (UChar *)limit;
+ }
+ } while(s!=limit);
+ return NULL;
+ }
+}
+
+U_CAPI UChar * U_EXPORT2
+u_memrchr32(const UChar *s, UChar32 c, int32_t count) {
+ if((uint32_t)c<=U_BMP_MAX) {
+ /* find BMP code point */
+ return u_memrchr(s, (UChar)c, count);
+ } else if(count<2) {
+ /* too short for a surrogate pair */
+ return NULL;
+ } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
+ /* find supplementary code point as surrogate pair */
+ const UChar *limit=s+count-1;
+ UChar lead=U16_LEAD(c), trail=U16_TRAIL(c);
+
+ do {
+ if(*limit==trail && *(limit-1)==lead) {
+ return (UChar *)(limit-1);
+ }
+ } while(s!=--limit);
+ return NULL;
+ } else {
+ /* not a Unicode code point, not findable */
+ return NULL;
+ }
+}
+
+/* Tokenization functions --------------------------------------------------- */
+
+/*
+ * Match each code point in a string against each code point in the matchSet.
+ * Return the index of the first string code point that
+ * is (polarity==TRUE) or is not (FALSE) contained in the matchSet.
+ * Return -(string length)-1 if there is no such code point.
+ */
+static int32_t
+_matchFromSet(const UChar *string, const UChar *matchSet, UBool polarity) {
+ int32_t matchLen, matchBMPLen, strItr, matchItr;
+ UChar32 stringCh, matchCh;
+ UChar c, c2;
+
+ /* first part of matchSet contains only BMP code points */
+ matchBMPLen = 0;
+ while((c = matchSet[matchBMPLen]) != 0 && U16_IS_SINGLE(c)) {
+ ++matchBMPLen;
+ }
+
+ /* second part of matchSet contains BMP and supplementary code points */
+ matchLen = matchBMPLen;
+ while(matchSet[matchLen] != 0) {
+ ++matchLen;
+ }
+
+ for(strItr = 0; (c = string[strItr]) != 0;) {
+ ++strItr;
+ if(U16_IS_SINGLE(c)) {
+ if(polarity) {
+ for(matchItr = 0; matchItr < matchLen; ++matchItr) {
+ if(c == matchSet[matchItr]) {
+ return strItr - 1; /* one matches */
+ }
+ }
+ } else {
+ for(matchItr = 0; matchItr < matchLen; ++matchItr) {
+ if(c == matchSet[matchItr]) {
+ goto endloop;
+ }
+ }
+ return strItr - 1; /* none matches */
+ }
+ } else {
+ /*
+ * No need to check for string length before U16_IS_TRAIL
+ * because c2 could at worst be the terminating NUL.
+ */
+ if(U16_IS_SURROGATE_LEAD(c) && U16_IS_TRAIL(c2 = string[strItr])) {
+ ++strItr;
+ stringCh = U16_GET_SUPPLEMENTARY(c, c2);
+ } else {
+ stringCh = c; /* unpaired trail surrogate */
+ }
+
+ if(polarity) {
+ for(matchItr = matchBMPLen; matchItr < matchLen;) {
+ U16_NEXT(matchSet, matchItr, matchLen, matchCh);
+ if(stringCh == matchCh) {
+ return strItr - U16_LENGTH(stringCh); /* one matches */
+ }
+ }
+ } else {
+ for(matchItr = matchBMPLen; matchItr < matchLen;) {
+ U16_NEXT(matchSet, matchItr, matchLen, matchCh);
+ if(stringCh == matchCh) {
+ goto endloop;
+ }
+ }
+ return strItr - U16_LENGTH(stringCh); /* none matches */
+ }
+ }
+endloop:
+ /* wish C had continue with labels like Java... */;
+ }
+
+ /* Didn't find it. */
+ return -strItr-1;
+}
+
+/* Search for a codepoint in a string that matches one of the matchSet codepoints. */
+U_CAPI UChar * U_EXPORT2
+u_strpbrk(const UChar *string, const UChar *matchSet)
+{
+ int32_t idx = _matchFromSet(string, matchSet, TRUE);
+ if(idx >= 0) {
+ return (UChar *)string + idx;
+ } else {
+ return NULL;
+ }
+}
+
+/* Search for a codepoint in a string that matches one of the matchSet codepoints. */
+U_CAPI int32_t U_EXPORT2
+u_strcspn(const UChar *string, const UChar *matchSet)
+{
+ int32_t idx = _matchFromSet(string, matchSet, TRUE);
+ if(idx >= 0) {
+ return idx;
+ } else {
+ return -idx - 1; /* == u_strlen(string) */
+ }
+}
+
+/* Search for a codepoint in a string that does not match one of the matchSet codepoints. */
+U_CAPI int32_t U_EXPORT2
+u_strspn(const UChar *string, const UChar *matchSet)
+{
+ int32_t idx = _matchFromSet(string, matchSet, FALSE);
+ if(idx >= 0) {
+ return idx;
+ } else {
+ return -idx - 1; /* == u_strlen(string) */
+ }
+}
+
+/* ----- Text manipulation functions --- */
+
+U_CAPI UChar* U_EXPORT2
+u_strtok_r(UChar *src,
+ const UChar *delim,
+ UChar **saveState)
+{
+ UChar *tokSource;
+ UChar *nextToken;
+ uint32_t nonDelimIdx;
+
+ /* If saveState is NULL, the user messed up. */
+ if (src != NULL) {
+ tokSource = src;
+ *saveState = src; /* Set to "src" in case there are no delimiters */
+ }
+ else if (*saveState) {
+ tokSource = *saveState;
+ }
+ else {
+ /* src == NULL && *saveState == NULL */
+ /* This shouldn't happen. We already finished tokenizing. */
+ return NULL;
+ }
+
+ /* Skip initial delimiters */
+ nonDelimIdx = u_strspn(tokSource, delim);
+ tokSource = &tokSource[nonDelimIdx];
+
+ if (*tokSource) {
+ nextToken = u_strpbrk(tokSource, delim);
+ if (nextToken != NULL) {
+ /* Create a token */
+ *(nextToken++) = 0;
+ *saveState = nextToken;
+ return tokSource;
+ }
+ else if (*saveState) {
+ /* Return the last token */
+ *saveState = NULL;
+ return tokSource;
+ }
+ }
+ else {
+ /* No tokens were found. Only delimiters were left. */
+ *saveState = NULL;
+ }
+ return NULL;
+}
+
+/* Miscellaneous functions -------------------------------------------------- */
+
+U_CAPI UChar* U_EXPORT2
+u_strcat(UChar *dst,
+ const UChar *src)
+{
+ UChar *anchor = dst; /* save a pointer to start of dst */
+
+ while(*dst != 0) { /* To end of first string */
+ ++dst;
+ }
+ while((*(dst++) = *(src++)) != 0) { /* copy string 2 over */
+ }
+
+ return anchor;
+}
+
+U_CAPI UChar* U_EXPORT2
+u_strncat(UChar *dst,
+ const UChar *src,
+ int32_t n )
+{
+ if(n > 0) {
+ UChar *anchor = dst; /* save a pointer to start of dst */
+
+ while(*dst != 0) { /* To end of first string */
+ ++dst;
+ }
+ while((*dst = *src) != 0) { /* copy string 2 over */
+ ++dst;
+ if(--n == 0) {
+ *dst = 0;
+ break;
+ }
+ ++src;
+ }
+
+ return anchor;
+ } else {
+ return dst;
+ }
+}
+
+/* ----- Text property functions --- */
+
+U_CAPI int32_t U_EXPORT2
+u_strcmp(const UChar *s1,
+ const UChar *s2)
+{
+ UChar c1, c2;
+
+ for(;;) {
+ c1=*s1++;
+ c2=*s2++;
+ if (c1 != c2 || c1 == 0) {
+ break;
+ }
+ }
+ return (int32_t)c1 - (int32_t)c2;
+}
+
+U_CFUNC int32_t U_EXPORT2
+uprv_strCompare(const UChar *s1, int32_t length1,
+ const UChar *s2, int32_t length2,
+ UBool strncmpStyle, UBool codePointOrder) {
+ const UChar *start1, *start2, *limit1, *limit2;
+ UChar c1, c2;
+
+ /* setup for fix-up */
+ start1=s1;
+ start2=s2;
+
+ /* compare identical prefixes - they do not need to be fixed up */
+ if(length1<0 && length2<0) {
+ /* strcmp style, both NUL-terminated */
+ if(s1==s2) {
+ return 0;
+ }
+
+ for(;;) {
+ c1=*s1;
+ c2=*s2;
+ if(c1!=c2) {
+ break;
+ }
+ if(c1==0) {
+ return 0;
+ }
+ ++s1;
+ ++s2;
+ }
+
+ /* setup for fix-up */
+ limit1=limit2=NULL;
+ } else if(strncmpStyle) {
+ /* special handling for strncmp, assume length1==length2>=0 but also check for NUL */
+ if(s1==s2) {
+ return 0;
+ }
+
+ limit1=start1+length1;
+
+ for(;;) {
+ /* both lengths are same, check only one limit */
+ if(s1==limit1) {
+ return 0;
+ }
+
+ c1=*s1;
+ c2=*s2;
+ if(c1!=c2) {
+ break;
+ }
+ if(c1==0) {
+ return 0;
+ }
+ ++s1;
+ ++s2;
+ }
+
+ /* setup for fix-up */
+ limit2=start2+length1; /* use length1 here, too, to enforce assumption */
+ } else {
+ /* memcmp/UnicodeString style, both length-specified */
+ int32_t lengthResult;
+
+ if(length1<0) {
+ length1=u_strlen(s1);
+ }
+ if(length2<0) {
+ length2=u_strlen(s2);
+ }
+
+ /* limit1=start1+min(lenght1, length2) */
+ if(length1<length2) {
+ lengthResult=-1;
+ limit1=start1+length1;
+ } else if(length1==length2) {
+ lengthResult=0;
+ limit1=start1+length1;
+ } else /* length1>length2 */ {
+ lengthResult=1;
+ limit1=start1+length2;
+ }
+
+ if(s1==s2) {
+ return lengthResult;
+ }
+
+ for(;;) {
+ /* check pseudo-limit */
+ if(s1==limit1) {
+ return lengthResult;
+ }
+
+ c1=*s1;
+ c2=*s2;
+ if(c1!=c2) {
+ break;
+ }
+ ++s1;
+ ++s2;
+ }
+
+ /* setup for fix-up */
+ limit1=start1+length1;
+ limit2=start2+length2;
+ }
+
+ /* if both values are in or above the surrogate range, fix them up */
+ if(c1>=0xd800 && c2>=0xd800 && codePointOrder) {
+ /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
+ if(
+ (c1<=0xdbff && (s1+1)!=limit1 && U16_IS_TRAIL(*(s1+1))) ||
+ (U16_IS_TRAIL(c1) && start1!=s1 && U16_IS_LEAD(*(s1-1)))
+ ) {
+ /* part of a surrogate pair, leave >=d800 */
+ } else {
+ /* BMP code point - may be surrogate code point - make <d800 */
+ c1-=0x2800;
+ }
+
+ if(
+ (c2<=0xdbff && (s2+1)!=limit2 && U16_IS_TRAIL(*(s2+1))) ||
+ (U16_IS_TRAIL(c2) && start2!=s2 && U16_IS_LEAD(*(s2-1)))
+ ) {
+ /* part of a surrogate pair, leave >=d800 */
+ } else {
+ /* BMP code point - may be surrogate code point - make <d800 */
+ c2-=0x2800;
+ }
+ }
+
+ /* now c1 and c2 are in the requested (code unit or code point) order */
+ return (int32_t)c1-(int32_t)c2;
+}
+
+/*
+ * Compare two strings as presented by UCharIterators.
+ * Use code unit or code point order.
+ * When the function returns, it is undefined where the iterators
+ * have stopped.
+ */
+U_CAPI int32_t U_EXPORT2
+u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder) {
+ UChar32 c1, c2;
+
+ /* argument checking */
+ if(iter1==NULL || iter2==NULL) {
+ return 0; /* bad arguments */
+ }
+ if(iter1==iter2) {
+ return 0; /* identical iterators */
+ }
+
+ /* reset iterators to start? */
+ iter1->move(iter1, 0, UITER_START);
+ iter2->move(iter2, 0, UITER_START);
+
+ /* compare identical prefixes - they do not need to be fixed up */
+ for(;;) {
+ c1=iter1->next(iter1);
+ c2=iter2->next(iter2);
+ if(c1!=c2) {
+ break;
+ }
+ if(c1==-1) {
+ return 0;
+ }
+ }
+
+ /* if both values are in or above the surrogate range, fix them up */
+ if(c1>=0xd800 && c2>=0xd800 && codePointOrder) {
+ /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
+ if(
+ (c1<=0xdbff && U16_IS_TRAIL(iter1->current(iter1))) ||
+ (U16_IS_TRAIL(c1) && (iter1->previous(iter1), U16_IS_LEAD(iter1->previous(iter1))))
+ ) {
+ /* part of a surrogate pair, leave >=d800 */
+ } else {
+ /* BMP code point - may be surrogate code point - make <d800 */
+ c1-=0x2800;
+ }
+
+ if(
+ (c2<=0xdbff && U16_IS_TRAIL(iter2->current(iter2))) ||
+ (U16_IS_TRAIL(c2) && (iter2->previous(iter2), U16_IS_LEAD(iter2->previous(iter2))))
+ ) {
+ /* part of a surrogate pair, leave >=d800 */
+ } else {
+ /* BMP code point - may be surrogate code point - make <d800 */
+ c2-=0x2800;
+ }
+ }
+
+ /* now c1 and c2 are in the requested (code unit or code point) order */
+ return (int32_t)c1-(int32_t)c2;
+}
+
+#if 0
+/*
+ * u_strCompareIter() does not leave the iterators _on_ the different units.
+ * This is possible but would cost a few extra indirect function calls to back
+ * up if the last unit (c1 or c2 respectively) was >=0.
+ *
+ * Consistently leaving them _behind_ the different units is not an option
+ * because the current "unit" is the end of the string if that is reached,
+ * and in such a case the iterator does not move.
+ * For example, when comparing "ab" with "abc", both iterators rest _on_ the end
+ * of their strings. Calling previous() on each does not move them to where
+ * the comparison fails.
+ *
+ * So the simplest semantics is to not define where the iterators end up.
+ *
+ * The following fragment is part of what would need to be done for backing up.
+ */
+void fragment {
+ /* iff a surrogate is part of a surrogate pair, leave >=d800 */
+ if(c1<=0xdbff) {
+ if(!U16_IS_TRAIL(iter1->current(iter1))) {
+ /* lead surrogate code point - make <d800 */
+ c1-=0x2800;
+ }
+ } else if(c1<=0xdfff) {
+ int32_t idx=iter1->getIndex(iter1, UITER_CURRENT);
+ iter1->previous(iter1); /* ==c1 */
+ if(!U16_IS_LEAD(iter1->previous(iter1))) {
+ /* trail surrogate code point - make <d800 */
+ c1-=0x2800;
+ }
+ /* go back to behind where the difference is */
+ iter1->move(iter1, idx, UITER_ZERO);
+ } else /* 0xe000<=c1<=0xffff */ {
+ /* BMP code point - make <d800 */
+ c1-=0x2800;
+ }
+}
+#endif
+
+U_CAPI int32_t U_EXPORT2
+u_strCompare(const UChar *s1, int32_t length1,
+ const UChar *s2, int32_t length2,
+ UBool codePointOrder) {
+ /* argument checking */
+ if(s1==NULL || length1<-1 || s2==NULL || length2<-1) {
+ return 0;
+ }
+ return uprv_strCompare(s1, length1, s2, length2, FALSE, codePointOrder);
+}
+
+/* String compare in code point order - u_strcmp() compares in code unit order. */
+U_CAPI int32_t U_EXPORT2
+u_strcmpCodePointOrder(const UChar *s1, const UChar *s2) {
+ return uprv_strCompare(s1, -1, s2, -1, FALSE, TRUE);
+}
+
+U_CAPI int32_t U_EXPORT2
+u_strncmp(const UChar *s1,
+ const UChar *s2,
+ int32_t n)
+{
+ if(n > 0) {
+ int32_t rc;
+ for(;;) {
+ rc = (int32_t)*s1 - (int32_t)*s2;
+ if(rc != 0 || *s1 == 0 || --n == 0) {
+ return rc;
+ }
+ ++s1;
+ ++s2;
+ }
+ } else {
+ return 0;
+ }
+}
+
+U_CAPI int32_t U_EXPORT2
+u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n) {
+ return uprv_strCompare(s1, n, s2, n, TRUE, TRUE);
+}
+
+U_CAPI UChar* U_EXPORT2
+u_strcpy(UChar *dst,
+ const UChar *src)
+{
+ UChar *anchor = dst; /* save a pointer to start of dst */
+
+ while((*(dst++) = *(src++)) != 0) { /* copy string 2 over */
+ }
+
+ return anchor;
+}
+
+U_CAPI UChar* U_EXPORT2
+u_strncpy(UChar *dst,
+ const UChar *src,
+ int32_t n)
+{
+ UChar *anchor = dst; /* save a pointer to start of dst */
+
+ /* copy string 2 over */
+ while(n > 0 && (*(dst++) = *(src++)) != 0) {
+ --n;
+ }
+
+ return anchor;
+}
+
+U_CAPI int32_t U_EXPORT2
+u_strlen(const UChar *s)
+{
+#if U_SIZEOF_WCHAR_T == U_SIZEOF_UCHAR
+ return (int32_t)uprv_wcslen(s);
+#else
+ const UChar *t = s;
+ while(*t != 0) {
+ ++t;
+ }
+ return t - s;
+#endif
+}
+
+U_CAPI int32_t U_EXPORT2
+u_countChar32(const UChar *s, int32_t length) {
+ int32_t count;
+
+ if(s==NULL || length<-1) {
+ return 0;
+ }
+
+ count=0;
+ if(length>=0) {
+ while(length>0) {
+ ++count;
+ if(U16_IS_LEAD(*s) && length>=2 && U16_IS_TRAIL(*(s+1))) {
+ s+=2;
+ length-=2;
+ } else {
+ ++s;
+ --length;
+ }
+ }
+ } else /* length==-1 */ {
+ UChar c;
+
+ for(;;) {
+ if((c=*s++)==0) {
+ break;
+ }
+ ++count;
+
+ /*
+ * sufficient to look ahead one because of UTF-16;
+ * safe to look ahead one because at worst that would be the terminating NUL
+ */
+ if(U16_IS_LEAD(c) && U16_IS_TRAIL(*s)) {
+ ++s;
+ }
+ }
+ }
+ return count;
+}
+
+U_CAPI UBool U_EXPORT2
+u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number) {
+
+ if(number<0) {
+ return TRUE;
+ }
+ if(s==NULL || length<-1) {
+ return FALSE;
+ }
+
+ if(length==-1) {
+ /* s is NUL-terminated */
+ UChar c;
+
+ /* count code points until they exceed */
+ for(;;) {
+ if((c=*s++)==0) {
+ return FALSE;
+ }
+ if(number==0) {
+ return TRUE;
+ }
+ if(U16_IS_LEAD(c) && U16_IS_TRAIL(*s)) {
+ ++s;
+ }
+ --number;
+ }
+ } else {
+ /* length>=0 known */
+ const UChar *limit;
+ int32_t maxSupplementary;
+
+ /* s contains at least (length+1)/2 code points: <=2 UChars per cp */
+ if(((length+1)/2)>number) {
+ return TRUE;
+ }
+
+ /* check if s does not even contain enough UChars */
+ maxSupplementary=length-number;
+ if(maxSupplementary<=0) {
+ return FALSE;
+ }
+ /* there are maxSupplementary=length-number more UChars than asked-for code points */
+
+ /*
+ * count code points until they exceed and also check that there are
+ * no more than maxSupplementary supplementary code points (UChar pairs)
+ */
+ limit=s+length;
+ for(;;) {
+ if(s==limit) {
+ return FALSE;
+ }
+ if(number==0) {
+ return TRUE;
+ }
+ if(U16_IS_LEAD(*s++) && s!=limit && U16_IS_TRAIL(*s)) {
+ ++s;
+ if(--maxSupplementary<=0) {
+ /* too many pairs - too few code points */
+ return FALSE;
+ }
+ }
+ --number;
+ }
+ }
+}
+
+U_CAPI UChar * U_EXPORT2
+u_memcpy(UChar *dest, const UChar *src, int32_t count) {
+ if(count > 0) {
+ uprv_memcpy(dest, src, count*U_SIZEOF_UCHAR);
+ }
+ return dest;
+}
+
+U_CAPI UChar * U_EXPORT2
+u_memmove(UChar *dest, const UChar *src, int32_t count) {
+ if(count > 0) {
+ uprv_memmove(dest, src, count*U_SIZEOF_UCHAR);
+ }
+ return dest;
+}
+
+U_CAPI UChar * U_EXPORT2
+u_memset(UChar *dest, UChar c, int32_t count) {
+ if(count > 0) {
+ UChar *ptr = dest;
+ UChar *limit = dest + count;
+
+ while (ptr < limit) {
+ *(ptr++) = c;
+ }
+ }
+ return dest;
+}
+
+U_CAPI int32_t U_EXPORT2
+u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count) {
+ if(count > 0) {
+ const UChar *limit = buf1 + count;
+ int32_t result;
+
+ while (buf1 < limit) {
+ result = (int32_t)(uint16_t)*buf1 - (int32_t)(uint16_t)*buf2;
+ if (result != 0) {
+ return result;
+ }
+ buf1++;
+ buf2++;
+ }
+ }
+ return 0;
+}
+
+U_CAPI int32_t U_EXPORT2
+u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count) {
+ return uprv_strCompare(s1, count, s2, count, FALSE, TRUE);
+}
+
+/* u_unescape & support fns ------------------------------------------------- */
+
+/* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
+static const UChar UNESCAPE_MAP[] = {
+ /*" 0x22, 0x22 */
+ /*' 0x27, 0x27 */
+ /*? 0x3F, 0x3F */
+ /*\ 0x5C, 0x5C */
+ /*a*/ 0x61, 0x07,
+ /*b*/ 0x62, 0x08,
+ /*e*/ 0x65, 0x1b,
+ /*f*/ 0x66, 0x0c,
+ /*n*/ 0x6E, 0x0a,
+ /*r*/ 0x72, 0x0d,
+ /*t*/ 0x74, 0x09,
+ /*v*/ 0x76, 0x0b
+};
+enum { UNESCAPE_MAP_LENGTH = sizeof(UNESCAPE_MAP) / sizeof(UNESCAPE_MAP[0]) };
+
+/* Convert one octal digit to a numeric value 0..7, or -1 on failure */
+static int8_t _digit8(UChar c) {
+ if (c >= 0x0030 && c <= 0x0037) {
+ return (int8_t)(c - 0x0030);
+ }
+ return -1;
+}
+
+/* Convert one hex digit to a numeric value 0..F, or -1 on failure */
+static int8_t _digit16(UChar c) {
+ if (c >= 0x0030 && c <= 0x0039) {
+ return (int8_t)(c - 0x0030);
+ }
+ if (c >= 0x0041 && c <= 0x0046) {
+ return (int8_t)(c - (0x0041 - 10));
+ }
+ if (c >= 0x0061 && c <= 0x0066) {
+ return (int8_t)(c - (0x0061 - 10));
+ }
+ return -1;
+}
+
+/* Parse a single escape sequence. Although this method deals in
+ * UChars, it does not use C++ or UnicodeString. This allows it to
+ * be used from C contexts. */
+U_CAPI UChar32 U_EXPORT2
+u_unescapeAt(UNESCAPE_CHAR_AT charAt,
+ int32_t *offset,
+ int32_t length,
+ void *context) {
+
+ int32_t start = *offset;
+ UChar c;
+ UChar32 result = 0;
+ int8_t n = 0;
+ int8_t minDig = 0;
+ int8_t maxDig = 0;
+ int8_t bitsPerDigit = 4;
+ int8_t dig;
+ int32_t i;
+ UBool braces = FALSE;
+
+ /* Check that offset is in range */
+ if (*offset < 0 || *offset >= length) {
+ goto err;
+ }
+
+ /* Fetch first UChar after '\\' */
+ c = charAt((*offset)++, context);
+
+ /* Convert hexadecimal and octal escapes */
+ switch (c) {
+ case 0x0075 /*'u'*/:
+ minDig = maxDig = 4;
+ break;
+ case 0x0055 /*'U'*/:
+ minDig = maxDig = 8;
+ break;
+ case 0x0078 /*'x'*/:
+ minDig = 1;
+ if (*offset < length && charAt(*offset, context) == 0x7B /*{*/) {
+ ++(*offset);
+ braces = TRUE;
+ maxDig = 8;
+ } else {
+ maxDig = 2;
+ }
+ break;
+ default:
+ dig = _digit8(c);
+ if (dig >= 0) {
+ minDig = 1;
+ maxDig = 3;
+ n = 1; /* Already have first octal digit */
+ bitsPerDigit = 3;
+ result = dig;
+ }
+ break;
+ }
+ if (minDig != 0) {
+ while (*offset < length && n < maxDig) {
+ c = charAt(*offset, context);
+ dig = (int8_t)((bitsPerDigit == 3) ? _digit8(c) : _digit16(c));
+ if (dig < 0) {
+ break;
+ }
+ result = (result << bitsPerDigit) | dig;
+ ++(*offset);
+ ++n;
+ }
+ if (n < minDig) {
+ goto err;
+ }
+ if (braces) {
+ if (c != 0x7D /*}*/) {
+ goto err;
+ }
+ ++(*offset);
+ }
+ if (result < 0 || result >= 0x110000) {
+ goto err;
+ }
+ /* If an escape sequence specifies a lead surrogate, see if
+ * there is a trail surrogate after it, either as an escape or
+ * as a literal. If so, join them up into a supplementary.
+ */
+ if (*offset < length && U16_IS_LEAD(result)) {
+ int32_t ahead = *offset + 1;
+ c = charAt(*offset, context);
+ if (c == 0x5C /*'\\'*/ && ahead < length) {
+ c = (UChar) u_unescapeAt(charAt, &ahead, length, context);
+ }
+ if (U16_IS_TRAIL(c)) {
+ *offset = ahead;
+ result = U16_GET_SUPPLEMENTARY(result, c);
+ }
+ }
+ return result;
+ }
+
+ /* Convert C-style escapes in table */
+ for (i=0; i<UNESCAPE_MAP_LENGTH; i+=2) {
+ if (c == UNESCAPE_MAP[i]) {
+ return UNESCAPE_MAP[i+1];
+ } else if (c < UNESCAPE_MAP[i]) {
+ break;
+ }
+ }
+
+ /* Map \cX to control-X: X & 0x1F */
+ if (c == 0x0063 /*'c'*/ && *offset < length) {
+ c = charAt((*offset)++, context);
+ if (U16_IS_LEAD(c) && *offset < length) {
+ UChar c2 = charAt(*offset, context);
+ if (U16_IS_TRAIL(c2)) {
+ ++(*offset);
+ c = (UChar) U16_GET_SUPPLEMENTARY(c, c2); /* [sic] */
+ }
+ }
+ return 0x1F & c;
+ }
+
+ /* If no special forms are recognized, then consider
+ * the backslash to generically escape the next character.
+ * Deal with surrogate pairs. */
+ if (U16_IS_LEAD(c) && *offset < length) {
+ UChar c2 = charAt(*offset, context);
+ if (U16_IS_TRAIL(c2)) {
+ ++(*offset);
+ return U16_GET_SUPPLEMENTARY(c, c2);
+ }
+ }
+ return c;
+
+ err:
+ /* Invalid escape sequence */
+ *offset = start; /* Reset to initial value */
+ return (UChar32)0xFFFFFFFF;
+}
+
+/* u_unescapeAt() callback to return a UChar from a char* */
+static UChar U_CALLCONV
+_charPtr_charAt(int32_t offset, void *context) {
+ UChar c16;
+ /* It would be more efficient to access the invariant tables
+ * directly but there is no API for that. */
+ u_charsToUChars(((char*) context) + offset, &c16, 1);
+ return c16;
+}
+
+/* Append an escape-free segment of the text; used by u_unescape() */
+static void _appendUChars(UChar *dest, int32_t destCapacity,
+ const char *src, int32_t srcLen) {
+ if (destCapacity < 0) {
+ destCapacity = 0;
+ }
+ if (srcLen > destCapacity) {
+ srcLen = destCapacity;
+ }
+ u_charsToUChars(src, dest, srcLen);
+}
+
+/* Do an invariant conversion of char* -> UChar*, with escape parsing */
+U_CAPI int32_t U_EXPORT2
+u_unescape(const char *src, UChar *dest, int32_t destCapacity) {
+ const char *segment = src;
+ int32_t i = 0;
+ char c;
+
+ while ((c=*src) != 0) {
+ /* '\\' intentionally written as compiler-specific
+ * character constant to correspond to compiler-specific
+ * char* constants. */
+ if (c == '\\') {
+ int32_t lenParsed = 0;
+ UChar32 c32;
+ if (src != segment) {
+ if (dest != NULL) {
+ _appendUChars(dest + i, destCapacity - i,
+ segment, (int32_t)(src - segment));
+ }
+ i += (int32_t)(src - segment);
+ }
+ ++src; /* advance past '\\' */
+ c32 = (UChar32)u_unescapeAt(_charPtr_charAt, &lenParsed, (int32_t)uprv_strlen(src), (void*)src);
+ if (lenParsed == 0) {
+ goto err;
+ }
+ src += lenParsed; /* advance past escape seq. */
+ if (dest != NULL && U16_LENGTH(c32) <= (destCapacity - i)) {
+ U16_APPEND_UNSAFE(dest, i, c32);
+ } else {
+ i += U16_LENGTH(c32);
+ }
+ segment = src;
+ } else {
+ ++src;
+ }
+ }
+ if (src != segment) {
+ if (dest != NULL) {
+ _appendUChars(dest + i, destCapacity - i,
+ segment, (int32_t)(src - segment));
+ }
+ i += (int32_t)(src - segment);
+ }
+ if (dest != NULL && i < destCapacity) {
+ dest[i] = 0;
+ }
+ return i;
+
+ err:
+ if (dest != NULL && destCapacity > 0) {
+ *dest = 0;
+ }
+ return 0;
+}
+
+/* NUL-termination of strings ----------------------------------------------- */
+
+/**
+ * NUL-terminate a string no matter what its type.
+ * Set warning and error codes accordingly.
+ */
+#define __TERMINATE_STRING(dest, destCapacity, length, pErrorCode) \
+ if(pErrorCode!=NULL && U_SUCCESS(*pErrorCode)) { \
+ /* not a public function, so no complete argument checking */ \
+ \
+ if(length<0) { \
+ /* assume that the caller handles this */ \
+ } else if(length<destCapacity) { \
+ /* NUL-terminate the string, the NUL fits */ \
+ dest[length]=0; \
+ /* unset the not-terminated warning but leave all others */ \
+ if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) { \
+ *pErrorCode=U_ZERO_ERROR; \
+ } \
+ } else if(length==destCapacity) { \
+ /* unable to NUL-terminate, but the string itself fit - set a warning code */ \
+ *pErrorCode=U_STRING_NOT_TERMINATED_WARNING; \
+ } else /* length>destCapacity */ { \
+ /* even the string itself did not fit - set an error code */ \
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR; \
+ } \
+ }
+
+U_CAPI int32_t U_EXPORT2
+u_terminateUChars(UChar *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
+ __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
+ return length;
+}
+
+U_CAPI int32_t U_EXPORT2
+u_terminateChars(char *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
+ __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
+ return length;
+}
+
+U_CAPI int32_t U_EXPORT2
+u_terminateUChar32s(UChar32 *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
+ __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
+ return length;
+}
+
+U_CAPI int32_t U_EXPORT2
+u_terminateWChars(wchar_t *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
+ __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
+ return length;
+}
+
+// Compute the hash code for a string -------------------------------------- ***
+
+// Moved here from uhash.c so that UnicodeString::hashCode() does not depend
+// on UHashtable code.
+
+/*
+ Compute the hash by iterating sparsely over about 32 (up to 63)
+ characters spaced evenly through the string. For each character,
+ multiply the previous hash value by a prime number and add the new
+ character in, like a linear congruential random number generator,
+ producing a pseudorandom deterministic value well distributed over
+ the output range. [LIU]
+*/
+
+#define STRING_HASH(TYPE, STR, STRLEN, DEREF) \
+ uint32_t hash = 0; \
+ const TYPE *p = (const TYPE*) STR; \
+ if (p != NULL) { \
+ int32_t len = (int32_t)(STRLEN); \
+ int32_t inc = ((len - 32) / 32) + 1; \
+ const TYPE *limit = p + len; \
+ while (p<limit) { \
+ hash = (hash * 37) + DEREF; \
+ p += inc; \
+ } \
+ } \
+ return static_cast<int32_t>(hash)
+
+/* Used by UnicodeString to compute its hashcode - Not public API. */
+U_CAPI int32_t U_EXPORT2
+ustr_hashUCharsN(const UChar *str, int32_t length) {
+ STRING_HASH(UChar, str, length, *p);
+}
+
+U_CAPI int32_t U_EXPORT2
+ustr_hashCharsN(const char *str, int32_t length) {
+ STRING_HASH(uint8_t, str, length, *p);
+}
+
+U_CAPI int32_t U_EXPORT2
+ustr_hashICharsN(const char *str, int32_t length) {
+ STRING_HASH(char, str, length, (uint8_t)uprv_tolower(*p));
+}
diff --git a/src/core/basetypes/icu-ucsdet/utrace.c b/src/core/basetypes/icu-ucsdet/utrace.c
new file mode 100644
index 00000000..c2f36c48
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/utrace.c
@@ -0,0 +1,488 @@
+/*
+*******************************************************************************
+* Copyright (C) 2003-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: utrace.c
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*/
+
+#define UTRACE_IMPL
+#include "unicode/utrace.h"
+#include "utracimp.h"
+#include "cstring.h"
+#include "uassert.h"
+#include "ucln_cmn.h"
+
+
+static UTraceEntry *pTraceEntryFunc = NULL;
+static UTraceExit *pTraceExitFunc = NULL;
+static UTraceData *pTraceDataFunc = NULL;
+static const void *gTraceContext = NULL;
+
+U_EXPORT int32_t
+utrace_level = UTRACE_ERROR;
+
+U_CAPI void U_EXPORT2
+utrace_entry(int32_t fnNumber) {
+ if (pTraceEntryFunc != NULL) {
+ (*pTraceEntryFunc)(gTraceContext, fnNumber);
+ }
+}
+
+
+static const char gExitFmt[] = "Returns.";
+static const char gExitFmtValue[] = "Returns %d.";
+static const char gExitFmtStatus[] = "Returns. Status = %d.";
+static const char gExitFmtValueStatus[] = "Returns %d. Status = %d.";
+static const char gExitFmtPtrStatus[] = "Returns %d. Status = %p.";
+
+U_CAPI void U_EXPORT2
+utrace_exit(int32_t fnNumber, int32_t returnType, ...) {
+ if (pTraceExitFunc != NULL) {
+ va_list args;
+ const char *fmt;
+
+ switch (returnType) {
+ case 0:
+ fmt = gExitFmt;
+ break;
+ case UTRACE_EXITV_I32:
+ fmt = gExitFmtValue;
+ break;
+ case UTRACE_EXITV_STATUS:
+ fmt = gExitFmtStatus;
+ break;
+ case UTRACE_EXITV_I32 | UTRACE_EXITV_STATUS:
+ fmt = gExitFmtValueStatus;
+ break;
+ case UTRACE_EXITV_PTR | UTRACE_EXITV_STATUS:
+ fmt = gExitFmtPtrStatus;
+ break;
+ default:
+ U_ASSERT(FALSE);
+ fmt = gExitFmt;
+ }
+
+ va_start(args, returnType);
+ (*pTraceExitFunc)(gTraceContext, fnNumber, fmt, args);
+ va_end(args);
+ }
+}
+
+
+
+U_CAPI void U_EXPORT2
+utrace_data(int32_t fnNumber, int32_t level, const char *fmt, ...) {
+ if (pTraceDataFunc != NULL) {
+ va_list args;
+ va_start(args, fmt );
+ (*pTraceDataFunc)(gTraceContext, fnNumber, level, fmt, args);
+ va_end(args);
+ }
+}
+
+
+static void outputChar(char c, char *outBuf, int32_t *outIx, int32_t capacity, int32_t indent) {
+ int32_t i;
+ /* Check whether a start of line indenting is needed. Three cases:
+ * 1. At the start of the first line (output index == 0).
+ * 2. At the start of subsequent lines (preceeding char in buffer == '\n')
+ * 3. When preflighting buffer len (buffer capacity is exceeded), when
+ * a \n is output. Ideally we wouldn't do the indent until the following char
+ * is received, but that won't work because there's no place to remember that
+ * the preceding char was \n. Meaning that we may overstimate the
+ * buffer size needed. No harm done.
+ */
+ if (*outIx==0 || /* case 1. */
+ (c!='\n' && c!=0 && *outIx < capacity && outBuf[(*outIx)-1]=='\n') || /* case 2. */
+ (c=='\n' && *outIx>=capacity)) /* case 3 */
+ {
+ /* At the start of a line. Indent. */
+ for(i=0; i<indent; i++) {
+ if (*outIx < capacity) {
+ outBuf[*outIx] = ' ';
+ }
+ (*outIx)++;
+ }
+ }
+
+ if (*outIx < capacity) {
+ outBuf[*outIx] = c;
+ }
+ if (c != 0) {
+ /* Nulls only appear as end-of-string terminators. Move them to the output
+ * buffer, but do not update the length of the buffer, so that any
+ * following output will overwrite the null. */
+ (*outIx)++;
+ }
+}
+
+static void outputHexBytes(int64_t val, int32_t charsToOutput,
+ char *outBuf, int32_t *outIx, int32_t capacity) {
+ static const char gHexChars[] = "0123456789abcdef";
+ int32_t shiftCount;
+ for (shiftCount=(charsToOutput-1)*4; shiftCount >= 0; shiftCount-=4) {
+ char c = gHexChars[(val >> shiftCount) & 0xf];
+ outputChar(c, outBuf, outIx, capacity, 0);
+ }
+}
+
+/* Output a pointer value in hex. Work with any size of pointer */
+static void outputPtrBytes(void *val, char *outBuf, int32_t *outIx, int32_t capacity) {
+ int32_t i;
+ int32_t incVal = 1; /* +1 for big endian, -1 for little endian */
+ char *p = (char *)&val; /* point to current byte to output in the ptr val */
+
+#if !U_IS_BIG_ENDIAN
+ /* Little Endian. Move p to most significant end of the value */
+ incVal = -1;
+ p += sizeof(void *) - 1;
+#endif
+
+ /* Loop through the bytes of the ptr as it sits in memory, from
+ * most significant to least significant end */
+ for (i=0; i<sizeof(void *); i++) {
+ outputHexBytes(*p, 2, outBuf, outIx, capacity);
+ p += incVal;
+ }
+}
+
+static void outputString(const char *s, char *outBuf, int32_t *outIx, int32_t capacity, int32_t indent) {
+ int32_t i = 0;
+ char c;
+ if (s==NULL) {
+ s = "*NULL*";
+ }
+ do {
+ c = s[i++];
+ outputChar(c, outBuf, outIx, capacity, indent);
+ } while (c != 0);
+}
+
+
+
+static void outputUString(const UChar *s, int32_t len,
+ char *outBuf, int32_t *outIx, int32_t capacity, int32_t indent) {
+ int32_t i = 0;
+ UChar c;
+ if (s==NULL) {
+ outputString(NULL, outBuf, outIx, capacity, indent);
+ return;
+ }
+
+ for (i=0; i<len || len==-1; i++) {
+ c = s[i];
+ outputHexBytes(c, 4, outBuf, outIx, capacity);
+ outputChar(' ', outBuf, outIx, capacity, indent);
+ if (len == -1 && c==0) {
+ break;
+ }
+ }
+}
+
+U_CAPI int32_t U_EXPORT2
+utrace_vformat(char *outBuf, int32_t capacity, int32_t indent, const char *fmt, va_list args) {
+ int32_t outIx = 0;
+ int32_t fmtIx = 0;
+ char fmtC;
+ char c;
+ int32_t intArg;
+ int64_t longArg = 0;
+ char *ptrArg;
+
+ /* Loop runs once for each character in the format string.
+ */
+ for (;;) {
+ fmtC = fmt[fmtIx++];
+ if (fmtC != '%') {
+ /* Literal character, not part of a %sequence. Just copy it to the output. */
+ outputChar(fmtC, outBuf, &outIx, capacity, indent);
+ if (fmtC == 0) {
+ /* We hit the null that terminates the format string.
+ * This is the normal (and only) exit from the loop that
+ * interprets the format
+ */
+ break;
+ }
+ continue;
+ }
+
+ /* We encountered a '%'. Pick up the following format char */
+ fmtC = fmt[fmtIx++];
+
+ switch (fmtC) {
+ case 'c':
+ /* single 8 bit char */
+ c = (char)va_arg(args, int32_t);
+ outputChar(c, outBuf, &outIx, capacity, indent);
+ break;
+
+ case 's':
+ /* char * string, null terminated. */
+ ptrArg = va_arg(args, char *);
+ outputString((const char *)ptrArg, outBuf, &outIx, capacity, indent);
+ break;
+
+ case 'S':
+ /* UChar * string, with length, len==-1 for null terminated. */
+ ptrArg = va_arg(args, void *); /* Ptr */
+ intArg =(int32_t)va_arg(args, int32_t); /* Length */
+ outputUString((const UChar *)ptrArg, intArg, outBuf, &outIx, capacity, indent);
+ break;
+
+ case 'b':
+ /* 8 bit int */
+ intArg = va_arg(args, int);
+ outputHexBytes(intArg, 2, outBuf, &outIx, capacity);
+ break;
+
+ case 'h':
+ /* 16 bit int */
+ intArg = va_arg(args, int);
+ outputHexBytes(intArg, 4, outBuf, &outIx, capacity);
+ break;
+
+ case 'd':
+ /* 32 bit int */
+ intArg = va_arg(args, int);
+ outputHexBytes(intArg, 8, outBuf, &outIx, capacity);
+ break;
+
+ case 'l':
+ /* 64 bit long */
+ longArg = va_arg(args, int64_t);
+ outputHexBytes(longArg, 16, outBuf, &outIx, capacity);
+ break;
+
+ case 'p':
+ /* Pointers. */
+ ptrArg = va_arg(args, void *);
+ outputPtrBytes(ptrArg, outBuf, &outIx, capacity);
+ break;
+
+ case 0:
+ /* Single '%' at end of fmt string. Output as literal '%'.
+ * Back up index into format string so that the terminating null will be
+ * re-fetched in the outer loop, causing it to terminate.
+ */
+ outputChar('%', outBuf, &outIx, capacity, indent);
+ fmtIx--;
+ break;
+
+ case 'v':
+ {
+ /* Vector of values, e.g. %vh */
+ char vectorType;
+ int32_t vectorLen;
+ const char *i8Ptr;
+ int16_t *i16Ptr;
+ int32_t *i32Ptr;
+ int64_t *i64Ptr;
+ void **ptrPtr;
+ int32_t charsToOutput = 0;
+ int32_t i;
+
+ vectorType = fmt[fmtIx]; /* b, h, d, l, p, etc. */
+ if (vectorType != 0) {
+ fmtIx++;
+ }
+ i8Ptr = (const char *)va_arg(args, void*);
+ i16Ptr = (int16_t *)i8Ptr;
+ i32Ptr = (int32_t *)i8Ptr;
+ i64Ptr = (int64_t *)i8Ptr;
+ ptrPtr = (void **)i8Ptr;
+ vectorLen =(int32_t)va_arg(args, int32_t);
+ if (ptrPtr == NULL) {
+ outputString("*NULL* ", outBuf, &outIx, capacity, indent);
+ } else {
+ for (i=0; i<vectorLen || vectorLen==-1; i++) {
+ switch (vectorType) {
+ case 'b':
+ charsToOutput = 2;
+ longArg = *i8Ptr++;
+ break;
+ case 'h':
+ charsToOutput = 4;
+ longArg = *i16Ptr++;
+ break;
+ case 'd':
+ charsToOutput = 8;
+ longArg = *i32Ptr++;
+ break;
+ case 'l':
+ charsToOutput = 16;
+ longArg = *i64Ptr++;
+ break;
+ case 'p':
+ charsToOutput = 0;
+ outputPtrBytes(*ptrPtr, outBuf, &outIx, capacity);
+ longArg = *ptrPtr==NULL? 0: 1; /* test for null terminated array. */
+ ptrPtr++;
+ break;
+ case 'c':
+ charsToOutput = 0;
+ outputChar(*i8Ptr, outBuf, &outIx, capacity, indent);
+ longArg = *i8Ptr; /* for test for null terminated array. */
+ i8Ptr++;
+ break;
+ case 's':
+ charsToOutput = 0;
+ outputString(*ptrPtr, outBuf, &outIx, capacity, indent);
+ outputChar('\n', outBuf, &outIx, capacity, indent);
+ longArg = *ptrPtr==NULL? 0: 1; /* for test for null term. array. */
+ ptrPtr++;
+ break;
+
+ case 'S':
+ charsToOutput = 0;
+ outputUString((const UChar *)*ptrPtr, -1, outBuf, &outIx, capacity, indent);
+ outputChar('\n', outBuf, &outIx, capacity, indent);
+ longArg = *ptrPtr==NULL? 0: 1; /* for test for null term. array. */
+ ptrPtr++;
+ break;
+
+
+ }
+ if (charsToOutput > 0) {
+ outputHexBytes(longArg, charsToOutput, outBuf, &outIx, capacity);
+ outputChar(' ', outBuf, &outIx, capacity, indent);
+ }
+ if (vectorLen == -1 && longArg == 0) {
+ break;
+ }
+ }
+ }
+ outputChar('[', outBuf, &outIx, capacity, indent);
+ outputHexBytes(vectorLen, 8, outBuf, &outIx, capacity);
+ outputChar(']', outBuf, &outIx, capacity, indent);
+ }
+ break;
+
+
+ default:
+ /* %. in format string, where . is some character not in the set
+ * of recognized format chars. Just output it as if % wasn't there.
+ * (Covers "%%" outputing a single '%')
+ */
+ outputChar(fmtC, outBuf, &outIx, capacity, indent);
+ }
+ }
+ outputChar(0, outBuf, &outIx, capacity, indent); /* Make sure that output is null terminated */
+ return outIx + 1; /* outIx + 1 because outIx does not increment when outputing final null. */
+}
+
+
+
+
+U_CAPI int32_t U_EXPORT2
+utrace_format(char *outBuf, int32_t capacity,
+ int32_t indent, const char *fmt, ...) {
+ int32_t retVal;
+ va_list args;
+ va_start(args, fmt );
+ retVal = utrace_vformat(outBuf, capacity, indent, fmt, args);
+ va_end(args);
+ return retVal;
+}
+
+
+U_CAPI void U_EXPORT2
+utrace_setFunctions(const void *context,
+ UTraceEntry *e, UTraceExit *x, UTraceData *d) {
+ pTraceEntryFunc = e;
+ pTraceExitFunc = x;
+ pTraceDataFunc = d;
+ gTraceContext = context;
+}
+
+
+U_CAPI void U_EXPORT2
+utrace_getFunctions(const void **context,
+ UTraceEntry **e, UTraceExit **x, UTraceData **d) {
+ *e = pTraceEntryFunc;
+ *x = pTraceExitFunc;
+ *d = pTraceDataFunc;
+ *context = gTraceContext;
+}
+
+U_CAPI void U_EXPORT2
+utrace_setLevel(int32_t level) {
+ if (level < UTRACE_OFF) {
+ level = UTRACE_OFF;
+ }
+ if (level > UTRACE_VERBOSE) {
+ level = UTRACE_VERBOSE;
+ }
+ utrace_level = level;
+}
+
+U_CAPI int32_t U_EXPORT2
+utrace_getLevel() {
+ return utrace_level;
+}
+
+
+U_CFUNC UBool
+utrace_cleanup() {
+ pTraceEntryFunc = NULL;
+ pTraceExitFunc = NULL;
+ pTraceDataFunc = NULL;
+ utrace_level = UTRACE_OFF;
+ gTraceContext = NULL;
+ return TRUE;
+}
+
+
+static const char * const
+trFnName[] = {
+ "u_init",
+ "u_cleanup",
+ NULL
+};
+
+
+static const char * const
+trConvNames[] = {
+ "ucnv_open",
+ "ucnv_openPackage",
+ "ucnv_openAlgorithmic",
+ "ucnv_clone",
+ "ucnv_close",
+ "ucnv_flushCache",
+ "ucnv_load",
+ "ucnv_unload",
+ NULL
+};
+
+
+static const char * const
+trCollNames[] = {
+ "ucol_open",
+ "ucol_close",
+ "ucol_strcoll",
+ "ucol_getSortKey",
+ "ucol_getLocale",
+ "ucol_nextSortKeyPart",
+ "ucol_strcollIter",
+ "ucol_openFromShortString",
+ "ucol_strcollUTF8",
+ NULL
+};
+
+
+U_CAPI const char * U_EXPORT2
+utrace_functionName(int32_t fnNumber) {
+ if(UTRACE_FUNCTION_START <= fnNumber && fnNumber < UTRACE_FUNCTION_LIMIT) {
+ return trFnName[fnNumber];
+ } else if(UTRACE_CONVERSION_START <= fnNumber && fnNumber < UTRACE_CONVERSION_LIMIT) {
+ return trConvNames[fnNumber - UTRACE_CONVERSION_START];
+ } else if(UTRACE_COLLATION_START <= fnNumber && fnNumber < UTRACE_COLLATION_LIMIT){
+ return trCollNames[fnNumber - UTRACE_COLLATION_START];
+ } else {
+ return "[BOGUS Trace Function Number]";
+ }
+}
+
diff --git a/unittest/data/summary/input/1015-windows-1252.eml b/unittest/data/summary/input/1015-windows-1252.eml
new file mode 100644
index 00000000..78e102ae
--- /dev/null
+++ b/unittest/data/summary/input/1015-windows-1252.eml
@@ -0,0 +1,19 @@
+From 0@xxx 2014-12-12 22:59:42 +0000
+From: "Test test" <test@test.net>
+Content-Type: text/plain;charset=windows-1252
+Content-Transfer-Encoding: quoted-printable
+Mime-Version: 1.0 (1.0)
+Subject: Re: Test test
+To: "Test test" <test@test.net>
+Message-Id: <E5D5C2EB-305B-4577-8D3F-F61D1167D52E@test.net>
+Date: Fri, 01 Dec 2014 23:59:42 +0100
+
+Thanks, Lena. So I guess I have an issue with Siri on one of my iPads, whic=
+h is why the microphone wasn=92t showing up.
+
+...
+
+[Description: Description: cid:image001.png@01CD286C.D0001580]
+=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=
+=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=
+=96=96=96=96
diff --git a/unittest/data/summary/input/1021-chinese.eml b/unittest/data/summary/input/1021-chinese.eml
new file mode 100644
index 00000000..93c36b4a
--- /dev/null
+++ b/unittest/data/summary/input/1021-chinese.eml
@@ -0,0 +1,64 @@
+HMM_SOURCE_IP:10.62.8.17:58409.173036363
+HMM_ATTACHE_NUM:0000
+HMM_SOURCE_TYPE:SMTP
+Received: from smtpbg328.qq.com (as7.inner-189.cn [10.62.8.17])
+ by 189.cn (HERMES) with ESMTP id 9DAD6260004
+ for <wujingyou@189.cn>; Wed, 24 Dec 2014 04:09:34 +0800 (CST)
+Received: from smtpbg328.qq.com ([14.17.43.160])
+ by 189CN(MEDUSA 10.62.8.17) with ESMTP id 1419365374.28408 for wujingyou@189.cn
+ ;
+ Wed Dec 24 04:09:36 2014
+0/X-Total-Score: -120:
+2/X-Total-Score: 0:
+3/X-Total-Score: 0:
+X-FILTER-SCORE: to=<5254545257515a595451536152595a4f848f>, score=<1419365376ttttpttftt9tpf9Oa7wdLtZZZFZZ6ZZjZF6jmU1+KNZZ>
+X-REAL-FROM:wujyou@qq.com
+X-Receive-IP:14.17.43.160 wujyou@qq.com
+DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=qq.com; s=s201307;
+ t=1419365374; bh=+L9WKWb/TZ/WN/hsH9zWebN7pLH2WpJWpe8JNnjkHzU=;
+ h=X-QQ-FEAT:X-QQ-SSF:X-QQ-WAPMAIL:X-QQ-BUSINESS-ORIGIN:
+ X-Originating-IP:X-QQ-STYLE:X-QQ-mid:From:To:Subject:Mime-Version:Content-Type:Content-Transfer-Encoding:Date:
+ X-Priority:Message-ID:X-QQ-MIME:X-Mailer:X-QQ-Mailer:
+ X-QQ-SENDSIZE;
+ b=TOFY9TbuFdUSu607LkA8z4siu7aRWfyPRfKR4Zdwjq2VDklCS4pOnYd3K7gUnU+Rw
+ rpJvAGZllbkcuDJM/2YB7SPb3yYH8Z55KnTfUPsm8aQxrXABv8OnOJmoU9zhFt2o9p
+ kfxuV+sdaT4MQJI/WXjN24W7Aq+zTTx3SBmcnONo=
+X-QQ-FEAT: zaIfg0hwV2r5ipAJ//zZfBFZvJON7nl/P/xc0g9oL/Y=
+X-QQ-SSF: 00010000000000F0
+X-QQ-WAPMAIL: 1
+X-QQ-BUSINESS-ORIGIN: 2
+X-Originating-IP: 120.237.120.44
+X-QQ-STYLE:
+X-QQ-mid: wapmail39t1419365373t592355
+From: "=?gb18030?B?z/LX8w==?=" <wujyou@qq.com>
+To: "=?gb18030?B?MTg509M=?=" <wujingyou@189.cn>
+Subject: =?gb18030?B?uf65/g==?=
+Mime-Version: 1.0
+Content-Type: multipart/alternative;
+ boundary="----=_NextPart_5499CBFD_09046CF8_7A47AD55"
+Content-Transfer-Encoding: 8Bit
+Date: Wed, 24 Dec 2014 04:09:33 +0800
+X-Priority: 3
+Message-ID: <tencent_768EB412354CD73039FB412B@qq.com>
+X-QQ-MIME: TCMime 1.0 by Tencent
+X-Mailer: QQMail 2.x
+X-QQ-Mailer: QQMail 2.x
+X-QQ-SENDSIZE: 520
+
+This is a multi-part message in MIME format.
+
+------=_NextPart_5499CBFD_09046CF8_7A47AD55
+Content-Type: text/plain;
+ charset="gb18030"
+Content-Transfer-Encoding: base64
+
+uf65/g==
+
+------=_NextPart_5499CBFD_09046CF8_7A47AD55
+Content-Type: text/html;
+ charset="gb18030"
+Content-Transfer-Encoding: base64
+
+uf65/g==
+
+------=_NextPart_5499CBFD_09046CF8_7A47AD55--
diff --git a/unittest/data/summary/output/1015-windows-1252.txt b/unittest/data/summary/output/1015-windows-1252.txt
new file mode 100644
index 00000000..1682a73e
--- /dev/null
+++ b/unittest/data/summary/output/1015-windows-1252.txt
@@ -0,0 +1,12 @@
+From:Test test<test@test.net>
+To:Test test<test@test.net>
+Subject:Test test
+Date:December 1, 2014 at 2:59:42 PM PST
+
+
+Thanks, Lena. So I guess I have an issue with Siri on one of my iPads, which is why the microphone wasn’t showing up.
+
+...
+
+[Description: Description: cid:image001.png@01CD286C.D0001580]
+––––––––––––––––––––––––––––––––––––––––––––––––––––––
diff --git a/unittest/data/summary/output/1021-chinese.txt b/unittest/data/summary/output/1021-chinese.txt
new file mode 100644
index 00000000..ad7c9e84
--- /dev/null
+++ b/unittest/data/summary/output/1021-chinese.txt
@@ -0,0 +1,7 @@
+From:"向左"<wujyou@qq.com>
+To:"189佑"<wujingyou@189.cn>
+Subject:哈哈
+Date:December 23, 2014 at 12:09:33 PM PST
+
+
+哈哈
diff --git a/unittest/data/summary/output/2458-encoding.txt b/unittest/data/summary/output/2458-encoding.txt
index 923cad9c..62bd4fb7 100644
--- a/unittest/data/summary/output/2458-encoding.txt
+++ b/unittest/data/summary/output/2458-encoding.txt
@@ -12,28 +12,28 @@ E-Mail
Cevap: Cevap : Yurt içi iPhone 4 Alanlar - Alamayanlar -(Fiyat-Taksit-Bayi-Stok) bilgi versin lütfen
~~~~~~~~~~~~~~
-Genpa&#39;dan 11ocakta form doldurdum hala arayacaklar Türkcellden tanýdýk buldum ilk partide alýyorum inþallah
+Genpa&#39;dan 11ocakta form doldurdum hala arayacaklar Türkcellden tanıdık buldum ilk partide alıyorum inşallah
------------ Mesajýn Sonu -------------
+----------- Mesajın Sonu -------------
-Bu mesaj aþaðýdaki adreste bulunmaktadýr:
+Bu mesaj aşağıdaki adreste bulunmaktadır:
http://forum.donanimhaber.com/fb.asp?m=47561754
-Bu mesaj þu kýsýmlarýn altýndadýr:
+Bu mesaj şu kısımların altındadır:
Bölüm: Apple - iPhone - iPad - http://forum.donanimhaber.com/tt.asp?forumid=462
-Forum: Diðer - http://forum.donanimhaber.com/tt.asp?forumid=514
+Forum: Diğer - http://forum.donanimhaber.com/tt.asp?forumid=514
-Eðer bu mesajlardan daha fazla almak istemiyorsanýz, sitemize giriþ yapýp, email yoluyla üyeliklerinizi gözden geçirip istemediklerinizi çýkartýnýz.
+Eğer bu mesajlardan daha fazla almak istemiyorsanız, sitemize giriş yapıp, email yoluyla üyeliklerinizi gözden geçirip istemediklerinizi çıkartınız.
http://forum.donanimhaber.com/subscribe.asp
=========================================
-Donaným Haber Forum
+Donanım Haber Forum
http://forum.donanimhaber.com/
=========================================
diff --git a/unittest/data/summary/output/2518-encoding.txt b/unittest/data/summary/output/2518-encoding.txt
index 923cad9c..62bd4fb7 100644
--- a/unittest/data/summary/output/2518-encoding.txt
+++ b/unittest/data/summary/output/2518-encoding.txt
@@ -12,28 +12,28 @@ E-Mail
Cevap: Cevap : Yurt içi iPhone 4 Alanlar - Alamayanlar -(Fiyat-Taksit-Bayi-Stok) bilgi versin lütfen
~~~~~~~~~~~~~~
-Genpa&#39;dan 11ocakta form doldurdum hala arayacaklar Türkcellden tanýdýk buldum ilk partide alýyorum inþallah
+Genpa&#39;dan 11ocakta form doldurdum hala arayacaklar Türkcellden tanıdık buldum ilk partide alıyorum inşallah
------------ Mesajýn Sonu -------------
+----------- Mesajın Sonu -------------
-Bu mesaj aþaðýdaki adreste bulunmaktadýr:
+Bu mesaj aşağıdaki adreste bulunmaktadır:
http://forum.donanimhaber.com/fb.asp?m=47561754
-Bu mesaj þu kýsýmlarýn altýndadýr:
+Bu mesaj şu kısımların altındadır:
Bölüm: Apple - iPhone - iPad - http://forum.donanimhaber.com/tt.asp?forumid=462
-Forum: Diðer - http://forum.donanimhaber.com/tt.asp?forumid=514
+Forum: Diğer - http://forum.donanimhaber.com/tt.asp?forumid=514
-Eðer bu mesajlardan daha fazla almak istemiyorsanýz, sitemize giriþ yapýp, email yoluyla üyeliklerinizi gözden geçirip istemediklerinizi çýkartýnýz.
+Eğer bu mesajlardan daha fazla almak istemiyorsanız, sitemize giriş yapıp, email yoluyla üyeliklerinizi gözden geçirip istemediklerinizi çıkartınız.
http://forum.donanimhaber.com/subscribe.asp
=========================================
-Donaným Haber Forum
+Donanım Haber Forum
http://forum.donanimhaber.com/
=========================================