98 files changed, 39545 insertions, 37 deletions
diff --git a/build-mac/mailcore2.xcodeproj/project.pbxproj b/build-mac/mailcore2.xcodeproj/project.pbxproj
index fcc4830b..58525de5 100755
--- a/build-mac/mailcore2.xcodeproj/project.pbxproj
+++ b/build-mac/mailcore2.xcodeproj/project.pbxproj
@@ -168,6 +168,50 @@
 		BD63713B177DFF080094121B /* MCLibetpan.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BD637139177DFF080094121B /* MCLibetpan.cpp */; };
 		BDCD7C5B1A5B1C2C0001DCC3 /* MCErrorMessage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7C591A5B1C2C0001DCC3 /* MCErrorMessage.cpp */; };
 		BDCD7C5C1A5B1C2C0001DCC3 /* MCErrorMessage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7C591A5B1C2C0001DCC3 /* MCErrorMessage.cpp */; };
+		BDCD7CC31A70771B0001DCC3 /* csdetect.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7C6A1A70771B0001DCC3 /* csdetect.cpp */; };
+		BDCD7CC41A70771B0001DCC3 /* csdetect.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7C6A1A70771B0001DCC3 /* csdetect.cpp */; };
+		BDCD7CC51A70771B0001DCC3 /* csmatch.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7C6B1A70771B0001DCC3 /* csmatch.cpp */; };
+		BDCD7CC61A70771B0001DCC3 /* csmatch.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7C6B1A70771B0001DCC3 /* csmatch.cpp */; };
+		BDCD7CC71A70771B0001DCC3 /* csr2022.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7C6C1A70771B0001DCC3 /* csr2022.cpp */; };
+		BDCD7CC81A70771B0001DCC3 /* csr2022.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7C6C1A70771B0001DCC3 /* csr2022.cpp */; };
+		BDCD7CC91A70771B0001DCC3 /* csrecog.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7C6D1A70771B0001DCC3 /* csrecog.cpp */; };
+		BDCD7CCA1A70771B0001DCC3 /* csrecog.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7C6D1A70771B0001DCC3 /* csrecog.cpp */; };
+		BDCD7CCB1A70771B0001DCC3 /* csrmbcs.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7C6E1A70771B0001DCC3 /* csrmbcs.cpp */; };
+		BDCD7CCC1A70771B0001DCC3 /* csrmbcs.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7C6E1A70771B0001DCC3 /* csrmbcs.cpp */; };
+		BDCD7CCD1A70771B0001DCC3 /* csrsbcs.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7C6F1A70771B0001DCC3 /* csrsbcs.cpp */; };
+		BDCD7CCE1A70771B0001DCC3 /* csrsbcs.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7C6F1A70771B0001DCC3 /* csrsbcs.cpp */; };
+		BDCD7CCF1A70771B0001DCC3 /* csrucode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7C701A70771B0001DCC3 /* csrucode.cpp */; };
+		BDCD7CD01A70771B0001DCC3 /* csrucode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7C701A70771B0001DCC3 /* csrucode.cpp */; };
+		BDCD7CD11A70771B0001DCC3 /* csrutf8.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7C711A70771B0001DCC3 /* csrutf8.cpp */; };
+		BDCD7CD21A70771B0001DCC3 /* csrutf8.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7C711A70771B0001DCC3 /* csrutf8.cpp */; };
+		BDCD7CD31A70771B0001DCC3 /* cstring.c in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7C721A70771B0001DCC3 /* cstring.c */; };
+		BDCD7CD41A70771B0001DCC3 /* cstring.c in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7C721A70771B0001DCC3 /* cstring.c */; };
+		BDCD7CD51A70771B0001DCC3 /* inputext.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CB91A70771B0001DCC3 /* inputext.cpp */; };
+		BDCD7CD61A70771B0001DCC3 /* inputext.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CB91A70771B0001DCC3 /* inputext.cpp */; };
+		BDCD7CD71A70771B0001DCC3 /* uarrsort.c in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CBA1A70771B0001DCC3 /* uarrsort.c */; };
+		BDCD7CD81A70771B0001DCC3 /* uarrsort.c in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CBA1A70771B0001DCC3 /* uarrsort.c */; };
+		BDCD7CD91A70771B0001DCC3 /* ucln_cmn.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CBB1A70771B0001DCC3 /* ucln_cmn.cpp */; };
+		BDCD7CDA1A70771B0001DCC3 /* ucln_cmn.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CBB1A70771B0001DCC3 /* ucln_cmn.cpp */; };
+		BDCD7CDB1A70771B0001DCC3 /* ucln_in.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CBC1A70771B0001DCC3 /* ucln_in.cpp */; };
+		BDCD7CDC1A70771B0001DCC3 /* ucln_in.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CBC1A70771B0001DCC3 /* ucln_in.cpp */; };
+		BDCD7CDD1A70771B0001DCC3 /* udataswp.c in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CBD1A70771B0001DCC3 /* udataswp.c */; };
+		BDCD7CDE1A70771B0001DCC3 /* udataswp.c in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CBD1A70771B0001DCC3 /* udataswp.c */; };
+		BDCD7CDF1A70771B0001DCC3 /* uenum.c in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CBE1A70771B0001DCC3 /* uenum.c */; };
+		BDCD7CE01A70771B0001DCC3 /* uenum.c in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CBE1A70771B0001DCC3 /* uenum.c */; };
+		BDCD7CE11A70771B0001DCC3 /* uinvchar.c in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CBF1A70771B0001DCC3 /* uinvchar.c */; };
+		BDCD7CE21A70771B0001DCC3 /* uinvchar.c in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CBF1A70771B0001DCC3 /* uinvchar.c */; };
+		BDCD7CE31A70771B0001DCC3 /* uobject.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CC01A70771B0001DCC3 /* uobject.cpp */; };
+		BDCD7CE41A70771B0001DCC3 /* uobject.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CC01A70771B0001DCC3 /* uobject.cpp */; };
+		BDCD7CE51A70771B0001DCC3 /* ustring.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CC11A70771B0001DCC3 /* ustring.cpp */; };
+		BDCD7CE61A70771B0001DCC3 /* ustring.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CC11A70771B0001DCC3 /* ustring.cpp */; };
+		BDCD7CE71A70771B0001DCC3 /* utrace.c in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CC21A70771B0001DCC3 /* utrace.c */; };
+		BDCD7CE81A70771B0001DCC3 /* utrace.c in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CC21A70771B0001DCC3 /* utrace.c */; };
+		BDCD7CEA1A7077680001DCC3 /* ucsdet.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CE91A7077680001DCC3 /* ucsdet.cpp */; };
+		BDCD7CEB1A7077680001DCC3 /* ucsdet.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CE91A7077680001DCC3 /* ucsdet.cpp */; };
+		BDCD7CEE1A7079300001DCC3 /* cmemory.c in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CEC1A7079300001DCC3 /* cmemory.c */; };
+		BDCD7CEF1A7079300001DCC3 /* cmemory.c in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CEC1A7079300001DCC3 /* cmemory.c */; };
+		BDCD7CF01A7079300001DCC3 /* umutex.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CED1A7079300001DCC3 /* umutex.cpp */; };
+		BDCD7CF11A7079300001DCC3 /* umutex.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BDCD7CED1A7079300001DCC3 /* umutex.cpp */; };
 		C07AD5D7FD82F8ACAB576231 /* NSError+MCO.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = C07AD44B013BB42A240B4F04 /* NSError+MCO.h */; };
 		C07AD99B2E2054C684DB8FF6 /* NSError+MCO.mm in Sources */ = {isa = PBXBuildFile; fileRef = C07ADFE43E22B38EFF23ADB5 /* NSError+MCO.mm */; };
 		C07ADC28B83E7959BF114D46 /* MCOIMAPSession.mm in Sources */ = {isa = PBXBuildFile; fileRef = C07AD057D3C8FBDC7AC95733 /* MCOIMAPSession.mm */; };
@@ -1493,6 +1537,96 @@
 		BD63713A177DFF080094121B /* MCLibetpan.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MCLibetpan.h; sourceTree = "<group>"; };
 		BDCD7C591A5B1C2C0001DCC3 /* MCErrorMessage.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = MCErrorMessage.cpp; sourceTree = "<group>"; };
 		BDCD7C5A1A5B1C2C0001DCC3 /* MCErrorMessage.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MCErrorMessage.h; sourceTree = "<group>"; };
+		BDCD7C6A1A70771B0001DCC3 /* csdetect.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = csdetect.cpp; sourceTree = "<group>"; };
+		BDCD7C6B1A70771B0001DCC3 /* csmatch.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = csmatch.cpp; sourceTree = "<group>"; };
+		BDCD7C6C1A70771B0001DCC3 /* csr2022.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = csr2022.cpp; sourceTree = "<group>"; };
+		BDCD7C6D1A70771B0001DCC3 /* csrecog.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = csrecog.cpp; sourceTree = "<group>"; };
+		BDCD7C6E1A70771B0001DCC3 /* csrmbcs.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = csrmbcs.cpp; sourceTree = "<group>"; };
+		BDCD7C6F1A70771B0001DCC3 /* csrsbcs.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = csrsbcs.cpp; sourceTree = "<group>"; };
+		BDCD7C701A70771B0001DCC3 /* csrucode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = csrucode.cpp; sourceTree = "<group>"; };
+		BDCD7C711A70771B0001DCC3 /* csrutf8.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = csrutf8.cpp; sourceTree = "<group>"; };
+		BDCD7C721A70771B0001DCC3 /* cstring.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = cstring.c; sourceTree = "<group>"; };
+		BDCD7C741A70771B0001DCC3 /* charstr.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = charstr.h; sourceTree = "<group>"; };
+		BDCD7C751A70771B0001DCC3 /* cmemory.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = cmemory.h; sourceTree = "<group>"; };
+		BDCD7C761A70771B0001DCC3 /* csdetect.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = csdetect.h; sourceTree = "<group>"; };
+		BDCD7C771A70771B0001DCC3 /* csmatch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = csmatch.h; sourceTree = "<group>"; };
+		BDCD7C781A70771B0001DCC3 /* csr2022.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = csr2022.h; sourceTree = "<group>"; };
+		BDCD7C791A70771B0001DCC3 /* csrecog.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = csrecog.h; sourceTree = "<group>"; };
+		BDCD7C7A1A70771B0001DCC3 /* csrmbcs.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = csrmbcs.h; sourceTree = "<group>"; };
+		BDCD7C7B1A70771B0001DCC3 /* csrsbcs.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = csrsbcs.h; sourceTree = "<group>"; };
+		BDCD7C7C1A70771B0001DCC3 /* csrucode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = csrucode.h; sourceTree = "<group>"; };
+		BDCD7C7D1A70771B0001DCC3 /* csrutf8.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = csrutf8.h; sourceTree = "<group>"; };
+		BDCD7C7E1A70771B0001DCC3 /* cstring.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = cstring.h; sourceTree = "<group>"; };
+		BDCD7C7F1A70771B0001DCC3 /* cwchar.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = cwchar.h; sourceTree = "<group>"; };
+		BDCD7C801A70771B0001DCC3 /* inputext.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = inputext.h; sourceTree = "<group>"; };
+		BDCD7C811A70771B0001DCC3 /* locmap.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = locmap.h; sourceTree = "<group>"; };
+		BDCD7C821A70771B0001DCC3 /* mutex.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mutex.h; sourceTree = "<group>"; };
+		BDCD7C831A70771B0001DCC3 /* putilimp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = putilimp.h; sourceTree = "<group>"; };
+		BDCD7C841A70771B0001DCC3 /* uarrsort.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = uarrsort.h; sourceTree = "<group>"; };
+		BDCD7C851A70771B0001DCC3 /* uassert.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = uassert.h; sourceTree = "<group>"; };
+		BDCD7C861A70771B0001DCC3 /* ucase.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ucase.h; sourceTree = "<group>"; };
+		BDCD7C871A70771B0001DCC3 /* ucln.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ucln.h; sourceTree = "<group>"; };
+		BDCD7C881A70771B0001DCC3 /* ucln_cmn.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ucln_cmn.h; sourceTree = "<group>"; };
+		BDCD7C891A70771B0001DCC3 /* ucln_imp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ucln_imp.h; sourceTree = "<group>"; };
+		BDCD7C8A1A70771B0001DCC3 /* ucln_in.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ucln_in.h; sourceTree = "<group>"; };
+		BDCD7C8B1A70771B0001DCC3 /* ucmndata.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ucmndata.h; sourceTree = "<group>"; };
+		BDCD7C8C1A70771B0001DCC3 /* udataswp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = udataswp.h; sourceTree = "<group>"; };
+		BDCD7C8D1A70771B0001DCC3 /* uelement.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = uelement.h; sourceTree = "<group>"; };
+		BDCD7C8E1A70771B0001DCC3 /* uenumimp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = uenumimp.h; sourceTree = "<group>"; };
+		BDCD7C8F1A70771B0001DCC3 /* uinvchar.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = uinvchar.h; sourceTree = "<group>"; };
+		BDCD7C901A70771B0001DCC3 /* umapfile.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = umapfile.h; sourceTree = "<group>"; };
+		BDCD7C911A70771B0001DCC3 /* umutex.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = umutex.h; sourceTree = "<group>"; };
+		BDCD7C931A70771B0001DCC3 /* appendable.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = appendable.h; sourceTree = "<group>"; };
+		BDCD7C941A70771B0001DCC3 /* bytestream.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bytestream.h; sourceTree = "<group>"; };
+		BDCD7C951A70771B0001DCC3 /* localpointer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = localpointer.h; sourceTree = "<group>"; };
+		BDCD7C961A70771B0001DCC3 /* platform.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = platform.h; sourceTree = "<group>"; };
+		BDCD7C971A70771B0001DCC3 /* ptypes.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ptypes.h; sourceTree = "<group>"; };
+		BDCD7C981A70771B0001DCC3 /* putil.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = putil.h; sourceTree = "<group>"; };
+		BDCD7C991A70771B0001DCC3 /* rep.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = rep.h; sourceTree = "<group>"; };
+		BDCD7C9A1A70771B0001DCC3 /* std_string.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = std_string.h; sourceTree = "<group>"; };
+		BDCD7C9B1A70771B0001DCC3 /* strenum.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = strenum.h; sourceTree = "<group>"; };
+		BDCD7C9C1A70771B0001DCC3 /* stringpiece.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = stringpiece.h; sourceTree = "<group>"; };
+		BDCD7C9D1A70771B0001DCC3 /* ucasemap.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ucasemap.h; sourceTree = "<group>"; };
+		BDCD7C9E1A70771B0001DCC3 /* uchar.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = uchar.h; sourceTree = "<group>"; };
+		BDCD7C9F1A70771B0001DCC3 /* uclean.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = uclean.h; sourceTree = "<group>"; };
+		BDCD7CA01A70771B0001DCC3 /* ucnv.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ucnv.h; sourceTree = "<group>"; };
+		BDCD7CA11A70771B0001DCC3 /* ucnv_err.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ucnv_err.h; sourceTree = "<group>"; };
+		BDCD7CA21A70771B0001DCC3 /* uconfig.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = uconfig.h; sourceTree = "<group>"; };
+		BDCD7CA31A70771B0001DCC3 /* ucsdet.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ucsdet.h; sourceTree = "<group>"; };
+		BDCD7CA41A70771B0001DCC3 /* udata.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = udata.h; sourceTree = "<group>"; };
+		BDCD7CA51A70771B0001DCC3 /* uenum.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = uenum.h; sourceTree = "<group>"; };
+		BDCD7CA61A70771B0001DCC3 /* uiter.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = uiter.h; sourceTree = "<group>"; };
+		BDCD7CA71A70771B0001DCC3 /* umachine.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = umachine.h; sourceTree = "<group>"; };
+		BDCD7CA81A70771B0001DCC3 /* unistr.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = unistr.h; sourceTree = "<group>"; };
+		BDCD7CA91A70771B0001DCC3 /* uobject.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = uobject.h; sourceTree = "<group>"; };
+		BDCD7CAA1A70771B0001DCC3 /* urename.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = urename.h; sourceTree = "<group>"; };
+		BDCD7CAB1A70771B0001DCC3 /* uset.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = uset.h; sourceTree = "<group>"; };
+		BDCD7CAC1A70771B0001DCC3 /* ustring.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ustring.h; sourceTree = "<group>"; };
+		BDCD7CAD1A70771B0001DCC3 /* utf.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = utf.h; sourceTree = "<group>"; };
+		BDCD7CAE1A70771B0001DCC3 /* utf16.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = utf16.h; sourceTree = "<group>"; };
+		BDCD7CAF1A70771B0001DCC3 /* utf8.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = utf8.h; sourceTree = "<group>"; };
+		BDCD7CB01A70771B0001DCC3 /* utf_old.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = utf_old.h; sourceTree = "<group>"; };
+		BDCD7CB11A70771B0001DCC3 /* utrace.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = utrace.h; sourceTree = "<group>"; };
+		BDCD7CB21A70771B0001DCC3 /* utypes.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = utypes.h; sourceTree = "<group>"; };
+		BDCD7CB31A70771B0001DCC3 /* uvernum.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = uvernum.h; sourceTree = "<group>"; };
+		BDCD7CB41A70771B0001DCC3 /* uversion.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = uversion.h; sourceTree = "<group>"; };
+		BDCD7CB51A70771B0001DCC3 /* uposixdefs.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = uposixdefs.h; sourceTree = "<group>"; };
+		BDCD7CB61A70771B0001DCC3 /* uset_imp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = uset_imp.h; sourceTree = "<group>"; };
+		BDCD7CB71A70771B0001DCC3 /* ustr_imp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ustr_imp.h; sourceTree = "<group>"; };
+		BDCD7CB81A70771B0001DCC3 /* utracimp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = utracimp.h; sourceTree = "<group>"; };
+		BDCD7CB91A70771B0001DCC3 /* inputext.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = inputext.cpp; sourceTree = "<group>"; };
+		BDCD7CBA1A70771B0001DCC3 /* uarrsort.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = uarrsort.c; sourceTree = "<group>"; };
+		BDCD7CBB1A70771B0001DCC3 /* ucln_cmn.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ucln_cmn.cpp; sourceTree = "<group>"; };
+		BDCD7CBC1A70771B0001DCC3 /* ucln_in.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ucln_in.cpp; sourceTree = "<group>"; };
+		BDCD7CBD1A70771B0001DCC3 /* udataswp.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = udataswp.c; sourceTree = "<group>"; };
+		BDCD7CBE1A70771B0001DCC3 /* uenum.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = uenum.c; sourceTree = "<group>"; };
+		BDCD7CBF1A70771B0001DCC3 /* uinvchar.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = uinvchar.c; sourceTree = "<group>"; };
+		BDCD7CC01A70771B0001DCC3 /* uobject.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = uobject.cpp; sourceTree = "<group>"; };
+		BDCD7CC11A70771B0001DCC3 /* ustring.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ustring.cpp; sourceTree = "<group>"; };
+		BDCD7CC21A70771B0001DCC3 /* utrace.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = utrace.c; sourceTree = "<group>"; };
+		BDCD7CE91A7077680001DCC3 /* ucsdet.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ucsdet.cpp; sourceTree = "<group>"; };
+		BDCD7CEC1A7079300001DCC3 /* cmemory.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = cmemory.c; sourceTree = "<group>"; };
+		BDCD7CED1A7079300001DCC3 /* umutex.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = umutex.cpp; sourceTree = "<group>"; };
 		C07AD057D3C8FBDC7AC95733 /* MCOIMAPSession.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = MCOIMAPSession.mm; sourceTree = "<group>"; };
 		C07AD44B013BB42A240B4F04 /* NSError+MCO.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "NSError+MCO.h"; sourceTree = "<group>"; };
 		C07ADFE43E22B38EFF23ADB5 /* NSError+MCO.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = "NSError+MCO.mm"; sourceTree = "<group>"; };
@@ -2091,6 +2225,119 @@
 			path = nntp;
 			sourceTree = "<group>";
 		};
+		BDCD7C691A70771B0001DCC3 /* icu-ucsdet */ = {
+			isa = PBXGroup;
+			children = (
+				BDCD7CEC1A7079300001DCC3 /* cmemory.c */,
+				BDCD7C6A1A70771B0001DCC3 /* csdetect.cpp */,
+				BDCD7C6B1A70771B0001DCC3 /* csmatch.cpp */,
+				BDCD7C6C1A70771B0001DCC3 /* csr2022.cpp */,
+				BDCD7C6D1A70771B0001DCC3 /* csrecog.cpp */,
+				BDCD7C6E1A70771B0001DCC3 /* csrmbcs.cpp */,
+				BDCD7C6F1A70771B0001DCC3 /* csrsbcs.cpp */,
+				BDCD7C701A70771B0001DCC3 /* csrucode.cpp */,
+				BDCD7C711A70771B0001DCC3 /* csrutf8.cpp */,
+				BDCD7C721A70771B0001DCC3 /* cstring.c */,
+				BDCD7C731A70771B0001DCC3 /* include */,
+				BDCD7CB91A70771B0001DCC3 /* inputext.cpp */,
+				BDCD7CBA1A70771B0001DCC3 /* uarrsort.c */,
+				BDCD7CBB1A70771B0001DCC3 /* ucln_cmn.cpp */,
+				BDCD7CBC1A70771B0001DCC3 /* ucln_in.cpp */,
+				BDCD7CE91A7077680001DCC3 /* ucsdet.cpp */,
+				BDCD7CBD1A70771B0001DCC3 /* udataswp.c */,
+				BDCD7CBE1A70771B0001DCC3 /* uenum.c */,
+				BDCD7CBF1A70771B0001DCC3 /* uinvchar.c */,
+				BDCD7CED1A7079300001DCC3 /* umutex.cpp */,
+				BDCD7CC01A70771B0001DCC3 /* uobject.cpp */,
+				BDCD7CC11A70771B0001DCC3 /* ustring.cpp */,
+				BDCD7CC21A70771B0001DCC3 /* utrace.c */,
+			);
+			path = "icu-ucsdet";
+			sourceTree = "<group>";
+		};
+		BDCD7C731A70771B0001DCC3 /* include */ = {
+			isa = PBXGroup;
+			children = (
+				BDCD7C741A70771B0001DCC3 /* charstr.h */,
+				BDCD7C751A70771B0001DCC3 /* cmemory.h */,
+				BDCD7C761A70771B0001DCC3 /* csdetect.h */,
+				BDCD7C771A70771B0001DCC3 /* csmatch.h */,
+				BDCD7C781A70771B0001DCC3 /* csr2022.h */,
+				BDCD7C791A70771B0001DCC3 /* csrecog.h */,
+				BDCD7C7A1A70771B0001DCC3 /* csrmbcs.h */,
+				BDCD7C7B1A70771B0001DCC3 /* csrsbcs.h */,
+				BDCD7C7C1A70771B0001DCC3 /* csrucode.h */,
+				BDCD7C7D1A70771B0001DCC3 /* csrutf8.h */,
+				BDCD7C7E1A70771B0001DCC3 /* cstring.h */,
+				BDCD7C7F1A70771B0001DCC3 /* cwchar.h */,
+				BDCD7C801A70771B0001DCC3 /* inputext.h */,
+				BDCD7C811A70771B0001DCC3 /* locmap.h */,
+				BDCD7C821A70771B0001DCC3 /* mutex.h */,
+				BDCD7C831A70771B0001DCC3 /* putilimp.h */,
+				BDCD7C841A70771B0001DCC3 /* uarrsort.h */,
+				BDCD7C851A70771B0001DCC3 /* uassert.h */,
+				BDCD7C861A70771B0001DCC3 /* ucase.h */,
+				BDCD7C871A70771B0001DCC3 /* ucln.h */,
+				BDCD7C881A70771B0001DCC3 /* ucln_cmn.h */,
+				BDCD7C891A70771B0001DCC3 /* ucln_imp.h */,
+				BDCD7C8A1A70771B0001DCC3 /* ucln_in.h */,
+				BDCD7C8B1A70771B0001DCC3 /* ucmndata.h */,
+				BDCD7C8C1A70771B0001DCC3 /* udataswp.h */,
+				BDCD7C8D1A70771B0001DCC3 /* uelement.h */,
+				BDCD7C8E1A70771B0001DCC3 /* uenumimp.h */,
+				BDCD7C8F1A70771B0001DCC3 /* uinvchar.h */,
+				BDCD7C901A70771B0001DCC3 /* umapfile.h */,
+				BDCD7C911A70771B0001DCC3 /* umutex.h */,
+				BDCD7C921A70771B0001DCC3 /* unicode */,
+				BDCD7CB51A70771B0001DCC3 /* uposixdefs.h */,
+				BDCD7CB61A70771B0001DCC3 /* uset_imp.h */,
+				BDCD7CB71A70771B0001DCC3 /* ustr_imp.h */,
+				BDCD7CB81A70771B0001DCC3 /* utracimp.h */,
+			);
+			path = include;
+			sourceTree = "<group>";
+		};
+		BDCD7C921A70771B0001DCC3 /* unicode */ = {
+			isa = PBXGroup;
+			children = (
+				BDCD7C931A70771B0001DCC3 /* appendable.h */,
+				BDCD7C941A70771B0001DCC3 /* bytestream.h */,
+				BDCD7C951A70771B0001DCC3 /* localpointer.h */,
+				BDCD7C961A70771B0001DCC3 /* platform.h */,
+				BDCD7C971A70771B0001DCC3 /* ptypes.h */,
+				BDCD7C981A70771B0001DCC3 /* putil.h */,
+				BDCD7C991A70771B0001DCC3 /* rep.h */,
+				BDCD7C9A1A70771B0001DCC3 /* std_string.h */,
+				BDCD7C9B1A70771B0001DCC3 /* strenum.h */,
+				BDCD7C9C1A70771B0001DCC3 /* stringpiece.h */,
+				BDCD7C9D1A70771B0001DCC3 /* ucasemap.h */,
+				BDCD7C9E1A70771B0001DCC3 /* uchar.h */,
+				BDCD7C9F1A70771B0001DCC3 /* uclean.h */,
+				BDCD7CA01A70771B0001DCC3 /* ucnv.h */,
+				BDCD7CA11A70771B0001DCC3 /* ucnv_err.h */,
+				BDCD7CA21A70771B0001DCC3 /* uconfig.h */,
+				BDCD7CA31A70771B0001DCC3 /* ucsdet.h */,
+				BDCD7CA41A70771B0001DCC3 /* udata.h */,
+				BDCD7CA51A70771B0001DCC3 /* uenum.h */,
+				BDCD7CA61A70771B0001DCC3 /* uiter.h */,
+				BDCD7CA71A70771B0001DCC3 /* umachine.h */,
+				BDCD7CA81A70771B0001DCC3 /* unistr.h */,
+				BDCD7CA91A70771B0001DCC3 /* uobject.h */,
+				BDCD7CAA1A70771B0001DCC3 /* urename.h */,
+				BDCD7CAB1A70771B0001DCC3 /* uset.h */,
+				BDCD7CAC1A70771B0001DCC3 /* ustring.h */,
+				BDCD7CAD1A70771B0001DCC3 /* utf.h */,
+				BDCD7CAE1A70771B0001DCC3 /* utf16.h */,
+				BDCD7CAF1A70771B0001DCC3 /* utf8.h */,
+				BDCD7CB01A70771B0001DCC3 /* utf_old.h */,
+				BDCD7CB11A70771B0001DCC3 /* utrace.h */,
+				BDCD7CB21A70771B0001DCC3 /* utypes.h */,
+				BDCD7CB31A70771B0001DCC3 /* uvernum.h */,
+				BDCD7CB41A70771B0001DCC3 /* uversion.h */,
+			);
+			path = unicode;
+			sourceTree = "<group>";
+		};
 		C63CD67616BDCDD300DB18F1 /* renderer */ = {
 			isa = PBXGroup;
 			children = (
@@ -2376,6 +2623,7 @@
 		C64EA6A1169E847800778456 /* basetypes */ = {
 			isa = PBXGroup;
 			children = (
+				BDCD7C691A70771B0001DCC3 /* icu-ucsdet */,
 				BD62729B1A158DA900129AA8 /* MCWin32.cpp */,
 				BD62729C1A158DA900129AA8 /* MCWin32.h */,
 				BD62729A1A158DA000129AA8 /* MCDefines.h */,
@@ -3144,6 +3392,7 @@
 				84D7373D199BF83B005124E5 /* MCNNTPFetchArticleOperation.cpp in Sources */,
 				C64EA6F4169E847800778456 /* MCAbstractMessage.cpp in Sources */,
 				C64EA6F6169E847800778456 /* MCAbstractMessagePart.cpp in Sources */,
+				BDCD7CF01A7079300001DCC3 /* umutex.cpp in Sources */,
 				84B639ED17F280F3003B5BA2 /* MCIMAPNoopOperation.cpp in Sources */,
 				C64EA6F8169E847800778456 /* MCAbstractMultipart.cpp in Sources */,
 				C64EA6FA169E847800778456 /* MCAbstractPart.cpp in Sources */,
@@ -3178,7 +3427,9 @@
 				C64EA72B169E847800778456 /* MCIMAPNamespace.cpp in Sources */,
 				C64EA72D169E847800778456 /* MCIMAPNamespaceItem.cpp in Sources */,
 				C64EA72F169E847800778456 /* MCIMAPPart.cpp in Sources */,
+				BDCD7CCB1A70771B0001DCC3 /* csrmbcs.cpp in Sources */,
 				C64EA732169E847800778456 /* MCIMAPSearchExpression.cpp in Sources */,
+				BDCD7CE11A70771B0001DCC3 /* uinvchar.c in Sources */,
 				C64EA734169E847800778456 /* MCIMAPSession.cpp in Sources */,
 				C68B2AF717797389005E61EF /* MCConnectionLoggerUtils.cpp in Sources */,
 				C64EA737169E847800778456 /* MCPOPMessageInfo.cpp in Sources */,
@@ -3199,6 +3450,7 @@
 				C64EA7DA16A1386600778456 /* MCSMTPOperation.cpp in Sources */,
 				C64EA7EA16A154B300778456 /* MCSMTPCheckAccountOperation.cpp in Sources */,
 				C64EA7F116A15A4D00778456 /* MCIMAPOperation.cpp in Sources */,
+				BDCD7CDB1A70771B0001DCC3 /* ucln_in.cpp in Sources */,
 				C64EA7F816A15A7800778456 /* MCIMAPCheckAccountOperation.cpp in Sources */,
 				943F1A9A17D964F600F0C798 /* MCIMAPConnectOperation.cpp in Sources */,
 				C64EA7FC16A2959800778456 /* MCIMAPFetchFoldersOperation.cpp in Sources */,
@@ -3208,11 +3460,13 @@
 				C63D315C17C9155C00A4D993 /* MCIMAPIdentity.cpp in Sources */,
 				84D73742199BF963005124E5 /* MCNNTPFetchAllArticlesOperation.cpp in Sources */,
 				C64EA80816A2999A00778456 /* MCIMAPCreateFolderOperation.cpp in Sources */,
+				BDCD7CC51A70771B0001DCC3 /* csmatch.cpp in Sources */,
 				C64EA80B16A299B700778456 /* MCIMAPSubscribeFolderOperation.cpp in Sources */,
 				84D7372C199BF66C005124E5 /* MCNNTPAsyncSession.cpp in Sources */,
 				C64EA81116A299ED00778456 /* MCIMAPAppendMessageOperation.cpp in Sources */,
 				C64EA81416A29A2300778456 /* MCIMAPCopyMessagesOperation.cpp in Sources */,
 				C64EA81716A29A8700778456 /* MCIMAPExpungeOperation.cpp in Sources */,
+				BDCD7CE31A70771B0001DCC3 /* uobject.cpp in Sources */,
 				C69BA85B17DEFCCB00D601B7 /* NSIndexSet+MCO.m in Sources */,
 				C64EA81A16A29AF200778456 /* MCIMAPFetchMessagesOperation.cpp in Sources */,
 				C64EA81D16A29DC500778456 /* MCIMAPFetchContentOperation.cpp in Sources */,
@@ -3230,6 +3484,7 @@
 				C62C6ED416A2A0E600737497 /* MCIMAPIdentityOperation.cpp in Sources */,
 				C62C6ED816A398FA00737497 /* MCIMAPFolderInfoOperation.cpp in Sources */,
 				C62C6EDA16A3D60700737497 /* MCIMAPAsyncConnection.cpp in Sources */,
+				BDCD7CD11A70771B0001DCC3 /* csrutf8.cpp in Sources */,
 				C62C6EEF16A7B67600737497 /* MCPOPAsyncSession.cpp in Sources */,
 				C62C6EF216A7C6DE00737497 /* MCPOPFetchHeaderOperation.cpp in Sources */,
 				C62C6EF516A7C6EA00737497 /* MCPOPFetchMessageOperation.cpp in Sources */,
@@ -3237,13 +3492,18 @@
 				C62C6EFB16A7C94000737497 /* MCPOPOperation.cpp in Sources */,
 				C62C6F0616A7E54500737497 /* MCPOPFetchMessagesOperation.cpp in Sources */,
 				C62C6F0A16A8F58000737497 /* MCIMAPAsyncSession.cpp in Sources */,
+				BDCD7CDD1A70771B0001DCC3 /* udataswp.c in Sources */,
+				BDCD7CD31A70771B0001DCC3 /* cstring.c in Sources */,
 				C6D42C1D16AE03D6002BB4F9 /* NSData+MCO.mm in Sources */,
 				C6D42C1E16AE03D6002BB4F9 /* NSString+MCO.mm in Sources */,
 				C64FF39116B3C13000F8C162 /* MCOObjectWrapper.mm in Sources */,
 				84D73737199BF7F2005124E5 /* MCNNTPFetchHeaderOperation.cpp in Sources */,
 				84D7376D199C005A005124E5 /* MCONNTPFetchHeaderOperation.mm in Sources */,
 				C6E665B71796500B0063F2CF /* ioapi.c in Sources */,
+				BDCD7CD51A70771B0001DCC3 /* inputext.cpp in Sources */,
+				BDCD7CEA1A7077680001DCC3 /* ucsdet.cpp in Sources */,
 				C07ADC28B83E7959BF114D46 /* MCOIMAPSession.mm in Sources */,
+				BDCD7CE71A70771B0001DCC3 /* utrace.c in Sources */,
 				C07AD99B2E2054C684DB8FF6 /* NSError+MCO.mm in Sources */,
 				F87F190C16BB62B00012652F /* MCOIMAPFetchFoldersOperation.mm in Sources */,
 				84B639F117F282B4003B5BA2 /* MCOPOPNoopOperation.mm in Sources */,
@@ -3251,6 +3511,7 @@
 				8199FBF119FAF1270040BBC3 /* MCIMAPFetchParsedContentOperation.cpp in Sources */,
 				C6EB30FE16B8E50F0091F4F1 /* NSArray+MCO.mm in Sources */,
 				C6EB310116B8E6E60091F4F1 /* NSObject+MCO.mm in Sources */,
+				BDCD7CDF1A70771B0001DCC3 /* uenum.c in Sources */,
 				C63CD67F16BDCDD400DB18F1 /* MCAddressDisplay.cpp in Sources */,
 				C63CD68016BDCDD400DB18F1 /* MCDateFormatter.cpp in Sources */,
 				C63CD68116BDCDD400DB18F1 /* MCHTMLRenderer.cpp in Sources */,
@@ -3263,8 +3524,10 @@
 				C64BB22E16E5C1EE000DB34C /* MCIndexSet.cpp in Sources */,
 				84D73764199BFFC7005124E5 /* MCONNTPSession.mm in Sources */,
 				C64BB23516EDAA17000DB34C /* MCOAbstractMessage.mm in Sources */,
+				BDCD7CE51A70771B0001DCC3 /* ustring.cpp in Sources */,
 				C64BB23916EDAA3F000DB34C /* MCOAbstractMessagePart.mm in Sources */,
 				C64BB23C16EDAAC7000DB34C /* MCOAbstractMultipart.mm in Sources */,
+				BDCD7CD71A70771B0001DCC3 /* uarrsort.c in Sources */,
 				C6D4FD4319FB7DAA001F7E01 /* MCDataMac.mm in Sources */,
 				C64BB23F16EDAAE1000DB34C /* MCOAbstractPart.mm in Sources */,
 				C64BB24216EDAAF4000DB34C /* MCOAddress.mm in Sources */,
@@ -3274,6 +3537,7 @@
 				C64BB25E16FD4377000DB34C /* MCOAttachment.mm in Sources */,
 				C64BB26116FD4390000DB34C /* MCOMessageBuilder.mm in Sources */,
 				C64BB26416FD43A1000DB34C /* MCOMessageParser.mm in Sources */,
+				BDCD7CCD1A70771B0001DCC3 /* csrsbcs.cpp in Sources */,
 				C64BB26716FD43E2000DB34C /* MCOMessagePart.mm in Sources */,
 				C64BB26A16FD44C2000DB34C /* MCOMultipart.mm in Sources */,
 				C623C58F16FE6B45001BBEFC /* MCOIMAPOperation.mm in Sources */,
@@ -3291,6 +3555,8 @@
 				C6CCC5C716FFE5190077A5FC /* MCORange.mm in Sources */,
 				C63D316217C92D8300A4D993 /* MCOIMAPIdentity.mm in Sources */,
 				C6F61F7B170169EE0073032E /* MCOIMAPFolderInfoOperation.mm in Sources */,
+				BDCD7CC31A70771B0001DCC3 /* csdetect.cpp in Sources */,
+				BDCD7CCF1A70771B0001DCC3 /* csrucode.cpp in Sources */,
 				C6F61F7E170169FB0073032E /* MCOIMAPAppendMessageOperation.mm in Sources */,
 				C6F61F8117016A0D0073032E /* MCOIMAPCopyMessagesOperation.mm in Sources */,
 				DA0F1C7B177C07B300F0D3B4 /* MCIMAPMessageRenderingOperation.cpp in Sources */,
@@ -3308,6 +3574,7 @@
 				C6F61FB51702AB340073032E /* MCOIMAPBaseOperation.mm in Sources */,
 				C608167517759967001F1018 /* MCSMTPDisconnectOperation.cpp in Sources */,
 				C6A81BBF17068E5E00882C15 /* MCOSMTPSession.mm in Sources */,
+				BDCD7CC71A70771B0001DCC3 /* csr2022.cpp in Sources */,
 				C673EBED1A46B41000A53F7F /* MCIMAPFolderInfo.cpp in Sources */,
 				8B0095CC1A00DDE700F84BC0 /* MCOSMTPLoginOperation.mm in Sources */,
 				C6A81BC317068E9500882C15 /* MCOSMTPSendOperation.mm in Sources */,
@@ -3323,9 +3590,12 @@
 				C6A81BE61706906D00882C15 /* MCOPOPFetchMessagesOperation.mm in Sources */,
 				C6A81C001707CEE600882C15 /* MCOPOPMessageInfo.mm in Sources */,
 				C6AC110017114DAF00B715B7 /* MCPOPCheckAccountOperation.cpp in Sources */,
+				BDCD7CD91A70771B0001DCC3 /* ucln_cmn.cpp in Sources */,
 				C6D6F7F9171E595D006F5B28 /* MCJSON.cpp in Sources */,
 				C6D6F954171E5CB8006F5B28 /* MCMD5.cpp in Sources */,
 				C6D6F956171E5CB8006F5B28 /* MCNull.cpp in Sources */,
+				BDCD7CEE1A7079300001DCC3 /* cmemory.c in Sources */,
+				BDCD7CC91A70771B0001DCC3 /* csrecog.cpp in Sources */,
 				C6D6F967171FCF9F006F5B28 /* MCJSONParser.cpp in Sources */,
 				BDCD7C5B1A5B1C2C0001DCC3 /* MCErrorMessage.cpp in Sources */,
 				84CFA98719F7159700FE35D2 /* MCNNTPFetchOverviewOperation.cpp in Sources */,
@@ -3390,6 +3660,7 @@
 				84D7373E199BF83B005124E5 /* MCNNTPFetchArticleOperation.cpp in Sources */,
 				C6BA2B991705F4E6003F0E9E /* MCAbstractMessage.cpp in Sources */,
 				C6BA2B9A1705F4E6003F0E9E /* MCAbstractMessagePart.cpp in Sources */,
+				BDCD7CF11A7079300001DCC3 /* umutex.cpp in Sources */,
 				84B639EE17F280F3003B5BA2 /* MCIMAPNoopOperation.cpp in Sources */,
 				C6BA2B9B1705F4E6003F0E9E /* MCAbstractMultipart.cpp in Sources */,
 				C6BA2B9C1705F4E6003F0E9E /* MCAbstractPart.cpp in Sources */,
@@ -3424,7 +3695,9 @@
 				C6BA2BB11705F4E6003F0E9E /* MCIMAPMultipart.cpp in Sources */,
 				C6BA2BB21705F4E6003F0E9E /* MCIMAPNamespace.cpp in Sources */,
 				C6BA2BB31705F4E6003F0E9E /* MCIMAPNamespaceItem.cpp in Sources */,
+				BDCD7CCC1A70771B0001DCC3 /* csrmbcs.cpp in Sources */,
 				C6BA2BB41705F4E6003F0E9E /* MCIMAPPart.cpp in Sources */,
+				BDCD7CE21A70771B0001DCC3 /* uinvchar.c in Sources */,
 				C6BA2BB51705F4E6003F0E9E /* MCIMAPSearchExpression.cpp in Sources */,
 				C6BA2BB61705F4E6003F0E9E /* MCIMAPSession.cpp in Sources */,
 				C68B2AF817797389005E61EF /* MCConnectionLoggerUtils.cpp in Sources */,
@@ -3445,6 +3718,7 @@
 				C6BA2BC01705F4E6003F0E9E /* MCSMTPSendWithDataOperation.cpp in Sources */,
 				C6BA2BC11705F4E6003F0E9E /* MCSMTPOperation.cpp in Sources */,
 				C6BA2BC21705F4E6003F0E9E /* MCSMTPCheckAccountOperation.cpp in Sources */,
+				BDCD7CDC1A70771B0001DCC3 /* ucln_in.cpp in Sources */,
 				C6BA2BC31705F4E6003F0E9E /* MCIMAPOperation.cpp in Sources */,
 				C6BA2BC41705F4E6003F0E9E /* MCIMAPCheckAccountOperation.cpp in Sources */,
 				943F1A9E17D96C5500F0C798 /* MCIMAPConnectOperation.cpp in Sources */,
@@ -3454,11 +3728,13 @@
 				C6BA2BC71705F4E6003F0E9E /* MCIMAPDeleteFolderOperation.cpp in Sources */,
 				C63D315D17C9155C00A4D993 /* MCIMAPIdentity.cpp in Sources */,
 				84D73743199BF963005124E5 /* MCNNTPFetchAllArticlesOperation.cpp in Sources */,
+				BDCD7CC61A70771B0001DCC3 /* csmatch.cpp in Sources */,
 				C6BA2BC81705F4E6003F0E9E /* MCIMAPCreateFolderOperation.cpp in Sources */,
 				C6BA2BC91705F4E6003F0E9E /* MCIMAPSubscribeFolderOperation.cpp in Sources */,
 				84D7372D199BF66C005124E5 /* MCNNTPAsyncSession.cpp in Sources */,
 				C6BA2BCA1705F4E6003F0E9E /* MCIMAPAppendMessageOperation.cpp in Sources */,
 				C6BA2BCB1705F4E6003F0E9E /* MCIMAPCopyMessagesOperation.cpp in Sources */,
+				BDCD7CE41A70771B0001DCC3 /* uobject.cpp in Sources */,
 				C6BA2BCC1705F4E6003F0E9E /* MCIMAPExpungeOperation.cpp in Sources */,
 				C69BA85C17DEFCCB00D601B7 /* NSIndexSet+MCO.m in Sources */,
 				C6BA2BCD1705F4E6003F0E9E /* MCIMAPFetchMessagesOperation.cpp in Sources */,
@@ -3476,6 +3752,7 @@
 				849189A218C93FB7002063A3 /* MCNNTPSession.cpp in Sources */,
 				C6BA2BD41705F4E6003F0E9E /* MCIMAPIdentityOperation.cpp in Sources */,
 				C6BA2BD51705F4E6003F0E9E /* MCIMAPFolderInfoOperation.cpp in Sources */,
+				BDCD7CD21A70771B0001DCC3 /* csrutf8.cpp in Sources */,
 				C6BA2BD61705F4E6003F0E9E /* MCIMAPAsyncConnection.cpp in Sources */,
 				C6BA2BD71705F4E6003F0E9E /* MCPOPAsyncSession.cpp in Sources */,
 				C6BA2BD81705F4E6003F0E9E /* MCPOPFetchHeaderOperation.cpp in Sources */,
@@ -3483,13 +3760,18 @@
 				C6BA2BDA1705F4E6003F0E9E /* MCPOPDeleteMessagesOperation.cpp in Sources */,
 				C6BA2BDB1705F4E6003F0E9E /* MCPOPOperation.cpp in Sources */,
 				C6BA2BDC1705F4E6003F0E9E /* MCPOPFetchMessagesOperation.cpp in Sources */,
+				BDCD7CDE1A70771B0001DCC3 /* udataswp.c in Sources */,
+				BDCD7CD41A70771B0001DCC3 /* cstring.c in Sources */,
 				C6BA2BDD1705F4E6003F0E9E /* MCIMAPAsyncSession.cpp in Sources */,
 				C6BA2BDE1705F4E6003F0E9E /* NSData+MCO.mm in Sources */,
 				C6BA2BDF1705F4E6003F0E9E /* NSString+MCO.mm in Sources */,
 				C6BA2BE01705F4E6003F0E9E /* MCOObjectWrapper.mm in Sources */,
 				84D73738199BF7F2005124E5 /* MCNNTPFetchHeaderOperation.cpp in Sources */,
 				84D7376E199C005A005124E5 /* MCONNTPFetchHeaderOperation.mm in Sources */,
+				BDCD7CD61A70771B0001DCC3 /* inputext.cpp in Sources */,
+				BDCD7CEB1A7077680001DCC3 /* ucsdet.cpp in Sources */,
 				C6E665B81796500B0063F2CF /* ioapi.c in Sources */,
+				BDCD7CE81A70771B0001DCC3 /* utrace.c in Sources */,
 				C6BA2BE11705F4E6003F0E9E /* MCOIMAPSession.mm in Sources */,
 				C6BA2BE21705F4E6003F0E9E /* NSError+MCO.mm in Sources */,
 				C6BA2BE31705F4E6003F0E9E /* MCOIMAPFetchFoldersOperation.mm in Sources */,
@@ -3497,6 +3779,7 @@
 				8199FBF219FAF1270040BBC3 /* MCIMAPFetchParsedContentOperation.cpp in Sources */,
 				C6BA2BE41705F4E6003F0E9E /* NSDictionary+MCO.mm in Sources */,
 				C6BA2BE51705F4E6003F0E9E /* NSArray+MCO.mm in Sources */,
+				BDCD7CE01A70771B0001DCC3 /* uenum.c in Sources */,
 				C6BA2BE61705F4E6003F0E9E /* NSObject+MCO.mm in Sources */,
 				C6BA2BE71705F4E6003F0E9E /* MCAddressDisplay.cpp in Sources */,
 				C6BA2BE81705F4E6003F0E9E /* MCDateFormatter.cpp in Sources */,
@@ -3509,8 +3792,10 @@
 				C6BA2BEE1705F4E6003F0E9E /* MCIMAPCapabilityOperation.cpp in Sources */,
 				C6BA2BEF1705F4E6003F0E9E /* MCIndexSet.cpp in Sources */,
 				84D73765199BFFC7005124E5 /* MCONNTPSession.mm in Sources */,
+				BDCD7CE61A70771B0001DCC3 /* ustring.cpp in Sources */,
 				C6BA2BF01705F4E6003F0E9E /* MCOAbstractMessage.mm in Sources */,
 				C6BA2BF11705F4E6003F0E9E /* MCOAbstractMessagePart.mm in Sources */,
+				BDCD7CD81A70771B0001DCC3 /* uarrsort.c in Sources */,
 				C6D4FD4419FB7DB2001F7E01 /* MCDataMac.mm in Sources */,
 				C6BA2BF21705F4E6003F0E9E /* MCOAbstractMultipart.mm in Sources */,
 				C6BA2BF31705F4E6003F0E9E /* MCOAbstractPart.mm in Sources */,
@@ -3520,6 +3805,7 @@
 				C6BA2BF71705F4E6003F0E9E /* MCOOperation.mm in Sources */,
 				C6BA2BF81705F4E6003F0E9E /* MCOAttachment.mm in Sources */,
 				C6BA2BF91705F4E6003F0E9E /* MCOMessageBuilder.mm in Sources */,
+				BDCD7CCE1A70771B0001DCC3 /* csrsbcs.cpp in Sources */,
 				C6BA2BFA1705F4E6003F0E9E /* MCOMessageParser.mm in Sources */,
 				C6BA2BFB1705F4E6003F0E9E /* MCOMessagePart.mm in Sources */,
 				C6BA2BFC1705F4E6003F0E9E /* MCOMultipart.mm in Sources */,
@@ -3537,6 +3823,8 @@
 				C6BA2C061705F4E6003F0E9E /* MCOAbstractMessageRendererCallback.mm in Sources */,
 				C6BA2C071705F4E6003F0E9E /* MCORange.mm in Sources */,
 				C63D316317C92D8300A4D993 /* MCOIMAPIdentity.mm in Sources */,
+				BDCD7CC41A70771B0001DCC3 /* csdetect.cpp in Sources */,
+				BDCD7CD01A70771B0001DCC3 /* csrucode.cpp in Sources */,
 				C6BA2C081705F4E6003F0E9E /* MCOIMAPFolderInfoOperation.mm in Sources */,
 				C6BA2C091705F4E6003F0E9E /* MCOIMAPAppendMessageOperation.mm in Sources */,
 				C6BA2C0A1705F4E6003F0E9E /* MCOIMAPCopyMessagesOperation.mm in Sources */,
@@ -3554,6 +3842,7 @@
 				C6BA2C131705F4E6003F0E9E /* MCOIMAPFolderInfo.mm in Sources */,
 				C6BA2C141705F4E6003F0E9E /* MCOIMAPBaseOperation.mm in Sources */,
 				C608167617759968001F1018 /* MCSMTPDisconnectOperation.cpp in Sources */,
+				BDCD7CC81A70771B0001DCC3 /* csr2022.cpp in Sources */,
 				C673EBEE1A46B41000A53F7F /* MCIMAPFolderInfo.cpp in Sources */,
 				C6A81BC017068E5E00882C15 /* MCOSMTPSession.mm in Sources */,
 				8B0095CE1A00DE7700F84BC0 /* MCOSMTPLoginOperation.mm in Sources */,
@@ -3569,9 +3858,12 @@
 				C6A81BE31706905600882C15 /* MCOPOPOperation.mm in Sources */,
 				C6A81BE71706906D00882C15 /* MCOPOPFetchMessagesOperation.mm in Sources */,
 				C6A81C011707CEE600882C15 /* MCOPOPMessageInfo.mm in Sources */,
+				BDCD7CDA1A70771B0001DCC3 /* ucln_cmn.cpp in Sources */,
 				C6AC110117114DAF00B715B7 /* MCPOPCheckAccountOperation.cpp in Sources */,
 				C6D6F7FA171E595D006F5B28 /* MCJSON.cpp in Sources */,
 				C6D6F955171E5CB8006F5B28 /* MCMD5.cpp in Sources */,
+				BDCD7CEF1A7079300001DCC3 /* cmemory.c in Sources */,
+				BDCD7CCA1A70771B0001DCC3 /* csrecog.cpp in Sources */,
 				C6D6F957171E5CB8006F5B28 /* MCNull.cpp in Sources */,
 				BDCD7C5C1A5B1C2C0001DCC3 /* MCErrorMessage.cpp in Sources */,
 				84CFA98819F7159700FE35D2 /* MCNNTPFetchOverviewOperation.cpp in Sources */,
@@ -3656,7 +3948,6 @@
 				INFOPLIST_FILE = "framework-Info.plist";
 				INSTALL_PATH = "@executable_path/../Frameworks";
 				OTHER_LDFLAGS = (
-					"-luchardet-ios",
 					"-lctemplate-ios",
 					"-letpan-ios",
 					"-lxml2",
@@ -3684,7 +3975,6 @@
 				INFOPLIST_FILE = "framework-Info.plist";
 				INSTALL_PATH = "@executable_path/../Frameworks";
 				OTHER_LDFLAGS = (
-					"-luchardet-ios",
 					"-lctemplate-ios",
 					"-letpan-ios",
 					"-lxml2",
@@ -3724,7 +4014,6 @@
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				MTL_ENABLE_DEBUG_INFO = YES;
 				OTHER_LDFLAGS = (
-					"-luchardet",
 					"-lctemplate",
 					"-letpan",
 					"-lxml2",
@@ -3764,7 +4053,6 @@
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				MTL_ENABLE_DEBUG_INFO = NO;
 				OTHER_LDFLAGS = (
-					"-luchardet",
 					"-lctemplate",
 					"-letpan",
 					"-lxml2",
@@ -3799,8 +4087,7 @@
 				GCC_OPTIMIZATION_LEVEL = 0;
 				GCC_PREPROCESSOR_DEFINITIONS = (
 					"DEBUG=1",
-					"JSON_NDEBUG=1",
-					"JSON_ISO_STRICT=1",
+					"U_COMMON_IMPLEMENTATION=1",
 					"$(inherited)",
 				);
 				GCC_SYMBOLS_PRIVATE_EXTERN = NO;
@@ -3818,8 +4105,8 @@
 				"HEADER_SEARCH_PATHS[sdk=iphoneos*]" = "$(IOS_HEADERS_SEARCH_PATHS)";
 				"HEADER_SEARCH_PATHS[sdk=iphonesimulator*]" = "$(IOS_HEADERS_SEARCH_PATHS)";
 				"HEADER_SEARCH_PATHS[sdk=macosx*]" = "$(OSX_HEADERS_SEARCH_PATHS)";
-				IOS_HEADERS_SEARCH_PATHS = "\"$(SRCROOT)/../Externals/libetpan-ios/include\" \"$(SRCROOT)/../Externals/ctemplate-ios/include\" \"$(SRCROOT)/../Externals/tidy-html5-ios/include/tidy\" \"$(SRCROOT)/../Externals/uchardet-ios/include\" /usr/include/libxml2";
-				IOS_LIBRARY_SEARCH_PATHS = "\"$(SRCROOT)/../Externals/libetpan-ios/lib\" \"$(SRCROOT)/../Externals/ctemplate-ios/lib\" \"$(SRCROOT)/../Externals/libsasl-ios/lib\" \"$(SRCROOT)/../Externals/tidy-html5-ios/lib\" \"$(SRCROOT)/../Externals/uchardet-ios/lib\"";
+				IOS_HEADERS_SEARCH_PATHS = "\"$(SRCROOT)/../src/core/basetypes/icu-ucsdet/include\" \"$(SRCROOT)/../Externals/libetpan-ios/include\" \"$(SRCROOT)/../Externals/ctemplate-ios/include\" \"$(SRCROOT)/../Externals/tidy-html5-ios/include/tidy\" /usr/include/libxml2";
+				IOS_LIBRARY_SEARCH_PATHS = "\"$(SRCROOT)/../Externals/libetpan-ios/lib\" \"$(SRCROOT)/../Externals/ctemplate-ios/lib\" \"$(SRCROOT)/../Externals/libsasl-ios/lib\" \"$(SRCROOT)/../Externals/tidy-html5-ios/lib\"";
 				LIBRARY_SEARCH_PATHS = (
 					"\"$(SRCROOT)/../Externals/libetpan/lib\"",
 					"\"$(SRCROOT)/../Externals/icu4c/lib\"",
@@ -3830,8 +4117,8 @@
 				"LIBRARY_SEARCH_PATHS[sdk=macosx*]" = "$(OSX_LIBRARY_SEARCH_PATHS)";
 				MACOSX_DEPLOYMENT_TARGET = 10.8;
 				ONLY_ACTIVE_ARCH = YES;
-				OSX_HEADERS_SEARCH_PATHS = "\"$(SRCROOT)/../Externals/libetpan-osx/include\" \"$(SRCROOT)/../Externals/ctemplate-osx/include\" \"$(SRCROOT)/../Externals/uchardet-osx/include\" /usr/include/tidy /usr/include/libxml2";
-				OSX_LIBRARY_SEARCH_PATHS = "\"$(SRCROOT)/../Externals/libetpan-osx/lib\" \"$(SRCROOT)/../Externals/uchardet-osx/lib\" \"$(SRCROOT)/../Externals/ctemplate-osx/lib\"";
+				OSX_HEADERS_SEARCH_PATHS = "\"$(SRCROOT)/../src/core/basetypes/icu-ucsdet/include\" \"$(SRCROOT)/../Externals/libetpan-osx/include\" \"$(SRCROOT)/../Externals/ctemplate-osx/include\" /usr/include/tidy /usr/include/libxml2";
+				OSX_LIBRARY_SEARCH_PATHS = "\"$(SRCROOT)/../Externals/libetpan-osx/lib\" \"$(SRCROOT)/../Externals/ctemplate-osx/lib\"";
 			};
 			name = Debug;
 		};
@@ -3848,10 +4135,7 @@
 				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
 				GCC_C_LANGUAGE_STANDARD = gnu99;
 				GCC_ENABLE_OBJC_EXCEPTIONS = YES;
-				GCC_PREPROCESSOR_DEFINITIONS = (
-					"JSON_NDEBUG=1",
-					"JSON_ISO_STRICT=1",
-				);
+				GCC_PREPROCESSOR_DEFINITIONS = "U_COMMON_IMPLEMENTATION=1";
 				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
 				GCC_WARN_ABOUT_RETURN_TYPE = YES;
 				GCC_WARN_UNINITIALIZED_AUTOS = YES;
@@ -3866,8 +4150,8 @@
 				"HEADER_SEARCH_PATHS[sdk=iphoneos*]" = "$(IOS_HEADERS_SEARCH_PATHS)";
 				"HEADER_SEARCH_PATHS[sdk=iphonesimulator*]" = "$(IOS_HEADERS_SEARCH_PATHS)";
 				"HEADER_SEARCH_PATHS[sdk=macosx*]" = "$(OSX_HEADERS_SEARCH_PATHS)";
-				IOS_HEADERS_SEARCH_PATHS = "\"$(SRCROOT)/../Externals/libetpan-ios/include\" \"$(SRCROOT)/../Externals/ctemplate-ios/include\" \"$(SRCROOT)/../Externals/tidy-html5-ios/include/tidy\" \"$(SRCROOT)/../Externals/uchardet-ios/include\" /usr/include/libxml2";
-				IOS_LIBRARY_SEARCH_PATHS = "\"$(SRCROOT)/../Externals/libetpan-ios/lib\" \"$(SRCROOT)/../Externals/ctemplate-ios/lib\" \"$(SRCROOT)/../Externals/libsasl-ios/lib\" \"$(SRCROOT)/../Externals/tidy-html5-ios/lib\" \"$(SRCROOT)/../Externals/uchardet-ios/lib\"";
+				IOS_HEADERS_SEARCH_PATHS = "\"$(SRCROOT)/../src/core/basetypes/icu-ucsdet/include\" \"$(SRCROOT)/../Externals/libetpan-ios/include\" \"$(SRCROOT)/../Externals/ctemplate-ios/include\" \"$(SRCROOT)/../Externals/tidy-html5-ios/include/tidy\" /usr/include/libxml2";
+				IOS_LIBRARY_SEARCH_PATHS = "\"$(SRCROOT)/../Externals/libetpan-ios/lib\" \"$(SRCROOT)/../Externals/ctemplate-ios/lib\" \"$(SRCROOT)/../Externals/libsasl-ios/lib\" \"$(SRCROOT)/../Externals/tidy-html5-ios/lib\"";
 				LIBRARY_SEARCH_PATHS = (
 					"\"$(SRCROOT)/../Externals/libetpan/lib\"",
 					"\"$(SRCROOT)/../Externals/icu4c/lib\"",
@@ -3877,8 +4161,8 @@
 				"LIBRARY_SEARCH_PATHS[sdk=iphonesimulator*]" = "$(IOS_LIBRARY_SEARCH_PATHS)";
 				"LIBRARY_SEARCH_PATHS[sdk=macosx*]" = "$(OSX_LIBRARY_SEARCH_PATHS)";
 				MACOSX_DEPLOYMENT_TARGET = 10.8;
-				OSX_HEADERS_SEARCH_PATHS = "\"$(SRCROOT)/../Externals/libetpan-osx/include\" \"$(SRCROOT)/../Externals/ctemplate-osx/include\" \"$(SRCROOT)/../Externals/uchardet-osx/include\" /usr/include/tidy /usr/include/libxml2";
-				OSX_LIBRARY_SEARCH_PATHS = "\"$(SRCROOT)/../Externals/libetpan-osx/lib\" \"$(SRCROOT)/../Externals/uchardet-osx/lib\" \"$(SRCROOT)/../Externals/ctemplate-osx/lib\"";
+				OSX_HEADERS_SEARCH_PATHS = "\"$(SRCROOT)/../src/core/basetypes/icu-ucsdet/include\" \"$(SRCROOT)/../Externals/libetpan-osx/include\" \"$(SRCROOT)/../Externals/ctemplate-osx/include\" /usr/include/tidy /usr/include/libxml2";
+				OSX_LIBRARY_SEARCH_PATHS = "\"$(SRCROOT)/../Externals/libetpan-osx/lib\" \"$(SRCROOT)/../Externals/ctemplate-osx/lib\"";
 			};
 			name = Release;
 		};
@@ -3932,7 +4216,6 @@
 				IPHONEOS_DEPLOYMENT_TARGET = 6.1;
 				ONLY_ACTIVE_ARCH = NO;
 				OTHER_LDFLAGS = (
-					"-luchardet-ios",
 					"-lctemplate-ios",
 					"-letpan-ios",
 					"-lxml2",
@@ -3964,7 +4247,6 @@
 				ONLY_ACTIVE_ARCH = NO;
 				OTHER_CFLAGS = "-DNS_BLOCK_ASSERTIONS=1";
 				OTHER_LDFLAGS = (
-					"-luchardet-ios",
 					"-lctemplate-ios",
 					"-letpan-ios",
 					"-lxml2",
@@ -4094,7 +4376,6 @@
 					"-all_load",
 					"-ltidy",
 					"-lz",
-					"-luchardet",
 				);
 				PRODUCT_NAME = MailCore;
 				SDKROOT = macosx;
@@ -4123,7 +4404,6 @@
 					"-all_load",
 					"-ltidy",
 					"-lz",
-					"-luchardet",
 				);
 				PRODUCT_NAME = MailCore;
 				SDKROOT = macosx;
diff --git a/src/core/basetypes/MCData.cpp b/src/core/basetypes/MCData.cpp
index 1fbca6b4..170ce927 100644
--- a/src/core/basetypes/MCData.cpp
+++ b/src/core/basetypes/MCData.cpp
@@ -2,7 +2,7 @@
 
 #include "MCData.h"
 
-#define USE_UCHARDET 1
+#define USE_UCHARDET 0
 
 #include <stdlib.h>
 #include <string.h>
diff --git a/src/core/basetypes/icu-ucsdet/cmemory.c b/src/core/basetypes/icu-ucsdet/cmemory.c
new file mode 100644
index 00000000..cd3ccac6
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/cmemory.c
@@ -0,0 +1,183 @@
+/*
+******************************************************************************
+*
+*   Copyright (C) 2002-2012, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*
+* File cmemory.c      ICU Heap allocation.
+*                     All ICU heap allocation, both for C and C++ new of ICU
+*                     class types, comes through these functions.
+*
+*                     If you have a need to replace ICU allocation, this is the
+*                     place to do it.
+*
+*                     Note that uprv_malloc(0) returns a non-NULL pointer, and
+*                     that a subsequent free of that pointer value is a NOP.
+*
+******************************************************************************
+*/
+#include "unicode/uclean.h"
+#include "cmemory.h"
+#include "putilimp.h"
+#include "uassert.h"
+#include <stdlib.h>
+
+/* uprv_malloc(0) returns a pointer to this read-only data. */
+static const int32_t zeroMem[] = {0, 0, 0, 0, 0, 0};
+
+/* Function Pointers for user-supplied heap functions  */
+static const void     *pContext;
+static UMemAllocFn    *pAlloc;
+static UMemReallocFn  *pRealloc;
+static UMemFreeFn     *pFree;
+
+/* Flag indicating whether any heap allocations have happened.
+ *   Used to prevent changing out the heap functions after allocations have been made */
+static UBool   gHeapInUse;
+
+#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
+#include <stdio.h>
+static int n=0;
+static long b=0; 
+#endif
+
+#if U_DEBUG
+
+static char gValidMemorySink = 0;
+
+U_CAPI void uprv_checkValidMemory(const void *p, size_t n) {
+    /*
+     * Access the memory to ensure that it's all valid.
+     * Load and save a computed value to try to ensure that the compiler
+     * does not throw away the whole loop.
+     * A thread analyzer might complain about un-mutexed access to gValidMemorySink
+     * which is true but harmless because no one ever uses the value in gValidMemorySink.
+     */
+    const char *s = (const char *)p;
+    char c = gValidMemorySink;
+    size_t i;
+    U_ASSERT(p != NULL);
+    for(i = 0; i < n; ++i) {
+        c ^= s[i];
+    }
+    gValidMemorySink = c;
+}
+
+#endif  /* U_DEBUG */
+
+U_CAPI void * U_EXPORT2
+uprv_malloc(size_t s) {
+#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
+#if 1
+  putchar('>');
+  fflush(stdout);
+#else
+  fprintf(stderr,"MALLOC\t#%d\t%ul bytes\t%ul total\n", ++n,s,(b+=s)); fflush(stderr);
+#endif
+#endif
+    if (s > 0) {
+        gHeapInUse = TRUE;
+        if (pAlloc) {
+            return (*pAlloc)(pContext, s);
+        } else {
+            return uprv_default_malloc(s);
+        }
+    } else {
+        return (void *)zeroMem;
+    }
+}
+
+U_CAPI void * U_EXPORT2
+uprv_realloc(void * buffer, size_t size) {
+#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
+  putchar('~');
+  fflush(stdout);
+#endif
+    if (buffer == zeroMem) {
+        return uprv_malloc(size);
+    } else if (size == 0) {
+        if (pFree) {
+            (*pFree)(pContext, buffer);
+        } else {
+            uprv_default_free(buffer);
+        }
+        return (void *)zeroMem;
+    } else {
+        gHeapInUse = TRUE;
+        if (pRealloc) {
+            return (*pRealloc)(pContext, buffer, size);
+        } else {
+            return uprv_default_realloc(buffer, size);
+        }
+    }
+}
+
+U_CAPI void U_EXPORT2
+uprv_free(void *buffer) {
+#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
+  putchar('<');
+  fflush(stdout);
+#endif
+    if (buffer != zeroMem) {
+        if (pFree) {
+            (*pFree)(pContext, buffer);
+        } else {
+            uprv_default_free(buffer);
+        }
+    }
+}
+
+U_CAPI void * U_EXPORT2
+uprv_calloc(size_t num, size_t size) {
+    void *mem = NULL;
+    size *= num;
+    mem = uprv_malloc(size);
+    if (mem) {
+        uprv_memset(mem, 0, size);
+    }
+    return mem;
+}
+
+U_CAPI void U_EXPORT2
+u_setMemoryFunctions(const void *context, UMemAllocFn *a, UMemReallocFn *r, UMemFreeFn *f,  UErrorCode *status)
+{
+    if (U_FAILURE(*status)) {
+        return;
+    }
+    if (a==NULL || r==NULL || f==NULL) {
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return;
+    }
+    if (gHeapInUse) {
+        *status = U_INVALID_STATE_ERROR;
+        return;
+    }
+    pContext  = context;
+    pAlloc    = a;
+    pRealloc  = r;
+    pFree     = f;
+}
+
+
+U_CFUNC UBool cmemory_cleanup(void) {
+    pContext   = NULL;
+    pAlloc     = NULL;
+    pRealloc   = NULL;
+    pFree      = NULL;
+    gHeapInUse = FALSE;
+    return TRUE;
+}
+
+
+/*
+ *   gHeapInUse
+ *       Return True if ICU has allocated any memory.
+ *       Used by u_SetMutexFunctions() and similar to verify that ICU has not
+ *               been used, that it is in a pristine initial state.
+ */
+U_CFUNC UBool cmemory_inUse() {
+    return gHeapInUse;
+}
+
diff --git a/src/core/basetypes/icu-ucsdet/csdetect.cpp b/src/core/basetypes/icu-ucsdet/csdetect.cpp
new file mode 100644
index 00000000..3efbd492
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/csdetect.cpp
@@ -0,0 +1,485 @@
+/*
+ **********************************************************************
+ *   Copyright (C) 2005-2013, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ **********************************************************************
+ */
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "unicode/ucsdet.h"
+
+#include "csdetect.h"
+#include "csmatch.h"
+#include "uenumimp.h"
+
+#include "cmemory.h"
+#include "cstring.h"
+#include "umutex.h"
+#include "ucln_in.h"
+#include "uarrsort.h"
+#include "inputext.h"
+#include "csrsbcs.h"
+#include "csrmbcs.h"
+#include "csrutf8.h"
+#include "csrucode.h"
+#include "csr2022.h"
+
+#define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
+
+#define NEW_ARRAY(type,count) (type *) uprv_malloc((count) * sizeof(type))
+#define DELETE_ARRAY(array) uprv_free((void *) (array))
+
+U_NAMESPACE_BEGIN
+
+struct CSRecognizerInfo : public UMemory {
+    CSRecognizerInfo(CharsetRecognizer *recognizer, UBool isDefaultEnabled)
+        : recognizer(recognizer), isDefaultEnabled(isDefaultEnabled) {};
+
+    ~CSRecognizerInfo() {delete recognizer;};
+
+    CharsetRecognizer *recognizer;
+    UBool isDefaultEnabled;
+};
+
+U_NAMESPACE_END
+
+static icu::CSRecognizerInfo **fCSRecognizers = NULL;
+static icu::UInitOnce gCSRecognizersInitOnce;
+static int32_t fCSRecognizers_size = 0;
+
+U_CDECL_BEGIN
+static UBool U_CALLCONV csdet_cleanup(void)
+{
+    U_NAMESPACE_USE
+    if (fCSRecognizers != NULL) {
+        for(int32_t r = 0; r < fCSRecognizers_size; r += 1) {
+            delete fCSRecognizers[r];
+            fCSRecognizers[r] = NULL;
+        }
+
+        DELETE_ARRAY(fCSRecognizers);
+        fCSRecognizers = NULL;
+        fCSRecognizers_size = 0;
+    }
+    gCSRecognizersInitOnce.reset();
+
+    return TRUE;
+}
+
+static int32_t U_CALLCONV
+charsetMatchComparator(const void * /*context*/, const void *left, const void *right)
+{
+    U_NAMESPACE_USE
+
+    const CharsetMatch **csm_l = (const CharsetMatch **) left;
+    const CharsetMatch **csm_r = (const CharsetMatch **) right;
+
+    // NOTE: compare is backwards to sort from highest to lowest.
+    return (*csm_r)->getConfidence() - (*csm_l)->getConfidence();
+}
+
+static void U_CALLCONV initRecognizers(UErrorCode &status) {
+    U_NAMESPACE_USE
+    ucln_i18n_registerCleanup(UCLN_I18N_CSDET, csdet_cleanup);
+    CSRecognizerInfo *tempArray[] = {
+        new CSRecognizerInfo(new CharsetRecog_UTF8(), TRUE),
+
+        new CSRecognizerInfo(new CharsetRecog_UTF_16_BE(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_UTF_16_LE(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_UTF_32_BE(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_UTF_32_LE(), TRUE),
+
+        new CSRecognizerInfo(new CharsetRecog_8859_1(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_8859_2(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_8859_5_ru(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_8859_6_ar(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_8859_7_el(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_8859_8_I_he(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_8859_8_he(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_windows_1251(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_windows_1256(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_KOI8_R(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_8859_9_tr(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_sjis(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_gb_18030(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_euc_jp(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_euc_kr(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_big5(), TRUE),
+
+        new CSRecognizerInfo(new CharsetRecog_2022JP(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_2022KR(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_2022CN(), TRUE),
+
+        new CSRecognizerInfo(new CharsetRecog_IBM424_he_rtl(), FALSE),
+        new CSRecognizerInfo(new CharsetRecog_IBM424_he_ltr(), FALSE),
+        new CSRecognizerInfo(new CharsetRecog_IBM420_ar_rtl(), FALSE),
+        new CSRecognizerInfo(new CharsetRecog_IBM420_ar_ltr(), FALSE)
+    };
+    int32_t rCount = ARRAY_SIZE(tempArray);
+
+    fCSRecognizers = NEW_ARRAY(CSRecognizerInfo *, rCount);
+
+    if (fCSRecognizers == NULL) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+    } 
+    else {
+        fCSRecognizers_size = rCount;
+        for (int32_t r = 0; r < rCount; r += 1) {
+            fCSRecognizers[r] = tempArray[r];
+            if (fCSRecognizers[r] == NULL) {
+                status = U_MEMORY_ALLOCATION_ERROR;
+            }
+        }
+    }
+}
+
+U_CDECL_END
+
+U_NAMESPACE_BEGIN
+
+void CharsetDetector::setRecognizers(UErrorCode &status)
+{
+    umtx_initOnce(gCSRecognizersInitOnce, &initRecognizers, status);
+}
+
+CharsetDetector::CharsetDetector(UErrorCode &status)
+  : textIn(new InputText(status)), resultArray(NULL),
+    resultCount(0), fStripTags(FALSE), fFreshTextSet(FALSE),
+    fEnabledRecognizers(NULL)
+{
+    if (U_FAILURE(status)) {
+        return;
+    }
+
+    setRecognizers(status);
+
+    if (U_FAILURE(status)) {
+        return;
+    }
+
+    resultArray = (CharsetMatch **)uprv_malloc(sizeof(CharsetMatch *)*fCSRecognizers_size);
+
+    if (resultArray == NULL) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+        return;
+    }
+
+    for(int32_t i = 0; i < fCSRecognizers_size; i += 1) {
+        resultArray[i] = new CharsetMatch();
+
+        if (resultArray[i] == NULL) {
+            status = U_MEMORY_ALLOCATION_ERROR;
+            break;
+        }
+    }
+}
+
+CharsetDetector::~CharsetDetector()
+{
+    delete textIn;
+
+    for(int32_t i = 0; i < fCSRecognizers_size; i += 1) {
+        delete resultArray[i];
+    }
+
+    uprv_free(resultArray);
+
+    if (fEnabledRecognizers) {
+        uprv_free(fEnabledRecognizers);
+    }
+}
+
+void CharsetDetector::setText(const char *in, int32_t len)
+{
+    textIn->setText(in, len);
+    fFreshTextSet = TRUE;
+}
+
+UBool CharsetDetector::setStripTagsFlag(UBool flag)
+{
+    UBool temp = fStripTags;
+    fStripTags = flag;
+    fFreshTextSet = TRUE;
+    return temp;
+}
+
+UBool CharsetDetector::getStripTagsFlag() const
+{
+    return fStripTags;
+}
+
+void CharsetDetector::setDeclaredEncoding(const char *encoding, int32_t len) const
+{
+    textIn->setDeclaredEncoding(encoding,len);
+}
+
+int32_t CharsetDetector::getDetectableCount()
+{
+    UErrorCode status = U_ZERO_ERROR;
+
+    setRecognizers(status);
+
+    return fCSRecognizers_size; 
+}
+
+const CharsetMatch *CharsetDetector::detect(UErrorCode &status)
+{
+    int32_t maxMatchesFound = 0;
+
+    detectAll(maxMatchesFound, status);
+
+    if(maxMatchesFound > 0) {
+        return resultArray[0];
+    } else {
+        return NULL;
+    }
+}
+
+const CharsetMatch * const *CharsetDetector::detectAll(int32_t &maxMatchesFound, UErrorCode &status)
+{
+    if(!textIn->isSet()) {
+        status = U_MISSING_RESOURCE_ERROR;// TODO:  Need to set proper status code for input text not set
+
+        return NULL;
+    } else if (fFreshTextSet) {
+        CharsetRecognizer *csr;
+        int32_t            i;
+
+        textIn->MungeInput(fStripTags);
+
+        // Iterate over all possible charsets, remember all that
+        // give a match quality > 0.
+        resultCount = 0;
+        for (i = 0; i < fCSRecognizers_size; i += 1) {
+            csr = fCSRecognizers[i]->recognizer;
+            if (csr->match(textIn, resultArray[resultCount])) {
+                resultCount++;
+            }
+        }
+
+        if (resultCount > 1) {
+            uprv_sortArray(resultArray, resultCount, sizeof resultArray[0], charsetMatchComparator, NULL, TRUE, &status);
+        }
+        fFreshTextSet = FALSE;
+    }
+
+    maxMatchesFound = resultCount;
+
+    return resultArray;
+}
+
+void CharsetDetector::setDetectableCharset(const char *encoding, UBool enabled, UErrorCode &status)
+{
+    if (U_FAILURE(status)) {
+        return;
+    }
+
+    int32_t modIdx = -1;
+    UBool isDefaultVal = FALSE;
+    for (int32_t i = 0; i < fCSRecognizers_size; i++) {
+        CSRecognizerInfo *csrinfo = fCSRecognizers[i];
+        if (uprv_strcmp(csrinfo->recognizer->getName(), encoding) == 0) {
+            modIdx = i;
+            isDefaultVal = (csrinfo->isDefaultEnabled == enabled);
+            break;
+        }
+    }
+    if (modIdx < 0) {
+        // No matching encoding found
+        status = U_ILLEGAL_ARGUMENT_ERROR;
+        return;
+    }
+
+    if (fEnabledRecognizers == NULL && !isDefaultVal) {
+        // Create an array storing the non default setting
+        fEnabledRecognizers = NEW_ARRAY(UBool, fCSRecognizers_size);
+        if (fEnabledRecognizers == NULL) {
+            status = U_MEMORY_ALLOCATION_ERROR;
+            return;
+        }
+        // Initialize the array with default info
+        for (int32_t i = 0; i < fCSRecognizers_size; i++) {
+            fEnabledRecognizers[i] = fCSRecognizers[i]->isDefaultEnabled;
+        }
+    }
+
+    if (fEnabledRecognizers != NULL) {
+        fEnabledRecognizers[modIdx] = enabled;
+    }
+}
+
+/*const char *CharsetDetector::getCharsetName(int32_t index, UErrorCode &status) const
+{
+    if( index > fCSRecognizers_size-1 || index < 0) {
+        status = U_INDEX_OUTOFBOUNDS_ERROR;
+
+        return 0;
+    } else {
+        return fCSRecognizers[index]->getName();
+    }
+}*/
+
+U_NAMESPACE_END
+
+U_CDECL_BEGIN
+typedef struct {
+    int32_t currIndex;
+    UBool all;
+    UBool *enabledRecognizers;
+} Context;
+
+
+
+static void U_CALLCONV
+enumClose(UEnumeration *en) {
+    if(en->context != NULL) {
+        DELETE_ARRAY(en->context);
+    }
+
+    DELETE_ARRAY(en);
+}
+
+static int32_t U_CALLCONV
+enumCount(UEnumeration *en, UErrorCode *) {
+    if (((Context *)en->context)->all) {
+        // ucsdet_getAllDetectableCharsets, all charset detector names
+        return fCSRecognizers_size;
+    }
+
+    // Otherwise, ucsdet_getDetectableCharsets - only enabled ones
+    int32_t count = 0;
+    UBool *enabledArray = ((Context *)en->context)->enabledRecognizers;
+    if (enabledArray != NULL) {
+        // custom set
+        for (int32_t i = 0; i < fCSRecognizers_size; i++) {
+            if (enabledArray[i]) {
+                count++;
+            }
+        }
+    } else {
+        // default set
+        for (int32_t i = 0; i < fCSRecognizers_size; i++) {
+            if (fCSRecognizers[i]->isDefaultEnabled) {
+                count++;
+            }
+        }
+    }
+    return count;
+}
+
+static const char* U_CALLCONV
+enumNext(UEnumeration *en, int32_t *resultLength, UErrorCode * /*status*/) {
+    const char *currName = NULL;
+
+    if (((Context *)en->context)->currIndex < fCSRecognizers_size) {
+        if (((Context *)en->context)->all) {
+            // ucsdet_getAllDetectableCharsets, all charset detector names
+            currName = fCSRecognizers[((Context *)en->context)->currIndex]->recognizer->getName();
+            ((Context *)en->context)->currIndex++;
+        } else {
+            // ucsdet_getDetectableCharsets
+            UBool *enabledArray = ((Context *)en->context)->enabledRecognizers;
+            if (enabledArray != NULL) {
+                // custome set
+                while (currName == NULL && ((Context *)en->context)->currIndex < fCSRecognizers_size) {
+                    if (enabledArray[((Context *)en->context)->currIndex]) {
+                        currName = fCSRecognizers[((Context *)en->context)->currIndex]->recognizer->getName();
+                    }
+                    ((Context *)en->context)->currIndex++;
+                }
+            } else {
+                // default set
+                while (currName == NULL && ((Context *)en->context)->currIndex < fCSRecognizers_size) {
+                    if (fCSRecognizers[((Context *)en->context)->currIndex]->isDefaultEnabled) {
+                        currName = fCSRecognizers[((Context *)en->context)->currIndex]->recognizer->getName();
+                    }
+                    ((Context *)en->context)->currIndex++;
+                }
+            }
+        }
+    }
+
+    if(resultLength != NULL) {
+        *resultLength = currName == NULL ? 0 : (int32_t)uprv_strlen(currName);
+    }
+
+    return currName;
+}
+
+
+static void U_CALLCONV
+enumReset(UEnumeration *en, UErrorCode *) {
+    ((Context *)en->context)->currIndex = 0;
+}
+
+static const UEnumeration gCSDetEnumeration = {
+    NULL,
+    NULL,
+    enumClose,
+    enumCount,
+    uenum_unextDefault,
+    enumNext,
+    enumReset
+};
+
+U_CDECL_END
+
+U_NAMESPACE_BEGIN
+
+UEnumeration * CharsetDetector::getAllDetectableCharsets(UErrorCode &status)
+{
+
+    /* Initialize recognized charsets. */
+    setRecognizers(status);
+
+    if(U_FAILURE(status)) {
+        return 0;
+    }
+
+    UEnumeration *en = NEW_ARRAY(UEnumeration, 1);
+    if (en == NULL) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+        return 0;
+    }
+    memcpy(en, &gCSDetEnumeration, sizeof(UEnumeration));
+    en->context = (void*)NEW_ARRAY(Context, 1);
+    if (en->context == NULL) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+        DELETE_ARRAY(en);
+        return 0;
+    }
+    uprv_memset(en->context, 0, sizeof(Context));
+    ((Context*)en->context)->all = TRUE;
+    return en;
+}
+
+UEnumeration * CharsetDetector::getDetectableCharsets(UErrorCode &status) const
+{
+    if(U_FAILURE(status)) {
+        return 0;
+    }
+
+    UEnumeration *en = NEW_ARRAY(UEnumeration, 1);
+    if (en == NULL) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+        return 0;
+    }
+    memcpy(en, &gCSDetEnumeration, sizeof(UEnumeration));
+    en->context = (void*)NEW_ARRAY(Context, 1);
+    if (en->context == NULL) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+        DELETE_ARRAY(en);
+        return 0;
+    }
+    uprv_memset(en->context, 0, sizeof(Context));
+    ((Context*)en->context)->all = FALSE;
+    ((Context*)en->context)->enabledRecognizers = fEnabledRecognizers;
+    return en;
+}
+
+U_NAMESPACE_END
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/csmatch.cpp b/src/core/basetypes/icu-ucsdet/csmatch.cpp
new file mode 100644
index 00000000..7233e3c2
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/csmatch.cpp
@@ -0,0 +1,73 @@
+/*
+ **********************************************************************
+ *   Copyright (C) 2005-2012, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ **********************************************************************
+ */
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+#include "unicode/unistr.h"
+#include "unicode/ucnv.h"
+
+#include "csmatch.h"
+
+#include "csrecog.h"
+#include "inputext.h"
+
+U_NAMESPACE_BEGIN
+
+CharsetMatch::CharsetMatch()
+  : textIn(NULL), confidence(0), fCharsetName(NULL), fLang(NULL)
+{
+    // nothing else to do.
+}
+
+void CharsetMatch::set(InputText *input, const CharsetRecognizer *cr, int32_t conf,
+                       const char *csName, const char *lang)
+{
+    textIn = input;
+    confidence = conf; 
+    fCharsetName = csName;
+    fLang = lang;
+    if (cr != NULL) {
+        if (fCharsetName == NULL) {
+            fCharsetName = cr->getName();
+        }
+        if (fLang == NULL) {
+            fLang = cr->getLanguage();
+        }
+    }
+}
+
+const char* CharsetMatch::getName()const
+{
+    return fCharsetName; 
+}
+
+const char* CharsetMatch::getLanguage()const
+{
+    return fLang; 
+}
+
+int32_t CharsetMatch::getConfidence()const
+{
+    return confidence;
+}
+
+/*
+int32_t CharsetMatch::getUChars(UChar *buf, int32_t cap, UErrorCode *status) const
+{
+    UConverter *conv = ucnv_open(getName(), status);
+    int32_t result = ucnv_toUChars(conv, buf, cap, (const char *) textIn->fRawInput, textIn->fRawLength, status);
+
+    ucnv_close(conv);
+
+    return result;
+}
+*/
+
+U_NAMESPACE_END
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/csr2022.cpp b/src/core/basetypes/icu-ucsdet/csr2022.cpp
new file mode 100644
index 00000000..3db0bc9f
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/csr2022.cpp
@@ -0,0 +1,190 @@
+/*
+ **********************************************************************
+ *   Copyright (C) 2005-2012, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ **********************************************************************
+ */
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "cstring.h"
+
+#include "csr2022.h"
+#include "csmatch.h"
+
+U_NAMESPACE_BEGIN
+
+#define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
+
+/**
+ * Matching function shared among the 2022 detectors JP, CN and KR
+ * Counts up the number of legal and unrecognized escape sequences in
+ * the sample of text, and computes a score based on the total number &
+ * the proportion that fit the encoding.
+ * 
+ * 
+ * @param text the byte buffer containing text to analyse
+ * @param textLen  the size of the text in the byte.
+ * @param escapeSequences the byte escape sequences to test for.
+ * @return match quality, in the range of 0-100.
+ */
+int32_t CharsetRecog_2022::match_2022(const uint8_t *text, int32_t textLen, const uint8_t escapeSequences[][5], int32_t escapeSequences_length) const
+{
+    int32_t i, j;
+    int32_t escN;
+    int32_t hits   = 0;
+    int32_t misses = 0;
+    int32_t shifts = 0;
+    int32_t quality;
+
+    i = 0;
+    while(i < textLen) {
+        if(text[i] == 0x1B) {
+            escN = 0;
+            while(escN < escapeSequences_length) {
+                const uint8_t *seq = escapeSequences[escN];
+                int32_t seq_length = (int32_t)uprv_strlen((const char *) seq);
+
+                if (textLen-i >= seq_length) {
+                    j = 1;
+                    while(j < seq_length) {
+                        if(seq[j] != text[i+j]) {
+                            goto checkEscapes;
+                        }
+
+                        j += 1;
+                    }
+
+                    hits += 1;
+                    i += seq_length-1;
+                    goto scanInput;
+                }
+                // else we ran out of string to compare this time.
+checkEscapes:
+                escN += 1;
+            }
+
+            misses += 1;
+        }
+
+        if( text[i]== 0x0e || text[i] == 0x0f){
+            shifts += 1;
+        }
+
+scanInput:
+        i += 1;
+    }
+
+    if (hits == 0) {
+        return 0;
+    }
+
+    //
+    // Initial quality is based on relative proportion of recongized vs.
+    //   unrecognized escape sequences. 
+    //   All good:  quality = 100;
+    //   half or less good: quality = 0;
+    //   linear inbetween.
+    quality = (100*hits - 100*misses) / (hits + misses);
+
+    // Back off quality if there were too few escape sequences seen.
+    //   Include shifts in this computation, so that KR does not get penalized
+    //   for having only a single Escape sequence, but many shifts.
+    if (hits+shifts < 5) {
+        quality -= (5-(hits+shifts))*10;
+    }
+
+    if (quality < 0) {
+        quality = 0;
+    }
+
+    return quality;
+}
+
+
+static const uint8_t escapeSequences_2022JP[][5] = {
+    {0x1b, 0x24, 0x28, 0x43, 0x00},   // KS X 1001:1992
+    {0x1b, 0x24, 0x28, 0x44, 0x00},   // JIS X 212-1990
+    {0x1b, 0x24, 0x40, 0x00, 0x00},   // JIS C 6226-1978
+    {0x1b, 0x24, 0x41, 0x00, 0x00},   // GB 2312-80
+    {0x1b, 0x24, 0x42, 0x00, 0x00},   // JIS X 208-1983
+    {0x1b, 0x26, 0x40, 0x00, 0x00},   // JIS X 208 1990, 1997
+    {0x1b, 0x28, 0x42, 0x00, 0x00},   // ASCII
+    {0x1b, 0x28, 0x48, 0x00, 0x00},   // JIS-Roman
+    {0x1b, 0x28, 0x49, 0x00, 0x00},   // Half-width katakana
+    {0x1b, 0x28, 0x4a, 0x00, 0x00},   // JIS-Roman
+    {0x1b, 0x2e, 0x41, 0x00, 0x00},   // ISO 8859-1
+    {0x1b, 0x2e, 0x46, 0x00, 0x00}    // ISO 8859-7
+};
+
+static const uint8_t escapeSequences_2022KR[][5] = {
+    {0x1b, 0x24, 0x29, 0x43, 0x00}   
+};
+
+static const uint8_t escapeSequences_2022CN[][5] = {
+    {0x1b, 0x24, 0x29, 0x41, 0x00},   // GB 2312-80
+    {0x1b, 0x24, 0x29, 0x47, 0x00},   // CNS 11643-1992 Plane 1
+    {0x1b, 0x24, 0x2A, 0x48, 0x00},   // CNS 11643-1992 Plane 2
+    {0x1b, 0x24, 0x29, 0x45, 0x00},   // ISO-IR-165
+    {0x1b, 0x24, 0x2B, 0x49, 0x00},   // CNS 11643-1992 Plane 3
+    {0x1b, 0x24, 0x2B, 0x4A, 0x00},   // CNS 11643-1992 Plane 4
+    {0x1b, 0x24, 0x2B, 0x4B, 0x00},   // CNS 11643-1992 Plane 5
+    {0x1b, 0x24, 0x2B, 0x4C, 0x00},   // CNS 11643-1992 Plane 6
+    {0x1b, 0x24, 0x2B, 0x4D, 0x00},   // CNS 11643-1992 Plane 7
+    {0x1b, 0x4e, 0x00, 0x00, 0x00},   // SS2
+    {0x1b, 0x4f, 0x00, 0x00, 0x00},   // SS3
+};
+
+CharsetRecog_2022JP::~CharsetRecog_2022JP() {}
+
+const char *CharsetRecog_2022JP::getName() const {
+    return "ISO-2022-JP";
+}
+
+UBool CharsetRecog_2022JP::match(InputText *textIn, CharsetMatch *results) const {
+    int32_t confidence = match_2022(textIn->fInputBytes, 
+                                    textIn->fInputLen, 
+                                    escapeSequences_2022JP, 
+                                    ARRAY_SIZE(escapeSequences_2022JP));
+    results->set(textIn, this, confidence);
+    return (confidence > 0);
+}
+
+CharsetRecog_2022KR::~CharsetRecog_2022KR() {}
+
+const char *CharsetRecog_2022KR::getName() const {
+    return "ISO-2022-KR";
+}
+
+UBool CharsetRecog_2022KR::match(InputText *textIn, CharsetMatch *results) const {
+    int32_t confidence = match_2022(textIn->fInputBytes, 
+                                    textIn->fInputLen, 
+                                    escapeSequences_2022KR, 
+                                    ARRAY_SIZE(escapeSequences_2022KR));
+    results->set(textIn, this, confidence);
+    return (confidence > 0);
+}
+
+CharsetRecog_2022CN::~CharsetRecog_2022CN() {}
+
+const char *CharsetRecog_2022CN::getName() const {
+    return "ISO-2022-CN";
+}
+
+UBool CharsetRecog_2022CN::match(InputText *textIn, CharsetMatch *results) const {
+    int32_t confidence = match_2022(textIn->fInputBytes,
+                                    textIn->fInputLen,
+                                    escapeSequences_2022CN,
+                                    ARRAY_SIZE(escapeSequences_2022CN));
+    results->set(textIn, this, confidence);
+    return (confidence > 0);
+}
+
+CharsetRecog_2022::~CharsetRecog_2022() {
+    // nothing to do
+}
+
+U_NAMESPACE_END
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/csrecog.cpp b/src/core/basetypes/icu-ucsdet/csrecog.cpp
new file mode 100644
index 00000000..ba70b154
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/csrecog.cpp
@@ -0,0 +1,28 @@
+/*
+ **********************************************************************
+ *   Copyright (C) 2005-2006, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ **********************************************************************
+ */
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "csrecog.h"
+
+U_NAMESPACE_BEGIN
+
+CharsetRecognizer::~CharsetRecognizer()
+{
+    // nothing to do.
+}
+
+const char *CharsetRecognizer::getLanguage() const
+{
+    return "";
+}
+
+U_NAMESPACE_END    
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/csrmbcs.cpp b/src/core/basetypes/icu-ucsdet/csrmbcs.cpp
new file mode 100644
index 00000000..fef2e869
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/csrmbcs.cpp
@@ -0,0 +1,529 @@
+/*
+ **********************************************************************
+ *   Copyright (C) 2005-2012, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ **********************************************************************
+ */
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "csmatch.h"
+#include "csrmbcs.h"
+
+#include <math.h>
+
+U_NAMESPACE_BEGIN
+
+#define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
+
+#define min(x,y) (((x)<(y))?(x):(y))
+
+static const uint16_t commonChars_sjis [] = {
+// TODO:  This set of data comes from the character frequency-
+//        of-occurence analysis tool.  The data needs to be moved
+//        into a resource and loaded from there.
+0x8140, 0x8141, 0x8142, 0x8145, 0x815b, 0x8169, 0x816a, 0x8175, 0x8176, 0x82a0,
+0x82a2, 0x82a4, 0x82a9, 0x82aa, 0x82ab, 0x82ad, 0x82af, 0x82b1, 0x82b3, 0x82b5,
+0x82b7, 0x82bd, 0x82be, 0x82c1, 0x82c4, 0x82c5, 0x82c6, 0x82c8, 0x82c9, 0x82cc,
+0x82cd, 0x82dc, 0x82e0, 0x82e7, 0x82e8, 0x82e9, 0x82ea, 0x82f0, 0x82f1, 0x8341,
+0x8343, 0x834e, 0x834f, 0x8358, 0x835e, 0x8362, 0x8367, 0x8375, 0x8376, 0x8389,
+0x838a, 0x838b, 0x838d, 0x8393, 0x8e96, 0x93fa, 0x95aa};
+
+static const uint16_t commonChars_euc_jp[] = {
+// TODO:  This set of data comes from the character frequency-
+//        of-occurence analysis tool.  The data needs to be moved
+//        into a resource and loaded from there.
+0xa1a1, 0xa1a2, 0xa1a3, 0xa1a6, 0xa1bc, 0xa1ca, 0xa1cb, 0xa1d6, 0xa1d7, 0xa4a2,
+0xa4a4, 0xa4a6, 0xa4a8, 0xa4aa, 0xa4ab, 0xa4ac, 0xa4ad, 0xa4af, 0xa4b1, 0xa4b3,
+0xa4b5, 0xa4b7, 0xa4b9, 0xa4bb, 0xa4bd, 0xa4bf, 0xa4c0, 0xa4c1, 0xa4c3, 0xa4c4,
+0xa4c6, 0xa4c7, 0xa4c8, 0xa4c9, 0xa4ca, 0xa4cb, 0xa4ce, 0xa4cf, 0xa4d0, 0xa4de,
+0xa4df, 0xa4e1, 0xa4e2, 0xa4e4, 0xa4e8, 0xa4e9, 0xa4ea, 0xa4eb, 0xa4ec, 0xa4ef,
+0xa4f2, 0xa4f3, 0xa5a2, 0xa5a3, 0xa5a4, 0xa5a6, 0xa5a7, 0xa5aa, 0xa5ad, 0xa5af,
+0xa5b0, 0xa5b3, 0xa5b5, 0xa5b7, 0xa5b8, 0xa5b9, 0xa5bf, 0xa5c3, 0xa5c6, 0xa5c7,
+0xa5c8, 0xa5c9, 0xa5cb, 0xa5d0, 0xa5d5, 0xa5d6, 0xa5d7, 0xa5de, 0xa5e0, 0xa5e1,
+0xa5e5, 0xa5e9, 0xa5ea, 0xa5eb, 0xa5ec, 0xa5ed, 0xa5f3, 0xb8a9, 0xb9d4, 0xbaee,
+0xbbc8, 0xbef0, 0xbfb7, 0xc4ea, 0xc6fc, 0xc7bd, 0xcab8, 0xcaf3, 0xcbdc, 0xcdd1};
+
+static const uint16_t commonChars_euc_kr[] = {
+// TODO:  This set of data comes from the character frequency-
+//        of-occurence analysis tool.  The data needs to be moved
+//        into a resource and loaded from there.
+0xb0a1, 0xb0b3, 0xb0c5, 0xb0cd, 0xb0d4, 0xb0e6, 0xb0ed, 0xb0f8, 0xb0fa, 0xb0fc,
+0xb1b8, 0xb1b9, 0xb1c7, 0xb1d7, 0xb1e2, 0xb3aa, 0xb3bb, 0xb4c2, 0xb4cf, 0xb4d9,
+0xb4eb, 0xb5a5, 0xb5b5, 0xb5bf, 0xb5c7, 0xb5e9, 0xb6f3, 0xb7af, 0xb7c2, 0xb7ce,
+0xb8a6, 0xb8ae, 0xb8b6, 0xb8b8, 0xb8bb, 0xb8e9, 0xb9ab, 0xb9ae, 0xb9cc, 0xb9ce,
+0xb9fd, 0xbab8, 0xbace, 0xbad0, 0xbaf1, 0xbbe7, 0xbbf3, 0xbbfd, 0xbcad, 0xbcba,
+0xbcd2, 0xbcf6, 0xbdba, 0xbdc0, 0xbdc3, 0xbdc5, 0xbec6, 0xbec8, 0xbedf, 0xbeee,
+0xbef8, 0xbefa, 0xbfa1, 0xbfa9, 0xbfc0, 0xbfe4, 0xbfeb, 0xbfec, 0xbff8, 0xc0a7,
+0xc0af, 0xc0b8, 0xc0ba, 0xc0bb, 0xc0bd, 0xc0c7, 0xc0cc, 0xc0ce, 0xc0cf, 0xc0d6,
+0xc0da, 0xc0e5, 0xc0fb, 0xc0fc, 0xc1a4, 0xc1a6, 0xc1b6, 0xc1d6, 0xc1df, 0xc1f6,
+0xc1f8, 0xc4a1, 0xc5cd, 0xc6ae, 0xc7cf, 0xc7d1, 0xc7d2, 0xc7d8, 0xc7e5, 0xc8ad};
+
+static const uint16_t commonChars_big5[] = {
+// TODO:  This set of data comes from the character frequency-
+//        of-occurence analysis tool.  The data needs to be moved
+//        into a resource and loaded from there.
+0xa140, 0xa141, 0xa142, 0xa143, 0xa147, 0xa149, 0xa175, 0xa176, 0xa440, 0xa446,
+0xa447, 0xa448, 0xa451, 0xa454, 0xa457, 0xa464, 0xa46a, 0xa46c, 0xa477, 0xa4a3,
+0xa4a4, 0xa4a7, 0xa4c1, 0xa4ce, 0xa4d1, 0xa4df, 0xa4e8, 0xa4fd, 0xa540, 0xa548,
+0xa558, 0xa569, 0xa5cd, 0xa5e7, 0xa657, 0xa661, 0xa662, 0xa668, 0xa670, 0xa6a8,
+0xa6b3, 0xa6b9, 0xa6d3, 0xa6db, 0xa6e6, 0xa6f2, 0xa740, 0xa751, 0xa759, 0xa7da,
+0xa8a3, 0xa8a5, 0xa8ad, 0xa8d1, 0xa8d3, 0xa8e4, 0xa8fc, 0xa9c0, 0xa9d2, 0xa9f3,
+0xaa6b, 0xaaba, 0xaabe, 0xaacc, 0xaafc, 0xac47, 0xac4f, 0xacb0, 0xacd2, 0xad59,
+0xaec9, 0xafe0, 0xb0ea, 0xb16f, 0xb2b3, 0xb2c4, 0xb36f, 0xb44c, 0xb44e, 0xb54c,
+0xb5a5, 0xb5bd, 0xb5d0, 0xb5d8, 0xb671, 0xb7ed, 0xb867, 0xb944, 0xbad8, 0xbb44,
+0xbba1, 0xbdd1, 0xc2c4, 0xc3b9, 0xc440, 0xc45f};
+
+static const uint16_t commonChars_gb_18030[] = {
+// TODO:  This set of data comes from the character frequency-
+//        of-occurence analysis tool.  The data needs to be moved
+//        into a resource and loaded from there.
+0xa1a1, 0xa1a2, 0xa1a3, 0xa1a4, 0xa1b0, 0xa1b1, 0xa1f1, 0xa1f3, 0xa3a1, 0xa3ac,
+0xa3ba, 0xb1a8, 0xb1b8, 0xb1be, 0xb2bb, 0xb3c9, 0xb3f6, 0xb4f3, 0xb5bd, 0xb5c4,
+0xb5e3, 0xb6af, 0xb6d4, 0xb6e0, 0xb7a2, 0xb7a8, 0xb7bd, 0xb7d6, 0xb7dd, 0xb8b4,
+0xb8df, 0xb8f6, 0xb9ab, 0xb9c9, 0xb9d8, 0xb9fa, 0xb9fd, 0xbacd, 0xbba7, 0xbbd6,
+0xbbe1, 0xbbfa, 0xbcbc, 0xbcdb, 0xbcfe, 0xbdcc, 0xbecd, 0xbedd, 0xbfb4, 0xbfc6,
+0xbfc9, 0xc0b4, 0xc0ed, 0xc1cb, 0xc2db, 0xc3c7, 0xc4dc, 0xc4ea, 0xc5cc, 0xc6f7,
+0xc7f8, 0xc8ab, 0xc8cb, 0xc8d5, 0xc8e7, 0xc9cf, 0xc9fa, 0xcab1, 0xcab5, 0xcac7,
+0xcad0, 0xcad6, 0xcaf5, 0xcafd, 0xccec, 0xcdf8, 0xceaa, 0xcec4, 0xced2, 0xcee5,
+0xcfb5, 0xcfc2, 0xcfd6, 0xd0c2, 0xd0c5, 0xd0d0, 0xd0d4, 0xd1a7, 0xd2aa, 0xd2b2,
+0xd2b5, 0xd2bb, 0xd2d4, 0xd3c3, 0xd3d0, 0xd3fd, 0xd4c2, 0xd4da, 0xd5e2, 0xd6d0};
+
+static int32_t binarySearch(const uint16_t *array, int32_t len, uint16_t value)
+{
+    int32_t start = 0, end = len-1;
+    int32_t mid = (start+end)/2;
+
+    while(start <= end) {
+        if(array[mid] == value) {
+            return mid;
+        }
+
+        if(array[mid] < value){
+            start = mid+1;
+        } else {
+            end = mid-1;
+        }
+
+        mid = (start+end)/2;
+    }
+
+    return -1;
+}
+
+IteratedChar::IteratedChar() : 
+charValue(0), index(-1), nextIndex(0), error(FALSE), done(FALSE)
+{
+    // nothing else to do.
+}
+
+/*void IteratedChar::reset()
+{
+    charValue = 0;
+    index     = -1;
+    nextIndex = 0;
+    error     = FALSE;
+    done      = FALSE;
+}*/
+
+int32_t IteratedChar::nextByte(InputText *det)
+{
+    if (nextIndex >= det->fRawLength) {
+        done = TRUE;
+
+        return -1;
+    }
+
+    return det->fRawInput[nextIndex++];
+}
+
+CharsetRecog_mbcs::~CharsetRecog_mbcs()
+{
+    // nothing to do.
+}
+
+int32_t CharsetRecog_mbcs::match_mbcs(InputText *det, const uint16_t commonChars[], int32_t commonCharsLen) const {
+    int32_t singleByteCharCount = 0;
+    int32_t doubleByteCharCount = 0;
+    int32_t commonCharCount     = 0;
+    int32_t badCharCount        = 0;
+    int32_t totalCharCount      = 0;
+    int32_t confidence          = 0;
+    IteratedChar iter;
+
+    while (nextChar(&iter, det)) {
+        totalCharCount++;
+
+        if (iter.error) {
+            badCharCount++;
+        } else {
+            if (iter.charValue <= 0xFF) {
+                singleByteCharCount++;
+            } else {
+                doubleByteCharCount++;
+
+                if (commonChars != 0) {
+                    if (binarySearch(commonChars, commonCharsLen, iter.charValue) >= 0){
+                        commonCharCount += 1;
+                    }
+                }
+            }
+        }
+
+
+        if (badCharCount >= 2 && badCharCount*5 >= doubleByteCharCount) {
+            // Bail out early if the byte data is not matching the encoding scheme.
+            // break detectBlock;
+            return confidence;
+        }
+    }
+
+    if (doubleByteCharCount <= 10 && badCharCount == 0) {
+        // Not many multi-byte chars.
+        if (doubleByteCharCount == 0 && totalCharCount < 10) {
+            // There weren't any multibyte sequences, and there was a low density of non-ASCII single bytes.
+            // We don't have enough data to have any confidence.
+            // Statistical analysis of single byte non-ASCII charcters would probably help here.
+            confidence = 0;
+        }
+        else {
+            //   ASCII or ISO file?  It's probably not our encoding,
+            //   but is not incompatible with our encoding, so don't give it a zero.
+            confidence = 10;
+        }
+
+        return confidence;
+    }
+
+    //
+    //  No match if there are too many characters that don't fit the encoding scheme.
+    //    (should we have zero tolerance for these?)
+    //
+    if (doubleByteCharCount < 20*badCharCount) {
+        confidence = 0;
+
+        return confidence;
+    }
+
+    if (commonChars == 0) {
+        // We have no statistics on frequently occuring characters.
+        //  Assess confidence purely on having a reasonable number of
+        //  multi-byte characters (the more the better)
+        confidence = 30 + doubleByteCharCount - 20*badCharCount;
+
+        if (confidence > 100) {
+            confidence = 100;
+        }
+    } else {
+        //
+        // Frequency of occurence statistics exist.
+        //
+
+        double maxVal = log((double)doubleByteCharCount / 4); /*(float)?*/
+        double scaleFactor = 90.0 / maxVal;
+        confidence = (int32_t)(log((double)commonCharCount+1) * scaleFactor + 10.0);
+
+        confidence = min(confidence, 100);
+    }
+
+    if (confidence < 0) {
+        confidence = 0;
+    }
+
+    return confidence;
+}
+
+CharsetRecog_sjis::~CharsetRecog_sjis()
+{
+    // nothing to do
+}
+
+UBool CharsetRecog_sjis::nextChar(IteratedChar* it, InputText* det) const {
+    it->index = it->nextIndex;
+    it->error = FALSE;
+
+    int32_t firstByte = it->charValue = it->nextByte(det);
+
+    if (firstByte < 0) {
+        return FALSE;
+    }
+
+    if (firstByte <= 0x7F || (firstByte > 0xA0 && firstByte <= 0xDF)) {
+        return TRUE;
+    }
+
+    int32_t secondByte = it->nextByte(det);
+    if (secondByte >= 0) {
+        it->charValue = (firstByte << 8) | secondByte;
+    }
+    // else we'll handle the error later.
+
+    if (! ((secondByte >= 0x40 && secondByte <= 0x7F) || (secondByte >= 0x80 && secondByte <= 0xFE))) {
+        // Illegal second byte value.
+        it->error = TRUE;
+    }
+
+    return TRUE;
+}
+
+UBool CharsetRecog_sjis::match(InputText* det, CharsetMatch *results) const {
+    int32_t confidence = match_mbcs(det, commonChars_sjis, ARRAY_SIZE(commonChars_sjis));
+    results->set(det, this, confidence);
+    return (confidence > 0);
+}
+
+const char *CharsetRecog_sjis::getName() const
+{
+    return "Shift_JIS";
+}
+
+const char *CharsetRecog_sjis::getLanguage() const
+{
+    return "ja";
+}
+
+CharsetRecog_euc::~CharsetRecog_euc()
+{
+    // nothing to do
+}
+
+UBool CharsetRecog_euc::nextChar(IteratedChar* it, InputText* det) const {
+    int32_t firstByte  = 0;
+    int32_t secondByte = 0;
+    int32_t thirdByte  = 0;
+
+    it->index = it->nextIndex;
+    it->error = FALSE;
+    firstByte = it->charValue = it->nextByte(det);
+
+    if (firstByte < 0) {
+        // Ran off the end of the input data
+        return FALSE;
+    }
+
+    if (firstByte <= 0x8D) {
+        // single byte char
+        return TRUE;
+    }
+
+    secondByte = it->nextByte(det);
+    if (secondByte >= 0) {
+        it->charValue = (it->charValue << 8) | secondByte;
+    }
+    // else we'll handle the error later.
+
+    if (firstByte >= 0xA1 && firstByte <= 0xFE) {
+        // Two byte Char
+        if (secondByte < 0xA1) {
+            it->error = TRUE;
+        }
+
+        return TRUE;
+    }
+
+    if (firstByte == 0x8E) {
+        // Code Set 2.
+        //   In EUC-JP, total char size is 2 bytes, only one byte of actual char value.
+        //   In EUC-TW, total char size is 4 bytes, three bytes contribute to char value.
+        // We don't know which we've got.
+        // Treat it like EUC-JP.  If the data really was EUC-TW, the following two
+        //   bytes will look like a well formed 2 byte char.
+        if (secondByte < 0xA1) {
+            it->error = TRUE;
+        }
+
+        return TRUE;
+    }
+
+    if (firstByte == 0x8F) {
+        // Code set 3.
+        // Three byte total char size, two bytes of actual char value.
+        thirdByte    = it->nextByte(det);
+        it->charValue = (it->charValue << 8) | thirdByte;
+
+        if (thirdByte < 0xa1) {
+            // Bad second byte or ran off the end of the input data with a non-ASCII first byte.
+            it->error = TRUE;
+        }
+    }
+
+    return TRUE;
+
+}
+
+CharsetRecog_euc_jp::~CharsetRecog_euc_jp()
+{
+    // nothing to do
+}
+
+const char *CharsetRecog_euc_jp::getName() const
+{
+    return "EUC-JP";
+}
+
+const char *CharsetRecog_euc_jp::getLanguage() const
+{
+    return "ja";
+}
+
+UBool CharsetRecog_euc_jp::match(InputText *det, CharsetMatch *results) const
+{
+    int32_t confidence = match_mbcs(det, commonChars_euc_jp, ARRAY_SIZE(commonChars_euc_jp));
+    results->set(det, this, confidence);
+    return (confidence > 0);
+}
+
+CharsetRecog_euc_kr::~CharsetRecog_euc_kr()
+{
+    // nothing to do
+}
+
+const char *CharsetRecog_euc_kr::getName() const
+{
+    return "EUC-KR";
+}
+
+const char *CharsetRecog_euc_kr::getLanguage() const
+{
+    return "ko";
+}
+
+UBool CharsetRecog_euc_kr::match(InputText *det, CharsetMatch *results) const
+{
+    int32_t confidence =  match_mbcs(det, commonChars_euc_kr, ARRAY_SIZE(commonChars_euc_kr));
+    results->set(det, this, confidence);
+    return (confidence > 0);
+}
+
+CharsetRecog_big5::~CharsetRecog_big5()
+{
+    // nothing to do
+}
+
+UBool CharsetRecog_big5::nextChar(IteratedChar* it, InputText* det) const
+{
+    int32_t firstByte;
+
+    it->index = it->nextIndex;
+    it->error = FALSE;
+    firstByte = it->charValue = it->nextByte(det);
+
+    if (firstByte < 0) {
+        return FALSE;
+    }
+
+    if (firstByte <= 0x7F || firstByte == 0xFF) {
+        // single byte character.
+        return TRUE;
+    }
+
+    int32_t secondByte = it->nextByte(det);
+    if (secondByte >= 0)  {
+        it->charValue = (it->charValue << 8) | secondByte;
+    }
+    // else we'll handle the error later.
+
+    if (secondByte < 0x40 || secondByte == 0x7F || secondByte == 0xFF) {
+        it->error = TRUE;
+    }
+
+    return TRUE;
+}
+
+const char *CharsetRecog_big5::getName() const
+{
+    return "Big5";
+}
+
+const char *CharsetRecog_big5::getLanguage() const
+{
+    return "zh";
+}
+
+UBool CharsetRecog_big5::match(InputText *det, CharsetMatch *results) const
+{
+    int32_t confidence = match_mbcs(det, commonChars_big5, ARRAY_SIZE(commonChars_big5));
+    results->set(det, this, confidence);
+    return (confidence > 0);
+}
+
+CharsetRecog_gb_18030::~CharsetRecog_gb_18030()
+{
+    // nothing to do
+}
+
+UBool CharsetRecog_gb_18030::nextChar(IteratedChar* it, InputText* det) const {
+    int32_t firstByte  = 0;
+    int32_t secondByte = 0;
+    int32_t thirdByte  = 0;
+    int32_t fourthByte = 0;
+
+    it->index = it->nextIndex;
+    it->error = FALSE;
+    firstByte = it->charValue = it->nextByte(det);
+
+    if (firstByte < 0) {
+        // Ran off the end of the input data
+        return FALSE;
+    }
+
+    if (firstByte <= 0x80) {
+        // single byte char
+        return TRUE;
+    }
+
+    secondByte = it->nextByte(det);
+    if (secondByte >= 0) {
+        it->charValue = (it->charValue << 8) | secondByte;
+    }
+    // else we'll handle the error later.
+
+    if (firstByte >= 0x81 && firstByte <= 0xFE) {
+        // Two byte Char
+        if ((secondByte >= 0x40 && secondByte <= 0x7E) || (secondByte >=80 && secondByte <= 0xFE)) {
+            return TRUE;
+        }
+
+        // Four byte char
+        if (secondByte >= 0x30 && secondByte <= 0x39) {
+            thirdByte = it->nextByte(det);
+
+            if (thirdByte >= 0x81 && thirdByte <= 0xFE) {
+                fourthByte = it->nextByte(det);
+
+                if (fourthByte >= 0x30 && fourthByte <= 0x39) {
+                    it->charValue = (it->charValue << 16) | (thirdByte << 8) | fourthByte;
+
+                    return TRUE;
+                }
+            }
+        }
+
+        // Something wasn't valid, or we ran out of data (-1).
+        it->error = TRUE;
+    }
+
+    return TRUE;
+}
+
+const char *CharsetRecog_gb_18030::getName() const
+{
+    return "GB18030";
+}
+
+const char *CharsetRecog_gb_18030::getLanguage() const
+{
+    return "zh";
+}
+
+UBool CharsetRecog_gb_18030::match(InputText *det, CharsetMatch *results) const
+{
+    int32_t confidence = match_mbcs(det, commonChars_gb_18030, ARRAY_SIZE(commonChars_gb_18030));
+    results->set(det, this, confidence);
+    return (confidence > 0);
+}
+
+U_NAMESPACE_END
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/csrsbcs.cpp b/src/core/basetypes/icu-ucsdet/csrsbcs.cpp
new file mode 100644
index 00000000..d03367cc
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/csrsbcs.cpp
@@ -0,0 +1,1259 @@
+/*
+ **********************************************************************
+ *   Copyright (C) 2005-2013, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ **********************************************************************
+ */
+
+#include "unicode/utypes.h"
+
+#include "cmemory.h"
+
+#if !UCONFIG_NO_CONVERSION
+#include "csrsbcs.h"
+#include "csmatch.h"
+
+#define N_GRAM_SIZE 3
+#define N_GRAM_MASK 0xFFFFFF
+#define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
+
+U_NAMESPACE_BEGIN
+
+NGramParser::NGramParser(const int32_t *theNgramList, const uint8_t *theCharMap)
+ : ngram(0), byteIndex(0)
+{
+    ngramList = theNgramList;
+    charMap   = theCharMap;
+
+    ngramCount = hitCount = 0;
+}
+
+/*
+ * Binary search for value in table, which must have exactly 64 entries.
+ */
+
+int32_t NGramParser::search(const int32_t *table, int32_t value)
+{
+    int32_t index = 0;
+
+    if (table[index + 32] <= value) {
+        index += 32;
+    }
+
+    if (table[index + 16] <= value) {
+        index += 16;
+    }
+
+    if (table[index + 8] <= value) {
+        index += 8;
+    }
+
+    if (table[index + 4] <= value) {
+        index += 4;
+    }
+
+    if (table[index + 2] <= value) {
+        index += 2;
+    }
+
+    if (table[index + 1] <= value) {
+        index += 1;
+    }
+
+    if (table[index] > value) {
+        index -= 1;
+    }
+
+    if (index < 0 || table[index] != value) {
+        return -1;
+    }
+
+    return index;
+}
+
+void NGramParser::lookup(int32_t thisNgram)
+{
+    ngramCount += 1;
+
+    if (search(ngramList, thisNgram) >= 0) {
+        hitCount += 1;
+    }
+
+}
+
+void NGramParser::addByte(int32_t b)
+{
+    ngram = ((ngram << 8) + b) & N_GRAM_MASK;
+    lookup(ngram);
+}
+
+int32_t NGramParser::nextByte(InputText *det)
+{
+    if (byteIndex >= det->fInputLen) {
+        return -1;
+    }
+
+    return det->fInputBytes[byteIndex++];
+}
+
+void NGramParser::parseCharacters(InputText *det)
+{
+    int32_t b;
+    bool ignoreSpace = FALSE;
+
+    while ((b = nextByte(det)) >= 0) {
+        uint8_t mb = charMap[b];
+
+        // TODO: 0x20 might not be a space in all character sets...
+        if (mb != 0) {
+            if (!(mb == 0x20 && ignoreSpace)) {
+                addByte(mb);
+            }
+
+            ignoreSpace = (mb == 0x20);
+        }
+    }
+}
+
+int32_t NGramParser::parse(InputText *det)
+{
+    parseCharacters(det);
+
+    // TODO: Is this OK? The buffer could have ended in the middle of a word...
+    addByte(0x20);
+
+    double rawPercent = (double) hitCount / (double) ngramCount;
+
+    //            if (rawPercent <= 2.0) {
+    //                return 0;
+    //            }
+
+    // TODO - This is a bit of a hack to take care of a case
+    // were we were getting a confidence of 135...
+    if (rawPercent > 0.33) {
+        return 98;
+    }
+
+    return (int32_t) (rawPercent * 300.0);
+}
+
+static const uint8_t unshapeMap_IBM420[] = {
+/*           -0    -1    -2    -3    -4    -5    -6    -7    -8    -9    -A    -B    -C    -D    -E    -F   */
+/* 0- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
+/* 1- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
+/* 2- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
+/* 3- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
+/* 4- */    0x40, 0x40, 0x42, 0x42, 0x44, 0x45, 0x46, 0x47, 0x47, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 
+/* 5- */    0x50, 0x49, 0x52, 0x53, 0x54, 0x55, 0x56, 0x56, 0x58, 0x58, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, 
+/* 6- */    0x60, 0x61, 0x62, 0x63, 0x63, 0x65, 0x65, 0x67, 0x67, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
+/* 7- */    0x69, 0x71, 0x71, 0x73, 0x74, 0x75, 0x76, 0x77, 0x77, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 
+/* 8- */    0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x80, 0x8B, 0x8B, 0x8D, 0x8D, 0x8F, 
+/* 9- */    0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9A, 0x9A, 0x9A, 0x9E, 0x9E, 
+/* A- */    0x9E, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x9E, 0xAB, 0xAB, 0xAD, 0xAD, 0xAF, 
+/* B- */    0xAF, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xB1, 0xBB, 0xBB, 0xBD, 0xBD, 0xBF, 
+/* C- */    0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xBF, 0xCC, 0xBF, 0xCE, 0xCF, 
+/* D- */    0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDA, 0xDC, 0xDC, 0xDC, 0xDF, 
+/* E- */    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
+/* F- */    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 
+};
+
+NGramParser_IBM420::NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap):NGramParser(theNgramList, theCharMap)
+{
+	alef = 0x00;
+}
+
+
+int32_t NGramParser_IBM420::isLamAlef(int32_t b)
+{
+	if(b == 0xB2 || b == 0xB3){
+         	return 0x47;        		
+        }else if(b == 0xB4 || b == 0xB5){
+         	return 0x49;
+        }else if(b == 0xB8 || b == 0xB9){
+         	return 0x56;
+        }else
+         	return 0x00;
+}
+
+/*
+* Arabic shaping needs to be done manually. Cannot call ArabicShaping class
+* because CharsetDetector is dealing with bytes not Unicode code points. We could
+* convert the bytes to Unicode code points but that would leave us dependent
+* on CharsetICU which we try to avoid. IBM420 converter amongst different versions
+* of JDK can produce different results and therefore is also avoided.
+*/ 
+int32_t NGramParser_IBM420::nextByte(InputText *det)
+{
+	
+    if (byteIndex >= det->fInputLen || det->fInputBytes[byteIndex] == 0) {
+        return -1;
+    }              
+    int next;
+             
+    alef = isLamAlef(det->fInputBytes[byteIndex]);
+    if(alef != 0x00)
+        next = 0xB1 & 0xFF;
+    else
+        next = unshapeMap_IBM420[det->fInputBytes[byteIndex]& 0xFF] & 0xFF;
+            
+    byteIndex++;
+             
+    return next;
+}
+
+void NGramParser_IBM420::parseCharacters(InputText *det)
+{
+	int32_t b;
+    bool ignoreSpace = FALSE;
+
+    while ((b = nextByte(det)) >= 0) {
+        uint8_t mb = charMap[b];
+
+        // TODO: 0x20 might not be a space in all character sets...
+        if (mb != 0) {
+            if (!(mb == 0x20 && ignoreSpace)) {
+                addByte(mb);
+            }
+            ignoreSpace = (mb == 0x20);
+        }
+		
+		if(alef != 0x00){
+            mb = charMap[alef & 0xFF];
+                     
+            // TODO: 0x20 might not be a space in all character sets...
+            if (mb != 0) {
+                if (!(mb == 0x20 && ignoreSpace)) {
+                    addByte(mb);                    
+                }
+                         
+                ignoreSpace = (mb == 0x20);
+            }
+                	 
+        }
+    }
+}
+
+CharsetRecog_sbcs::CharsetRecog_sbcs()
+{
+    // nothing else to do
+}
+
+CharsetRecog_sbcs::~CharsetRecog_sbcs()
+{
+    // nothing to do
+}
+
+int32_t CharsetRecog_sbcs::match_sbcs(InputText *det, const int32_t ngrams[],  const uint8_t byteMap[]) const
+{
+    NGramParser parser(ngrams, byteMap);
+    int32_t result;
+
+    result = parser.parse(det);
+
+    return result;
+}
+
+static const uint8_t charMap_8859_1[] = {
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
+    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
+    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
+    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
+    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
+    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
+    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20, 
+    0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
+    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
+    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, 
+    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF, 
+    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
+    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
+    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, 
+    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 
+};
+
+static const uint8_t charMap_8859_2[] = {
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
+    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
+    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
+    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
+    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
+    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
+    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0x20, 
+    0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF, 
+    0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0xB7, 
+    0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF, 
+    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
+    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
+    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, 
+    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF, 
+    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
+    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
+    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, 
+    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20, 
+};
+
+static const uint8_t charMap_8859_5[] = {
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
+    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
+    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
+    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
+    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
+    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
+    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 
+    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF, 
+    0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 
+    0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 
+    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
+    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
+    0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 
+    0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 
+    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
+    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
+    0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 
+    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF, 
+};
+
+static const uint8_t charMap_8859_6[] = {
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
+    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
+    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
+    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
+    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
+    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
+    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 
+    0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 
+    0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 
+    0xD8, 0xD9, 0xDA, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
+    0xE8, 0xE9, 0xEA, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+};
+
+static const uint8_t charMap_8859_7[] = {
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
+    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
+    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
+    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
+    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
+    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
+    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0xA1, 0xA2, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xDC, 0x20, 
+    0xDD, 0xDE, 0xDF, 0x20, 0xFC, 0x20, 0xFD, 0xFE, 
+    0xC0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
+    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
+    0xF0, 0xF1, 0x20, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 
+    0xF8, 0xF9, 0xFA, 0xFB, 0xDC, 0xDD, 0xDE, 0xDF, 
+    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
+    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
+    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 
+    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20, 
+};
+
+static const uint8_t charMap_8859_8[] = {
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
+    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
+    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
+    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
+    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
+    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
+    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
+    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
+    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 
+    0xF8, 0xF9, 0xFA, 0x20, 0x20, 0x20, 0x20, 0x20, 
+};
+
+static const uint8_t charMap_8859_9[] = {
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
+    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
+    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
+    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
+    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
+    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
+    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20, 
+    0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
+    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
+    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, 
+    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x69, 0xFE, 0xDF, 
+    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
+    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
+    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, 
+    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 
+};
+
+static const int32_t ngrams_windows_1251[] = {
+    0x20E220, 0x20E2EE, 0x20E4EE, 0x20E7E0, 0x20E820, 0x20EAE0, 0x20EAEE, 0x20EDE0, 0x20EDE5, 0x20EEE1, 0x20EFEE, 0x20EFF0, 0x20F0E0, 0x20F1EE, 0x20F1F2, 0x20F2EE, 
+    0x20F7F2, 0x20FDF2, 0xE0EDE8, 0xE0F2FC, 0xE3EE20, 0xE5EBFC, 0xE5EDE8, 0xE5F1F2, 0xE5F220, 0xE820EF, 0xE8E520, 0xE8E820, 0xE8FF20, 0xEBE5ED, 0xEBE820, 0xEBFCED, 
+    0xEDE020, 0xEDE520, 0xEDE8E5, 0xEDE8FF, 0xEDEE20, 0xEDEEE2, 0xEE20E2, 0xEE20EF, 0xEE20F1, 0xEEE220, 0xEEE2E0, 0xEEE3EE, 0xEEE920, 0xEEEBFC, 0xEEEC20, 0xEEF1F2, 
+    0xEFEEEB, 0xEFF0E5, 0xEFF0E8, 0xEFF0EE, 0xF0E0E2, 0xF0E5E4, 0xF1F2E0, 0xF1F2E2, 0xF1F2E8, 0xF1FF20, 0xF2E5EB, 0xF2EE20, 0xF2EEF0, 0xF2FC20, 0xF7F2EE, 0xFBF520, 
+};
+
+static const uint8_t charMap_windows_1251[] = {
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
+    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
+    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
+    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
+    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
+    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
+    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x90, 0x83, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F, 
+    0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F, 
+    0x20, 0xA2, 0xA2, 0xBC, 0x20, 0xB4, 0x20, 0x20, 
+    0xB8, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0xBF, 
+    0x20, 0x20, 0xB3, 0xB3, 0xB4, 0xB5, 0x20, 0x20, 
+    0xB8, 0x20, 0xBA, 0x20, 0xBC, 0xBE, 0xBE, 0xBF, 
+    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
+    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
+    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 
+    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 
+    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
+    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
+    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 
+    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 
+};
+
+static const int32_t ngrams_windows_1256[] = {
+    0x20C7E1, 0x20C7E4, 0x20C8C7, 0x20DAE1, 0x20DDED, 0x20E1E1, 0x20E3E4, 0x20E6C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E120, 0xC7E1C3, 0xC7E1C7, 0xC7E1C8, 
+    0xC7E1CA, 0xC7E1CC, 0xC7E1CD, 0xC7E1CF, 0xC7E1D3, 0xC7E1DA, 0xC7E1DE, 0xC7E1E3, 0xC7E1E6, 0xC7E1ED, 0xC7E320, 0xC7E420, 0xC7E4CA, 0xC820C7, 0xC920C7, 0xC920DD, 
+    0xC920E1, 0xC920E3, 0xC920E6, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xDA20C7, 0xDAE1EC, 0xDDED20, 0xE120C7, 0xE1C920, 0xE1EC20, 0xE1ED20, 
+    0xE320C7, 0xE3C720, 0xE3C920, 0xE3E420, 0xE420C7, 0xE520C7, 0xE5C720, 0xE6C7E1, 0xE6E420, 0xEC20C7, 0xED20C7, 0xED20E3, 0xED20E6, 0xEDC920, 0xEDD120, 0xEDE420, 
+};
+
+static const uint8_t charMap_windows_1256[] = {
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
+    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
+    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
+    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
+    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
+    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
+    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x81, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20, 
+    0x88, 0x20, 0x8A, 0x20, 0x9C, 0x8D, 0x8E, 0x8F, 
+    0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x98, 0x20, 0x9A, 0x20, 0x9C, 0x20, 0x20, 0x9F, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 
+    0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 
+    0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0x20, 
+    0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 
+    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
+    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
+    0x20, 0x20, 0x20, 0x20, 0xF4, 0x20, 0x20, 0x20, 
+    0x20, 0xF9, 0x20, 0xFB, 0xFC, 0x20, 0x20, 0xFF, 
+};
+
+static const int32_t ngrams_KOI8_R[] = {
+    0x20C4CF, 0x20C920, 0x20CBC1, 0x20CBCF, 0x20CEC1, 0x20CEC5, 0x20CFC2, 0x20D0CF, 0x20D0D2, 0x20D2C1, 0x20D3CF, 0x20D3D4, 0x20D4CF, 0x20D720, 0x20D7CF, 0x20DAC1, 
+    0x20DCD4, 0x20DED4, 0xC1CEC9, 0xC1D4D8, 0xC5CCD8, 0xC5CEC9, 0xC5D3D4, 0xC5D420, 0xC7CF20, 0xC920D0, 0xC9C520, 0xC9C920, 0xC9D120, 0xCCC5CE, 0xCCC920, 0xCCD8CE, 
+    0xCEC120, 0xCEC520, 0xCEC9C5, 0xCEC9D1, 0xCECF20, 0xCECFD7, 0xCF20D0, 0xCF20D3, 0xCF20D7, 0xCFC7CF, 0xCFCA20, 0xCFCCD8, 0xCFCD20, 0xCFD3D4, 0xCFD720, 0xCFD7C1, 
+    0xD0CFCC, 0xD0D2C5, 0xD0D2C9, 0xD0D2CF, 0xD2C1D7, 0xD2C5C4, 0xD3D120, 0xD3D4C1, 0xD3D4C9, 0xD3D4D7, 0xD4C5CC, 0xD4CF20, 0xD4CFD2, 0xD4D820, 0xD9C820, 0xDED4CF, 
+};
+
+static const uint8_t charMap_KOI8_R[] = {
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
+    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
+    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
+    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
+    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
+    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
+    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20, 
+    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
+    0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 
+    0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 
+    0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 
+    0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 
+    0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 
+    0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 
+    0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 
+    0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 
+};
+
+static const int32_t ngrams_IBM424_he_rtl[] = {
+    0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405641, 
+    0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514045, 
+    0x514540, 0x514671, 0x515155, 0x515540, 0x515740, 0x516840, 0x517140, 0x544041, 0x544045, 0x544140, 0x544540, 0x554041, 0x554042, 0x554045, 0x554054, 0x554056, 
+    0x554069, 0x564540, 0x574045, 0x584540, 0x585140, 0x585155, 0x625440, 0x684045, 0x685155, 0x695440, 0x714041, 0x714042, 0x714045, 0x714054, 0x714056, 0x714069, 
+};
+
+static const int32_t ngrams_IBM424_he_ltr[] = {
+    0x404146, 0x404154, 0x404551, 0x404554, 0x404556, 0x404558, 0x405158, 0x405462, 0x405469, 0x405546, 0x405551, 0x405746, 0x405751, 0x406846, 0x406851, 0x407141,
+    0x407146, 0x407151, 0x414045, 0x414054, 0x414055, 0x414071, 0x414540, 0x414645, 0x415440, 0x415640, 0x424045, 0x424055, 0x424071, 0x454045, 0x454051, 0x454054,
+    0x454055, 0x454057, 0x454068, 0x454071, 0x455440, 0x464140, 0x464540, 0x484140, 0x514140, 0x514240, 0x514540, 0x544045, 0x544055, 0x544071, 0x546240, 0x546940,
+    0x555151, 0x555158, 0x555168, 0x564045, 0x564055, 0x564071, 0x564240, 0x564540, 0x624540, 0x694045, 0x694055, 0x694071, 0x694540, 0x714140, 0x714540, 0x714651,
+};
+
+static const uint8_t charMap_IBM424_he[] = {
+/*           -0    -1    -2    -3    -4    -5    -6    -7    -8    -9    -A    -B    -C    -D    -E    -F   */
+/* 0- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
+/* 1- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
+/* 2- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
+/* 3- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
+/* 4- */    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
+/* 5- */    0x40, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
+/* 6- */    0x40, 0x40, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
+/* 7- */    0x40, 0x71, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x00, 0x40, 0x40, 
+/* 8- */    0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
+/* 9- */    0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
+/* A- */    0xA0, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
+/* B- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
+/* C- */    0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
+/* D- */    0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
+/* E- */    0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
+/* F- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
+};
+
+static const int32_t ngrams_IBM420_ar_rtl[] = {
+    0x4056B1, 0x4056BD, 0x405856, 0x409AB1, 0x40ABDC, 0x40B1B1, 0x40BBBD, 0x40CF56, 0x564056, 0x564640, 0x566340, 0x567540, 0x56B140, 0x56B149, 0x56B156, 0x56B158,
+    0x56B163, 0x56B167, 0x56B169, 0x56B173, 0x56B178, 0x56B19A, 0x56B1AD, 0x56B1BB, 0x56B1CF, 0x56B1DC, 0x56BB40, 0x56BD40, 0x56BD63, 0x584056, 0x624056, 0x6240AB,
+    0x6240B1, 0x6240BB, 0x6240CF, 0x634056, 0x734056, 0x736240, 0x754056, 0x756240, 0x784056, 0x9A4056, 0x9AB1DA, 0xABDC40, 0xB14056, 0xB16240, 0xB1DA40, 0xB1DC40,
+    0xBB4056, 0xBB5640, 0xBB6240, 0xBBBD40, 0xBD4056, 0xBF4056, 0xBF5640, 0xCF56B1, 0xCFBD40, 0xDA4056, 0xDC4056, 0xDC40BB, 0xDC40CF, 0xDC6240, 0xDC7540, 0xDCBD40,
+};
+
+static const int32_t ngrams_IBM420_ar_ltr[] = {
+    0x404656, 0x4056BB, 0x4056BF, 0x406273, 0x406275, 0x4062B1, 0x4062BB, 0x4062DC, 0x406356, 0x407556, 0x4075DC, 0x40B156, 0x40BB56, 0x40BD56, 0x40BDBB, 0x40BDCF, 
+    0x40BDDC, 0x40DAB1, 0x40DCAB, 0x40DCB1, 0x49B156, 0x564056, 0x564058, 0x564062, 0x564063, 0x564073, 0x564075, 0x564078, 0x56409A, 0x5640B1, 0x5640BB, 0x5640BD,
+    0x5640BF, 0x5640DA, 0x5640DC, 0x565840, 0x56B156, 0x56CF40, 0x58B156, 0x63B156, 0x63BD56, 0x67B156, 0x69B156, 0x73B156, 0x78B156, 0x9AB156, 0xAB4062, 0xADB156,
+    0xB14062, 0xB15640, 0xB156CF, 0xB19A40, 0xB1B140, 0xBB4062, 0xBB40DC, 0xBBB156, 0xBD5640, 0xBDBB40, 0xCF4062, 0xCF40DC, 0xCFB156, 0xDAB19A, 0xDCAB40, 0xDCB156
+};
+
+static const uint8_t charMap_IBM420_ar[]= {
+/*           -0    -1    -2    -3    -4    -5    -6    -7    -8    -9    -A    -B    -C    -D    -E    -F   */
+/* 0- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
+/* 1- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
+/* 2- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
+/* 3- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
+/* 4- */    0x40, 0x40, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
+/* 5- */    0x40, 0x51, 0x52, 0x40, 0x40, 0x55, 0x56, 0x57, 0x58, 0x59, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
+/* 6- */    0x40, 0x40, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
+/* 7- */    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
+/* 8- */    0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, 
+/* 9- */    0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F, 
+/* A- */    0xA0, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 
+/* B- */    0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0x40, 0x40, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF, 
+/* C- */    0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0xCB, 0x40, 0xCD, 0x40, 0xCF, 
+/* D- */    0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 
+/* E- */    0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0xEB, 0x40, 0xED, 0xEE, 0xEF, 
+/* F- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0xFB, 0xFC, 0xFD, 0xFE, 0x40, 
+};
+
+//ISO-8859-1,2,5,6,7,8,9 Ngrams
+
+struct NGramsPlusLang {
+    const int32_t ngrams[64];
+    const char *  lang;
+};
+
+static const NGramsPlusLang ngrams_8859_1[] =  {
+  { 
+    {
+    0x206120, 0x20616E, 0x206265, 0x20636F, 0x20666F, 0x206861, 0x206865, 0x20696E, 0x206D61, 0x206F66, 0x207072, 0x207265, 0x207361, 0x207374, 0x207468, 0x20746F, 
+    0x207768, 0x616964, 0x616C20, 0x616E20, 0x616E64, 0x617320, 0x617420, 0x617465, 0x617469, 0x642061, 0x642074, 0x652061, 0x652073, 0x652074, 0x656420, 0x656E74, 
+    0x657220, 0x657320, 0x666F72, 0x686174, 0x686520, 0x686572, 0x696420, 0x696E20, 0x696E67, 0x696F6E, 0x697320, 0x6E2061, 0x6E2074, 0x6E6420, 0x6E6720, 0x6E7420, 
+    0x6F6620, 0x6F6E20, 0x6F7220, 0x726520, 0x727320, 0x732061, 0x732074, 0x736169, 0x737420, 0x742074, 0x746572, 0x746861, 0x746865, 0x74696F, 0x746F20, 0x747320, 
+    },
+    "en"
+  },
+  { 
+    {
+    0x206166, 0x206174, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207369, 0x207374, 0x207469, 0x207669, 0x616620, 
+    0x616E20, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646572, 0x646574, 0x652073, 0x656420, 0x656465, 0x656E20, 0x656E64, 0x657220, 0x657265, 0x657320, 
+    0x657420, 0x666F72, 0x676520, 0x67656E, 0x676572, 0x696765, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6572, 0x6C6967, 0x6C6C65, 0x6D6564, 0x6E6465, 0x6E6520, 
+    0x6E6720, 0x6E6765, 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722064, 0x722065, 0x722073, 0x726520, 0x737465, 0x742073, 0x746520, 0x746572, 0x74696C, 0x766572, 
+    },
+    "da"
+  },
+  { 
+    {
+    0x20616E, 0x206175, 0x206265, 0x206461, 0x206465, 0x206469, 0x206569, 0x206765, 0x206861, 0x20696E, 0x206D69, 0x207363, 0x207365, 0x20756E, 0x207665, 0x20766F, 
+    0x207765, 0x207A75, 0x626572, 0x636820, 0x636865, 0x636874, 0x646173, 0x64656E, 0x646572, 0x646965, 0x652064, 0x652073, 0x65696E, 0x656974, 0x656E20, 0x657220, 
+    0x657320, 0x67656E, 0x68656E, 0x687420, 0x696368, 0x696520, 0x696E20, 0x696E65, 0x697420, 0x6C6963, 0x6C6C65, 0x6E2061, 0x6E2064, 0x6E2073, 0x6E6420, 0x6E6465, 
+    0x6E6520, 0x6E6720, 0x6E6765, 0x6E7465, 0x722064, 0x726465, 0x726569, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x756E64, 0x756E67, 0x766572, 
+    },
+    "de"
+  },
+  {
+    {
+    0x206120, 0x206361, 0x20636F, 0x206465, 0x20656C, 0x20656E, 0x206573, 0x20696E, 0x206C61, 0x206C6F, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365, 
+    0x20756E, 0x207920, 0x612063, 0x612064, 0x612065, 0x61206C, 0x612070, 0x616369, 0x61646F, 0x616C20, 0x617220, 0x617320, 0x6369F3, 0x636F6E, 0x646520, 0x64656C, 
+    0x646F20, 0x652064, 0x652065, 0x65206C, 0x656C20, 0x656E20, 0x656E74, 0x657320, 0x657374, 0x69656E, 0x69F36E, 0x6C6120, 0x6C6F73, 0x6E2065, 0x6E7465, 0x6F2064, 
+    0x6F2065, 0x6F6E20, 0x6F7220, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732064, 0x732065, 0x732070, 0x736520, 0x746520, 0x746F20, 0x756520, 0xF36E20, 
+    },
+    "es"
+  },
+  {
+    {
+    0x206175, 0x20636F, 0x206461, 0x206465, 0x206475, 0x20656E, 0x206574, 0x206C61, 0x206C65, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207365, 0x20736F, 0x20756E,
+    0x20E020, 0x616E74, 0x617469, 0x636520, 0x636F6E, 0x646520, 0x646573, 0x647520, 0x652061, 0x652063, 0x652064, 0x652065, 0x65206C, 0x652070, 0x652073, 0x656E20,
+    0x656E74, 0x657220, 0x657320, 0x657420, 0x657572, 0x696F6E, 0x697320, 0x697420, 0x6C6120, 0x6C6520, 0x6C6573, 0x6D656E, 0x6E2064, 0x6E6520, 0x6E7320, 0x6E7420,
+    0x6F6E20, 0x6F6E74, 0x6F7572, 0x717565, 0x72206C, 0x726520, 0x732061, 0x732064, 0x732065, 0x73206C, 0x732070, 0x742064, 0x746520, 0x74696F, 0x756520, 0x757220,
+    },
+    "fr"
+  },
+  {
+    {
+    0x20616C, 0x206368, 0x20636F, 0x206465, 0x206469, 0x206520, 0x20696C, 0x20696E, 0x206C61, 0x207065, 0x207072, 0x20756E, 0x612063, 0x612064, 0x612070, 0x612073,
+    0x61746F, 0x636865, 0x636F6E, 0x64656C, 0x646920, 0x652061, 0x652063, 0x652064, 0x652069, 0x65206C, 0x652070, 0x652073, 0x656C20, 0x656C6C, 0x656E74, 0x657220,
+    0x686520, 0x692061, 0x692063, 0x692064, 0x692073, 0x696120, 0x696C20, 0x696E20, 0x696F6E, 0x6C6120, 0x6C6520, 0x6C6920, 0x6C6C61, 0x6E6520, 0x6E6920, 0x6E6F20,
+    0x6E7465, 0x6F2061, 0x6F2064, 0x6F2069, 0x6F2073, 0x6F6E20, 0x6F6E65, 0x706572, 0x726120, 0x726520, 0x736920, 0x746120, 0x746520, 0x746920, 0x746F20, 0x7A696F,
+    },
+    "it"
+  },
+  {
+    {
+    0x20616C, 0x206265, 0x206461, 0x206465, 0x206469, 0x206565, 0x20656E, 0x206765, 0x206865, 0x20696E, 0x206D61, 0x206D65, 0x206F70, 0x207465, 0x207661, 0x207665,
+    0x20766F, 0x207765, 0x207A69, 0x61616E, 0x616172, 0x616E20, 0x616E64, 0x617220, 0x617420, 0x636874, 0x646520, 0x64656E, 0x646572, 0x652062, 0x652076, 0x65656E,
+    0x656572, 0x656E20, 0x657220, 0x657273, 0x657420, 0x67656E, 0x686574, 0x696520, 0x696E20, 0x696E67, 0x697320, 0x6E2062, 0x6E2064, 0x6E2065, 0x6E2068, 0x6E206F,
+    0x6E2076, 0x6E6465, 0x6E6720, 0x6F6E64, 0x6F6F72, 0x6F7020, 0x6F7220, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x76616E, 0x766572, 0x766F6F,
+    },
+    "nl"
+  },
+  {
+    {
+    0x206174, 0x206176, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207365, 0x20736B, 0x20736F, 0x207374, 0x207469,
+    0x207669, 0x20E520, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646574, 0x652073, 0x656420, 0x656E20, 0x656E65, 0x657220, 0x657265, 0x657420, 0x657474,
+    0x666F72, 0x67656E, 0x696B6B, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6520, 0x6C6C65, 0x6D6564, 0x6D656E, 0x6E2073, 0x6E6520, 0x6E6720, 0x6E6765, 0x6E6E65,
+    0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722073, 0x726520, 0x736F6D, 0x737465, 0x742073, 0x746520, 0x74656E, 0x746572, 0x74696C, 0x747420, 0x747465, 0x766572,
+    },
+    "no"
+  },
+  {
+    {
+    0x206120, 0x20636F, 0x206461, 0x206465, 0x20646F, 0x206520, 0x206573, 0x206D61, 0x206E6F, 0x206F20, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365,
+    0x20756D, 0x612061, 0x612063, 0x612064, 0x612070, 0x616465, 0x61646F, 0x616C20, 0x617220, 0x617261, 0x617320, 0x636F6D, 0x636F6E, 0x646120, 0x646520, 0x646F20,
+    0x646F73, 0x652061, 0x652064, 0x656D20, 0x656E74, 0x657320, 0x657374, 0x696120, 0x696361, 0x6D656E, 0x6E7465, 0x6E746F, 0x6F2061, 0x6F2063, 0x6F2064, 0x6F2065,
+    0x6F2070, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732061, 0x732064, 0x732065, 0x732070, 0x737461, 0x746520, 0x746F20, 0x756520, 0xE36F20, 0xE7E36F,
+    },
+    "pt"
+  },
+  {
+    {
+    0x206174, 0x206176, 0x206465, 0x20656E, 0x2066F6, 0x206861, 0x206920, 0x20696E, 0x206B6F, 0x206D65, 0x206F63, 0x2070E5, 0x20736B, 0x20736F, 0x207374, 0x207469,
+    0x207661, 0x207669, 0x20E472, 0x616465, 0x616E20, 0x616E64, 0x617220, 0x617474, 0x636820, 0x646520, 0x64656E, 0x646572, 0x646574, 0x656420, 0x656E20, 0x657220,
+    0x657420, 0x66F672, 0x67656E, 0x696C6C, 0x696E67, 0x6B6120, 0x6C6C20, 0x6D6564, 0x6E2073, 0x6E6120, 0x6E6465, 0x6E6720, 0x6E6765, 0x6E696E, 0x6F6368, 0x6F6D20,
+    0x6F6E20, 0x70E520, 0x722061, 0x722073, 0x726120, 0x736B61, 0x736F6D, 0x742073, 0x746120, 0x746520, 0x746572, 0x74696C, 0x747420, 0x766172, 0xE47220, 0xF67220,
+    },
+    "sv"
+  }
+};
+
+
+static const NGramsPlusLang ngrams_8859_2[] =  {
+  {
+    {
+    0x206120, 0x206279, 0x20646F, 0x206A65, 0x206E61, 0x206E65, 0x206F20, 0x206F64, 0x20706F, 0x207072, 0x2070F8, 0x20726F, 0x207365, 0x20736F, 0x207374, 0x20746F,
+    0x207620, 0x207679, 0x207A61, 0x612070, 0x636520, 0x636820, 0x652070, 0x652073, 0x652076, 0x656D20, 0x656EED, 0x686F20, 0x686F64, 0x697374, 0x6A6520, 0x6B7465,
+    0x6C6520, 0x6C6920, 0x6E6120, 0x6EE920, 0x6EEC20, 0x6EED20, 0x6F2070, 0x6F646E, 0x6F6A69, 0x6F7374, 0x6F7520, 0x6F7661, 0x706F64, 0x706F6A, 0x70726F, 0x70F865,
+    0x736520, 0x736F75, 0x737461, 0x737469, 0x73746E, 0x746572, 0x746EED, 0x746F20, 0x752070, 0xBE6520, 0xE16EED, 0xE9686F, 0xED2070, 0xED2073, 0xED6D20, 0xF86564,
+    },
+    "cs"
+  },
+  {
+    {
+    0x206120, 0x20617A, 0x206265, 0x206567, 0x20656C, 0x206665, 0x206861, 0x20686F, 0x206973, 0x206B65, 0x206B69, 0x206BF6, 0x206C65, 0x206D61, 0x206D65, 0x206D69,
+    0x206E65, 0x20737A, 0x207465, 0x20E973, 0x612061, 0x61206B, 0x61206D, 0x612073, 0x616B20, 0x616E20, 0x617A20, 0x62616E, 0x62656E, 0x656779, 0x656B20, 0x656C20,
+    0x656C65, 0x656D20, 0x656E20, 0x657265, 0x657420, 0x657465, 0x657474, 0x677920, 0x686F67, 0x696E74, 0x697320, 0x6B2061, 0x6BF67A, 0x6D6567, 0x6D696E, 0x6E2061,
+    0x6E616B, 0x6E656B, 0x6E656D, 0x6E7420, 0x6F6779, 0x732061, 0x737A65, 0x737A74, 0x737AE1, 0x73E967, 0x742061, 0x747420, 0x74E173, 0x7A6572, 0xE16E20, 0xE97320,
+    },
+    "hu"
+  },
+  {
+    {
+    0x20637A, 0x20646F, 0x206920, 0x206A65, 0x206B6F, 0x206D61, 0x206D69, 0x206E61, 0x206E69, 0x206F64, 0x20706F, 0x207072, 0x207369, 0x207720, 0x207769, 0x207779,
+    0x207A20, 0x207A61, 0x612070, 0x612077, 0x616E69, 0x636820, 0x637A65, 0x637A79, 0x646F20, 0x647A69, 0x652070, 0x652073, 0x652077, 0x65207A, 0x65676F, 0x656A20,
+    0x656D20, 0x656E69, 0x676F20, 0x696120, 0x696520, 0x69656A, 0x6B6120, 0x6B6920, 0x6B6965, 0x6D6965, 0x6E6120, 0x6E6961, 0x6E6965, 0x6F2070, 0x6F7761, 0x6F7769,
+    0x706F6C, 0x707261, 0x70726F, 0x70727A, 0x727A65, 0x727A79, 0x7369EA, 0x736B69, 0x737461, 0x776965, 0x796368, 0x796D20, 0x7A6520, 0x7A6965, 0x7A7920, 0xF37720,
+    },
+    "pl"
+  },
+  {
+    {
+    0x206120, 0x206163, 0x206361, 0x206365, 0x20636F, 0x206375, 0x206465, 0x206469, 0x206C61, 0x206D61, 0x207065, 0x207072, 0x207365, 0x2073E3, 0x20756E, 0x20BA69,
+    0x20EE6E, 0x612063, 0x612064, 0x617265, 0x617420, 0x617465, 0x617520, 0x636172, 0x636F6E, 0x637520, 0x63E320, 0x646520, 0x652061, 0x652063, 0x652064, 0x652070,
+    0x652073, 0x656120, 0x656920, 0x656C65, 0x656E74, 0x657374, 0x692061, 0x692063, 0x692064, 0x692070, 0x696520, 0x696920, 0x696E20, 0x6C6120, 0x6C6520, 0x6C6F72,
+    0x6C7569, 0x6E6520, 0x6E7472, 0x6F7220, 0x70656E, 0x726520, 0x726561, 0x727520, 0x73E320, 0x746520, 0x747275, 0x74E320, 0x756920, 0x756C20, 0xBA6920, 0xEE6E20,
+    },
+    "ro"
+  }
+};
+
+static const int32_t ngrams_8859_5_ru[] = {
+    0x20D220, 0x20D2DE, 0x20D4DE, 0x20D7D0, 0x20D820, 0x20DAD0, 0x20DADE, 0x20DDD0, 0x20DDD5, 0x20DED1, 0x20DFDE, 0x20DFE0, 0x20E0D0, 0x20E1DE, 0x20E1E2, 0x20E2DE,
+    0x20E7E2, 0x20EDE2, 0xD0DDD8, 0xD0E2EC, 0xD3DE20, 0xD5DBEC, 0xD5DDD8, 0xD5E1E2, 0xD5E220, 0xD820DF, 0xD8D520, 0xD8D820, 0xD8EF20, 0xDBD5DD, 0xDBD820, 0xDBECDD,
+    0xDDD020, 0xDDD520, 0xDDD8D5, 0xDDD8EF, 0xDDDE20, 0xDDDED2, 0xDE20D2, 0xDE20DF, 0xDE20E1, 0xDED220, 0xDED2D0, 0xDED3DE, 0xDED920, 0xDEDBEC, 0xDEDC20, 0xDEE1E2,
+    0xDFDEDB, 0xDFE0D5, 0xDFE0D8, 0xDFE0DE, 0xE0D0D2, 0xE0D5D4, 0xE1E2D0, 0xE1E2D2, 0xE1E2D8, 0xE1EF20, 0xE2D5DB, 0xE2DE20, 0xE2DEE0, 0xE2EC20, 0xE7E2DE, 0xEBE520,
+};
+
+static const int32_t ngrams_8859_6_ar[] = {
+    0x20C7E4, 0x20C7E6, 0x20C8C7, 0x20D9E4, 0x20E1EA, 0x20E4E4, 0x20E5E6, 0x20E8C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E420, 0xC7E4C3, 0xC7E4C7, 0xC7E4C8,
+    0xC7E4CA, 0xC7E4CC, 0xC7E4CD, 0xC7E4CF, 0xC7E4D3, 0xC7E4D9, 0xC7E4E2, 0xC7E4E5, 0xC7E4E8, 0xC7E4EA, 0xC7E520, 0xC7E620, 0xC7E6CA, 0xC820C7, 0xC920C7, 0xC920E1,
+    0xC920E4, 0xC920E5, 0xC920E8, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xD920C7, 0xD9E4E9, 0xE1EA20, 0xE420C7, 0xE4C920, 0xE4E920, 0xE4EA20,
+    0xE520C7, 0xE5C720, 0xE5C920, 0xE5E620, 0xE620C7, 0xE720C7, 0xE7C720, 0xE8C7E4, 0xE8E620, 0xE920C7, 0xEA20C7, 0xEA20E5, 0xEA20E8, 0xEAC920, 0xEAD120, 0xEAE620,
+};
+
+static const int32_t ngrams_8859_7_el[] = {
+    0x20E1ED, 0x20E1F0, 0x20E3E9, 0x20E4E9, 0x20E5F0, 0x20E720, 0x20EAE1, 0x20ECE5, 0x20EDE1, 0x20EF20, 0x20F0E1, 0x20F0EF, 0x20F0F1, 0x20F3F4, 0x20F3F5, 0x20F4E7,
+    0x20F4EF, 0xDFE120, 0xE120E1, 0xE120F4, 0xE1E920, 0xE1ED20, 0xE1F0FC, 0xE1F220, 0xE3E9E1, 0xE5E920, 0xE5F220, 0xE720F4, 0xE7ED20, 0xE7F220, 0xE920F4, 0xE9E120,
+    0xE9EADE, 0xE9F220, 0xEAE1E9, 0xEAE1F4, 0xECE520, 0xED20E1, 0xED20E5, 0xED20F0, 0xEDE120, 0xEFF220, 0xEFF520, 0xF0EFF5, 0xF0F1EF, 0xF0FC20, 0xF220E1, 0xF220E5,
+    0xF220EA, 0xF220F0, 0xF220F4, 0xF3E520, 0xF3E720, 0xF3F4EF, 0xF4E120, 0xF4E1E9, 0xF4E7ED, 0xF4E7F2, 0xF4E9EA, 0xF4EF20, 0xF4EFF5, 0xF4F9ED, 0xF9ED20, 0xFEED20,
+};
+
+static const int32_t ngrams_8859_8_I_he[] = {
+    0x20E0E5, 0x20E0E7, 0x20E0E9, 0x20E0FA, 0x20E1E9, 0x20E1EE, 0x20E4E0, 0x20E4E5, 0x20E4E9, 0x20E4EE, 0x20E4F2, 0x20E4F9, 0x20E4FA, 0x20ECE0, 0x20ECE4, 0x20EEE0,
+    0x20F2EC, 0x20F9EC, 0xE0FA20, 0xE420E0, 0xE420E1, 0xE420E4, 0xE420EC, 0xE420EE, 0xE420F9, 0xE4E5E0, 0xE5E020, 0xE5ED20, 0xE5EF20, 0xE5F820, 0xE5FA20, 0xE920E4,
+    0xE9E420, 0xE9E5FA, 0xE9E9ED, 0xE9ED20, 0xE9EF20, 0xE9F820, 0xE9FA20, 0xEC20E0, 0xEC20E4, 0xECE020, 0xECE420, 0xED20E0, 0xED20E1, 0xED20E4, 0xED20EC, 0xED20EE,
+    0xED20F9, 0xEEE420, 0xEF20E4, 0xF0E420, 0xF0E920, 0xF0E9ED, 0xF2EC20, 0xF820E4, 0xF8E9ED, 0xF9EC20, 0xFA20E0, 0xFA20E1, 0xFA20E4, 0xFA20EC, 0xFA20EE, 0xFA20F9,
+};
+
+static const int32_t ngrams_8859_8_he[] = {
+    0x20E0E5, 0x20E0EC, 0x20E4E9, 0x20E4EC, 0x20E4EE, 0x20E4F0, 0x20E9F0, 0x20ECF2, 0x20ECF9, 0x20EDE5, 0x20EDE9, 0x20EFE5, 0x20EFE9, 0x20F8E5, 0x20F8E9, 0x20FAE0,
+    0x20FAE5, 0x20FAE9, 0xE020E4, 0xE020EC, 0xE020ED, 0xE020FA, 0xE0E420, 0xE0E5E4, 0xE0EC20, 0xE0EE20, 0xE120E4, 0xE120ED, 0xE120FA, 0xE420E4, 0xE420E9, 0xE420EC,
+    0xE420ED, 0xE420EF, 0xE420F8, 0xE420FA, 0xE4EC20, 0xE5E020, 0xE5E420, 0xE7E020, 0xE9E020, 0xE9E120, 0xE9E420, 0xEC20E4, 0xEC20ED, 0xEC20FA, 0xECF220, 0xECF920,
+    0xEDE9E9, 0xEDE9F0, 0xEDE9F8, 0xEE20E4, 0xEE20ED, 0xEE20FA, 0xEEE120, 0xEEE420, 0xF2E420, 0xF920E4, 0xF920ED, 0xF920FA, 0xF9E420, 0xFAE020, 0xFAE420, 0xFAE5E9,
+};
+
+static const int32_t ngrams_8859_9_tr[] = {
+    0x206261, 0x206269, 0x206275, 0x206461, 0x206465, 0x206765, 0x206861, 0x20696C, 0x206B61, 0x206B6F, 0x206D61, 0x206F6C, 0x207361, 0x207461, 0x207665, 0x207961,
+    0x612062, 0x616B20, 0x616C61, 0x616D61, 0x616E20, 0x616EFD, 0x617220, 0x617261, 0x6172FD, 0x6173FD, 0x617961, 0x626972, 0x646120, 0x646520, 0x646920, 0x652062,
+    0x65206B, 0x656469, 0x656E20, 0x657220, 0x657269, 0x657369, 0x696C65, 0x696E20, 0x696E69, 0x697220, 0x6C616E, 0x6C6172, 0x6C6520, 0x6C6572, 0x6E2061, 0x6E2062,
+    0x6E206B, 0x6E6461, 0x6E6465, 0x6E6520, 0x6E6920, 0x6E696E, 0x6EFD20, 0x72696E, 0x72FD6E, 0x766520, 0x796120, 0x796F72, 0xFD6E20, 0xFD6E64, 0xFD6EFD, 0xFDF0FD,
+};
+
+CharsetRecog_8859_1::~CharsetRecog_8859_1()
+{
+    // nothing to do
+}
+
+UBool CharsetRecog_8859_1::match(InputText *textIn, CharsetMatch *results) const {
+    const char *name = textIn->fC1Bytes? "windows-1252" : "ISO-8859-1";
+    uint32_t i;
+    int32_t bestConfidenceSoFar = -1;
+    for (i=0; i < ARRAY_SIZE(ngrams_8859_1) ; i++) {
+        const int32_t *ngrams = ngrams_8859_1[i].ngrams;
+        const char    *lang   = ngrams_8859_1[i].lang;
+        int32_t confidence = match_sbcs(textIn, ngrams, charMap_8859_1);
+        if (confidence > bestConfidenceSoFar) {
+            results->set(textIn, this, confidence, name, lang);
+            bestConfidenceSoFar = confidence;
+        }
+    }
+    return (bestConfidenceSoFar > 0);
+}
+
+const char *CharsetRecog_8859_1::getName() const
+{
+    return "ISO-8859-1";
+}
+
+
+CharsetRecog_8859_2::~CharsetRecog_8859_2()
+{
+    // nothing to do
+}
+
+UBool CharsetRecog_8859_2::match(InputText *textIn, CharsetMatch *results) const {
+    const char *name = textIn->fC1Bytes? "windows-1250" : "ISO-8859-2";
+    uint32_t i;
+    int32_t bestConfidenceSoFar = -1;
+    for (i=0; i < ARRAY_SIZE(ngrams_8859_2) ; i++) {
+        const int32_t *ngrams = ngrams_8859_2[i].ngrams;
+        const char    *lang   = ngrams_8859_2[i].lang;
+        int32_t confidence = match_sbcs(textIn, ngrams, charMap_8859_2);
+        if (confidence > bestConfidenceSoFar) {
+            results->set(textIn, this, confidence, name, lang);
+            bestConfidenceSoFar = confidence;
+        }
+    }
+    return (bestConfidenceSoFar > 0);
+}
+
+const char *CharsetRecog_8859_2::getName() const
+{
+    return "ISO-8859-2";
+}
+
+
+CharsetRecog_8859_5::~CharsetRecog_8859_5()
+{
+    // nothing to do
+}
+
+const char *CharsetRecog_8859_5::getName() const
+{
+    return "ISO-8859-5";
+}
+
+CharsetRecog_8859_5_ru::~CharsetRecog_8859_5_ru()
+{
+    // nothing to do
+}
+
+const char *CharsetRecog_8859_5_ru::getLanguage() const
+{
+    return "ru";
+}
+
+UBool CharsetRecog_8859_5_ru::match(InputText *textIn, CharsetMatch *results) const
+{
+    int32_t confidence = match_sbcs(textIn, ngrams_8859_5_ru, charMap_8859_5);
+    results->set(textIn, this, confidence);
+    return (confidence > 0);
+}
+
+CharsetRecog_8859_6::~CharsetRecog_8859_6()
+{
+    // nothing to do
+}
+
+const char *CharsetRecog_8859_6::getName() const
+{
+    return "ISO-8859-6";
+}
+
+CharsetRecog_8859_6_ar::~CharsetRecog_8859_6_ar()
+{
+    // nothing to do
+}
+
+const char *CharsetRecog_8859_6_ar::getLanguage() const
+{
+    return "ar";
+}
+
+UBool CharsetRecog_8859_6_ar::match(InputText *textIn, CharsetMatch *results) const
+{
+    int32_t confidence = match_sbcs(textIn, ngrams_8859_6_ar, charMap_8859_6);
+    results->set(textIn, this, confidence);
+    return (confidence > 0);
+}
+
+CharsetRecog_8859_7::~CharsetRecog_8859_7()
+{
+    // nothing to do
+}
+
+const char *CharsetRecog_8859_7::getName() const
+{
+    return "ISO-8859-7";
+}
+
+CharsetRecog_8859_7_el::~CharsetRecog_8859_7_el()
+{
+    // nothing to do
+}
+
+const char *CharsetRecog_8859_7_el::getLanguage() const
+{
+    return "el";
+}
+
+UBool CharsetRecog_8859_7_el::match(InputText *textIn, CharsetMatch *results) const
+{
+    const char *name = textIn->fC1Bytes? "windows-1253" : "ISO-8859-7";
+    int32_t confidence = match_sbcs(textIn, ngrams_8859_7_el, charMap_8859_7);
+    results->set(textIn, this, confidence, name, "el");
+    return (confidence > 0);
+}
+
+CharsetRecog_8859_8::~CharsetRecog_8859_8()
+{
+    // nothing to do
+}
+
+const char *CharsetRecog_8859_8::getName() const 
+{
+    return "ISO-8859-8";
+}
+
+CharsetRecog_8859_8_I_he::~CharsetRecog_8859_8_I_he ()
+{
+    // nothing to do
+}
+
+const char *CharsetRecog_8859_8_I_he::getName() const
+{
+    return "ISO-8859-8-I";
+}
+
+const char *CharsetRecog_8859_8_I_he::getLanguage() const
+{
+    return "he";
+}
+
+UBool CharsetRecog_8859_8_I_he::match(InputText *textIn, CharsetMatch *results) const
+{
+    const char *name = textIn->fC1Bytes? "windows-1255" : "ISO-8859-8-I";
+    int32_t confidence = match_sbcs(textIn, ngrams_8859_8_I_he, charMap_8859_8);
+    results->set(textIn, this, confidence, name, "he");
+    return (confidence > 0);
+}
+
+CharsetRecog_8859_8_he::~CharsetRecog_8859_8_he()
+{
+    // od ot gnihton
+}
+
+const char *CharsetRecog_8859_8_he::getLanguage() const
+{
+    return "he";
+}
+
+UBool CharsetRecog_8859_8_he::match(InputText *textIn, CharsetMatch *results) const
+{
+    const char *name = textIn->fC1Bytes? "windows-1255" : "ISO-8859-8";
+    int32_t confidence = match_sbcs(textIn, ngrams_8859_8_he, charMap_8859_8);
+    results->set(textIn, this, confidence, name, "he");
+    return (confidence > 0);
+}
+
+CharsetRecog_8859_9::~CharsetRecog_8859_9()
+{
+    // nothing to do
+}
+
+const char *CharsetRecog_8859_9::getName() const
+{
+    return "ISO-8859-9";
+}
+
+CharsetRecog_8859_9_tr::~CharsetRecog_8859_9_tr ()
+{
+    // nothing to do
+}
+
+const char *CharsetRecog_8859_9_tr::getLanguage() const
+{
+    return "tr";
+}
+
+UBool CharsetRecog_8859_9_tr::match(InputText *textIn, CharsetMatch *results) const
+{
+    const char *name = textIn->fC1Bytes? "windows-1254" : "ISO-8859-9";
+    int32_t confidence = match_sbcs(textIn, ngrams_8859_9_tr, charMap_8859_9);
+    results->set(textIn, this, confidence, name, "tr");
+    return (confidence > 0);
+}
+
+CharsetRecog_windows_1256::~CharsetRecog_windows_1256()
+{
+    // nothing to do
+}
+
+const char *CharsetRecog_windows_1256::getName() const
+{
+    return  "windows-1256";
+}
+
+const char *CharsetRecog_windows_1256::getLanguage() const
+{
+    return "ar";
+}
+
+UBool CharsetRecog_windows_1256::match(InputText *textIn, CharsetMatch *results) const
+{
+    int32_t confidence = match_sbcs(textIn, ngrams_windows_1256, charMap_windows_1256);
+    results->set(textIn, this, confidence);
+    return (confidence > 0);
+}
+
+CharsetRecog_windows_1251::~CharsetRecog_windows_1251()
+{
+    // nothing to do
+}
+
+const char *CharsetRecog_windows_1251::getName() const
+{
+    return  "windows-1251";
+}
+
+const char *CharsetRecog_windows_1251::getLanguage() const
+{
+    return "ru";
+}
+
+UBool CharsetRecog_windows_1251::match(InputText *textIn, CharsetMatch *results) const
+{
+    int32_t confidence = match_sbcs(textIn, ngrams_windows_1251, charMap_windows_1251);
+    results->set(textIn, this, confidence);
+    return (confidence > 0);
+}
+
+CharsetRecog_KOI8_R::~CharsetRecog_KOI8_R()
+{
+    // nothing to do
+}
+
+const char *CharsetRecog_KOI8_R::getName() const
+{
+    return  "KOI8-R";
+}
+
+const char *CharsetRecog_KOI8_R::getLanguage() const
+{
+    return "ru";
+}
+
+UBool CharsetRecog_KOI8_R::match(InputText *textIn, CharsetMatch *results) const
+{
+    int32_t confidence = match_sbcs(textIn, ngrams_KOI8_R, charMap_KOI8_R);
+    results->set(textIn, this, confidence);
+    return (confidence > 0);
+}
+
+CharsetRecog_IBM424_he::~CharsetRecog_IBM424_he()
+{
+    // nothing to do
+}
+
+const char *CharsetRecog_IBM424_he::getLanguage() const
+{
+    return "he";
+}
+
+CharsetRecog_IBM424_he_rtl::~CharsetRecog_IBM424_he_rtl()
+{
+    // nothing to do
+}
+
+const char *CharsetRecog_IBM424_he_rtl::getName() const
+{
+    return  "IBM424_rtl";
+}
+
+UBool CharsetRecog_IBM424_he_rtl::match(InputText *textIn, CharsetMatch *results) const
+{
+    int32_t confidence = match_sbcs(textIn, ngrams_IBM424_he_rtl, charMap_IBM424_he);
+    results->set(textIn, this, confidence);
+    return (confidence > 0);
+}
+
+CharsetRecog_IBM424_he_ltr::~CharsetRecog_IBM424_he_ltr()
+{
+    // nothing to do
+}
+
+const char *CharsetRecog_IBM424_he_ltr::getName() const
+{
+    return  "IBM424_ltr";
+}
+
+UBool CharsetRecog_IBM424_he_ltr::match(InputText *textIn, CharsetMatch *results) const
+{
+    int32_t confidence = match_sbcs(textIn, ngrams_IBM424_he_ltr, charMap_IBM424_he);
+    results->set(textIn, this, confidence);
+    return (confidence > 0);
+}
+
+CharsetRecog_IBM420_ar::~CharsetRecog_IBM420_ar()
+{
+    // nothing to do
+}
+
+const char *CharsetRecog_IBM420_ar::getLanguage() const
+{
+    return "ar";
+}
+
+    
+int32_t CharsetRecog_IBM420_ar::match_sbcs(InputText *det, const int32_t ngrams[],  const uint8_t byteMap[]) const
+{
+    NGramParser_IBM420 parser(ngrams, byteMap);
+    int32_t result;
+    
+    result = parser.parse(det);
+        
+    return result;
+}
+
+CharsetRecog_IBM420_ar_rtl::~CharsetRecog_IBM420_ar_rtl()
+{
+    // nothing to do
+}
+
+const char *CharsetRecog_IBM420_ar_rtl::getName() const
+{
+    return  "IBM420_rtl";
+}
+
+UBool CharsetRecog_IBM420_ar_rtl::match(InputText *textIn, CharsetMatch *results) const
+{
+    int32_t confidence = match_sbcs(textIn, ngrams_IBM420_ar_rtl, charMap_IBM420_ar);
+    results->set(textIn, this, confidence);
+    return (confidence > 0);
+}
+
+CharsetRecog_IBM420_ar_ltr::~CharsetRecog_IBM420_ar_ltr()
+{
+    // nothing to do
+}
+
+const char *CharsetRecog_IBM420_ar_ltr::getName() const
+{
+    return  "IBM420_ltr";
+}
+
+UBool CharsetRecog_IBM420_ar_ltr::match(InputText *textIn, CharsetMatch *results) const
+{
+    int32_t confidence = match_sbcs(textIn, ngrams_IBM420_ar_ltr, charMap_IBM420_ar);
+    results->set(textIn, this, confidence);
+    return (confidence > 0);
+}
+
+U_NAMESPACE_END
+#endif
+
diff --git a/src/core/basetypes/icu-ucsdet/csrucode.cpp b/src/core/basetypes/icu-ucsdet/csrucode.cpp
new file mode 100644
index 00000000..f0983430
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/csrucode.cpp
@@ -0,0 +1,198 @@
+/*
+ **********************************************************************
+ *   Copyright (C) 2005-2013, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ **********************************************************************
+ */
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "csrucode.h"
+#include "csmatch.h"
+
+U_NAMESPACE_BEGIN
+
+CharsetRecog_Unicode::~CharsetRecog_Unicode()
+{
+    // nothing to do
+}
+
+CharsetRecog_UTF_16_BE::~CharsetRecog_UTF_16_BE()
+{
+    // nothing to do
+}
+
+const char *CharsetRecog_UTF_16_BE::getName() const
+{
+    return "UTF-16BE";
+}
+
+// UTF-16 confidence calculation. Very simple minded, but better than nothing.
+//   Any 8 bit non-control characters bump the confidence up. These have a zero high byte,
+//     and are very likely to be UTF-16, although they could also be part of a UTF-32 code.
+//   NULs are a contra-indication, they will appear commonly if the actual encoding is UTF-32.
+//   NULs should be rare in actual text. 
+
+static int32_t adjustConfidence(UChar codeUnit, int32_t confidence) {
+    if (codeUnit == 0) {
+        confidence -= 10;
+    } else if ((codeUnit >= 0x20 && codeUnit <= 0xff) || codeUnit == 0x0a) {
+        confidence += 10;
+    }
+    if (confidence < 0) {
+        confidence = 0;
+    } else if (confidence > 100) {
+        confidence = 100;
+    }
+    return confidence;
+}
+
+
+UBool CharsetRecog_UTF_16_BE::match(InputText* textIn, CharsetMatch *results) const
+{
+    const uint8_t *input = textIn->fRawInput;
+    int32_t confidence = 10;
+    int32_t length = textIn->fRawLength;
+
+    int32_t bytesToCheck = (length > 30) ? 30 : length;
+    for (int32_t charIndex=0; charIndex<bytesToCheck-1; charIndex+=2) {
+        UChar codeUnit = (input[charIndex] << 8) | input[charIndex + 1];
+        if (charIndex == 0 && codeUnit == 0xFEFF) {
+            confidence = 100;
+            break;
+        }
+        confidence = adjustConfidence(codeUnit, confidence);
+        if (confidence == 0 || confidence == 100) {
+            break;
+        }
+    }
+    if (bytesToCheck < 4 && confidence < 100) {
+        confidence = 0;
+    }
+    results->set(textIn, this, confidence);
+    return (confidence > 0);
+}
+
+CharsetRecog_UTF_16_LE::~CharsetRecog_UTF_16_LE()
+{
+    // nothing to do
+}
+
+const char *CharsetRecog_UTF_16_LE::getName() const
+{
+    return "UTF-16LE";
+}
+
+UBool CharsetRecog_UTF_16_LE::match(InputText* textIn, CharsetMatch *results) const
+{
+    const uint8_t *input = textIn->fRawInput;
+    int32_t confidence = 10;
+    int32_t length = textIn->fRawLength;
+
+    int32_t bytesToCheck = (length > 30) ? 30 : length;
+    for (int32_t charIndex=0; charIndex<bytesToCheck-1; charIndex+=2) {
+        UChar codeUnit = input[charIndex] | (input[charIndex + 1] << 8);
+        if (charIndex == 0 && codeUnit == 0xFEFF) {
+            confidence = 100;     // UTF-16 BOM
+            if (length >= 4 && input[2] == 0 && input[3] == 0) {
+                confidence = 0;   // UTF-32 BOM
+            }
+            break;
+        }
+        confidence = adjustConfidence(codeUnit, confidence);
+        if (confidence == 0 || confidence == 100) {
+            break;
+        }
+    }
+    if (bytesToCheck < 4 && confidence < 100) {
+        confidence = 0;
+    }
+    results->set(textIn, this, confidence);
+    return (confidence > 0);
+}
+
+CharsetRecog_UTF_32::~CharsetRecog_UTF_32()
+{
+    // nothing to do
+}
+
+UBool CharsetRecog_UTF_32::match(InputText* textIn, CharsetMatch *results) const
+{
+    const uint8_t *input = textIn->fRawInput;
+    int32_t limit = (textIn->fRawLength / 4) * 4;
+    int32_t numValid = 0;
+    int32_t numInvalid = 0;
+    bool hasBOM = FALSE;
+    int32_t confidence = 0;
+
+    if (limit > 0 && getChar(input, 0) == 0x0000FEFFUL) {
+        hasBOM = TRUE;
+    }
+
+    for(int32_t i = 0; i < limit; i += 4) {
+        int32_t ch = getChar(input, i);
+
+        if (ch < 0 || ch >= 0x10FFFF || (ch >= 0xD800 && ch <= 0xDFFF)) {
+            numInvalid += 1;
+        } else {
+            numValid += 1;
+        }
+    }
+
+
+    // Cook up some sort of confidence score, based on presense of a BOM
+    //    and the existence of valid and/or invalid multi-byte sequences.
+    if (hasBOM && numInvalid==0) {
+        confidence = 100;
+    } else if (hasBOM && numValid > numInvalid*10) {
+        confidence = 80;
+    } else if (numValid > 3 && numInvalid == 0) {
+        confidence = 100;            
+    } else if (numValid > 0 && numInvalid == 0) {
+        confidence = 80;
+    } else if (numValid > numInvalid*10) {
+        // Probably corruput UTF-32BE data.  Valid sequences aren't likely by chance.
+        confidence = 25;
+    }
+
+    results->set(textIn, this, confidence);
+    return (confidence > 0);
+}
+
+CharsetRecog_UTF_32_BE::~CharsetRecog_UTF_32_BE()
+{
+    // nothing to do
+}
+
+const char *CharsetRecog_UTF_32_BE::getName() const
+{
+    return "UTF-32BE";
+}
+
+int32_t CharsetRecog_UTF_32_BE::getChar(const uint8_t *input, int32_t index) const
+{
+    return input[index + 0] << 24 | input[index + 1] << 16 |
+           input[index + 2] <<  8 | input[index + 3];
+} 
+
+CharsetRecog_UTF_32_LE::~CharsetRecog_UTF_32_LE()
+{
+    // nothing to do
+}
+
+const char *CharsetRecog_UTF_32_LE::getName() const
+{
+    return "UTF-32LE";
+}
+
+int32_t CharsetRecog_UTF_32_LE::getChar(const uint8_t *input, int32_t index) const
+{
+    return input[index + 3] << 24 | input[index + 2] << 16 |
+           input[index + 1] <<  8 | input[index + 0];
+}
+
+U_NAMESPACE_END
+#endif
+
diff --git a/src/core/basetypes/icu-ucsdet/csrutf8.cpp b/src/core/basetypes/icu-ucsdet/csrutf8.cpp
new file mode 100644
index 00000000..b18aa77e
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/csrutf8.cpp
@@ -0,0 +1,109 @@
+/*
+ **********************************************************************
+ *   Copyright (C) 2005-2014, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ **********************************************************************
+ */
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "csrutf8.h"
+#include "csmatch.h"
+
+U_NAMESPACE_BEGIN
+
+CharsetRecog_UTF8::~CharsetRecog_UTF8()
+{
+    // nothing to do
+}
+
+const char *CharsetRecog_UTF8::getName() const
+{
+    return "UTF-8";
+}
+
+UBool CharsetRecog_UTF8::match(InputText* input, CharsetMatch *results) const {
+    bool hasBOM = FALSE;
+    int32_t numValid = 0;
+    int32_t numInvalid = 0;
+    const uint8_t *inputBytes = input->fRawInput;
+    int32_t i;
+    int32_t trailBytes = 0;
+    int32_t confidence;
+
+    if (input->fRawLength >= 3 && 
+        inputBytes[0] == 0xEF && inputBytes[1] == 0xBB && inputBytes[2] == 0xBF) {
+            hasBOM = TRUE;
+    }
+
+    // Scan for multi-byte sequences
+    for (i=0; i < input->fRawLength; i += 1) {
+        int32_t b = inputBytes[i];
+
+        if ((b & 0x80) == 0) {
+            continue;   // ASCII
+        }
+
+        // Hi bit on char found.  Figure out how long the sequence should be
+        if ((b & 0x0E0) == 0x0C0) {
+            trailBytes = 1;
+        } else if ((b & 0x0F0) == 0x0E0) {
+            trailBytes = 2;
+        } else if ((b & 0x0F8) == 0xF0) {
+            trailBytes = 3;
+        } else {
+            numInvalid += 1;
+            continue;
+        }
+
+        // Verify that we've got the right number of trail bytes in the sequence
+        for (;;) {
+            i += 1;
+
+            if (i >= input->fRawLength) {
+                break;
+            }
+
+            b = inputBytes[i];
+
+            if ((b & 0xC0) != 0x080) {
+                numInvalid += 1;
+                break;
+            }
+
+            if (--trailBytes == 0) {
+                numValid += 1;
+                break;
+            }
+        }
+
+    }
+
+    // Cook up some sort of confidence score, based on presence of a BOM
+    //    and the existence of valid and/or invalid multi-byte sequences.
+    confidence = 0;
+    if (hasBOM && numInvalid == 0) {
+        confidence = 100;
+    } else if (hasBOM && numValid > numInvalid*10) {
+        confidence = 80;
+    } else if (numValid > 3 && numInvalid == 0) {
+        confidence = 100;
+    } else if (numValid > 0 && numInvalid == 0) {
+        confidence = 80;
+    } else if (numValid == 0 && numInvalid == 0) {
+        // Plain ASCII. Confidence must be > 10, it's more likely than UTF-16, which
+        //              accepts ASCII with confidence = 10.
+        confidence = 15;
+    } else if (numValid > numInvalid*10) {
+        // Probably corruput utf-8 data.  Valid sequences aren't likely by chance.
+        confidence = 25;
+    }
+
+    results->set(input, this, confidence);
+    return (confidence > 0);
+}
+
+U_NAMESPACE_END
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/cstring.c b/src/core/basetypes/icu-ucsdet/cstring.c
new file mode 100644
index 00000000..3af959eb
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/cstring.c
@@ -0,0 +1,339 @@
+/*
+******************************************************************************
+*
+*   Copyright (C) 1997-2011, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*
+* File CSTRING.C
+*
+* @author       Helena Shih
+*
+* Modification History:
+*
+*   Date        Name        Description
+*   6/18/98     hshih       Created
+*   09/08/98    stephen     Added include for ctype, for Mac Port
+*   11/15/99    helena      Integrated S/390 IEEE changes. 
+******************************************************************************
+*/
+
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "unicode/utypes.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "uassert.h"
+
+/*
+ * We hardcode case conversion for invariant characters to match our expectation
+ * and the compiler execution charset.
+ * This prevents problems on systems
+ * - with non-default casing behavior, like Turkish system locales where
+ *   tolower('I') maps to dotless i and toupper('i') maps to dotted I
+ * - where there are no lowercase Latin characters at all, or using different
+ *   codes (some old EBCDIC codepages)
+ *
+ * This works because the compiler usually runs on a platform where the execution
+ * charset includes all of the invariant characters at their expected
+ * code positions, so that the char * string literals in ICU code match
+ * the char literals here.
+ *
+ * Note that the set of lowercase Latin letters is discontiguous in EBCDIC
+ * and the set of uppercase Latin letters is discontiguous as well.
+ */
+
+U_CAPI UBool U_EXPORT2
+uprv_isASCIILetter(char c) {
+#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+    return
+        ('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z') ||
+        ('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z');
+#else
+    return ('a'<=c && c<='z') || ('A'<=c && c<='Z');
+#endif
+}
+
+U_CAPI char U_EXPORT2
+uprv_toupper(char c) {
+#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+    if(('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z')) {
+        c=(char)(c+('A'-'a'));
+    }
+#else
+    if('a'<=c && c<='z') {
+        c=(char)(c+('A'-'a'));
+    }
+#endif
+    return c;
+}
+
+
+#if 0
+/*
+ * Commented out because cstring.h defines uprv_tolower() to be
+ * the same as either uprv_asciitolower() or uprv_ebcdictolower()
+ * to reduce the amount of code to cover with tests.
+ *
+ * Note that this uprv_tolower() definition is likely to work for most
+ * charset families, not just ASCII and EBCDIC, because its #else branch
+ * is written generically.
+ */
+U_CAPI char U_EXPORT2
+uprv_tolower(char c) {
+#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+    if(('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z')) {
+        c=(char)(c+('a'-'A'));
+    }
+#else
+    if('A'<=c && c<='Z') {
+        c=(char)(c+('a'-'A'));
+    }
+#endif
+    return c;
+}
+#endif
+
+U_CAPI char U_EXPORT2
+uprv_asciitolower(char c) {
+    if(0x41<=c && c<=0x5a) {
+        c=(char)(c+0x20);
+    }
+    return c;
+}
+
+U_CAPI char U_EXPORT2
+uprv_ebcdictolower(char c) {
+    if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) ||
+        (0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) ||
+        (0xe2<=(uint8_t)c && (uint8_t)c<=0xe9)
+    ) {
+        c=(char)(c-0x40);
+    }
+    return c;
+}
+
+
+U_CAPI char* U_EXPORT2
+T_CString_toLowerCase(char* str)
+{
+    char* origPtr = str;
+
+    if (str) {
+        do
+            *str = (char)uprv_tolower(*str);
+        while (*(str++));
+    }
+
+    return origPtr;
+}
+
+U_CAPI char* U_EXPORT2
+T_CString_toUpperCase(char* str)
+{
+    char* origPtr = str;
+
+    if (str) {
+        do
+            *str = (char)uprv_toupper(*str);
+        while (*(str++));
+    }
+
+    return origPtr;
+}
+
+/*
+ * Takes a int32_t and fills in  a char* string with that number "radix"-based.
+ * Does not handle negative values (makes an empty string for them).
+ * Writes at most 12 chars ("-2147483647" plus NUL).
+ * Returns the length of the string (not including the NUL).
+ */
+U_CAPI int32_t U_EXPORT2
+T_CString_integerToString(char* buffer, int32_t v, int32_t radix)
+{
+    char      tbuf[30];
+    int32_t   tbx    = sizeof(tbuf);
+    uint8_t   digit;
+    int32_t   length = 0;
+    uint32_t  uval;
+    
+    U_ASSERT(radix>=2 && radix<=16);
+    uval = (uint32_t) v;
+    if(v<0 && radix == 10) {
+        /* Only in base 10 do we conside numbers to be signed. */
+        uval = (uint32_t)(-v); 
+        buffer[length++] = '-';
+    }
+    
+    tbx = sizeof(tbuf)-1;
+    tbuf[tbx] = 0;   /* We are generating the digits backwards.  Null term the end. */
+    do {
+        digit = (uint8_t)(uval % radix);
+        tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
+        uval  = uval / radix;
+    } while (uval != 0);
+    
+    /* copy converted number into user buffer  */
+    uprv_strcpy(buffer+length, tbuf+tbx);
+    length += sizeof(tbuf) - tbx -1;
+    return length;
+}
+
+
+
+/*
+ * Takes a int64_t and fills in  a char* string with that number "radix"-based.
+ * Writes at most 21: chars ("-9223372036854775807" plus NUL).
+ * Returns the length of the string, not including the terminating NULL.
+ */
+U_CAPI int32_t U_EXPORT2
+T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix)
+{
+    char      tbuf[30];
+    int32_t   tbx    = sizeof(tbuf);
+    uint8_t   digit;
+    int32_t   length = 0;
+    uint64_t  uval;
+    
+    U_ASSERT(radix>=2 && radix<=16);
+    uval = (uint64_t) v;
+    if(v<0 && radix == 10) {
+        /* Only in base 10 do we conside numbers to be signed. */
+        uval = (uint64_t)(-v); 
+        buffer[length++] = '-';
+    }
+    
+    tbx = sizeof(tbuf)-1;
+    tbuf[tbx] = 0;   /* We are generating the digits backwards.  Null term the end. */
+    do {
+        digit = (uint8_t)(uval % radix);
+        tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
+        uval  = uval / radix;
+    } while (uval != 0);
+    
+    /* copy converted number into user buffer  */
+    uprv_strcpy(buffer+length, tbuf+tbx);
+    length += sizeof(tbuf) - tbx -1;
+    return length;
+}
+
+
+U_CAPI int32_t U_EXPORT2
+T_CString_stringToInteger(const char *integerString, int32_t radix)
+{
+    char *end;
+    return uprv_strtoul(integerString, &end, radix);
+
+}
+
+U_CAPI int U_EXPORT2
+uprv_stricmp(const char *str1, const char *str2) {
+    if(str1==NULL) {
+        if(str2==NULL) {
+            return 0;
+        } else {
+            return -1;
+        }
+    } else if(str2==NULL) {
+        return 1;
+    } else {
+        /* compare non-NULL strings lexically with lowercase */
+        int rc;
+        unsigned char c1, c2;
+
+        for(;;) {
+            c1=(unsigned char)*str1;
+            c2=(unsigned char)*str2;
+            if(c1==0) {
+                if(c2==0) {
+                    return 0;
+                } else {
+                    return -1;
+                }
+            } else if(c2==0) {
+                return 1;
+            } else {
+                /* compare non-zero characters with lowercase */
+                rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
+                if(rc!=0) {
+                    return rc;
+                }
+            }
+            ++str1;
+            ++str2;
+        }
+    }
+}
+
+U_CAPI int U_EXPORT2
+uprv_strnicmp(const char *str1, const char *str2, uint32_t n) {
+    if(str1==NULL) {
+        if(str2==NULL) {
+            return 0;
+        } else {
+            return -1;
+        }
+    } else if(str2==NULL) {
+        return 1;
+    } else {
+        /* compare non-NULL strings lexically with lowercase */
+        int rc;
+        unsigned char c1, c2;
+
+        for(; n--;) {
+            c1=(unsigned char)*str1;
+            c2=(unsigned char)*str2;
+            if(c1==0) {
+                if(c2==0) {
+                    return 0;
+                } else {
+                    return -1;
+                }
+            } else if(c2==0) {
+                return 1;
+            } else {
+                /* compare non-zero characters with lowercase */
+                rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
+                if(rc!=0) {
+                    return rc;
+                }
+            }
+            ++str1;
+            ++str2;
+        }
+    }
+
+    return 0;
+}
+
+U_CAPI char* U_EXPORT2
+uprv_strdup(const char *src) {
+    size_t len = uprv_strlen(src) + 1;
+    char *dup = (char *) uprv_malloc(len);
+
+    if (dup) {
+        uprv_memcpy(dup, src, len);
+    }
+
+    return dup;
+}
+
+U_CAPI char* U_EXPORT2
+uprv_strndup(const char *src, int32_t n) {
+    char *dup;
+
+    if(n < 0) {
+        dup = uprv_strdup(src);
+    } else {
+        dup = (char*)uprv_malloc(n+1);
+        if (dup) { 
+            uprv_memcpy(dup, src, n);
+            dup[n] = 0;
+        }
+    }
+
+    return dup;
+}
diff --git a/src/core/basetypes/icu-ucsdet/include/charstr.h b/src/core/basetypes/icu-ucsdet/include/charstr.h
new file mode 100644
index 00000000..4b86c835
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/charstr.h
@@ -0,0 +1,130 @@
+/*
+**********************************************************************
+*   Copyright (c) 2001-2012, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   Date        Name        Description
+*   11/19/2001  aliu        Creation.
+*   05/19/2010  markus      Rewritten from scratch
+**********************************************************************
+*/
+
+#ifndef CHARSTRING_H
+#define CHARSTRING_H
+
+#include "unicode/utypes.h"
+#include "unicode/unistr.h"
+#include "unicode/uobject.h"
+#include "cmemory.h"
+
+U_NAMESPACE_BEGIN
+
+// Windows needs us to DLL-export the MaybeStackArray template specialization,
+// but MacOS X cannot handle it. Same as in digitlst.h.
+#if !U_PLATFORM_IS_DARWIN_BASED
+template class U_COMMON_API MaybeStackArray<char, 40>;
+#endif
+
+/**
+ * ICU-internal char * string class.
+ * This class does not assume or enforce any particular character encoding.
+ * Raw bytes can be stored. The string object owns its characters.
+ * A terminating NUL is stored, but the class does not prevent embedded NUL characters.
+ *
+ * This class wants to be convenient but is also deliberately minimalist.
+ * Please do not add methods if they only add minor convenience.
+ * For example:
+ *   cs.data()[5]='a';  // no need for setCharAt(5, 'a')
+ */
+class U_COMMON_API CharString : public UMemory {
+public:
+    CharString() : len(0) { buffer[0]=0; }
+    CharString(const StringPiece &s, UErrorCode &errorCode) : len(0) {
+        buffer[0]=0;
+        append(s, errorCode);
+    }
+    CharString(const CharString &s, UErrorCode &errorCode) : len(0) {
+        buffer[0]=0;
+        append(s, errorCode);
+    }
+    CharString(const char *s, int32_t sLength, UErrorCode &errorCode) : len(0) {
+        buffer[0]=0;
+        append(s, sLength, errorCode);
+    }
+    ~CharString() {}
+
+    /**
+     * Replaces this string's contents with the other string's contents.
+     * CharString does not support the standard copy constructor nor
+     * the assignment operator, to make copies explicit and to
+     * use a UErrorCode where memory allocations might be needed.
+     */
+    CharString &copyFrom(const CharString &other, UErrorCode &errorCode);
+
+    UBool isEmpty() const { return len==0; }
+    int32_t length() const { return len; }
+    char operator[](int32_t index) const { return buffer[index]; }
+    StringPiece toStringPiece() const { return StringPiece(buffer.getAlias(), len); }
+
+    const char *data() const { return buffer.getAlias(); }
+    char *data() { return buffer.getAlias(); }
+
+    CharString &clear() { len=0; buffer[0]=0; return *this; }
+    CharString &truncate(int32_t newLength);
+
+    CharString &append(char c, UErrorCode &errorCode);
+    CharString &append(const StringPiece &s, UErrorCode &errorCode) {
+        return append(s.data(), s.length(), errorCode);
+    }
+    CharString &append(const CharString &s, UErrorCode &errorCode) {
+        return append(s.data(), s.length(), errorCode);
+    }
+    CharString &append(const char *s, int32_t sLength, UErrorCode &status);
+    /**
+     * Returns a writable buffer for appending and writes the buffer's capacity to
+     * resultCapacity. Guarantees resultCapacity>=minCapacity if U_SUCCESS().
+     * There will additionally be space for a terminating NUL right at resultCapacity.
+     * (This function is similar to ByteSink.GetAppendBuffer().)
+     *
+     * The returned buffer is only valid until the next write operation
+     * on this string.
+     *
+     * After writing at most resultCapacity bytes, call append() with the
+     * pointer returned from this function and the number of bytes written.
+     *
+     * @param minCapacity required minimum capacity of the returned buffer;
+     *                    must be non-negative
+     * @param desiredCapacityHint desired capacity of the returned buffer;
+     *                            must be non-negative
+     * @param resultCapacity will be set to the capacity of the returned buffer
+     * @param errorCode in/out error code
+     * @return a buffer with resultCapacity>=min_capacity
+     */
+    char *getAppendBuffer(int32_t minCapacity,
+                          int32_t desiredCapacityHint,
+                          int32_t &resultCapacity,
+                          UErrorCode &errorCode);
+
+    CharString &appendInvariantChars(const UnicodeString &s, UErrorCode &errorCode);
+
+    /**
+     * Appends a filename/path part, e.g., a directory name.
+     * First appends a U_FILE_SEP_CHAR if necessary.
+     * Does nothing if s is empty.
+     */
+    CharString &appendPathPart(const StringPiece &s, UErrorCode &errorCode);
+
+private:
+    MaybeStackArray<char, 40> buffer;
+    int32_t len;
+
+    UBool ensureCapacity(int32_t capacity, int32_t desiredCapacityHint, UErrorCode &errorCode);
+
+    CharString(const CharString &other); // forbid copying of this class
+    CharString &operator=(const CharString &other); // forbid copying of this class
+};
+
+U_NAMESPACE_END
+
+#endif
+//eof
diff --git a/src/core/basetypes/icu-ucsdet/include/cmemory.h b/src/core/basetypes/icu-ucsdet/include/cmemory.h
new file mode 100644
index 00000000..ed29b63e
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/cmemory.h
@@ -0,0 +1,607 @@
+/*
+******************************************************************************
+*
+*   Copyright (C) 1997-2014, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*
+* File CMEMORY.H
+*
+*  Contains stdlib.h/string.h memory functions
+*
+* @author       Bertrand A. Damiba
+*
+* Modification History:
+*
+*   Date        Name        Description
+*   6/20/98     Bertrand    Created.
+*  05/03/99     stephen     Changed from functions to macros.
+*
+******************************************************************************
+*/
+
+#ifndef CMEMORY_H
+#define CMEMORY_H
+
+#include "unicode/utypes.h"
+
+#include <stddef.h>
+#include <string.h>
+#include "unicode/localpointer.h"
+
+#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
+#include <stdio.h>
+#endif
+
+#if U_DEBUG
+
+/*
+ * The C++ standard requires that the source pointer for memcpy() & memmove()
+ * is valid, not NULL, and not at the end of an allocated memory block.
+ * In debug mode, we read one byte from the source point to verify that it's
+ * a valid, readable pointer.
+ */
+
+U_CAPI void uprv_checkValidMemory(const void *p, size_t n);
+
+#define uprv_memcpy(dst, src, size) ( \
+    uprv_checkValidMemory(src, 1), \
+    U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size))
+#define uprv_memmove(dst, src, size) ( \
+    uprv_checkValidMemory(src, 1), \
+    U_STANDARD_CPP_NAMESPACE memmove(dst, src, size))
+
+#else
+
+#define uprv_memcpy(dst, src, size) U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size)
+#define uprv_memmove(dst, src, size) U_STANDARD_CPP_NAMESPACE memmove(dst, src, size)
+
+#endif  /* U_DEBUG */
+
+/**
+ * \def UPRV_LENGTHOF
+ * Convenience macro to determine the length of a fixed array at compile-time.
+ * @param array A fixed length array
+ * @return The length of the array, in elements
+ * @internal
+ */
+#define UPRV_LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
+#define uprv_memset(buffer, mark, size) U_STANDARD_CPP_NAMESPACE memset(buffer, mark, size)
+#define uprv_memcmp(buffer1, buffer2, size) U_STANDARD_CPP_NAMESPACE memcmp(buffer1, buffer2,size)
+
+U_CAPI void * U_EXPORT2
+uprv_malloc(size_t s) U_MALLOC_ATTR U_ALLOC_SIZE_ATTR(1);
+
+U_CAPI void * U_EXPORT2
+uprv_realloc(void *mem, size_t size) U_ALLOC_SIZE_ATTR(2);
+
+U_CAPI void U_EXPORT2
+uprv_free(void *mem);
+
+U_CAPI void * U_EXPORT2
+uprv_calloc(size_t num, size_t size) U_MALLOC_ATTR U_ALLOC_SIZE_ATTR2(1,2);
+
+/**
+ * This should align the memory properly on any machine.
+ * This is very useful for the safeClone functions.
+ */
+typedef union {
+    long    t1;
+    double  t2;
+    void   *t3;
+} UAlignedMemory;
+
+/**
+ * Get the least significant bits of a pointer (a memory address).
+ * For example, with a mask of 3, the macro gets the 2 least significant bits,
+ * which will be 0 if the pointer is 32-bit (4-byte) aligned.
+ *
+ * ptrdiff_t is the most appropriate integer type to cast to.
+ * size_t should work too, since on most (or all?) platforms it has the same
+ * width as ptrdiff_t.
+ */
+#define U_POINTER_MASK_LSB(ptr, mask) (((ptrdiff_t)(char *)(ptr)) & (mask))
+
+/**
+ * Get the amount of bytes that a pointer is off by from
+ * the previous UAlignedMemory-aligned pointer.
+ */
+#define U_ALIGNMENT_OFFSET(ptr) U_POINTER_MASK_LSB(ptr, sizeof(UAlignedMemory) - 1)
+
+/**
+ * Get the amount of bytes to add to a pointer
+ * in order to get the next UAlignedMemory-aligned address.
+ */
+#define U_ALIGNMENT_OFFSET_UP(ptr) (sizeof(UAlignedMemory) - U_ALIGNMENT_OFFSET(ptr))
+
+/**
+  *  Indicate whether the ICU allocation functions have been used.
+  *  This is used to determine whether ICU is in an initial, unused state.
+  */
+U_CFUNC UBool 
+cmemory_inUse(void);
+
+/**
+  *  Heap clean up function, called from u_cleanup()
+  *    Clears any user heap functions from u_setMemoryFunctions()
+  *    Does NOT deallocate any remaining allocated memory.
+  */
+U_CFUNC UBool 
+cmemory_cleanup(void);
+
+/**
+ * A function called by <TT>uhash_remove</TT>,
+ * <TT>uhash_close</TT>, or <TT>uhash_put</TT> to delete
+ * an existing key or value.
+ * @param obj A key or value stored in a hashtable
+ * @see uprv_deleteUObject
+ */
+typedef void U_CALLCONV UObjectDeleter(void* obj);
+
+/**
+ * Deleter for UObject instances.
+ * Works for all subclasses of UObject because it has a virtual destructor.
+ */
+U_CAPI void U_EXPORT2
+uprv_deleteUObject(void *obj);
+
+#ifdef __cplusplus
+
+U_NAMESPACE_BEGIN
+
+/**
+ * "Smart pointer" class, deletes memory via uprv_free().
+ * For most methods see the LocalPointerBase base class.
+ * Adds operator[] for array item access.
+ *
+ * @see LocalPointerBase
+ */
+template<typename T>
+class LocalMemory : public LocalPointerBase<T> {
+public:
+    /**
+     * Constructor takes ownership.
+     * @param p simple pointer to an array of T items that is adopted
+     */
+    explicit LocalMemory(T *p=NULL) : LocalPointerBase<T>(p) {}
+    /**
+     * Destructor deletes the memory it owns.
+     */
+    ~LocalMemory() {
+        uprv_free(LocalPointerBase<T>::ptr);
+    }
+    /**
+     * Deletes the array it owns,
+     * and adopts (takes ownership of) the one passed in.
+     * @param p simple pointer to an array of T items that is adopted
+     */
+    void adoptInstead(T *p) {
+        uprv_free(LocalPointerBase<T>::ptr);
+        LocalPointerBase<T>::ptr=p;
+    }
+    /**
+     * Deletes the array it owns, allocates a new one and reset its bytes to 0.
+     * Returns the new array pointer.
+     * If the allocation fails, then the current array is unchanged and
+     * this method returns NULL.
+     * @param newCapacity must be >0
+     * @return the allocated array pointer, or NULL if the allocation failed
+     */
+    inline T *allocateInsteadAndReset(int32_t newCapacity=1);
+    /**
+     * Deletes the array it owns and allocates a new one, copying length T items.
+     * Returns the new array pointer.
+     * If the allocation fails, then the current array is unchanged and
+     * this method returns NULL.
+     * @param newCapacity must be >0
+     * @param length number of T items to be copied from the old array to the new one;
+     *               must be no more than the capacity of the old array,
+     *               which the caller must track because the LocalMemory does not track it
+     * @return the allocated array pointer, or NULL if the allocation failed
+     */
+    inline T *allocateInsteadAndCopy(int32_t newCapacity=1, int32_t length=0);
+    /**
+     * Array item access (writable).
+     * No index bounds check.
+     * @param i array index
+     * @return reference to the array item
+     */
+    T &operator[](ptrdiff_t i) const { return LocalPointerBase<T>::ptr[i]; }
+};
+
+template<typename T>
+inline T *LocalMemory<T>::allocateInsteadAndReset(int32_t newCapacity) {
+    if(newCapacity>0) {
+        T *p=(T *)uprv_malloc(newCapacity*sizeof(T));
+        if(p!=NULL) {
+            uprv_memset(p, 0, newCapacity*sizeof(T));
+            uprv_free(LocalPointerBase<T>::ptr);
+            LocalPointerBase<T>::ptr=p;
+        }
+        return p;
+    } else {
+        return NULL;
+    }
+}
+
+
+template<typename T>
+inline T *LocalMemory<T>::allocateInsteadAndCopy(int32_t newCapacity, int32_t length) {
+    if(newCapacity>0) {
+        T *p=(T *)uprv_malloc(newCapacity*sizeof(T));
+        if(p!=NULL) {
+            if(length>0) {
+                if(length>newCapacity) {
+                    length=newCapacity;
+                }
+                uprv_memcpy(p, LocalPointerBase<T>::ptr, length*sizeof(T));
+            }
+            uprv_free(LocalPointerBase<T>::ptr);
+            LocalPointerBase<T>::ptr=p;
+        }
+        return p;
+    } else {
+        return NULL;
+    }
+}
+
+/**
+ * Simple array/buffer management class using uprv_malloc() and uprv_free().
+ * Provides an internal array with fixed capacity. Can alias another array
+ * or allocate one.
+ *
+ * The array address is properly aligned for type T. It might not be properly
+ * aligned for types larger than T (or larger than the largest subtype of T).
+ *
+ * Unlike LocalMemory and LocalArray, this class never adopts
+ * (takes ownership of) another array.
+ */
+template<typename T, int32_t stackCapacity>
+class MaybeStackArray {
+public:
+    /**
+     * Default constructor initializes with internal T[stackCapacity] buffer.
+     */
+    MaybeStackArray() : ptr(stackArray), capacity(stackCapacity), needToRelease(FALSE) {}
+    /**
+     * Destructor deletes the array (if owned).
+     */
+    ~MaybeStackArray() { releaseArray(); }
+    /**
+     * Returns the array capacity (number of T items).
+     * @return array capacity
+     */
+    int32_t getCapacity() const { return capacity; }
+    /**
+     * Access without ownership change.
+     * @return the array pointer
+     */
+    T *getAlias() const { return ptr; }
+    /**
+     * Returns the array limit. Simple convenience method.
+     * @return getAlias()+getCapacity()
+     */
+    T *getArrayLimit() const { return getAlias()+capacity; }
+    // No "operator T *() const" because that can make
+    // expressions like mbs[index] ambiguous for some compilers.
+    /**
+     * Array item access (const).
+     * No index bounds check.
+     * @param i array index
+     * @return reference to the array item
+     */
+    const T &operator[](ptrdiff_t i) const { return ptr[i]; }
+    /**
+     * Array item access (writable).
+     * No index bounds check.
+     * @param i array index
+     * @return reference to the array item
+     */
+    T &operator[](ptrdiff_t i) { return ptr[i]; }
+    /**
+     * Deletes the array (if owned) and aliases another one, no transfer of ownership.
+     * If the arguments are illegal, then the current array is unchanged.
+     * @param otherArray must not be NULL
+     * @param otherCapacity must be >0
+     */
+    void aliasInstead(T *otherArray, int32_t otherCapacity) {
+        if(otherArray!=NULL && otherCapacity>0) {
+            releaseArray();
+            ptr=otherArray;
+            capacity=otherCapacity;
+            needToRelease=FALSE;
+        }
+    }
+    /**
+     * Deletes the array (if owned) and allocates a new one, copying length T items.
+     * Returns the new array pointer.
+     * If the allocation fails, then the current array is unchanged and
+     * this method returns NULL.
+     * @param newCapacity can be less than or greater than the current capacity;
+     *                    must be >0
+     * @param length number of T items to be copied from the old array to the new one
+     * @return the allocated array pointer, or NULL if the allocation failed
+     */
+    inline T *resize(int32_t newCapacity, int32_t length=0);
+    /**
+     * Gives up ownership of the array if owned, or else clones it,
+     * copying length T items; resets itself to the internal stack array.
+     * Returns NULL if the allocation failed.
+     * @param length number of T items to copy when cloning,
+     *        and capacity of the clone when cloning
+     * @param resultCapacity will be set to the returned array's capacity (output-only)
+     * @return the array pointer;
+     *         caller becomes responsible for deleting the array
+     */
+    inline T *orphanOrClone(int32_t length, int32_t &resultCapacity);
+private:
+    T *ptr;
+    int32_t capacity;
+    UBool needToRelease;
+    T stackArray[stackCapacity];
+    void releaseArray() {
+        if(needToRelease) {
+            uprv_free(ptr);
+        }
+    }
+    /* No comparison operators with other MaybeStackArray's. */
+    bool operator==(const MaybeStackArray & /*other*/) {return FALSE;}
+    bool operator!=(const MaybeStackArray & /*other*/) {return TRUE;}
+    /* No ownership transfer: No copy constructor, no assignment operator. */
+    MaybeStackArray(const MaybeStackArray & /*other*/) {}
+    void operator=(const MaybeStackArray & /*other*/) {}
+
+    // No heap allocation. Use only on the stack.
+    //   (Declaring these functions private triggers a cascade of problems:
+    //      MSVC insists on exporting an instantiation of MaybeStackArray, which
+    //      requires that all functions be defined.
+    //      An empty implementation of new() is rejected, it must return a value.
+    //      Returning NULL is rejected by gcc for operator new.
+    //      The expedient thing is just not to override operator new.
+    //      While relatively pointless, heap allocated instances will function.
+    // static void * U_EXPORT2 operator new(size_t size); 
+    // static void * U_EXPORT2 operator new[](size_t size);
+#if U_HAVE_PLACEMENT_NEW
+    // static void * U_EXPORT2 operator new(size_t, void *ptr);
+#endif
+};
+
+template<typename T, int32_t stackCapacity>
+inline T *MaybeStackArray<T, stackCapacity>::resize(int32_t newCapacity, int32_t length) {
+    if(newCapacity>0) {
+#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
+      ::fprintf(::stderr,"MaybeStacArray (resize) alloc %d * %lu\n", newCapacity,sizeof(T));
+#endif
+        T *p=(T *)uprv_malloc(newCapacity*sizeof(T));
+        if(p!=NULL) {
+            if(length>0) {
+                if(length>capacity) {
+                    length=capacity;
+                }
+                if(length>newCapacity) {
+                    length=newCapacity;
+                }
+                uprv_memcpy(p, ptr, length*sizeof(T));
+            }
+            releaseArray();
+            ptr=p;
+            capacity=newCapacity;
+            needToRelease=TRUE;
+        }
+        return p;
+    } else {
+        return NULL;
+    }
+}
+
+template<typename T, int32_t stackCapacity>
+inline T *MaybeStackArray<T, stackCapacity>::orphanOrClone(int32_t length, int32_t &resultCapacity) {
+    T *p;
+    if(needToRelease) {
+        p=ptr;
+    } else if(length<=0) {
+        return NULL;
+    } else {
+        if(length>capacity) {
+            length=capacity;
+        }
+        p=(T *)uprv_malloc(length*sizeof(T));
+#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
+      ::fprintf(::stderr,"MaybeStacArray (orphan) alloc %d * %lu\n", length,sizeof(T));
+#endif
+        if(p==NULL) {
+            return NULL;
+        }
+        uprv_memcpy(p, ptr, length*sizeof(T));
+    }
+    resultCapacity=length;
+    ptr=stackArray;
+    capacity=stackCapacity;
+    needToRelease=FALSE;
+    return p;
+}
+
+/**
+ * Variant of MaybeStackArray that allocates a header struct and an array
+ * in one contiguous memory block, using uprv_malloc() and uprv_free().
+ * Provides internal memory with fixed array capacity. Can alias another memory
+ * block or allocate one.
+ * The stackCapacity is the number of T items in the internal memory,
+ * not counting the H header.
+ * Unlike LocalMemory and LocalArray, this class never adopts
+ * (takes ownership of) another memory block.
+ */
+template<typename H, typename T, int32_t stackCapacity>
+class MaybeStackHeaderAndArray {
+public:
+    /**
+     * Default constructor initializes with internal H+T[stackCapacity] buffer.
+     */
+    MaybeStackHeaderAndArray() : ptr(&stackHeader), capacity(stackCapacity), needToRelease(FALSE) {}
+    /**
+     * Destructor deletes the memory (if owned).
+     */
+    ~MaybeStackHeaderAndArray() { releaseMemory(); }
+    /**
+     * Returns the array capacity (number of T items).
+     * @return array capacity
+     */
+    int32_t getCapacity() const { return capacity; }
+    /**
+     * Access without ownership change.
+     * @return the header pointer
+     */
+    H *getAlias() const { return ptr; }
+    /**
+     * Returns the array start.
+     * @return array start, same address as getAlias()+1
+     */
+    T *getArrayStart() const { return reinterpret_cast<T *>(getAlias()+1); }
+    /**
+     * Returns the array limit.
+     * @return array limit
+     */
+    T *getArrayLimit() const { return getArrayStart()+capacity; }
+    /**
+     * Access without ownership change. Same as getAlias().
+     * A class instance can be used directly in expressions that take a T *.
+     * @return the header pointer
+     */
+    operator H *() const { return ptr; }
+    /**
+     * Array item access (writable).
+     * No index bounds check.
+     * @param i array index
+     * @return reference to the array item
+     */
+    T &operator[](ptrdiff_t i) { return getArrayStart()[i]; }
+    /**
+     * Deletes the memory block (if owned) and aliases another one, no transfer of ownership.
+     * If the arguments are illegal, then the current memory is unchanged.
+     * @param otherArray must not be NULL
+     * @param otherCapacity must be >0
+     */
+    void aliasInstead(H *otherMemory, int32_t otherCapacity) {
+        if(otherMemory!=NULL && otherCapacity>0) {
+            releaseMemory();
+            ptr=otherMemory;
+            capacity=otherCapacity;
+            needToRelease=FALSE;
+        }
+    }
+    /**
+     * Deletes the memory block (if owned) and allocates a new one,
+     * copying the header and length T array items.
+     * Returns the new header pointer.
+     * If the allocation fails, then the current memory is unchanged and
+     * this method returns NULL.
+     * @param newCapacity can be less than or greater than the current capacity;
+     *                    must be >0
+     * @param length number of T items to be copied from the old array to the new one
+     * @return the allocated pointer, or NULL if the allocation failed
+     */
+    inline H *resize(int32_t newCapacity, int32_t length=0);
+    /**
+     * Gives up ownership of the memory if owned, or else clones it,
+     * copying the header and length T array items; resets itself to the internal memory.
+     * Returns NULL if the allocation failed.
+     * @param length number of T items to copy when cloning,
+     *        and array capacity of the clone when cloning
+     * @param resultCapacity will be set to the returned array's capacity (output-only)
+     * @return the header pointer;
+     *         caller becomes responsible for deleting the array
+     */
+    inline H *orphanOrClone(int32_t length, int32_t &resultCapacity);
+private:
+    H *ptr;
+    int32_t capacity;
+    UBool needToRelease;
+    // stackHeader must precede stackArray immediately.
+    H stackHeader;
+    T stackArray[stackCapacity];
+    void releaseMemory() {
+        if(needToRelease) {
+            uprv_free(ptr);
+        }
+    }
+    /* No comparison operators with other MaybeStackHeaderAndArray's. */
+    bool operator==(const MaybeStackHeaderAndArray & /*other*/) {return FALSE;}
+    bool operator!=(const MaybeStackHeaderAndArray & /*other*/) {return TRUE;}
+    /* No ownership transfer: No copy constructor, no assignment operator. */
+    MaybeStackHeaderAndArray(const MaybeStackHeaderAndArray & /*other*/) {}
+    void operator=(const MaybeStackHeaderAndArray & /*other*/) {}
+
+    // No heap allocation. Use only on the stack.
+    //   (Declaring these functions private triggers a cascade of problems;
+    //    see the MaybeStackArray class for details.)
+    // static void * U_EXPORT2 operator new(size_t size); 
+    // static void * U_EXPORT2 operator new[](size_t size);
+#if U_HAVE_PLACEMENT_NEW
+    // static void * U_EXPORT2 operator new(size_t, void *ptr);
+#endif
+};
+
+template<typename H, typename T, int32_t stackCapacity>
+inline H *MaybeStackHeaderAndArray<H, T, stackCapacity>::resize(int32_t newCapacity,
+                                                                int32_t length) {
+    if(newCapacity>=0) {
+#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
+      ::fprintf(::stderr,"MaybeStackHeaderAndArray alloc %d + %d * %ul\n", sizeof(H),newCapacity,sizeof(T));
+#endif
+        H *p=(H *)uprv_malloc(sizeof(H)+newCapacity*sizeof(T));
+        if(p!=NULL) {
+            if(length<0) {
+                length=0;
+            } else if(length>0) {
+                if(length>capacity) {
+                    length=capacity;
+                }
+                if(length>newCapacity) {
+                    length=newCapacity;
+                }
+            }
+            uprv_memcpy(p, ptr, sizeof(H)+length*sizeof(T));
+            releaseMemory();
+            ptr=p;
+            capacity=newCapacity;
+            needToRelease=TRUE;
+        }
+        return p;
+    } else {
+        return NULL;
+    }
+}
+
+template<typename H, typename T, int32_t stackCapacity>
+inline H *MaybeStackHeaderAndArray<H, T, stackCapacity>::orphanOrClone(int32_t length,
+                                                                       int32_t &resultCapacity) {
+    H *p;
+    if(needToRelease) {
+        p=ptr;
+    } else {
+        if(length<0) {
+            length=0;
+        } else if(length>capacity) {
+            length=capacity;
+        }
+#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
+      ::fprintf(::stderr,"MaybeStackHeaderAndArray (orphan) alloc %ul + %d * %lu\n", sizeof(H),length,sizeof(T));
+#endif
+        p=(H *)uprv_malloc(sizeof(H)+length*sizeof(T));
+        if(p==NULL) {
+            return NULL;
+        }
+        uprv_memcpy(p, ptr, sizeof(H)+length*sizeof(T));
+    }
+    resultCapacity=length;
+    ptr=&stackHeader;
+    capacity=stackCapacity;
+    needToRelease=FALSE;
+    return p;
+}
+
+U_NAMESPACE_END
+
+#endif  /* __cplusplus */
+#endif  /* CMEMORY_H */
diff --git a/src/core/basetypes/icu-ucsdet/include/csdetect.h b/src/core/basetypes/icu-ucsdet/include/csdetect.h
new file mode 100644
index 00000000..15910453
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/csdetect.h
@@ -0,0 +1,65 @@
+/*
+ **********************************************************************
+ *   Copyright (C) 2005-2013, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ **********************************************************************
+ */
+
+#ifndef __CSDETECT_H
+#define __CSDETECT_H
+
+#include "unicode/uobject.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+U_NAMESPACE_BEGIN
+
+class InputText;
+class CharsetRecognizer;
+class CharsetMatch;
+
+class CharsetDetector : public UMemory
+{
+private:
+    InputText *textIn;
+    CharsetMatch **resultArray;
+    int32_t resultCount;
+    UBool fStripTags;   // If true, setText() will strip tags from input text.
+    UBool fFreshTextSet;
+    static void setRecognizers(UErrorCode &status);
+
+    UBool *fEnabledRecognizers;  // If not null, active set of charset recognizers had
+                                // been changed from the default. The array index is
+                                // corresponding to fCSRecognizers. See setDetectableCharset().
+
+public:
+    CharsetDetector(UErrorCode &status);
+
+    ~CharsetDetector();
+
+    void setText(const char *in, int32_t len);
+
+    const CharsetMatch * const *detectAll(int32_t &maxMatchesFound, UErrorCode &status);
+
+    const CharsetMatch *detect(UErrorCode& status);
+
+    void setDeclaredEncoding(const char *encoding, int32_t len) const;
+
+    UBool setStripTagsFlag(UBool flag);
+
+    UBool getStripTagsFlag() const;
+
+//    const char *getCharsetName(int32_t index, UErrorCode& status) const;
+
+    static int32_t getDetectableCount();
+
+
+    static UEnumeration * getAllDetectableCharsets(UErrorCode &status);
+    UEnumeration * getDetectableCharsets(UErrorCode &status) const;
+    void setDetectableCharset(const char *encoding, UBool enabled, UErrorCode &status);
+};
+
+U_NAMESPACE_END
+
+#endif
+#endif /* __CSDETECT_H */
diff --git a/src/core/basetypes/icu-ucsdet/include/csmatch.h b/src/core/basetypes/icu-ucsdet/include/csmatch.h
new file mode 100644
index 00000000..6a3671c4
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/csmatch.h
@@ -0,0 +1,69 @@
+/*
+ **********************************************************************
+ *   Copyright (C) 2005-2012, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ **********************************************************************
+ */
+
+#ifndef __CSMATCH_H
+#define __CSMATCH_H
+
+#include "unicode/uobject.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+U_NAMESPACE_BEGIN
+
+class InputText;
+class CharsetRecognizer;
+
+/*
+ * CharsetMatch represents the results produced by one Charset Recognizer for one input text
+ *              Any confidence > 0 indicates a possible match, meaning that the input bytes
+ *              are at least legal.
+ *
+ *              The full results of a detect are represented by an array of these
+ *              CharsetMatch objects, each representing a possible matching charset.
+ *
+ *              Note that a single charset recognizer may detect multiple closely related
+ *              charsets, and set different names depending on the exact input bytes seen.
+ */
+class CharsetMatch : public UMemory
+{
+ private:
+    InputText               *textIn;
+    int32_t                  confidence;
+    const char              *fCharsetName;
+    const char              *fLang;
+
+ public:
+    CharsetMatch();
+
+    /**
+      * fully set the state of this CharsetMatch.
+      * Called by the CharsetRecognizers to record match results.
+      * Default (NULL) parameters for names will be filled by calling the
+      *   corresponding getters on the recognizer.
+      */
+    void set(InputText               *input, 
+             const CharsetRecognizer *cr, 
+             int32_t                  conf, 
+             const char              *csName=NULL, 
+             const char              *lang=NULL);
+
+    /**
+      * Return the name of the charset for this Match
+      */
+    const char *getName() const;
+
+    const char *getLanguage()const;
+
+    int32_t getConfidence()const;
+
+    int32_t getUChars(UChar *buf, int32_t cap, UErrorCode *status) const;
+};
+
+U_NAMESPACE_END
+
+#endif
+#endif /* __CSMATCH_H */
diff --git a/src/core/basetypes/icu-ucsdet/include/csr2022.h b/src/core/basetypes/icu-ucsdet/include/csr2022.h
new file mode 100644
index 00000000..2ac2b87d
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/csr2022.h
@@ -0,0 +1,91 @@
+/*
+ **********************************************************************
+ *   Copyright (C) 2005-2012, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ **********************************************************************
+ */
+
+#ifndef __CSR2022_H
+#define __CSR2022_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "csrecog.h"
+
+U_NAMESPACE_BEGIN
+
+class CharsetMatch;
+
+/**
+ *  class CharsetRecog_2022  part of the ICU charset detection imlementation.
+ *                           This is a superclass for the individual detectors for
+ *                           each of the detectable members of the ISO 2022 family
+ *                           of encodings.
+ * 
+ *                           The separate classes are nested within this class.
+ * 
+ * @internal
+ */
+class CharsetRecog_2022 : public CharsetRecognizer
+{
+
+public:    
+    virtual ~CharsetRecog_2022() = 0;
+
+protected:
+
+    /**
+     * Matching function shared among the 2022 detectors JP, CN and KR
+     * Counts up the number of legal an unrecognized escape sequences in
+     * the sample of text, and computes a score based on the total number &
+     * the proportion that fit the encoding.
+     * 
+     * 
+     * @param text the byte buffer containing text to analyse
+     * @param textLen  the size of the text in the byte.
+     * @param escapeSequences the byte escape sequences to test for.
+     * @return match quality, in the range of 0-100.
+     */
+    int32_t match_2022(const uint8_t *text,
+                       int32_t textLen,
+                       const uint8_t escapeSequences[][5],
+                       int32_t escapeSequences_length) const;
+
+};
+
+class CharsetRecog_2022JP :public CharsetRecog_2022
+{
+public:
+    virtual ~CharsetRecog_2022JP();
+
+    const char *getName() const;
+
+    UBool match(InputText *textIn, CharsetMatch *results) const;
+};
+
+class CharsetRecog_2022KR :public CharsetRecog_2022 {
+public:
+    virtual ~CharsetRecog_2022KR();
+
+    const char *getName() const;
+
+    UBool match(InputText *textIn, CharsetMatch *results) const;
+
+};
+
+class CharsetRecog_2022CN :public CharsetRecog_2022
+{
+public:
+    virtual ~CharsetRecog_2022CN();
+
+    const char* getName() const;
+
+    UBool match(InputText *textIn, CharsetMatch *results) const;
+};
+
+U_NAMESPACE_END
+
+#endif
+#endif /* __CSR2022_H */
diff --git a/src/core/basetypes/icu-ucsdet/include/csrecog.h b/src/core/basetypes/icu-ucsdet/include/csrecog.h
new file mode 100644
index 00000000..6b7573a1
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/csrecog.h
@@ -0,0 +1,55 @@
+/*
+ **********************************************************************
+ *   Copyright (C) 2005-2012, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ **********************************************************************
+ */
+
+#ifndef __CSRECOG_H
+#define __CSRECOG_H
+
+#include "unicode/uobject.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "inputext.h"
+
+U_NAMESPACE_BEGIN
+
+class CharsetMatch;
+
+class CharsetRecognizer : public UMemory
+{
+ public:
+    /**
+     * Get the IANA name of this charset.
+     * Note that some recognizers can recognize more than one charset, but that this API
+     * assumes just one name per recognizer.
+     * TODO: need to account for multiple names in public API that enumerates over the
+     *       known detectable charsets.
+     * @return the charset name.
+     */
+    virtual const char *getName() const = 0;
+    
+    /**
+     * Get the ISO language code for this charset.
+     * @return the language code, or <code>null</code> if the language cannot be determined.
+     */
+    virtual const char *getLanguage() const;
+        
+    /*
+     * Try the given input text against this Charset, and fill in the results object
+     * with the quality of the match plus other information related to the match.
+     *
+     * Return TRUE if the the input bytes are a potential match, and
+     * FALSE if the input data is not compatible with, or illegal in this charset.
+     */
+    virtual UBool match(InputText *textIn, CharsetMatch *results) const = 0;
+
+    virtual ~CharsetRecognizer();
+};
+
+U_NAMESPACE_END
+
+#endif
+#endif /* __CSRECOG_H */
diff --git a/src/core/basetypes/icu-ucsdet/include/csrmbcs.h b/src/core/basetypes/icu-ucsdet/include/csrmbcs.h
new file mode 100644
index 00000000..9ea9d8f8
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/csrmbcs.h
@@ -0,0 +1,205 @@
+/*
+ **********************************************************************
+ *   Copyright (C) 2005-2012, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ **********************************************************************
+ */
+
+#ifndef __CSRMBCS_H
+#define __CSRMBCS_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "csrecog.h"
+
+U_NAMESPACE_BEGIN
+
+// "Character"  iterated character class.
+//    Recognizers for specific mbcs encodings make their "characters" available
+//    by providing a nextChar() function that fills in an instance of IteratedChar
+//    with the next char from the input.
+//    The returned characters are not converted to Unicode, but remain as the raw
+//    bytes (concatenated into an int) from the codepage data.
+//
+//  For Asian charsets, use the raw input rather than the input that has been
+//   stripped of markup.  Detection only considers multi-byte chars, effectively
+//   stripping markup anyway, and double byte chars do occur in markup too.
+//
+class IteratedChar : public UMemory
+{
+public:
+    uint32_t charValue;             // 1-4 bytes from the raw input data
+    int32_t  index;
+    int32_t  nextIndex;
+    UBool    error;
+    UBool    done;
+
+public:
+    IteratedChar();
+    //void reset();
+    int32_t nextByte(InputText* det);
+};
+
+
+class CharsetRecog_mbcs : public CharsetRecognizer {
+
+protected:
+    /**
+     * Test the match of this charset with the input text data
+     *      which is obtained via the CharsetDetector object.
+     *
+     * @param det  The CharsetDetector, which contains the input text
+     *             to be checked for being in this charset.
+     * @return     Two values packed into one int  (Damn java, anyhow)
+     *             <br/>
+     *             bits 0-7:  the match confidence, ranging from 0-100
+     *             <br/>
+     *             bits 8-15: The match reason, an enum-like value.
+     */
+    int32_t match_mbcs(InputText* det, const uint16_t commonChars[], int32_t commonCharsLen) const;
+
+public:
+
+    virtual ~CharsetRecog_mbcs();
+
+    /**
+     * Get the IANA name of this charset.
+     * @return the charset name.
+     */
+
+    const char *getName() const = 0;
+    const char *getLanguage() const = 0;
+    UBool match(InputText* input, CharsetMatch *results) const = 0;
+
+    /**
+     * Get the next character (however many bytes it is) from the input data
+     *    Subclasses for specific charset encodings must implement this function
+     *    to get characters according to the rules of their encoding scheme.
+     *
+     *  This function is not a method of class IteratedChar only because
+     *   that would require a lot of extra derived classes, which is awkward.
+     * @param it  The IteratedChar "struct" into which the returned char is placed.
+     * @param det The charset detector, which is needed to get at the input byte data
+     *            being iterated over.
+     * @return    True if a character was returned, false at end of input.
+     */
+    virtual UBool nextChar(IteratedChar *it, InputText *textIn) const = 0;
+
+};
+
+
+/**
+ *   Shift-JIS charset recognizer.
+ *
+ */
+class CharsetRecog_sjis : public CharsetRecog_mbcs {
+public:
+    virtual ~CharsetRecog_sjis();
+
+    UBool nextChar(IteratedChar *it, InputText *det) const;
+
+    UBool match(InputText* input, CharsetMatch *results) const;
+
+    const char *getName() const;
+    const char *getLanguage() const;
+
+};
+
+
+/**
+ *   EUC charset recognizers.  One abstract class that provides the common function
+ *             for getting the next character according to the EUC encoding scheme,
+ *             and nested derived classes for EUC_KR, EUC_JP, EUC_CN.
+ *
+ */
+class CharsetRecog_euc : public CharsetRecog_mbcs
+{
+public:
+    virtual ~CharsetRecog_euc();
+
+    const char *getName() const = 0;
+    const char *getLanguage() const = 0;
+
+    UBool match(InputText* input, CharsetMatch *results) const = 0;
+    /*
+     *  (non-Javadoc)
+     *  Get the next character value for EUC based encodings.
+     *  Character "value" is simply the raw bytes that make up the character
+     *     packed into an int.
+     */
+    UBool nextChar(IteratedChar *it, InputText *det) const;
+};
+
+/**
+ * The charset recognize for EUC-JP.  A singleton instance of this class
+ *    is created and kept by the public CharsetDetector class
+ */
+class CharsetRecog_euc_jp : public CharsetRecog_euc
+{
+public:
+    virtual ~CharsetRecog_euc_jp();
+
+    const char *getName() const;
+    const char *getLanguage() const;
+
+    UBool match(InputText* input, CharsetMatch *results) const;
+};
+
+/**
+ * The charset recognize for EUC-KR.  A singleton instance of this class
+ *    is created and kept by the public CharsetDetector class
+ */
+class CharsetRecog_euc_kr : public CharsetRecog_euc
+{
+public:
+    virtual ~CharsetRecog_euc_kr();
+
+    const char *getName() const;
+    const char *getLanguage() const;
+
+    UBool match(InputText* input, CharsetMatch *results) const;
+};
+
+/**
+ *
+ *   Big5 charset recognizer.
+ *
+ */
+class CharsetRecog_big5 : public CharsetRecog_mbcs
+{
+public:
+    virtual ~CharsetRecog_big5();
+
+    UBool nextChar(IteratedChar* it, InputText* det) const;
+
+    const char *getName() const;
+    const char *getLanguage() const;
+
+    UBool match(InputText* input, CharsetMatch *results) const;
+};
+
+
+/**
+ *
+ *   GB-18030 recognizer. Uses simplified Chinese statistics.
+ *
+ */
+class CharsetRecog_gb_18030 : public CharsetRecog_mbcs
+{
+public:
+    virtual ~CharsetRecog_gb_18030();
+
+    UBool nextChar(IteratedChar* it, InputText* det) const;
+
+    const char *getName() const;
+    const char *getLanguage() const;
+
+    UBool match(InputText* input, CharsetMatch *results) const;
+};
+
+U_NAMESPACE_END
+
+#endif
+#endif /* __CSRMBCS_H */
diff --git a/src/core/basetypes/icu-ucsdet/include/csrsbcs.h b/src/core/basetypes/icu-ucsdet/include/csrsbcs.h
new file mode 100644
index 00000000..2579c029
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/csrsbcs.h
@@ -0,0 +1,287 @@
+/*
+ **********************************************************************
+ *   Copyright (C) 2005-2013, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ **********************************************************************
+ */
+
+#ifndef __CSRSBCS_H
+#define __CSRSBCS_H
+
+#include "unicode/uobject.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "csrecog.h"
+
+U_NAMESPACE_BEGIN
+
+class NGramParser : public UMemory
+{
+private:
+    int32_t ngram;
+    const int32_t *ngramList;    
+
+    int32_t ngramCount;
+    int32_t hitCount;
+
+protected:
+	int32_t byteIndex;
+    const uint8_t *charMap;
+
+	void addByte(int32_t b);
+
+public:
+    NGramParser(const int32_t *theNgramList, const uint8_t *theCharMap);
+
+private:
+    /*
+    * Binary search for value in table, which must have exactly 64 entries.
+    */
+    int32_t search(const int32_t *table, int32_t value);
+
+    void lookup(int32_t thisNgram);
+    
+    virtual int32_t nextByte(InputText *det);
+	virtual void parseCharacters(InputText *det);
+
+public:
+    int32_t parse(InputText *det);
+
+};
+
+class NGramParser_IBM420 : public NGramParser
+{
+private:
+	int32_t alef;
+	int32_t isLamAlef(int32_t b);
+	int32_t nextByte(InputText *det);
+	void parseCharacters(InputText *det);
+
+public:
+    NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap);
+};
+
+
+class CharsetRecog_sbcs : public CharsetRecognizer
+{
+public:
+    CharsetRecog_sbcs();
+    virtual ~CharsetRecog_sbcs();
+    virtual const char *getName() const = 0;
+    virtual UBool match(InputText *det, CharsetMatch *results) const = 0;
+    virtual int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const;
+};
+
+class CharsetRecog_8859_1 : public CharsetRecog_sbcs
+{
+public:
+    virtual ~CharsetRecog_8859_1();
+    const char *getName() const;
+    virtual UBool match(InputText *det, CharsetMatch *results) const;
+};
+
+class CharsetRecog_8859_2 : public CharsetRecog_sbcs
+{
+public:
+    virtual ~CharsetRecog_8859_2();
+    const char *getName() const;
+    virtual UBool match(InputText *det, CharsetMatch *results) const;
+};
+
+class CharsetRecog_8859_5 : public CharsetRecog_sbcs
+{
+public:
+    virtual ~CharsetRecog_8859_5();
+    const char *getName() const;
+};
+
+class CharsetRecog_8859_6 : public CharsetRecog_sbcs
+{
+public:
+    virtual ~CharsetRecog_8859_6();
+
+    const char *getName() const;
+};
+
+class CharsetRecog_8859_7 : public CharsetRecog_sbcs
+{
+public:
+    virtual ~CharsetRecog_8859_7();
+
+    const char *getName() const;
+};
+
+class CharsetRecog_8859_8 : public CharsetRecog_sbcs
+{
+public:
+    virtual ~CharsetRecog_8859_8();
+	
+    virtual const char *getName() const;
+};
+
+class CharsetRecog_8859_9 : public CharsetRecog_sbcs
+{
+public:
+    virtual ~CharsetRecog_8859_9();
+
+    const char *getName() const;
+};
+
+
+
+class CharsetRecog_8859_5_ru : public CharsetRecog_8859_5
+{
+public:
+    virtual ~CharsetRecog_8859_5_ru();
+
+    const char *getLanguage() const;
+
+    virtual UBool match(InputText *det, CharsetMatch *results) const;
+};
+
+class CharsetRecog_8859_6_ar : public CharsetRecog_8859_6
+{
+public:
+    virtual ~CharsetRecog_8859_6_ar();
+
+    const char *getLanguage() const;
+
+    virtual UBool match(InputText *det, CharsetMatch *results) const;
+};
+
+class CharsetRecog_8859_7_el : public CharsetRecog_8859_7
+{
+public:
+    virtual ~CharsetRecog_8859_7_el();
+
+    const char *getLanguage() const;
+
+    virtual UBool match(InputText *det, CharsetMatch *results) const;
+};
+
+class CharsetRecog_8859_8_I_he : public CharsetRecog_8859_8
+{
+public:
+    virtual ~CharsetRecog_8859_8_I_he();
+	
+    const char *getName() const;
+
+    const char *getLanguage() const;
+
+    virtual UBool match(InputText *det, CharsetMatch *results) const;
+};
+
+class CharsetRecog_8859_8_he : public CharsetRecog_8859_8
+{
+public:
+    virtual ~CharsetRecog_8859_8_he ();
+
+    const char *getLanguage() const;
+
+    virtual UBool match(InputText *det, CharsetMatch *results) const;
+};
+
+class CharsetRecog_8859_9_tr : public CharsetRecog_8859_9
+{
+public:
+    virtual ~CharsetRecog_8859_9_tr ();
+
+    const char *getLanguage() const;
+
+    virtual UBool match(InputText *det, CharsetMatch *results) const;
+};
+
+class CharsetRecog_windows_1256 : public CharsetRecog_sbcs
+{
+public:
+    virtual ~CharsetRecog_windows_1256();
+
+    const char *getName() const;
+
+    const char *getLanguage() const;
+
+    virtual UBool match(InputText *det, CharsetMatch *results) const;
+};
+
+class CharsetRecog_windows_1251 : public CharsetRecog_sbcs
+{
+public:
+    virtual ~CharsetRecog_windows_1251();
+
+    const char *getName() const;
+
+    const char *getLanguage() const;
+
+    virtual UBool match(InputText *det, CharsetMatch *results) const;
+};
+
+
+class CharsetRecog_KOI8_R : public CharsetRecog_sbcs
+{
+public:
+    virtual ~CharsetRecog_KOI8_R();
+
+    const char *getName() const;
+
+    const char *getLanguage() const;
+
+    virtual UBool match(InputText *det, CharsetMatch *results) const;
+};
+
+class CharsetRecog_IBM424_he : public CharsetRecog_sbcs
+{
+public:
+    virtual ~CharsetRecog_IBM424_he();
+
+    const char *getLanguage() const;
+};
+
+class CharsetRecog_IBM424_he_rtl : public CharsetRecog_IBM424_he {
+public:
+    virtual ~CharsetRecog_IBM424_he_rtl();
+    
+    const char *getName() const;
+    
+    virtual UBool match(InputText *det, CharsetMatch *results) const;
+};
+
+class CharsetRecog_IBM424_he_ltr : public CharsetRecog_IBM424_he {
+    virtual ~CharsetRecog_IBM424_he_ltr();
+    
+    const char *getName() const;
+    
+    virtual UBool match(InputText *det, CharsetMatch *results) const;
+};
+
+class CharsetRecog_IBM420_ar : public CharsetRecog_sbcs
+{
+public:
+    virtual ~CharsetRecog_IBM420_ar();
+
+    const char *getLanguage() const;
+	int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const;
+    
+};
+
+class CharsetRecog_IBM420_ar_rtl : public CharsetRecog_IBM420_ar {
+public:
+    virtual ~CharsetRecog_IBM420_ar_rtl();
+    
+    const char *getName() const;
+    
+    virtual UBool match(InputText *det, CharsetMatch *results) const;
+};
+
+class CharsetRecog_IBM420_ar_ltr : public CharsetRecog_IBM420_ar {
+    virtual ~CharsetRecog_IBM420_ar_ltr();
+    
+    const char *getName() const;
+    
+    virtual UBool match(InputText *det, CharsetMatch *results) const;
+};
+
+U_NAMESPACE_END
+
+#endif /* !UCONFIG_NO_CONVERSION */
+#endif /* __CSRSBCS_H */
diff --git a/src/core/basetypes/icu-ucsdet/include/csrucode.h b/src/core/basetypes/icu-ucsdet/include/csrucode.h
new file mode 100644
index 00000000..a8a4f2bc
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/csrucode.h
@@ -0,0 +1,106 @@
+/*
+ **********************************************************************
+ *   Copyright (C) 2005-2012, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ **********************************************************************
+ */
+
+#ifndef __CSRUCODE_H
+#define __CSRUCODE_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "csrecog.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * This class matches UTF-16 and UTF-32, both big- and little-endian. The
+ * BOM will be used if it is present.
+ * 
+ * @internal
+ */
+class CharsetRecog_Unicode : public CharsetRecognizer 
+{
+
+public:
+
+    virtual ~CharsetRecog_Unicode();
+    /* (non-Javadoc)
+     * @see com.ibm.icu.text.CharsetRecognizer#getName()
+     */
+    const char* getName() const = 0;
+
+    /* (non-Javadoc)
+     * @see com.ibm.icu.text.CharsetRecognizer#match(com.ibm.icu.text.CharsetDetector)
+     */
+    UBool match(InputText* textIn, CharsetMatch *results) const = 0;
+};
+
+
+class CharsetRecog_UTF_16_BE : public CharsetRecog_Unicode
+{
+public:
+
+    virtual ~CharsetRecog_UTF_16_BE();
+
+    const char *getName() const;
+
+    UBool match(InputText* textIn, CharsetMatch *results) const;
+};
+
+class CharsetRecog_UTF_16_LE : public CharsetRecog_Unicode
+{
+public:
+
+    virtual ~CharsetRecog_UTF_16_LE();
+
+    const char *getName() const;
+
+    UBool match(InputText* textIn, CharsetMatch *results) const;
+};
+
+class CharsetRecog_UTF_32 : public CharsetRecog_Unicode
+{
+protected:
+    virtual int32_t getChar(const uint8_t *input, int32_t index) const = 0;
+public:
+
+    virtual ~CharsetRecog_UTF_32();
+
+    const char* getName() const = 0;
+
+    UBool match(InputText* textIn, CharsetMatch *results) const;
+};
+
+
+class CharsetRecog_UTF_32_BE : public CharsetRecog_UTF_32
+{
+protected:
+    int32_t getChar(const uint8_t *input, int32_t index) const;
+
+public:
+
+    virtual ~CharsetRecog_UTF_32_BE();
+
+    const char *getName() const;
+};
+
+
+class CharsetRecog_UTF_32_LE : public CharsetRecog_UTF_32
+{
+protected:
+    int32_t getChar(const uint8_t *input, int32_t index) const;
+
+public:
+    virtual ~CharsetRecog_UTF_32_LE();
+
+    const char* getName() const;
+};
+
+U_NAMESPACE_END
+
+#endif
+#endif /* __CSRUCODE_H */
diff --git a/src/core/basetypes/icu-ucsdet/include/csrutf8.h b/src/core/basetypes/icu-ucsdet/include/csrutf8.h
new file mode 100644
index 00000000..82e8f9d7
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/csrutf8.h
@@ -0,0 +1,42 @@
+/*
+ **********************************************************************
+ *   Copyright (C) 2005-2012, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ **********************************************************************
+ */
+
+#ifndef __CSRUTF8_H
+#define __CSRUTF8_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "csrecog.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * Charset recognizer for UTF-8
+ *
+ * @internal
+ */
+class CharsetRecog_UTF8: public CharsetRecognizer {
+
+ public:
+		
+    virtual ~CharsetRecog_UTF8();		 
+
+    const char *getName() const;
+
+    /* (non-Javadoc)
+     * @see com.ibm.icu.text.CharsetRecognizer#match(com.ibm.icu.text.CharsetDetector)
+     */
+    UBool match(InputText *input, CharsetMatch *results) const;
+	
+};
+
+U_NAMESPACE_END
+
+#endif
+#endif /* __CSRUTF8_H */
diff --git a/src/core/basetypes/icu-ucsdet/include/cstring.h b/src/core/basetypes/icu-ucsdet/include/cstring.h
new file mode 100644
index 00000000..64b68ffa
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/cstring.h
@@ -0,0 +1,140 @@
+/*
+******************************************************************************
+*
+*   Copyright (C) 1997-2012, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*
+* File CSTRING.H
+*
+* Contains CString interface
+*
+* @author       Helena Shih
+*
+* Modification History:
+*
+*   Date        Name        Description
+*   6/17/98     hshih       Created.
+*  05/03/99     stephen     Changed from functions to macros.
+*  06/14/99     stephen     Added icu_strncat, icu_strncmp, icu_tolower
+*
+******************************************************************************
+*/
+
+#ifndef CSTRING_H
+#define CSTRING_H 1
+
+#include "unicode/utypes.h"
+#include "cmemory.h"
+#include <string.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+#define uprv_strcpy(dst, src) U_STANDARD_CPP_NAMESPACE  strcpy(dst, src)
+#define uprv_strlen(str) U_STANDARD_CPP_NAMESPACE strlen(str)
+#define uprv_strcmp(s1, s2) U_STANDARD_CPP_NAMESPACE strcmp(s1, s2)
+#define uprv_strcat(dst, src) U_STANDARD_CPP_NAMESPACE strcat(dst, src)
+#define uprv_strchr(s, c) U_STANDARD_CPP_NAMESPACE strchr(s, c)
+#define uprv_strstr(s, c) U_STANDARD_CPP_NAMESPACE strstr(s, c)
+#define uprv_strrchr(s, c) U_STANDARD_CPP_NAMESPACE strrchr(s, c)
+
+#if U_DEBUG
+
+#define uprv_strncpy(dst, src, size) ( \
+    uprv_checkValidMemory(src, 1), \
+    U_STANDARD_CPP_NAMESPACE strncpy(dst, src, size))
+#define uprv_strncmp(s1, s2, n) ( \
+    uprv_checkValidMemory(s1, 1), \
+    uprv_checkValidMemory(s2, 1), \
+    U_STANDARD_CPP_NAMESPACE strncmp(s1, s2, n))
+#define uprv_strncat(dst, src, n) ( \
+    uprv_checkValidMemory(src, 1), \
+    U_STANDARD_CPP_NAMESPACE strncat(dst, src, n))
+
+#else
+
+#define uprv_strncpy(dst, src, size) U_STANDARD_CPP_NAMESPACE strncpy(dst, src, size)
+#define uprv_strncmp(s1, s2, n) U_STANDARD_CPP_NAMESPACE strncmp(s1, s2, n)
+#define uprv_strncat(dst, src, n) U_STANDARD_CPP_NAMESPACE strncat(dst, src, n)
+
+#endif  /* U_DEBUG */
+
+/**
+ * Is c an ASCII-repertoire letter a-z or A-Z?
+ * Note: The implementation is specific to whether ICU is compiled for
+ * an ASCII-based or EBCDIC-based machine. There just does not seem to be a better name for this.
+ */
+U_CAPI UBool U_EXPORT2
+uprv_isASCIILetter(char c);
+
+U_CAPI char U_EXPORT2
+uprv_toupper(char c);
+
+
+U_CAPI char U_EXPORT2
+uprv_asciitolower(char c);
+
+U_CAPI char U_EXPORT2
+uprv_ebcdictolower(char c);
+
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+#   define uprv_tolower uprv_asciitolower
+#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+#   define uprv_tolower uprv_ebcdictolower
+#else
+#   error U_CHARSET_FAMILY is not valid
+#endif
+
+#define uprv_strtod(source, end) U_STANDARD_CPP_NAMESPACE strtod(source, end)
+#define uprv_strtoul(str, end, base) U_STANDARD_CPP_NAMESPACE strtoul(str, end, base)
+#define uprv_strtol(str, end, base) U_STANDARD_CPP_NAMESPACE strtol(str, end, base)
+
+/* Conversion from a digit to the character with radix base from 2-19 */
+/* May need to use U_UPPER_ORDINAL*/
+#define T_CString_itosOffset(a) ((a)<=9?('0'+(a)):('A'+(a)-10))
+
+U_CAPI char* U_EXPORT2
+uprv_strdup(const char *src);
+
+/**
+ * uprv_malloc n+1 bytes, and copy n bytes from src into the new string.
+ * Terminate with a null at offset n.   If n is -1, works like uprv_strdup
+ * @param src
+ * @param n length of the input string, not including null.
+ * @return new string (owned by caller, use uprv_free to free).
+ * @internal
+ */
+U_CAPI char* U_EXPORT2
+uprv_strndup(const char *src, int32_t n);
+
+U_CAPI char* U_EXPORT2
+T_CString_toLowerCase(char* str);
+
+U_CAPI char* U_EXPORT2
+T_CString_toUpperCase(char* str);
+
+U_CAPI int32_t U_EXPORT2
+T_CString_integerToString(char *buffer, int32_t n, int32_t radix);
+
+U_CAPI int32_t U_EXPORT2
+T_CString_int64ToString(char *buffer, int64_t n, uint32_t radix);
+
+U_CAPI int32_t U_EXPORT2
+T_CString_stringToInteger(const char *integerString, int32_t radix);
+
+/**
+ * Case-insensitive, language-independent string comparison
+ * limited to the ASCII character repertoire.
+ */
+U_CAPI int U_EXPORT2
+uprv_stricmp(const char *str1, const char *str2);
+
+/**
+ * Case-insensitive, language-independent string comparison
+ * limited to the ASCII character repertoire.
+ */
+U_CAPI int U_EXPORT2
+uprv_strnicmp(const char *str1, const char *str2, uint32_t n);
+
+#endif /* ! CSTRING_H */
diff --git a/src/core/basetypes/icu-ucsdet/include/cwchar.h b/src/core/basetypes/icu-ucsdet/include/cwchar.h
new file mode 100644
index 00000000..2ab36c03
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/cwchar.h
@@ -0,0 +1,56 @@
+/*  
+******************************************************************************
+*
+*   Copyright (C) 2001, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*   file name:  cwchar.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2001may25
+*   created by: Markus W. Scherer
+*
+*   This file contains ICU-internal definitions of wchar_t operations.
+*   These definitions were moved here from cstring.h so that fewer
+*   ICU implementation files include wchar.h.
+*/
+
+#ifndef __CWCHAR_H__
+#define __CWCHAR_H__
+
+#include <string.h>
+#include <stdlib.h>
+#include "unicode/utypes.h"
+
+/* Do this after utypes.h so that we have U_HAVE_WCHAR_H . */
+#if U_HAVE_WCHAR_H
+#   include <wchar.h>
+#endif
+
+/*===========================================================================*/
+/* Wide-character functions                                                  */
+/*===========================================================================*/
+
+/* The following are not available on all systems, defined in wchar.h or string.h. */
+#if U_HAVE_WCSCPY
+#   define uprv_wcscpy wcscpy
+#   define uprv_wcscat wcscat
+#   define uprv_wcslen wcslen
+#else
+U_CAPI wchar_t* U_EXPORT2 
+uprv_wcscpy(wchar_t *dst, const wchar_t *src);
+U_CAPI wchar_t* U_EXPORT2 
+uprv_wcscat(wchar_t *dst, const wchar_t *src);
+U_CAPI size_t U_EXPORT2 
+uprv_wcslen(const wchar_t *src);
+#endif
+
+/* The following are part of the ANSI C standard, defined in stdlib.h . */
+#define uprv_wcstombs(mbstr, wcstr, count) U_STANDARD_CPP_NAMESPACE wcstombs(mbstr, wcstr, count)
+#define uprv_mbstowcs(wcstr, mbstr, count) U_STANDARD_CPP_NAMESPACE mbstowcs(wcstr, mbstr, count)
+
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/inputext.h b/src/core/basetypes/icu-ucsdet/include/inputext.h
new file mode 100644
index 00000000..0c5973d8
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/inputext.h
@@ -0,0 +1,61 @@
+/*
+ **********************************************************************
+ *   Copyright (C) 2005-2008, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ **********************************************************************
+ */
+
+#ifndef __INPUTEXT_H
+#define __INPUTEXT_H
+
+/**
+ * \file
+ * \internal
+ *
+ * This is an internal header for the Character Set Detection code. The
+ * name is probably too generic...
+ */
+
+
+#include "unicode/uobject.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+U_NAMESPACE_BEGIN 
+
+class InputText : public UMemory
+{
+    // Prevent copying
+    InputText(const InputText &);
+public:
+    InputText(UErrorCode &status);
+    ~InputText();
+
+    void setText(const char *in, int32_t len);
+    void setDeclaredEncoding(const char *encoding, int32_t len);
+    UBool isSet() const; 
+    void MungeInput(UBool fStripTags);
+
+    // The text to be checked.  Markup will have been
+    //   removed if appropriate.
+    uint8_t    *fInputBytes;
+    int32_t     fInputLen;          // Length of the byte data in fInputBytes.
+    // byte frequency statistics for the input text.
+    //   Value is percent, not absolute.
+    //   Value is rounded up, so zero really means zero occurences. 
+    int16_t  *fByteStats;
+    UBool     fC1Bytes;          // True if any bytes in the range 0x80 - 0x9F are in the input;false by default
+    char     *fDeclaredEncoding;
+
+    const uint8_t           *fRawInput;     // Original, untouched input bytes.
+    //  If user gave us a byte array, this is it.
+    //  If user gave us a stream, it's read to a 
+    //   buffer here.
+    int32_t                  fRawLength;    // Length of data in fRawInput array.
+
+};
+
+U_NAMESPACE_END
+
+#endif
+#endif /* __INPUTEXT_H */
diff --git a/src/core/basetypes/icu-ucsdet/include/locmap.h b/src/core/basetypes/icu-ucsdet/include/locmap.h
new file mode 100644
index 00000000..214bbcec
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/locmap.h
@@ -0,0 +1,37 @@
+/*
+******************************************************************************
+*
+*   Copyright (C) 1996-2013, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*
+* File locmap.h      : Locale Mapping Classes
+* 
+*
+* Created by: Helena Shih
+*
+* Modification History:
+*
+*  Date        Name        Description
+*  3/11/97     aliu        Added setId().
+*  4/20/99     Madhu       Added T_convertToPosix()
+* 09/18/00     george      Removed the memory leaks.
+* 08/23/01     george      Convert to C
+*============================================================================
+*/
+
+#ifndef LOCMAP_H
+#define LOCMAP_H
+
+#include "unicode/utypes.h"
+
+#define LANGUAGE_LCID(hostID) (uint16_t)(0x03FF & hostID)
+
+U_CAPI int32_t uprv_convertToPosix(uint32_t hostid, char* posixID, int32_t posixIDCapacity, UErrorCode* status);
+
+/* Don't call this function directly. Use uloc_getLCID instead. */
+U_CAPI uint32_t uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status);
+
+#endif /* LOCMAP_H */
+
diff --git a/src/core/basetypes/icu-ucsdet/include/mutex.h b/src/core/basetypes/icu-ucsdet/include/mutex.h
new file mode 100644
index 00000000..07dcdbbc
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/mutex.h
@@ -0,0 +1,77 @@
+/*
+******************************************************************************
+*
+*   Copyright (C) 1997-2013, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*/
+//----------------------------------------------------------------------------
+// File:     mutex.h
+//
+// Lightweight C++ wrapper for umtx_ C mutex functions
+//
+// Author:   Alan Liu  1/31/97
+// History:
+// 06/04/97   helena         Updated setImplementation as per feedback from 5/21 drop.
+// 04/07/1999  srl               refocused as a thin wrapper
+//
+//----------------------------------------------------------------------------
+#ifndef MUTEX_H
+#define MUTEX_H
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "umutex.h"
+
+U_NAMESPACE_BEGIN
+
+//----------------------------------------------------------------------------
+// Code within that accesses shared static or global data should
+// should instantiate a Mutex object while doing so. You should make your own 
+// private mutex where possible.
+
+// For example:
+// 
+// UMutex myMutex;
+// 
+// void Function(int arg1, int arg2)
+// {
+//    static Object* foo;     // Shared read-write object
+//    Mutex mutex(&myMutex);  // or no args for the global lock
+//    foo->Method();
+//    // When 'mutex' goes out of scope and gets destroyed here, the lock is released
+// }
+//
+// Note:  Do NOT use the form 'Mutex mutex();' as that merely forward-declares a function
+//        returning a Mutex. This is a common mistake which silently slips through the
+//        compiler!!
+//
+
+class U_COMMON_API Mutex : public UMemory {
+public:
+  inline Mutex(UMutex *mutex = NULL);
+  inline ~Mutex();
+
+private:
+  UMutex   *fMutex;
+
+  Mutex(const Mutex &other); // forbid copying of this class
+  Mutex &operator=(const Mutex &other); // forbid copying of this class
+};
+
+inline Mutex::Mutex(UMutex *mutex)
+  : fMutex(mutex)
+{
+  umtx_lock(fMutex);
+}
+
+inline Mutex::~Mutex()
+{
+  umtx_unlock(fMutex);
+}
+
+U_NAMESPACE_END
+
+#endif //_MUTEX_
+//eof
diff --git a/src/core/basetypes/icu-ucsdet/include/putilimp.h b/src/core/basetypes/icu-ucsdet/include/putilimp.h
new file mode 100644
index 00000000..d2c1c66f
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/putilimp.h
@@ -0,0 +1,611 @@
+/*
+******************************************************************************
+*
+*   Copyright (C) 1997-2014, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*
+*  FILE NAME : putilimp.h
+*
+*   Date        Name        Description
+*   10/17/04    grhoten     Move internal functions from putil.h to this file.
+******************************************************************************
+*/
+
+#ifndef PUTILIMP_H
+#define PUTILIMP_H
+
+#include "unicode/utypes.h"
+#include "unicode/putil.h"
+
+/**
+ * \def U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC
+ * Nearly all CPUs and compilers implement a right-shift of a signed integer
+ * as an Arithmetic Shift Right which copies the sign bit (the Most Significant Bit (MSB))
+ * into the vacated bits (sign extension).
+ * For example, (int32_t)0xfff5fff3>>4 becomes 0xffff5fff and -1>>1=-1.
+ *
+ * This can be useful for storing a signed value in the upper bits
+ * and another bit field in the lower bits.
+ * The signed value can be retrieved by simple right-shifting.
+ *
+ * This is consistent with the Java language.
+ *
+ * However, the C standard allows compilers to implement a right-shift of a signed integer
+ * as a Logical Shift Right which copies a 0 into the vacated bits.
+ * For example, (int32_t)0xfff5fff3>>4 becomes 0x0fff5fff and -1>>1=0x7fffffff.
+ *
+ * Code that depends on the natural behavior should be guarded with this macro,
+ * with an alternate path for unusual platforms.
+ * @internal
+ */
+#ifdef U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC
+    /* Use the predefined value. */
+#else
+    /*
+     * Nearly all CPUs & compilers implement a right-shift of a signed integer
+     * as an Arithmetic Shift Right (with sign extension).
+     */
+#   define U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC 1
+#endif
+
+/** Define this to 1 if your platform supports IEEE 754 floating point,
+   to 0 if it does not. */
+#ifndef IEEE_754
+#   define IEEE_754 1
+#endif
+
+/**
+ * uintptr_t is an optional part of the standard definitions in stdint.h.
+ * The opengroup.org documentation for stdint.h says
+ * "On XSI-conformant systems, the intptr_t and uintptr_t types are required;
+ * otherwise, they are optional."
+ * We assume that when uintptr_t is defined, UINTPTR_MAX is defined as well.
+ *
+ * Do not use ptrdiff_t since it is signed. size_t is unsigned.
+ */
+/* TODO: This check fails on some z environments. Filed a ticket #9357 for this. */
+#if !defined(__intptr_t_defined) && !defined(UINTPTR_MAX) && (U_PLATFORM != U_PF_OS390)
+typedef size_t uintptr_t;
+#endif
+
+/**
+ * \def U_HAVE_MSVC_2003_OR_EARLIER
+ * Flag for workaround of MSVC 2003 optimization bugs
+ * @internal
+ */
+#if !defined(U_HAVE_MSVC_2003_OR_EARLIER) && defined(_MSC_VER) && (_MSC_VER < 1400)
+#define U_HAVE_MSVC_2003_OR_EARLIER
+#endif
+
+/*===========================================================================*/
+/** @{ Information about POSIX support                                       */
+/*===========================================================================*/
+
+#ifdef U_HAVE_NL_LANGINFO_CODESET
+    /* Use the predefined value. */
+#elif U_PLATFORM_HAS_WIN32_API || U_PLATFORM == U_PF_ANDROID || U_PLATFORM == U_PF_QNX
+#   define U_HAVE_NL_LANGINFO_CODESET 0
+#else
+#   define U_HAVE_NL_LANGINFO_CODESET 1
+#endif
+
+#ifdef U_NL_LANGINFO_CODESET
+    /* Use the predefined value. */
+#elif !U_HAVE_NL_LANGINFO_CODESET
+#   define U_NL_LANGINFO_CODESET -1
+#elif U_PLATFORM == U_PF_OS400
+   /* not defined */
+#else
+#   define U_NL_LANGINFO_CODESET CODESET
+#endif
+
+#ifdef U_TZSET
+    /* Use the predefined value. */
+#elif U_PLATFORM_USES_ONLY_WIN32_API
+#   define U_TZSET _tzset
+#elif U_PLATFORM == U_PF_OS400
+   /* not defined */
+#else
+#   define U_TZSET tzset
+#endif
+
+#if defined(U_TIMEZONE) || defined(U_HAVE_TIMEZONE)
+    /* Use the predefined value. */
+#elif U_PLATFORM == U_PF_ANDROID
+#   define U_TIMEZONE timezone
+#elif U_PLATFORM_IS_LINUX_BASED
+#   if defined(__UCLIBC__)
+       /* uClibc does not have __timezone or _timezone. */
+#   elif defined(_NEWLIB_VERSION)
+#      define U_TIMEZONE      _timezone
+#   elif defined(__GLIBC__)
+       /* glibc */
+#      define U_TIMEZONE      __timezone
+#   endif
+#elif U_PLATFORM_USES_ONLY_WIN32_API
+#   define U_TIMEZONE _timezone
+#elif U_PLATFORM == U_PF_BSD && !defined(__NetBSD__)
+   /* not defined */
+#elif U_PLATFORM == U_PF_OS400
+   /* not defined */
+#elif U_PLATFORM == U_PF_IPHONE
+   /* not defined */
+#else
+#   define U_TIMEZONE timezone
+#endif
+
+#ifdef U_TZNAME
+    /* Use the predefined value. */
+#elif U_PLATFORM_USES_ONLY_WIN32_API
+#   define U_TZNAME _tzname
+#elif U_PLATFORM == U_PF_OS400
+   /* not defined */
+#else
+#   define U_TZNAME tzname
+#endif
+
+#ifdef U_HAVE_MMAP
+    /* Use the predefined value. */
+#elif U_PLATFORM_HAS_WIN32_API
+#   define U_HAVE_MMAP 0
+#else
+#   define U_HAVE_MMAP 1
+#endif
+
+#ifdef U_HAVE_POPEN
+    /* Use the predefined value. */
+#elif U_PLATFORM_USES_ONLY_WIN32_API
+#   define U_HAVE_POPEN 0
+#elif U_PLATFORM == U_PF_OS400
+#   define U_HAVE_POPEN 0
+#else
+#   define U_HAVE_POPEN 1
+#endif
+
+/**
+ * \def U_HAVE_DIRENT_H
+ * Defines whether dirent.h is available.
+ * @internal
+ */
+#ifdef U_HAVE_DIRENT_H
+    /* Use the predefined value. */
+#elif U_PLATFORM_HAS_WIN32_API
+#   define U_HAVE_DIRENT_H 0
+#else
+#   define U_HAVE_DIRENT_H 1
+#endif
+
+/** @} */
+
+/*===========================================================================*/
+/** @{ GCC built in functions for atomic memory operations                   */
+/*===========================================================================*/
+
+/**
+ * \def U_HAVE_GCC_ATOMICS
+ * @internal
+ */
+#ifdef U_HAVE_GCC_ATOMICS
+    /* Use the predefined value. */
+#elif U_PLATFORM == U_PF_MINGW
+    #define U_HAVE_GCC_ATOMICS 0
+#elif U_GCC_MAJOR_MINOR >= 404 || defined(__clang__)
+    /* TODO: Intel icc and IBM xlc on AIX also support gcc atomics.  (Intel originated them.)
+     *       Add them for these compilers.
+     * Note: Clang sets __GNUC__ defines for version 4.2, so misses the 4.4 test here.
+     */
+#   define U_HAVE_GCC_ATOMICS 1
+#else
+#   define U_HAVE_GCC_ATOMICS 0
+#endif
+
+/** @} */
+
+/**
+ * \def U_HAVE_STD_ATOMICS
+ * Defines whether the standard C++11 <atomic> is available.
+ * ICU will use this when avialable,
+ * otherwise will fall back to compiler or platform specific alternatives.
+ * @internal
+ */
+#ifdef U_HAVE_STD_ATOMICS
+    /* Use the predefined value. */
+#elif !defined(__cplusplus) || __cplusplus<201103L
+    /* Not C++11, disable use of atomics */
+#   define U_HAVE_STD_ATOMICS 0
+#elif __clang__ && __clang_major__==3 && __clang_minor__<=1
+    /* Clang 3.1, has atomic variable initializer bug. */
+#   define U_HAVE_STD_ATOMICS 0
+#else 
+    /* U_HAVE_ATOMIC is typically set by an autoconf test of #include <atomic>  */
+    /*   Can be set manually, or left undefined, on platforms without autoconf. */
+#   if defined(U_HAVE_ATOMIC) &&  U_HAVE_ATOMIC 
+#      define U_HAVE_STD_ATOMICS 1
+#   else
+#      define U_HAVE_STD_ATOMICS 0
+#   endif
+#endif
+
+
+/*===========================================================================*/
+/** @{ Code alignment                                                        */
+/*===========================================================================*/
+
+/**
+ * \def U_ALIGN_CODE
+ * This is used to align code fragments to a specific byte boundary.
+ * This is useful for getting consistent performance test results.
+ * @internal
+ */
+#ifdef U_ALIGN_CODE
+    /* Use the predefined value. */
+#elif defined(_MSC_VER) && defined(_M_IX86) && !defined(_MANAGED)
+#   define U_ALIGN_CODE(boundarySize) __asm  align boundarySize
+#else
+#   define U_ALIGN_CODE(boundarySize) 
+#endif
+
+/** @} */
+
+/*===========================================================================*/
+/** @{ Programs used by ICU code                                             */
+/*===========================================================================*/
+
+/**
+ * \def U_MAKE_IS_NMAKE
+ * Defines whether the "make" program is Windows nmake.
+ */
+#ifdef U_MAKE_IS_NMAKE
+    /* Use the predefined value. */
+#elif U_PLATFORM == U_PF_WINDOWS
+#   define U_MAKE_IS_NMAKE 1
+#else
+#   define U_MAKE_IS_NMAKE 0
+#endif
+
+/** @} */
+
+/*==========================================================================*/
+/* Platform utilities                                                       */
+/*==========================================================================*/
+
+/**
+ * Platform utilities isolates the platform dependencies of the
+ * libarary.  For each platform which this code is ported to, these
+ * functions may have to be re-implemented.
+ */
+
+/**
+ * Floating point utility to determine if a double is Not a Number (NaN).
+ * @internal
+ */
+U_INTERNAL UBool   U_EXPORT2 uprv_isNaN(double d);
+/**
+ * Floating point utility to determine if a double has an infinite value.
+ * @internal
+ */
+U_INTERNAL UBool   U_EXPORT2 uprv_isInfinite(double d);
+/**
+ * Floating point utility to determine if a double has a positive infinite value.
+ * @internal
+ */
+U_INTERNAL UBool   U_EXPORT2 uprv_isPositiveInfinity(double d);
+/**
+ * Floating point utility to determine if a double has a negative infinite value.
+ * @internal
+ */
+U_INTERNAL UBool   U_EXPORT2 uprv_isNegativeInfinity(double d);
+/**
+ * Floating point utility that returns a Not a Number (NaN) value.
+ * @internal
+ */
+U_INTERNAL double  U_EXPORT2 uprv_getNaN(void);
+/**
+ * Floating point utility that returns an infinite value.
+ * @internal
+ */
+U_INTERNAL double  U_EXPORT2 uprv_getInfinity(void);
+
+/**
+ * Floating point utility to truncate a double.
+ * @internal
+ */
+U_INTERNAL double  U_EXPORT2 uprv_trunc(double d);
+/**
+ * Floating point utility to calculate the floor of a double.
+ * @internal
+ */
+U_INTERNAL double  U_EXPORT2 uprv_floor(double d);
+/**
+ * Floating point utility to calculate the ceiling of a double.
+ * @internal
+ */
+U_INTERNAL double  U_EXPORT2 uprv_ceil(double d);
+/**
+ * Floating point utility to calculate the absolute value of a double.
+ * @internal
+ */
+U_INTERNAL double  U_EXPORT2 uprv_fabs(double d);
+/**
+ * Floating point utility to calculate the fractional and integer parts of a double.
+ * @internal
+ */
+U_INTERNAL double  U_EXPORT2 uprv_modf(double d, double* pinteger);
+/**
+ * Floating point utility to calculate the remainder of a double divided by another double.
+ * @internal
+ */
+U_INTERNAL double  U_EXPORT2 uprv_fmod(double d, double y);
+/**
+ * Floating point utility to calculate d to the power of exponent (d^exponent).
+ * @internal
+ */
+U_INTERNAL double  U_EXPORT2 uprv_pow(double d, double exponent);
+/**
+ * Floating point utility to calculate 10 to the power of exponent (10^exponent).
+ * @internal
+ */
+U_INTERNAL double  U_EXPORT2 uprv_pow10(int32_t exponent);
+/**
+ * Floating point utility to calculate the maximum value of two doubles.
+ * @internal
+ */
+U_INTERNAL double  U_EXPORT2 uprv_fmax(double d, double y);
+/**
+ * Floating point utility to calculate the minimum value of two doubles.
+ * @internal
+ */
+U_INTERNAL double  U_EXPORT2 uprv_fmin(double d, double y);
+/**
+ * Private utility to calculate the maximum value of two integers.
+ * @internal
+ */
+U_INTERNAL int32_t U_EXPORT2 uprv_max(int32_t d, int32_t y);
+/**
+ * Private utility to calculate the minimum value of two integers.
+ * @internal
+ */
+U_INTERNAL int32_t U_EXPORT2 uprv_min(int32_t d, int32_t y);
+
+#if U_IS_BIG_ENDIAN
+#   define uprv_isNegative(number) (*((signed char *)&(number))<0)
+#else
+#   define uprv_isNegative(number) (*((signed char *)&(number)+sizeof(number)-1)<0)
+#endif
+
+/**
+ * Return the largest positive number that can be represented by an integer
+ * type of arbitrary bit length.
+ * @internal
+ */
+U_INTERNAL double  U_EXPORT2 uprv_maxMantissa(void);
+
+/**
+ * Floating point utility to calculate the logarithm of a double.
+ * @internal
+ */
+U_INTERNAL double  U_EXPORT2 uprv_log(double d);
+
+/**
+ * Does common notion of rounding e.g. uprv_floor(x + 0.5);
+ * @param x the double number
+ * @return the rounded double
+ * @internal
+ */
+U_INTERNAL double  U_EXPORT2 uprv_round(double x);
+
+#if 0
+/**
+ * Returns the number of digits after the decimal point in a double number x.
+ *
+ * @param x the double number
+ * @return the number of digits after the decimal point in a double number x.
+ * @internal
+ */
+/*U_INTERNAL int32_t  U_EXPORT2 uprv_digitsAfterDecimal(double x);*/
+#endif
+
+#if !U_CHARSET_IS_UTF8
+/**
+ * Please use ucnv_getDefaultName() instead.
+ * Return the default codepage for this platform and locale.
+ * This function can call setlocale() on Unix platforms. Please read the
+ * platform documentation on setlocale() before calling this function.
+ * @return the default codepage for this platform 
+ * @internal
+ */
+U_INTERNAL const char*  U_EXPORT2 uprv_getDefaultCodepage(void);
+#endif
+
+/**
+ * Please use uloc_getDefault() instead.
+ * Return the default locale ID string by querying ths system, or
+ *     zero if one cannot be found. 
+ * This function can call setlocale() on Unix platforms. Please read the
+ * platform documentation on setlocale() before calling this function.
+ * @return the default locale ID string
+ * @internal
+ */
+U_INTERNAL const char*  U_EXPORT2 uprv_getDefaultLocaleID(void);
+
+/**
+ * Time zone utilities
+ *
+ * Wrappers for C runtime library functions relating to timezones.
+ * The t_tzset() function (similar to tzset) uses the current setting
+ * of the environment variable TZ to assign values to three global
+ * variables: daylight, timezone, and tzname. These variables have the
+ * following meanings, and are declared in &lt;time.h&gt;.
+ *
+ *   daylight   Nonzero if daylight-saving-time zone (DST) is specified
+ *              in TZ; otherwise, 0. Default value is 1.
+ *   timezone   Difference in seconds between coordinated universal
+ *              time and local time. E.g., -28,800 for PST (GMT-8hrs)
+ *   tzname(0)  Three-letter time-zone name derived from TZ environment
+ *              variable. E.g., "PST".
+ *   tzname(1)  Three-letter DST zone name derived from TZ environment
+ *              variable.  E.g., "PDT". If DST zone is omitted from TZ,
+ *              tzname(1) is an empty string.
+ *
+ * Notes: For example, to set the TZ environment variable to correspond
+ * to the current time zone in Germany, you can use one of the
+ * following statements:
+ *
+ *   set TZ=GST1GDT
+ *   set TZ=GST+1GDT
+ *
+ * If the TZ value is not set, t_tzset() attempts to use the time zone
+ * information specified by the operating system. Under Windows NT
+ * and Windows 95, this information is specified in the Control Panel's
+ * Date/Time application.
+ * @internal
+ */
+U_INTERNAL void     U_EXPORT2 uprv_tzset(void);
+
+/**
+ * Difference in seconds between coordinated universal
+ * time and local time. E.g., -28,800 for PST (GMT-8hrs)
+ * @return the difference in seconds between coordinated universal time and local time.
+ * @internal
+ */
+U_INTERNAL int32_t  U_EXPORT2 uprv_timezone(void);
+
+/**
+ *   tzname(0)  Three-letter time-zone name derived from TZ environment
+ *              variable. E.g., "PST".
+ *   tzname(1)  Three-letter DST zone name derived from TZ environment
+ *              variable.  E.g., "PDT". If DST zone is omitted from TZ,
+ *              tzname(1) is an empty string.
+ * @internal
+ */
+U_INTERNAL const char* U_EXPORT2 uprv_tzname(int n);
+
+/**
+ * Get UTC (GMT) time measured in milliseconds since 0:00 on 1/1/1970.
+ * This function is affected by 'faketime' and should be the bottleneck for all user-visible ICU time functions.
+ * @return the UTC time measured in milliseconds
+ * @internal
+ */
+U_INTERNAL UDate U_EXPORT2 uprv_getUTCtime(void);
+
+/**
+ * Get UTC (GMT) time measured in milliseconds since 0:00 on 1/1/1970.
+ * This function is not affected by 'faketime', so it should only be used by low level test functions- not by anything that
+ * exposes time to the end user.
+ * @return the UTC time measured in milliseconds
+ * @internal
+ */
+U_INTERNAL UDate U_EXPORT2 uprv_getRawUTCtime(void);
+
+/**
+ * Determine whether a pathname is absolute or not, as defined by the platform.
+ * @param path Pathname to test
+ * @return TRUE if the path is absolute
+ * @internal (ICU 3.0)
+ */
+U_INTERNAL UBool U_EXPORT2 uprv_pathIsAbsolute(const char *path);
+
+/**
+ * Use U_MAX_PTR instead of this function.
+ * @param void pointer to test
+ * @return the largest possible pointer greater than the base
+ * @internal (ICU 3.8)
+ */
+U_INTERNAL void * U_EXPORT2 uprv_maximumPtr(void *base);
+
+/**
+ * Maximum value of a (void*) - use to indicate the limit of an 'infinite' buffer.
+ * In fact, buffer sizes must not exceed 2GB so that the difference between
+ * the buffer limit and the buffer start can be expressed in an int32_t.
+ *
+ * The definition of U_MAX_PTR must fulfill the following conditions:
+ * - return the largest possible pointer greater than base
+ * - return a valid pointer according to the machine architecture (AS/400, 64-bit, etc.)
+ * - avoid wrapping around at high addresses
+ * - make sure that the returned pointer is not farther from base than 0x7fffffff bytes
+ *
+ * @param base The beginning of a buffer to find the maximum offset from
+ * @internal
+ */
+#ifndef U_MAX_PTR
+#  if U_PLATFORM == U_PF_OS390 && !defined(_LP64)
+    /* We have 31-bit pointers. */
+#    define U_MAX_PTR(base) ((void *)0x7fffffff)
+#  elif U_PLATFORM == U_PF_OS400
+#    define U_MAX_PTR(base) uprv_maximumPtr((void *)base)
+#  elif 0
+    /*
+     * For platforms where pointers are scalar values (which is normal, but unlike i5/OS)
+     * but that do not define uintptr_t.
+     *
+     * However, this does not work on modern compilers:
+     * The C++ standard does not define pointer overflow, and allows compilers to
+     * assume that p+u>p for any pointer p and any integer u>0.
+     * Thus, modern compilers optimize away the ">" comparison.
+     * (See ICU tickets #7187 and #8096.)
+     */
+#    define U_MAX_PTR(base) \
+    ((void *)(((char *)(base)+0x7fffffffu) > (char *)(base) \
+        ? ((char *)(base)+0x7fffffffu) \
+        : (char *)-1))
+#  else
+    /* Default version. C++ standard compliant for scalar pointers. */
+#    define U_MAX_PTR(base) \
+    ((void *)(((uintptr_t)(base)+0x7fffffffu) > (uintptr_t)(base) \
+        ? ((uintptr_t)(base)+0x7fffffffu) \
+        : (uintptr_t)-1))
+#  endif
+#endif
+
+/*  Dynamic Library Functions */
+
+typedef void (UVoidFunction)(void);
+
+#if U_ENABLE_DYLOAD
+/**
+ * Load a library
+ * @internal (ICU 4.4)
+ */
+U_INTERNAL void * U_EXPORT2 uprv_dl_open(const char *libName, UErrorCode *status);
+
+/**
+ * Close a library
+ * @internal (ICU 4.4)
+ */
+U_INTERNAL void U_EXPORT2 uprv_dl_close( void *lib, UErrorCode *status);
+
+/**
+ * Extract a symbol from a library (function)
+ * @internal (ICU 4.8)
+ */
+U_INTERNAL UVoidFunction* U_EXPORT2 uprv_dlsym_func( void *lib, const char *symbolName, UErrorCode *status);
+
+/**
+ * Extract a symbol from a library (function)
+ * Not implemented, no clients.
+ * @internal
+ */
+/* U_INTERNAL void * U_EXPORT2 uprv_dlsym_data( void *lib, const char *symbolName, UErrorCode *status); */
+
+#endif
+
+/**
+ * Define malloc and related functions
+ * @internal
+ */
+#if U_PLATFORM == U_PF_OS400
+# define uprv_default_malloc(x) _C_TS_malloc(x)
+# define uprv_default_realloc(x,y) _C_TS_realloc(x,y)
+# define uprv_default_free(x) _C_TS_free(x)
+/* also _C_TS_calloc(x) */
+#else
+/* C defaults */
+# define uprv_default_malloc(x) malloc(x)
+# define uprv_default_realloc(x,y) realloc(x,y)
+# define uprv_default_free(x) free(x)
+#endif
+
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/uarrsort.h b/src/core/basetypes/icu-ucsdet/include/uarrsort.h
new file mode 100644
index 00000000..aece6da9
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/uarrsort.h
@@ -0,0 +1,101 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2003-2013, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  uarrsort.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2003aug04
+*   created by: Markus W. Scherer
+*
+*   Internal function for sorting arrays.
+*/
+
+#ifndef __UARRSORT_H__
+#define __UARRSORT_H__
+
+#include "unicode/utypes.h"
+
+U_CDECL_BEGIN
+/**
+ * Function type for comparing two items as part of sorting an array or similar.
+ * Callback function for uprv_sortArray().
+ *
+ * @param context Application-specific pointer, passed through by uprv_sortArray().
+ * @param left    Pointer to the "left" item.
+ * @param right   Pointer to the "right" item.
+ * @return 32-bit signed integer comparison result:
+ *                <0 if left<right
+ *               ==0 if left==right
+ *                >0 if left>right
+ *
+ * @internal
+ */
+typedef int32_t U_CALLCONV
+UComparator(const void *context, const void *left, const void *right);
+U_CDECL_END
+
+/**
+ * Array sorting function.
+ * Uses a UComparator for comparing array items to each other, and simple
+ * memory copying to move items.
+ *
+ * @param array      The array to be sorted.
+ * @param length     The number of items in the array.
+ * @param itemSize   The size in bytes of each array item.
+ * @param cmp        UComparator function used to compare two items each.
+ * @param context    Application-specific pointer, passed through to the UComparator.
+ * @param sortStable If true, a stable sorting algorithm must be used.
+ * @param pErrorCode ICU in/out UErrorCode parameter.
+ *
+ * @internal
+ */
+U_CAPI void U_EXPORT2
+uprv_sortArray(void *array, int32_t length, int32_t itemSize,
+               UComparator *cmp, const void *context,
+               UBool sortStable, UErrorCode *pErrorCode);
+
+/**
+ * Convenience UComparator implementation for uint16_t arrays.
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+uprv_uint16Comparator(const void *context, const void *left, const void *right);
+
+/**
+ * Convenience UComparator implementation for int32_t arrays.
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+uprv_int32Comparator(const void *context, const void *left, const void *right);
+
+/**
+ * Convenience UComparator implementation for uint32_t arrays.
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+uprv_uint32Comparator(const void *context, const void *left, const void *right);
+
+/**
+ * Much like Java Collections.binarySearch(list, key, comparator).
+ *
+ * Except: Java documents "If the list contains multiple elements equal to
+ * the specified object, there is no guarantee which one will be found."
+ *
+ * This version here will return the largest index of any equal item,
+ * for use in stable sorting.
+ *
+ * @return the index>=0 where the item was found:
+ *         the largest such index, if multiple, for stable sorting;
+ *         or the index<0 for inserting the item at ~index in sorted order
+ */
+U_CAPI int32_t U_EXPORT2
+uprv_stableBinarySearch(char *array, int32_t length, void *item, int32_t itemSize,
+                        UComparator *cmp, const void *context);
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/uassert.h b/src/core/basetypes/icu-ucsdet/include/uassert.h
new file mode 100644
index 00000000..9dc29b24
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/uassert.h
@@ -0,0 +1,32 @@
+/*
+******************************************************************************
+*
+*   Copyright (C) 2002-2011, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*
+* File uassert.h
+*
+*  Contains U_ASSERT macro
+*
+*    By default, U_ASSERT just wraps the C library assert macro.
+*    By changing the definition here, the assert behavior for ICU can be changed
+*    without affecting other non-ICU uses of the C library assert().
+*
+******************************************************************************
+*/
+
+#ifndef U_ASSERT_H
+#define U_ASSERT_H
+/* utypes.h is included to get the proper define for uint8_t */
+#include "unicode/utypes.h"
+#if U_DEBUG
+#   include <assert.h>
+#   define U_ASSERT(exp) assert(exp)
+#else
+#   define U_ASSERT(exp)
+#endif
+#endif
+
+
diff --git a/src/core/basetypes/icu-ucsdet/include/ucase.h b/src/core/basetypes/icu-ucsdet/include/ucase.h
new file mode 100644
index 00000000..8f24769d
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/ucase.h
@@ -0,0 +1,409 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2004-2012, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  ucase.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2004aug30
+*   created by: Markus W. Scherer
+*
+*   Low-level Unicode character/string case mapping code.
+*/
+
+#ifndef __UCASE_H__
+#define __UCASE_H__
+
+#include "unicode/utypes.h"
+#include "unicode/uset.h"
+#include "putilimp.h"
+#include "uset_imp.h"
+#include "udataswp.h"
+
+#ifdef __cplusplus
+U_NAMESPACE_BEGIN
+
+class UnicodeString;
+
+U_NAMESPACE_END
+#endif
+
+/* library API -------------------------------------------------------------- */
+
+U_CDECL_BEGIN
+
+struct UCaseProps;
+typedef struct UCaseProps UCaseProps;
+
+U_CDECL_END
+
+U_CAPI const UCaseProps * U_EXPORT2
+ucase_getSingleton(void);
+
+U_CFUNC void U_EXPORT2
+ucase_addPropertyStarts(const UCaseProps *csp, const USetAdder *sa, UErrorCode *pErrorCode);
+
+/**
+ * Requires non-NULL locale ID but otherwise does the equivalent of
+ * checking for language codes as if uloc_getLanguage() were called:
+ * Accepts both 2- and 3-letter codes and accepts case variants.
+ */
+U_CFUNC int32_t
+ucase_getCaseLocale(const char *locale, int32_t *locCache);
+
+/* Casing locale types for ucase_getCaseLocale */
+enum {
+    UCASE_LOC_UNKNOWN,
+    UCASE_LOC_ROOT,
+    UCASE_LOC_TURKISH,
+    UCASE_LOC_LITHUANIAN,
+    UCASE_LOC_DUTCH
+};
+
+/**
+ * Bit mask for getting just the options from a string compare options word
+ * that are relevant for case-insensitive string comparison.
+ * See uchar.h. Also include _STRNCMP_STYLE and U_COMPARE_CODE_POINT_ORDER.
+ * @internal
+ */
+#define _STRCASECMP_OPTIONS_MASK 0xffff
+
+/**
+ * Bit mask for getting just the options from a string compare options word
+ * that are relevant for case folding (of a single string or code point).
+ * See uchar.h.
+ * @internal
+ */
+#define _FOLD_CASE_OPTIONS_MASK 0xff
+
+/* single-code point functions */
+
+U_CAPI UChar32 U_EXPORT2
+ucase_tolower(const UCaseProps *csp, UChar32 c);
+
+U_CAPI UChar32 U_EXPORT2
+ucase_toupper(const UCaseProps *csp, UChar32 c);
+
+U_CAPI UChar32 U_EXPORT2
+ucase_totitle(const UCaseProps *csp, UChar32 c);
+
+U_CAPI UChar32 U_EXPORT2
+ucase_fold(const UCaseProps *csp, UChar32 c, uint32_t options);
+
+/**
+ * Adds all simple case mappings and the full case folding for c to sa,
+ * and also adds special case closure mappings.
+ * c itself is not added.
+ * For example, the mappings
+ * - for s include long s
+ * - for sharp s include ss
+ * - for k include the Kelvin sign
+ */
+U_CFUNC void U_EXPORT2
+ucase_addCaseClosure(const UCaseProps *csp, UChar32 c, const USetAdder *sa);
+
+/**
+ * Maps the string to single code points and adds the associated case closure
+ * mappings.
+ * The string is mapped to code points if it is their full case folding string.
+ * In other words, this performs a reverse full case folding and then
+ * adds the case closure items of the resulting code points.
+ * If the string is found and its closure applied, then
+ * the string itself is added as well as part of its code points' closure.
+ * It must be length>=0.
+ *
+ * @return TRUE if the string was found
+ */
+U_CFUNC UBool U_EXPORT2
+ucase_addStringCaseClosure(const UCaseProps *csp, const UChar *s, int32_t length, const USetAdder *sa);
+
+#ifdef __cplusplus
+U_NAMESPACE_BEGIN
+
+/**
+ * Iterator over characters with more than one code point in the full default Case_Folding.
+ */
+class U_COMMON_API FullCaseFoldingIterator {
+public:
+    /** Constructor. */
+    FullCaseFoldingIterator();
+    /**
+     * Returns the next (cp, full) pair where "full" is cp's full default Case_Folding.
+     * Returns a negative cp value at the end of the iteration.
+     */
+    UChar32 next(UnicodeString &full);
+private:
+    FullCaseFoldingIterator(const FullCaseFoldingIterator &);  // no copy
+    FullCaseFoldingIterator &operator=(const FullCaseFoldingIterator &);  // no assignment
+
+    const UChar *unfold;
+    int32_t unfoldRows;
+    int32_t unfoldRowWidth;
+    int32_t unfoldStringWidth;
+    int32_t currentRow;
+    int32_t rowCpIndex;
+};
+
+U_NAMESPACE_END
+#endif
+
+/** @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE */
+U_CAPI int32_t U_EXPORT2
+ucase_getType(const UCaseProps *csp, UChar32 c);
+
+/** @return same as ucase_getType(), or <0 if c is case-ignorable */
+U_CAPI int32_t U_EXPORT2
+ucase_getTypeOrIgnorable(const UCaseProps *csp, UChar32 c);
+
+U_CAPI UBool U_EXPORT2
+ucase_isSoftDotted(const UCaseProps *csp, UChar32 c);
+
+U_CAPI UBool U_EXPORT2
+ucase_isCaseSensitive(const UCaseProps *csp, UChar32 c);
+
+/* string case mapping functions */
+
+U_CDECL_BEGIN
+
+/**
+ * Iterator function for string case mappings, which need to look at the
+ * context (surrounding text) of a given character for conditional mappings.
+ *
+ * The iterator only needs to go backward or forward away from the
+ * character in question. It does not use any indexes on this interface.
+ * It does not support random access or an arbitrary change of
+ * iteration direction.
+ *
+ * The code point being case-mapped itself is never returned by
+ * this iterator.
+ *
+ * @param context A pointer to the iterator's working data.
+ * @param dir If <0 then start iterating backward from the character;
+ *            if >0 then start iterating forward from the character;
+ *            if 0 then continue iterating in the current direction.
+ * @return Next code point, or <0 when the iteration is done.
+ */
+typedef UChar32 U_CALLCONV
+UCaseContextIterator(void *context, int8_t dir);
+
+/**
+ * Sample struct which may be used by some implementations of
+ * UCaseContextIterator.
+ */
+struct UCaseContext {
+    void *p;
+    int32_t start, index, limit;
+    int32_t cpStart, cpLimit;
+    int8_t dir;
+    int8_t b1, b2, b3;
+};
+typedef struct UCaseContext UCaseContext;
+
+U_CDECL_END
+
+#define UCASECONTEXT_INITIALIZER { NULL,  0, 0, 0,  0, 0,  0,  0, 0, 0 }
+
+enum {
+    /**
+     * For string case mappings, a single character (a code point) is mapped
+     * either to itself (in which case in-place mapping functions do nothing),
+     * or to another single code point, or to a string.
+     * Aside from the string contents, these are indicated with a single int32_t
+     * value as follows:
+     *
+     * Mapping to self: Negative values (~self instead of -self to support U+0000)
+     *
+     * Mapping to another code point: Positive values >UCASE_MAX_STRING_LENGTH
+     *
+     * Mapping to a string: The string length (0..UCASE_MAX_STRING_LENGTH) is
+     * returned. Note that the string result may indeed have zero length.
+     */
+    UCASE_MAX_STRING_LENGTH=0x1f
+};
+
+/**
+ * Get the full lowercase mapping for c.
+ *
+ * @param csp Case mapping properties.
+ * @param c Character to be mapped.
+ * @param iter Character iterator, used for context-sensitive mappings.
+ *             See UCaseContextIterator for details.
+ *             If iter==NULL then a context-independent result is returned.
+ * @param context Pointer to be passed into iter.
+ * @param pString If the mapping result is a string, then the pointer is
+ *                written to *pString.
+ * @param locale Locale ID for locale-dependent mappings.
+ * @param locCache Initialize to 0; may be used to cache the result of parsing
+ *                 the locale ID for subsequent calls.
+ *                 Can be NULL.
+ * @return Output code point or string length, see UCASE_MAX_STRING_LENGTH.
+ *
+ * @see UCaseContextIterator
+ * @see UCASE_MAX_STRING_LENGTH
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+ucase_toFullLower(const UCaseProps *csp, UChar32 c,
+                  UCaseContextIterator *iter, void *context,
+                  const UChar **pString,
+                  const char *locale, int32_t *locCache);
+
+U_CAPI int32_t U_EXPORT2
+ucase_toFullUpper(const UCaseProps *csp, UChar32 c,
+                  UCaseContextIterator *iter, void *context,
+                  const UChar **pString,
+                  const char *locale, int32_t *locCache);
+
+U_CAPI int32_t U_EXPORT2
+ucase_toFullTitle(const UCaseProps *csp, UChar32 c,
+                  UCaseContextIterator *iter, void *context,
+                  const UChar **pString,
+                  const char *locale, int32_t *locCache);
+
+U_CAPI int32_t U_EXPORT2
+ucase_toFullFolding(const UCaseProps *csp, UChar32 c,
+                    const UChar **pString,
+                    uint32_t options);
+
+U_CFUNC int32_t U_EXPORT2
+ucase_hasBinaryProperty(UChar32 c, UProperty which);
+
+
+U_CDECL_BEGIN
+
+/**
+ * @internal
+ */
+typedef int32_t U_CALLCONV
+UCaseMapFull(const UCaseProps *csp, UChar32 c,
+             UCaseContextIterator *iter, void *context,
+             const UChar **pString,
+             const char *locale, int32_t *locCache);
+
+U_CDECL_END
+
+/* file definitions --------------------------------------------------------- */
+
+#define UCASE_DATA_NAME "ucase"
+#define UCASE_DATA_TYPE "icu"
+
+/* format "cAsE" */
+#define UCASE_FMT_0 0x63
+#define UCASE_FMT_1 0x41
+#define UCASE_FMT_2 0x53
+#define UCASE_FMT_3 0x45
+
+/* indexes into indexes[] */
+enum {
+    UCASE_IX_INDEX_TOP,
+    UCASE_IX_LENGTH,
+    UCASE_IX_TRIE_SIZE,
+    UCASE_IX_EXC_LENGTH,
+    UCASE_IX_UNFOLD_LENGTH,
+
+    UCASE_IX_MAX_FULL_LENGTH=15,
+    UCASE_IX_TOP=16
+};
+
+/* definitions for 16-bit case properties word ------------------------------ */
+
+/* 2-bit constants for types of cased characters */
+#define UCASE_TYPE_MASK     3
+enum {
+    UCASE_NONE,
+    UCASE_LOWER,
+    UCASE_UPPER,
+    UCASE_TITLE
+};
+
+#define UCASE_GET_TYPE(props) ((props)&UCASE_TYPE_MASK)
+#define UCASE_GET_TYPE_AND_IGNORABLE(props) ((props)&7)
+
+#define UCASE_IGNORABLE         4
+#define UCASE_SENSITIVE         8
+#define UCASE_EXCEPTION         0x10
+
+#define UCASE_DOT_MASK      0x60
+enum {
+    UCASE_NO_DOT=0,         /* normal characters with cc=0 */
+    UCASE_SOFT_DOTTED=0x20, /* soft-dotted characters with cc=0 */
+    UCASE_ABOVE=0x40,       /* "above" accents with cc=230 */
+    UCASE_OTHER_ACCENT=0x60 /* other accent character (0<cc!=230) */
+};
+
+/* no exception: bits 15..7 are a 9-bit signed case mapping delta */
+#define UCASE_DELTA_SHIFT   7
+#define UCASE_DELTA_MASK    0xff80
+#define UCASE_MAX_DELTA     0xff
+#define UCASE_MIN_DELTA     (-UCASE_MAX_DELTA-1)
+
+#if U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC
+#   define UCASE_GET_DELTA(props) ((int16_t)(props)>>UCASE_DELTA_SHIFT)
+#else
+#   define UCASE_GET_DELTA(props) (int16_t)(((props)&0x8000) ? (((props)>>UCASE_DELTA_SHIFT)|0xfe00) : ((uint16_t)(props)>>UCASE_DELTA_SHIFT))
+#endif
+
+/* exception: bits 15..5 are an unsigned 11-bit index into the exceptions array */
+#define UCASE_EXC_SHIFT     5
+#define UCASE_EXC_MASK      0xffe0
+#define UCASE_MAX_EXCEPTIONS ((UCASE_EXC_MASK>>UCASE_EXC_SHIFT)+1)
+
+/* definitions for 16-bit main exceptions word ------------------------------ */
+
+/* first 8 bits indicate values in optional slots */
+enum {
+    UCASE_EXC_LOWER,
+    UCASE_EXC_FOLD,
+    UCASE_EXC_UPPER,
+    UCASE_EXC_TITLE,
+    UCASE_EXC_4,            /* reserved */
+    UCASE_EXC_5,            /* reserved */
+    UCASE_EXC_CLOSURE,
+    UCASE_EXC_FULL_MAPPINGS,
+    UCASE_EXC_ALL_SLOTS     /* one past the last slot */
+};
+
+/* each slot is 2 uint16_t instead of 1 */
+#define UCASE_EXC_DOUBLE_SLOTS      0x100
+
+/* reserved: exception bits 11..9 */
+
+/* UCASE_EXC_DOT_MASK=UCASE_DOT_MASK<<UCASE_EXC_DOT_SHIFT */
+#define UCASE_EXC_DOT_SHIFT     7
+
+/* normally stored in the main word, but pushed out for larger exception indexes */
+#define UCASE_EXC_DOT_MASK      0x3000
+enum {
+    UCASE_EXC_NO_DOT=0,
+    UCASE_EXC_SOFT_DOTTED=0x1000,
+    UCASE_EXC_ABOVE=0x2000,         /* "above" accents with cc=230 */
+    UCASE_EXC_OTHER_ACCENT=0x3000   /* other character (0<cc!=230) */
+};
+
+/* complex/conditional mappings */
+#define UCASE_EXC_CONDITIONAL_SPECIAL   0x4000
+#define UCASE_EXC_CONDITIONAL_FOLD      0x8000
+
+/* definitions for lengths word for full case mappings */
+#define UCASE_FULL_LOWER    0xf
+#define UCASE_FULL_FOLDING  0xf0
+#define UCASE_FULL_UPPER    0xf00
+#define UCASE_FULL_TITLE    0xf000
+
+/* maximum lengths */
+#define UCASE_FULL_MAPPINGS_MAX_LENGTH (4*0xf)
+#define UCASE_CLOSURE_MAX_LENGTH 0xf
+
+/* constants for reverse case folding ("unfold") data */
+enum {
+    UCASE_UNFOLD_ROWS,
+    UCASE_UNFOLD_ROW_WIDTH,
+    UCASE_UNFOLD_STRING_WIDTH
+};
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/ucln.h b/src/core/basetypes/icu-ucsdet/include/ucln.h
new file mode 100644
index 00000000..cd2630af
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/ucln.h
@@ -0,0 +1,89 @@
+/*
+******************************************************************************
+*
+* Copyright (C) 2001-2013, International Business Machines
+*                Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*   file name:  ucln.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2001July05
+*   created by: George Rhoten
+*/
+
+#ifndef __UCLN_H__
+#define __UCLN_H__
+
+#include "unicode/utypes.h"
+
+/** These are the functions used to register a library's memory cleanup
+ * functions.  Each library should define a single library register function
+ * to call this API.  In the i18n library, it is ucln_i18n_registerCleanup().
+ *
+ * None of the cleanup functions should use a mutex to clean up an API's
+ * allocated memory because a cleanup function is not meant to be thread safe,
+ * and plenty of data cannot be reference counted in order to make sure that
+ * no one else needs the allocated data.
+ *
+ * In order to make a cleanup function get called when u_cleanup is called,
+ * You should add your function to the library specific cleanup function.
+ * If the cleanup function is not in the common library, the code that
+ * allocates the memory should call the library specific cleanup function.
+ * For instance, in the i18n library, any memory allocated statically must
+ * call ucln_i18n_registerCleanup() from the ucln_in.h header.  These library
+ * cleanup functions are needed in order to prevent a circular dependency
+ * between the common library and any other library.
+ *
+ * The order of the cleanup is very important.  In general, an API that
+ * depends on a second API should be cleaned up before the second API.
+ * For instance, the default converter in ustring depends upon the converter
+ * API.  So the default converter should be closed before the converter API
+ * has its cache flushed.  This will prevent any memory leaks due to
+ * reference counting.
+ *
+ * Please see common/ucln_cmn.{h,c} and i18n/ucln_in.{h,c} for examples.
+ */
+
+/**
+ * Data Type for cleanup function selector. These roughly correspond to libraries.
+ */
+typedef enum ECleanupLibraryType {
+    UCLN_START = -1,
+    UCLN_UPLUG,     /* ICU plugins */
+    UCLN_CUSTOM,    /* Custom is for anyone else. */
+    UCLN_CTESTFW,
+    UCLN_TOOLUTIL,
+    UCLN_LAYOUTEX,
+    UCLN_LAYOUT,
+    UCLN_IO,
+    UCLN_I18N,
+    UCLN_COMMON /* This must be the last one to cleanup. */
+} ECleanupLibraryType;
+
+/**
+ * Data type for cleanup function pointer
+ */
+U_CDECL_BEGIN
+typedef UBool U_CALLCONV cleanupFunc(void);
+typedef void U_CALLCONV initFunc(UErrorCode *);
+U_CDECL_END
+
+/**
+ * Register a cleanup function
+ * @param type which library to register for.
+ * @param func the function pointer
+ */
+U_CAPI void U_EXPORT2 ucln_registerCleanup(ECleanupLibraryType type,
+                                           cleanupFunc *func);
+
+/**
+ * Request cleanup for one specific library.
+ * Not thread safe.
+ * @param type which library to cleanup
+ */
+U_CAPI void U_EXPORT2 ucln_cleanupOne(ECleanupLibraryType type);
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/ucln_cmn.h b/src/core/basetypes/icu-ucsdet/include/ucln_cmn.h
new file mode 100644
index 00000000..3517ca78
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/ucln_cmn.h
@@ -0,0 +1,72 @@
+/*
+******************************************************************************
+*                                                                            *
+* Copyright (C) 2001-2014, International Business Machines                   *
+*                Corporation and others. All Rights Reserved.                *
+*                                                                            *
+******************************************************************************
+*   file name:  ucln_cmn.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2001July05
+*   created by: George Rhoten
+*/
+
+#ifndef __UCLN_CMN_H__
+#define __UCLN_CMN_H__
+
+#include "unicode/utypes.h"
+#include "ucln.h"
+
+/* These are the cleanup functions for various APIs. */
+/* @return true if cleanup complete successfully.*/
+U_CFUNC UBool umtx_cleanup(void);
+
+U_CFUNC UBool utrace_cleanup(void);
+
+U_CFUNC UBool ucln_lib_cleanup(void);
+
+/*
+Please keep the order of enums declared in same order
+as the cleanup functions are suppose to be called. */
+typedef enum ECleanupCommonType {
+    UCLN_COMMON_START = -1,
+    UCLN_COMMON_USPREP,
+    UCLN_COMMON_BREAKITERATOR,
+    UCLN_COMMON_BREAKITERATOR_DICT,
+    UCLN_COMMON_SERVICE,
+    UCLN_COMMON_LOCALE_KEY_TYPE,
+    UCLN_COMMON_LOCALE,
+    UCLN_COMMON_LOCALE_AVAILABLE,
+    UCLN_COMMON_ULOC,
+    UCLN_COMMON_LOADED_NORMALIZER2,
+    UCLN_COMMON_NORMALIZER2,
+    UCLN_COMMON_USET,
+    UCLN_COMMON_UNAMES,
+    UCLN_COMMON_UPROPS,
+    UCLN_COMMON_UCNV,
+    UCLN_COMMON_UCNV_IO,
+    UCLN_COMMON_UDATA,
+    UCLN_COMMON_PUTIL,
+    UCLN_COMMON_LIST_FORMATTER,
+    UCLN_COMMON_UINIT,
+
+    /*
+       Unified caches caches collation stuff. Collation data structures
+       contain resource bundles which means that unified cache cleanup
+       must happen before resource bundle clean up.
+    */
+    UCLN_COMMON_UNIFIED_CACHE,
+    UCLN_COMMON_URES,
+    UCLN_COMMON_COUNT /* This must be last */
+} ECleanupCommonType;
+
+/* Main library cleanup registration function. */
+/* See common/ucln.h for details on adding a cleanup function. */
+/* Note: the global mutex must not be held when calling this function. */
+U_CFUNC void U_EXPORT2 ucln_common_registerCleanup(ECleanupCommonType type,
+                                                   cleanupFunc *func);
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/ucln_imp.h b/src/core/basetypes/icu-ucsdet/include/ucln_imp.h
new file mode 100644
index 00000000..d5d202ec
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/ucln_imp.h
@@ -0,0 +1,178 @@
+/*
+******************************************************************************
+*
+* Copyright (C) 2009-2011, International Business Machines
+*                Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*   file name:  ucln_imp.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   This file contains the platform specific implementation of per-library cleanup.
+*
+*/
+
+
+#ifndef __UCLN_IMP_H__
+#define __UCLN_IMP_H__
+
+#include "ucln.h"
+#include <stdlib.h>
+
+/**
+ * Auto cleanup of ICU libraries
+ * There are several methods in per library cleanup of icu libraries:
+ * 1) Compiler/Platform based cleanup:
+ *   a) Windows MSVC uses DllMain()
+ *   b) GCC uses destructor function attribute
+ *   c) Sun Studio, AIX VA, and HP-UX aCC uses a linker option to set the exit function
+ * 2) Using atexit()
+ * 3) Implementing own automatic cleanup functions
+ *
+ * For option 1, ensure that UCLN_NO_AUTO_CLEANUP is set to 0 by using --enable-auto-cleanup
+ * configure option or by otherwise setting UCLN_NO_AUTO_CLEANUP to 0
+ * For option 2, follow option 1 and also define UCLN_AUTO_ATEXIT
+ * For option 3, follow option 1 and also define UCLN_AUTO_LOCAL (see below for more information)
+ */
+
+#if !UCLN_NO_AUTO_CLEANUP
+
+/*
+ * The following declarations are for when UCLN_AUTO_LOCAL or UCLN_AUTO_ATEXIT
+ * are defined. They are commented out because they are static and will be defined
+ * later. The information is still here to provide some guidance for the developer
+ * who chooses to use UCLN_AUTO_LOCAL.
+ */
+/**
+ * Give the library an opportunity to register an automatic cleanup.
+ * This may be called more than once.
+ */
+/*static void ucln_registerAutomaticCleanup();*/
+/**
+ * Unregister an automatic cleanup, if possible. Called from cleanup.
+ */
+/*static void ucln_unRegisterAutomaticCleanup();*/
+
+#ifdef UCLN_TYPE_IS_COMMON
+#   define UCLN_CLEAN_ME_UP u_cleanup()
+#else
+#   define UCLN_CLEAN_ME_UP ucln_cleanupOne(UCLN_TYPE)
+#endif
+
+/* ------------ automatic cleanup: registration. Choose ONE ------- */
+#if defined(UCLN_AUTO_LOCAL)
+/* To use:
+ *  1. define UCLN_AUTO_LOCAL,
+ *  2. create ucln_local_hook.c containing implementations of
+ *           static void ucln_registerAutomaticCleanup()
+ *           static void ucln_unRegisterAutomaticCleanup()
+ */
+#include "ucln_local_hook.c"
+
+#elif defined(UCLN_AUTO_ATEXIT)
+/*
+ * Use the ANSI C 'atexit' function. Note that this mechanism does not
+ * guarantee the order of cleanup relative to other users of ICU!
+ */
+static UBool gAutoCleanRegistered = FALSE;
+
+static void ucln_atexit_handler()
+{
+    UCLN_CLEAN_ME_UP;
+}
+
+static void ucln_registerAutomaticCleanup()
+{
+    if(!gAutoCleanRegistered) {
+        gAutoCleanRegistered = TRUE;
+        atexit(&ucln_atexit_handler);
+    }
+}
+
+static void ucln_unRegisterAutomaticCleanup () {
+}
+/* ------------end of automatic cleanup: registration. ------- */
+
+#elif defined (UCLN_FINI)
+/**
+ * If UCLN_FINI is defined, it is the (versioned, etc) name of a cleanup
+ * entrypoint. Add a stub to call ucln_cleanupOne
+ * Used on AIX, Solaris, and HP-UX
+ */
+U_CAPI void U_EXPORT2 UCLN_FINI (void);
+
+U_CAPI void U_EXPORT2 UCLN_FINI ()
+{
+    /* This function must be defined, if UCLN_FINI is defined, else link error. */
+     UCLN_CLEAN_ME_UP;
+}
+
+/* Windows: DllMain */
+#elif U_PLATFORM_HAS_WIN32_API
+/* 
+ * ICU's own DllMain.
+ */
+
+/* these are from putil.c */
+/* READ READ READ READ!    Are you getting compilation errors from windows.h?
+          Any source file which includes this (ucln_imp.h) header MUST 
+          be defined with language extensions ON. */
+#   define WIN32_LEAN_AND_MEAN
+#   define VC_EXTRALEAN
+#   define NOUSER
+#   define NOSERVICE
+#   define NOIME
+#   define NOMCX
+#   include <windows.h>
+/*
+ * This is a stub DllMain function with icu specific process handling code.
+ */
+BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved)
+{
+    BOOL status = TRUE;
+
+    switch(fdwReason) {
+        case DLL_PROCESS_ATTACH:
+             /* ICU does not trap process attach, but must pass these through properly. */
+            /* ICU specific process attach could go here */
+            break;
+
+        case DLL_PROCESS_DETACH:
+            /* Here is the one we actually care about. */
+
+            UCLN_CLEAN_ME_UP;
+
+            break;
+
+        case DLL_THREAD_ATTACH:
+            /* ICU does not trap thread attach, but must pass these through properly. */
+            /* ICU specific thread attach could go here */
+            break;
+
+        case DLL_THREAD_DETACH:
+            /* ICU does not trap thread detach, but must pass these through properly. */
+            /* ICU specific thread detach could go here */
+            break;
+
+    }
+    return status;
+}
+
+#elif defined(__GNUC__)
+/* GCC - use __attribute((destructor)) */
+static void ucln_destructor()   __attribute__((destructor)) ;
+
+static void ucln_destructor() 
+{
+    UCLN_CLEAN_ME_UP;
+}
+
+#endif
+
+#endif /* UCLN_NO_AUTO_CLEANUP */
+
+#else
+#error This file can only be included once.
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/ucln_in.h b/src/core/basetypes/icu-ucsdet/include/ucln_in.h
new file mode 100644
index 00000000..e9685161
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/ucln_in.h
@@ -0,0 +1,64 @@
+/*
+******************************************************************************
+* Copyright (C) 2001-2014, International Business Machines
+*                Corporation and others. All Rights Reserved.
+******************************************************************************
+*   file name:  ucln_cmn.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2001July05
+*   created by: George Rhoten
+*/
+
+#ifndef __UCLN_IN_H__
+#define __UCLN_IN_H__
+
+#include "unicode/utypes.h"
+#include "ucln.h"
+
+/*
+Please keep the order of enums declared in same order
+as the functions are suppose to be called.
+It's usually best to have child dependencies called first. */
+typedef enum ECleanupI18NType {
+    UCLN_I18N_START = -1,
+    UCLN_I18N_IDENTIFIER_INFO,
+    UCLN_I18N_SPOOF,
+    UCLN_I18N_TRANSLITERATOR,
+    UCLN_I18N_REGEX,
+    UCLN_I18N_ISLAMIC_CALENDAR,
+    UCLN_I18N_CHINESE_CALENDAR,
+    UCLN_I18N_HEBREW_CALENDAR,
+    UCLN_I18N_ASTRO_CALENDAR,
+    UCLN_I18N_DANGI_CALENDAR,
+    UCLN_I18N_CALENDAR,
+    UCLN_I18N_TIMEZONEFORMAT,
+    UCLN_I18N_TZDBTIMEZONENAMES,
+    UCLN_I18N_TIMEZONEGENERICNAMES,
+    UCLN_I18N_TIMEZONENAMES,
+    UCLN_I18N_ZONEMETA,
+    UCLN_I18N_TIMEZONE,
+    UCLN_I18N_CURRENCY,
+    UCLN_I18N_DECFMT,
+    UCLN_I18N_NUMFMT,
+    UCLN_I18N_SMPDTFMT,
+    UCLN_I18N_USEARCH,
+    UCLN_I18N_COLLATOR,
+    UCLN_I18N_UCOL_RES,
+    UCLN_I18N_CSDET,
+    UCLN_I18N_COLLATION_ROOT,
+    UCLN_I18N_GENDERINFO,
+    UCLN_I18N_CDFINFO,
+    UCLN_I18N_REGION,
+    UCLN_I18N_COUNT /* This must be last */
+} ECleanupI18NType;
+
+/* Main library cleanup registration function. */
+/* See common/ucln.h for details on adding a cleanup function. */
+/* Note: the global mutex must not be held when calling this function. */
+U_CFUNC void U_EXPORT2 ucln_i18n_registerCleanup(ECleanupI18NType type,
+                                                 cleanupFunc *func);
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/ucmndata.h b/src/core/basetypes/icu-ucsdet/include/ucmndata.h
new file mode 100644
index 00000000..36163c50
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/ucmndata.h
@@ -0,0 +1,111 @@
+/*
+******************************************************************************
+*
+*   Copyright (C) 1999-2011, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************/
+
+
+/*----------------------------------------------------------------------------------
+ *
+ *   UCommonData   An abstract interface for dealing with ICU Common Data Files.
+ *                 ICU Common Data Files are a grouping of a number of individual
+ *                 data items (resources, converters, tables, anything) into a
+ *                 single file or dll.  The combined format includes a table of
+ *                 contents for locating the individual items by name.
+ *
+ *                 Two formats for the table of contents are supported, which is
+ *                 why there is an abstract inteface involved.
+ *
+ *                 These functions are part of the ICU internal implementation, and
+ *                 are not inteded to be used directly by applications.
+ */
+
+#ifndef __UCMNDATA_H__
+#define __UCMNDATA_H__
+
+#include "unicode/udata.h"
+#include "umapfile.h"
+
+
+#define COMMON_DATA_NAME U_ICUDATA_NAME
+
+typedef struct  {
+    uint16_t    headerSize;
+    uint8_t     magic1;
+    uint8_t     magic2;
+} MappedData;
+
+
+typedef struct  {
+    MappedData  dataHeader;
+    UDataInfo   info;
+} DataHeader;
+
+typedef struct {
+    uint32_t nameOffset;
+    uint32_t dataOffset;
+} UDataOffsetTOCEntry;
+
+typedef struct {
+    uint32_t count;
+    UDataOffsetTOCEntry entry[2];    /* Actual size of array is from count. */
+} UDataOffsetTOC;
+
+/**
+ * Get the header size from a const DataHeader *udh.
+ * Handles opposite-endian data.
+ *
+ * @internal
+ */
+U_CFUNC uint16_t
+udata_getHeaderSize(const DataHeader *udh);
+
+/**
+ * Get the UDataInfo.size from a const UDataInfo *info.
+ * Handles opposite-endian data.
+ *
+ * @internal
+ */
+U_CFUNC uint16_t
+udata_getInfoSize(const UDataInfo *info);
+
+U_CDECL_BEGIN
+/*
+ *  "Virtual" functions for data lookup.
+ *  To call one, given a UDataMemory *p, the code looks like this:
+ *     p->vFuncs.Lookup(p, tocEntryName, pErrorCode);
+ *          (I sure do wish this was written in C++, not C)
+ */
+
+typedef const DataHeader *
+(U_CALLCONV * LookupFn)(const UDataMemory *pData,
+                        const char *tocEntryName,
+                        int32_t *pLength,
+                        UErrorCode *pErrorCode);
+
+typedef uint32_t
+(U_CALLCONV * NumEntriesFn)(const UDataMemory *pData);
+
+U_CDECL_END
+
+typedef struct {
+    LookupFn      Lookup;
+    NumEntriesFn  NumEntries; 
+} commonDataFuncs;
+
+
+/*
+ *  Functions to check whether a UDataMemory refers to memory containing 
+ *     a recognizable header and table of contents a Common Data Format
+ *
+ *     If a valid header and TOC are found,
+ *         set the CommonDataFuncs function dispatch vector in the UDataMemory
+ *             to point to the right functions for the TOC type.
+ *     otherwise
+ *         set an errorcode.
+ */
+U_CFUNC void udata_checkCommonData(UDataMemory *pData, UErrorCode *pErrorCode);
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/udataswp.h b/src/core/basetypes/icu-ucsdet/include/udataswp.h
new file mode 100644
index 00000000..66c84955
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/udataswp.h
@@ -0,0 +1,351 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2003-2014, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  udataswp.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2003jun05
+*   created by: Markus W. Scherer
+*
+*   Definitions for ICU data transformations for different platforms,
+*   changing between big- and little-endian data and/or between
+*   charset families (ASCII<->EBCDIC).
+*/
+
+#ifndef __UDATASWP_H__
+#define __UDATASWP_H__
+
+#include <stdarg.h>
+#include "unicode/utypes.h"
+
+/* forward declaration */
+
+U_CDECL_BEGIN
+
+struct UDataSwapper;
+typedef struct UDataSwapper UDataSwapper;
+
+/**
+ * Function type for data transformation.
+ * Transforms data, or just returns the length of the data if
+ * the input length is -1.
+ * Swap functions assume that their data pointers are aligned properly.
+ *
+ * Quick implementation outline:
+ * (best to copy and adapt and existing swapper implementation)
+ * check that the data looks like the expected format
+ * if(length<0) {
+ *   preflight:
+ *   never dereference outData
+ *   read inData and determine the data size
+ *   assume that inData is long enough for this
+ * } else {
+ *   outData can be NULL if length==0
+ *   inData==outData (in-place swapping) possible but not required!
+ *   verify that length>=(actual size)
+ *   if there is a chance that not every byte up to size is reached
+ *     due to padding etc.:
+ *   if(inData!=outData) {
+ *     memcpy(outData, inData, actual size);
+ *   }
+ *   swap contents
+ * }
+ * return actual size
+ *
+ * Further implementation notes:
+ * - read integers from inData before swapping them
+ *   because in-place swapping can make them unreadable
+ * - compareInvChars compares a local Unicode string with already-swapped
+ *   output charset strings
+ *
+ * @param ds Pointer to UDataSwapper containing global data about the
+ *           transformation and function pointers for handling primitive
+ *           types.
+ * @param inData Pointer to the input data to be transformed or examined.
+ * @param length Length of the data, counting bytes. May be -1 for preflighting.
+ *               If length>=0, then transform the data.
+ *               If length==-1, then only determine the length of the data.
+ *               The length cannot be determined from the data itself for all
+ *               types of data (e.g., not for simple arrays of integers).
+ * @param outData Pointer to the output data buffer.
+ *                If length>=0 (transformation), then the output buffer must
+ *                have a capacity of at least length.
+ *                If length==-1, then outData will not be used and can be NULL.
+ * @param pErrorCode ICU UErrorCode parameter, must not be NULL and must
+ *                   fulfill U_SUCCESS on input.
+ * @return The actual length of the data.
+ *
+ * @see UDataSwapper
+ * @internal ICU 2.8
+ */
+typedef int32_t U_CALLCONV
+UDataSwapFn(const UDataSwapper *ds,
+            const void *inData, int32_t length, void *outData,
+            UErrorCode *pErrorCode);
+
+/**
+ * Convert one uint16_t from input to platform endianness.
+ * @internal ICU 2.8
+ */
+typedef uint16_t U_CALLCONV
+UDataReadUInt16(uint16_t x);
+
+/**
+ * Convert one uint32_t from input to platform endianness.
+ * @internal ICU 2.8
+ */
+typedef uint32_t U_CALLCONV
+UDataReadUInt32(uint32_t x);
+
+/**
+ * Convert one uint16_t from platform to input endianness.
+ * @internal ICU 2.8
+ */
+typedef void U_CALLCONV
+UDataWriteUInt16(uint16_t *p, uint16_t x);
+
+/**
+ * Convert one uint32_t from platform to input endianness.
+ * @internal ICU 2.8
+ */
+typedef void U_CALLCONV
+UDataWriteUInt32(uint32_t *p, uint32_t x);
+
+/**
+ * Compare invariant-character strings, one in the output data and the
+ * other one caller-provided in Unicode.
+ * An output data string is compared because strings are usually swapped
+ * before the rest of the data, to allow for sorting of string tables
+ * according to the output charset.
+ * You can use -1 for the length parameters of NUL-terminated strings as usual.
+ * Returns Unicode code point order for invariant characters.
+ * @internal ICU 2.8
+ */
+typedef int32_t U_CALLCONV
+UDataCompareInvChars(const UDataSwapper *ds,
+                     const char *outString, int32_t outLength,
+                     const UChar *localString, int32_t localLength);
+
+/**
+ * Function for message output when an error occurs during data swapping.
+ * A format string and variable number of arguments are passed
+ * like for vprintf().
+ *
+ * @param context A function-specific context pointer.
+ * @param fmt The format string.
+ * @param args The arguments for format string inserts.
+ *
+ * @internal ICU 2.8
+ */
+typedef void U_CALLCONV
+UDataPrintError(void *context, const char *fmt, va_list args);
+
+struct UDataSwapper {
+    /** Input endianness. @internal ICU 2.8 */
+    UBool inIsBigEndian;
+    /** Input charset family. @see U_CHARSET_FAMILY @internal ICU 2.8 */
+    uint8_t inCharset;
+    /** Output endianness. @internal ICU 2.8 */
+    UBool outIsBigEndian;
+    /** Output charset family. @see U_CHARSET_FAMILY @internal ICU 2.8 */
+    uint8_t outCharset;
+
+    /* basic functions for reading data values */
+
+    /** Convert one uint16_t from input to platform endianness. @internal ICU 2.8 */
+    UDataReadUInt16 *readUInt16;
+    /** Convert one uint32_t from input to platform endianness. @internal ICU 2.8 */
+    UDataReadUInt32 *readUInt32;
+    /** Compare an invariant-character output string with a local one. @internal ICU 2.8 */
+    UDataCompareInvChars *compareInvChars;
+
+    /* basic functions for writing data values */
+
+    /** Convert one uint16_t from platform to input endianness. @internal ICU 2.8 */
+    UDataWriteUInt16 *writeUInt16;
+    /** Convert one uint32_t from platform to input endianness. @internal ICU 2.8 */
+    UDataWriteUInt32 *writeUInt32;
+
+    /* basic functions for data transformations */
+
+    /** Transform an array of 16-bit integers. @internal ICU 2.8 */
+    UDataSwapFn *swapArray16;
+    /** Transform an array of 32-bit integers. @internal ICU 2.8 */
+    UDataSwapFn *swapArray32;
+    /** Transform an array of 64-bit integers. @internal ICU 53 */
+    UDataSwapFn *swapArray64;
+    /** Transform an invariant-character string. @internal ICU 2.8 */
+    UDataSwapFn *swapInvChars;
+
+    /**
+     * Function for message output when an error occurs during data swapping.
+     * Can be NULL.
+     * @internal ICU 2.8
+     */
+    UDataPrintError *printError;
+    /** Context pointer for printError. @internal ICU 2.8 */
+    void *printErrorContext;
+};
+
+U_CDECL_END
+
+U_CAPI UDataSwapper * U_EXPORT2
+udata_openSwapper(UBool inIsBigEndian, uint8_t inCharset,
+                  UBool outIsBigEndian, uint8_t outCharset,
+                  UErrorCode *pErrorCode);
+
+/**
+ * Open a UDataSwapper for the given input data and the specified output
+ * characteristics.
+ * Values of -1 for any of the characteristics mean the local platform's
+ * characteristics.
+ *
+ * @see udata_swap
+ * @internal ICU 2.8
+ */
+U_CAPI UDataSwapper * U_EXPORT2
+udata_openSwapperForInputData(const void *data, int32_t length,
+                              UBool outIsBigEndian, uint8_t outCharset,
+                              UErrorCode *pErrorCode);
+
+U_CAPI void U_EXPORT2
+udata_closeSwapper(UDataSwapper *ds);
+
+/**
+ * Read the beginning of an ICU data piece, recognize magic bytes,
+ * swap the structure.
+ * Set a U_UNSUPPORTED_ERROR if it does not look like an ICU data piece.
+ *
+ * @return The size of the data header, in bytes.
+ *
+ * @internal ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+udata_swapDataHeader(const UDataSwapper *ds,
+                     const void *inData, int32_t length, void *outData,
+                     UErrorCode *pErrorCode);
+
+/**
+ * Convert one int16_t from input to platform endianness.
+ * @internal ICU 2.8
+ */
+U_CAPI int16_t U_EXPORT2
+udata_readInt16(const UDataSwapper *ds, int16_t x);
+
+/**
+ * Convert one int32_t from input to platform endianness.
+ * @internal ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+udata_readInt32(const UDataSwapper *ds, int32_t x);
+
+/**
+ * Swap a block of invariant, NUL-terminated strings, but not padding
+ * bytes after the last string.
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+udata_swapInvStringBlock(const UDataSwapper *ds,
+                         const void *inData, int32_t length, void *outData,
+                         UErrorCode *pErrorCode);
+
+U_CAPI void U_EXPORT2
+udata_printError(const UDataSwapper *ds,
+                 const char *fmt,
+                 ...);
+
+/* internal exports from putil.c -------------------------------------------- */
+
+/* declared here to keep them out of the public putil.h */
+
+/**
+ * Swap invariant char * strings ASCII->EBCDIC.
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+uprv_ebcdicFromAscii(const UDataSwapper *ds,
+                     const void *inData, int32_t length, void *outData,
+                     UErrorCode *pErrorCode);
+
+/**
+ * Copy invariant ASCII char * strings and verify they are invariant.
+ * @internal
+ */
+U_CFUNC int32_t
+uprv_copyAscii(const UDataSwapper *ds,
+               const void *inData, int32_t length, void *outData,
+               UErrorCode *pErrorCode);
+
+/**
+ * Swap invariant char * strings EBCDIC->ASCII.
+ * @internal
+ */
+U_CFUNC int32_t
+uprv_asciiFromEbcdic(const UDataSwapper *ds,
+                     const void *inData, int32_t length, void *outData,
+                     UErrorCode *pErrorCode);
+
+/**
+ * Copy invariant EBCDIC char * strings and verify they are invariant.
+ * @internal
+ */
+U_CFUNC int32_t
+uprv_copyEbcdic(const UDataSwapper *ds,
+                const void *inData, int32_t length, void *outData,
+                UErrorCode *pErrorCode);
+
+/**
+ * Compare ASCII invariant char * with Unicode invariant UChar *
+ * @internal
+ */
+U_CFUNC int32_t
+uprv_compareInvAscii(const UDataSwapper *ds,
+                     const char *outString, int32_t outLength,
+                     const UChar *localString, int32_t localLength);
+
+/**
+ * Compare EBCDIC invariant char * with Unicode invariant UChar *
+ * @internal
+ */
+U_CFUNC int32_t
+uprv_compareInvEbcdic(const UDataSwapper *ds,
+                      const char *outString, int32_t outLength,
+                      const UChar *localString, int32_t localLength);
+
+/* material... -------------------------------------------------------------- */
+
+#if 0
+
+/* udata.h */
+
+/**
+ * Public API function in udata.c
+ *
+ * Same as udata_openChoice() but automatically swaps the data.
+ * isAcceptable, if not NULL, may accept data with endianness and charset family
+ * different from the current platform's properties.
+ * If the data is acceptable and the platform properties do not match, then
+ * the swap function is called to swap an allocated version of the data.
+ * Preflighting may or may not be performed depending on whether the size of
+ * the loaded data item is known.
+ *
+ * @param isAcceptable Same as for udata_openChoice(). May be NULL.
+ *
+ * @internal ICU 2.8
+ */
+U_CAPI UDataMemory * U_EXPORT2
+udata_openSwap(const char *path, const char *type, const char *name,
+               UDataMemoryIsAcceptable *isAcceptable, void *isAcceptableContext,
+               UDataSwapFn *swap,
+               UDataPrintError *printError, void *printErrorContext,
+               UErrorCode *pErrorCode);
+
+#endif
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/uelement.h b/src/core/basetypes/icu-ucsdet/include/uelement.h
new file mode 100644
index 00000000..4eaddd9d
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/uelement.h
@@ -0,0 +1,89 @@
+/*
+*******************************************************************************
+*   Copyright (C) 1997-2011, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*   file name:  uelement.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2011jul04
+*   created by: Markus W. Scherer
+*
+*   Common definitions for UHashTable and UVector.
+*   UHashTok moved here from uhash.h and renamed UElement.
+*   This allows users of UVector to avoid the confusing #include of uhash.h.
+*   uhash.h aliases UElement to UHashTok,
+*   so that we need not change all of its code and its users.
+*/
+
+#ifndef __UELEMENT_H__
+#define __UELEMENT_H__
+
+#include "unicode/utypes.h"
+
+U_CDECL_BEGIN
+
+/**
+ * A UVector element, or a key or value within a UHashtable.
+ * It may be either a 32-bit integral value or an opaque void* pointer.
+ * The void* pointer may be smaller than 32 bits (e.g. 24 bits)
+ * or may be larger (e.g. 64 bits).
+ *
+ * Because a UElement is the size of a native pointer or a 32-bit
+ * integer, we pass it around by value.
+ */
+union UElement {
+    void*   pointer;
+    int32_t integer;
+};
+typedef union UElement UElement;
+
+/**
+ * An element-equality (boolean) comparison function.
+ * @param e1 An element (object or integer)
+ * @param e2 An element (object or integer)
+ * @return TRUE if the two elements are equal.
+ */
+typedef UBool U_CALLCONV UElementsAreEqual(const UElement e1, const UElement e2);
+
+/**
+ * An element sorting (three-way) comparison function.
+ * @param e1 An element (object or integer)
+ * @param e2 An element (object or integer)
+ * @return 0 if the two elements are equal, -1 if e1 is < e2, or +1 if e1 is > e2.
+ */
+typedef int8_t U_CALLCONV UElementComparator(UElement e1, UElement e2);
+
+/**
+ * An element assignment function.  It may copy an integer, copy
+ * a pointer, or clone a pointer, as appropriate.
+ * @param dst The element to be assigned to
+ * @param src The element to assign from
+ */
+typedef void U_CALLCONV UElementAssigner(UElement *dst, UElement *src);
+
+U_CDECL_END
+
+/**
+ * Comparator function for UnicodeString* keys. Implements UElementsAreEqual.
+ * @param key1 The string for comparison
+ * @param key2 The string for comparison
+ * @return true if key1 and key2 are equal, return false otherwise.
+ */
+U_CAPI UBool U_EXPORT2 
+uhash_compareUnicodeString(const UElement key1, const UElement key2);
+
+/**
+ * Comparator function for UnicodeString* keys (case insensitive).
+ * Make sure to use together with uhash_hashCaselessUnicodeString.
+ * Implements UElementsAreEqual.
+ * @param key1 The string for comparison
+ * @param key2 The string for comparison
+ * @return true if key1 and key2 are equal, return false otherwise.
+ */
+U_CAPI UBool U_EXPORT2 
+uhash_compareCaselessUnicodeString(const UElement key1, const UElement key2);
+
+#endif  /* __UELEMENT_H__ */
diff --git a/src/core/basetypes/icu-ucsdet/include/uenumimp.h b/src/core/basetypes/icu-ucsdet/include/uenumimp.h
new file mode 100644
index 00000000..664bc686
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/uenumimp.h
@@ -0,0 +1,153 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2002-2006, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  uenumimp.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:2
+*
+*   created on: 2002jul08
+*   created by: Vladimir Weinstein
+*/
+
+#ifndef __UENUMIMP_H
+#define __UENUMIMP_H
+
+#include "unicode/uenum.h"
+
+U_CDECL_BEGIN
+
+/** 
+ * following are the type declarations for 
+ * implementations of APIs. If any of these
+ * functions are NULL, U_UNSUPPORTED_ERROR
+ * is returned. If close is NULL, the enumeration
+ * object is going to be released.
+ * Initial error checking is done in the body
+ * of API function, so the implementations 
+ * need not to check the initial error condition.
+ */
+
+/**
+ * Function type declaration for uenum_close().
+ *
+ * This function should cleanup the enumerator object
+ *
+ * @param en enumeration to be closed
+ */
+typedef void U_CALLCONV
+UEnumClose(UEnumeration *en);
+
+/**
+ * Function type declaration for uenum_count().
+ *
+ * This function should count the number of elements
+ * in this enumeration
+ *
+ * @param en enumeration to be counted
+ * @param status pointer to UErrorCode variable
+ * @return number of elements in enumeration
+ */
+typedef int32_t U_CALLCONV
+UEnumCount(UEnumeration *en, UErrorCode *status);
+
+/**
+ * Function type declaration for uenum_unext().
+ *
+ * This function returns the next element as a UChar *,
+ * or NULL after all elements haven been enumerated.
+ *
+ * @param en enumeration 
+ * @param resultLength pointer to result length
+ * @param status pointer to UErrorCode variable
+ * @return next element as UChar *,
+ *         or NULL after all elements haven been enumerated
+ */
+typedef const UChar* U_CALLCONV 
+UEnumUNext(UEnumeration* en,
+            int32_t* resultLength,
+            UErrorCode* status);
+
+/**
+ * Function type declaration for uenum_next().
+ *
+ * This function returns the next element as a char *,
+ * or NULL after all elements haven been enumerated.
+ *
+ * @param en enumeration 
+ * @param resultLength pointer to result length
+ * @param status pointer to UErrorCode variable
+ * @return next element as char *,
+ *         or NULL after all elements haven been enumerated
+ */
+typedef const char* U_CALLCONV 
+UEnumNext(UEnumeration* en,
+           int32_t* resultLength,
+           UErrorCode* status);
+
+/**
+ * Function type declaration for uenum_reset().
+ *
+ * This function should reset the enumeration 
+ * object
+ *
+ * @param en enumeration 
+ * @param status pointer to UErrorCode variable
+ */
+typedef void U_CALLCONV 
+UEnumReset(UEnumeration* en, 
+            UErrorCode* status);
+
+
+struct UEnumeration {
+    /* baseContext. For the base class only. Don't touch! */
+    void *baseContext;
+
+    /* context. Use it for what you need */
+    void *context;
+
+    /** 
+     * these are functions that will 
+     * be used for APIs
+     */
+    /* called from uenum_close */
+    UEnumClose *close;
+    /* called from uenum_count */
+    UEnumCount *count;
+    /* called from uenum_unext */
+    UEnumUNext *uNext;
+    /* called from uenum_next */
+    UEnumNext  *next;
+    /* called from uenum_reset */
+    UEnumReset *reset;
+};
+
+U_CDECL_END
+
+/* This is the default implementation for uenum_unext().
+ * It automatically converts the char * string to UChar *.
+ * Don't call this directly.  This is called internally by uenum_unext
+ * when a UEnumeration is defined with 'uNext' pointing to this
+ * function.
+ */
+U_CAPI const UChar* U_EXPORT2
+uenum_unextDefault(UEnumeration* en,
+            int32_t* resultLength,
+            UErrorCode* status);
+
+/* This is the default implementation for uenum_next().
+ * It automatically converts the UChar * string to char *.
+ * Don't call this directly.  This is called internally by uenum_next
+ * when a UEnumeration is defined with 'next' pointing to this
+ * function.
+ */
+U_CAPI const char* U_EXPORT2
+uenum_nextDefault(UEnumeration* en,
+            int32_t* resultLength,
+            UErrorCode* status);
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/uinvchar.h b/src/core/basetypes/icu-ucsdet/include/uinvchar.h
new file mode 100644
index 00000000..f307bd6a
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/uinvchar.h
@@ -0,0 +1,125 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 1999-2010, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  uinvchar.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:2
+*
+*   created on: 2004sep14
+*   created by: Markus W. Scherer
+*
+*   Definitions for handling invariant characters, moved here from putil.c
+*   for better modularization.
+*/
+
+#ifndef __UINVCHAR_H__
+#define __UINVCHAR_H__
+
+#include "unicode/utypes.h"
+
+/**
+ * Check if a char string only contains invariant characters.
+ * See utypes.h for details.
+ *
+ * @param s Input string pointer.
+ * @param length Length of the string, can be -1 if NUL-terminated.
+ * @return TRUE if s contains only invariant characters.
+ *
+ * @internal (ICU 2.8)
+ */
+U_INTERNAL UBool U_EXPORT2
+uprv_isInvariantString(const char *s, int32_t length);
+
+/**
+ * Check if a Unicode string only contains invariant characters.
+ * See utypes.h for details.
+ *
+ * @param s Input string pointer.
+ * @param length Length of the string, can be -1 if NUL-terminated.
+ * @return TRUE if s contains only invariant characters.
+ *
+ * @internal (ICU 2.8)
+ */
+U_INTERNAL UBool U_EXPORT2
+uprv_isInvariantUString(const UChar *s, int32_t length);
+
+/**
+ * \def U_UPPER_ORDINAL
+ * Get the ordinal number of an uppercase invariant character
+ * @internal
+ */
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+#   define U_UPPER_ORDINAL(x) ((x)-'A')
+#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+#   define U_UPPER_ORDINAL(x) (((x) < 'J') ? ((x)-'A') : \
+                              (((x) < 'S') ? ((x)-'J'+9) : \
+                               ((x)-'S'+18)))
+#else
+#   error Unknown charset family!
+#endif
+
+/**
+ * Compare two EBCDIC invariant-character strings in ASCII order.
+ * @internal
+ */
+U_INTERNAL int32_t U_EXPORT2
+uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2);
+
+/**
+ * \def uprv_compareInvCharsAsAscii
+ * Compare two invariant-character strings in ASCII order.
+ * @internal
+ */
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+#   define uprv_compareInvCharsAsAscii(s1, s2) uprv_strcmp(s1, s2)
+#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+#   define uprv_compareInvCharsAsAscii(s1, s2) uprv_compareInvEbcdicAsAscii(s1, s2)
+#else
+#   error Unknown charset family!
+#endif
+
+/**
+ * Converts an EBCDIC invariant character to lowercase ASCII.
+ * @internal
+ */
+U_INTERNAL char U_EXPORT2
+uprv_ebcdicToLowercaseAscii(char c);
+
+/**
+ * \def uprv_invCharToLowercaseAscii
+ * Converts an invariant character to lowercase ASCII.
+ * @internal
+ */
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+#   define uprv_invCharToLowercaseAscii uprv_asciitolower
+#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+#   define uprv_invCharToLowercaseAscii uprv_ebcdicToLowercaseAscii
+#else
+#   error Unknown charset family!
+#endif
+
+/**
+ * Copy EBCDIC to ASCII
+ * @internal
+ * @see uprv_strncpy
+ */
+U_INTERNAL uint8_t* U_EXPORT2
+uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n);
+
+
+/**
+ * Copy ASCII to EBCDIC
+ * @internal
+ * @see uprv_strncpy
+ */
+U_INTERNAL uint8_t* U_EXPORT2
+uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n);
+
+
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/umapfile.h b/src/core/basetypes/icu-ucsdet/include/umapfile.h
new file mode 100644
index 00000000..2995e381
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/umapfile.h
@@ -0,0 +1,55 @@
+/*
+******************************************************************************
+*
+*   Copyright (C) 1999-2011, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************/
+
+/*----------------------------------------------------------------------------------
+ *
+ *       Memory mapped file wrappers for use by the ICU Data Implementation
+ *
+ *           Porting note:  The implementation of these functions is very platform specific.
+ *             Not all platforms can do real memory mapping.  Those that can't
+ *             still must implement these functions, getting the data into memory using
+ *             whatever means are available.
+ *
+ *            These functions are part of the ICU internal implementation, and
+ *            are not inteded to be used directly by applications.
+ *
+ *----------------------------------------------------------------------------------*/
+
+#ifndef __UMAPFILE_H__
+#define __UMAPFILE_H__
+
+#include "unicode/putil.h"
+#include "unicode/udata.h"
+#include "putilimp.h"
+
+U_CFUNC UBool uprv_mapFile(UDataMemory *pdm, const char *path);
+U_CFUNC void  uprv_unmapFile(UDataMemory *pData);
+
+/* MAP_NONE: no memory mapping, no file access at all */
+#define MAP_NONE        0
+#define MAP_WIN32       1
+#define MAP_POSIX       2
+#define MAP_STDIO       3
+#define MAP_390DLL      4
+
+#if UCONFIG_NO_FILE_IO
+#   define MAP_IMPLEMENTATION MAP_NONE
+#elif U_PLATFORM_USES_ONLY_WIN32_API
+#   define MAP_IMPLEMENTATION MAP_WIN32
+#elif U_HAVE_MMAP || U_PLATFORM == U_PF_OS390
+#   if U_PLATFORM == U_PF_OS390 && defined (OS390_STUBDATA)
+        /*   No memory mapping for 390 batch mode.  Fake it using dll loading.  */
+#       define MAP_IMPLEMENTATION MAP_390DLL
+#   else
+#       define MAP_IMPLEMENTATION MAP_POSIX
+#   endif
+#else /* unknown platform, no memory map implementation: use stdio.h and uprv_malloc() instead */
+#   define MAP_IMPLEMENTATION MAP_STDIO
+#endif
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/umutex.h b/src/core/basetypes/icu-ucsdet/include/umutex.h
new file mode 100644
index 00000000..e0ad0d3c
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/umutex.h
@@ -0,0 +1,424 @@
+/*
+**********************************************************************
+*   Copyright (C) 1997-2014, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*
+* File UMUTEX.H
+*
+* Modification History:
+*
+*   Date        Name        Description
+*   04/02/97  aliu        Creation.
+*   04/07/99  srl         rewrite - C interface, multiple mutices
+*   05/13/99  stephen     Changed to umutex (from cmutex)
+******************************************************************************
+*/
+
+#ifndef UMUTEX_H
+#define UMUTEX_H
+
+#include "unicode/utypes.h"
+#include "unicode/uclean.h"
+#include "putilimp.h"
+
+
+
+// Forward Declarations. UMutex is not in the ICU namespace (yet) because
+//                       there are some remaining references from plain C.
+struct UMutex;
+struct UConditionVar;
+
+U_NAMESPACE_BEGIN
+struct UInitOnce;
+U_NAMESPACE_END
+
+// Stringify macros, to allow #include of user supplied atomic & mutex files.
+#define U_MUTEX_STR(s) #s
+#define U_MUTEX_XSTR(s) U_MUTEX_STR(s)
+
+/****************************************************************************
+ *
+ *   Low Level Atomic Operations.
+ *      Compiler dependent. Not operating system dependent.
+ *
+ ****************************************************************************/
+#if defined (U_USER_ATOMICS_H)
+#include U_MUTEX_XSTR(U_USER_ATOMICS_H)
+
+#elif U_HAVE_STD_ATOMICS
+
+//  C++11 atomics are available.
+
+#include <atomic>
+
+U_NAMESPACE_BEGIN
+
+typedef std::atomic<int32_t> u_atomic_int32_t;
+#define ATOMIC_INT32_T_INITIALIZER(val) ATOMIC_VAR_INIT(val)
+
+inline int32_t umtx_loadAcquire(u_atomic_int32_t &var) {
+    return var.load(std::memory_order_acquire);
+}
+
+inline void umtx_storeRelease(u_atomic_int32_t &var, int32_t val) {
+    var.store(val, std::memory_order_release);
+}
+
+inline int32_t umtx_atomic_inc(u_atomic_int32_t *var) {
+    return var->fetch_add(1) + 1;
+}
+
+inline int32_t umtx_atomic_dec(u_atomic_int32_t *var) {
+    return var->fetch_sub(1) - 1;
+}
+U_NAMESPACE_END
+
+#elif U_PLATFORM_HAS_WIN32_API
+
+// MSVC compiler. Reads and writes of volatile variables have
+//                acquire and release memory semantics, respectively.
+//                This is a Microsoft extension, not standard C++ behavior.
+//
+//   Update:      can't use this because of MinGW, built with gcc.
+//                Original plan was to use gcc atomics for MinGW, but they
+//                aren't supported, so we fold MinGW into this path.
+
+# define WIN32_LEAN_AND_MEAN
+# define VC_EXTRALEAN
+# define NOUSER
+# define NOSERVICE
+# define NOIME
+# define NOMCX
+# ifndef NOMINMAX
+# define NOMINMAX
+# endif
+# include <windows.h>
+
+U_NAMESPACE_BEGIN
+typedef volatile LONG u_atomic_int32_t;
+#define ATOMIC_INT32_T_INITIALIZER(val) val
+
+inline int32_t umtx_loadAcquire(u_atomic_int32_t &var) {
+    return InterlockedCompareExchange(&var, 0, 0);
+}
+
+inline void umtx_storeRelease(u_atomic_int32_t &var, int32_t val) {
+    InterlockedExchange(&var, val);
+}
+
+
+inline int32_t umtx_atomic_inc(u_atomic_int32_t *var) {
+    return InterlockedIncrement(var);
+}
+
+inline int32_t umtx_atomic_dec(u_atomic_int32_t *var) {
+    return InterlockedDecrement(var);
+}
+U_NAMESPACE_END
+
+
+#elif U_HAVE_GCC_ATOMICS
+/*
+ * gcc atomic ops. These are available on several other compilers as well.
+ */
+
+U_NAMESPACE_BEGIN
+typedef int32_t u_atomic_int32_t;
+#define ATOMIC_INT32_T_INITIALIZER(val) val
+
+inline int32_t umtx_loadAcquire(u_atomic_int32_t &var) {
+    int32_t val = var;
+    __sync_synchronize();
+    return val;
+}
+
+inline void umtx_storeRelease(u_atomic_int32_t &var, int32_t val) {
+    __sync_synchronize();
+    var = val;
+}
+
+inline int32_t umtx_atomic_inc(u_atomic_int32_t *p)  {
+   return __sync_add_and_fetch(p, 1);
+}
+
+inline int32_t umtx_atomic_dec(u_atomic_int32_t *p)  {
+   return __sync_sub_and_fetch(p, 1);
+}
+U_NAMESPACE_END
+
+#else
+
+/*
+ * Unknown Platform. Use out-of-line functions, which in turn use mutexes.
+ *                   Slow but correct.
+ */
+
+#define U_NO_PLATFORM_ATOMICS
+
+U_NAMESPACE_BEGIN
+typedef int32_t u_atomic_int32_t;
+#define ATOMIC_INT32_T_INITIALIZER(val) val
+
+U_COMMON_API int32_t U_EXPORT2 
+umtx_loadAcquire(u_atomic_int32_t &var);
+
+U_COMMON_API void U_EXPORT2 
+umtx_storeRelease(u_atomic_int32_t &var, int32_t val);
+
+U_COMMON_API int32_t U_EXPORT2 
+umtx_atomic_inc(u_atomic_int32_t *p);
+
+U_COMMON_API int32_t U_EXPORT2 
+umtx_atomic_dec(u_atomic_int32_t *p);
+
+U_NAMESPACE_END
+
+#endif  /* Low Level Atomic Ops Platfrom Chain */
+
+
+
+/*************************************************************************************************
+ *
+ *  UInitOnce Definitions.
+ *     These are platform neutral.
+ *
+ *************************************************************************************************/
+
+U_NAMESPACE_BEGIN
+
+struct UInitOnce {
+    u_atomic_int32_t   fState;
+    UErrorCode       fErrCode;
+    void reset() {fState = 0;};
+    UBool isReset() {return umtx_loadAcquire(fState) == 0;};
+// Note: isReset() is used by service registration code.
+//                 Thread safety of this usage needs review.
+};
+
+#define U_INITONCE_INITIALIZER {ATOMIC_INT32_T_INITIALIZER(0), U_ZERO_ERROR}
+
+
+U_COMMON_API UBool U_EXPORT2 umtx_initImplPreInit(UInitOnce &);
+U_COMMON_API void  U_EXPORT2 umtx_initImplPostInit(UInitOnce &);
+
+template<class T> void umtx_initOnce(UInitOnce &uio, T *obj, void (T::*fp)()) {
+    if (umtx_loadAcquire(uio.fState) == 2) {
+        return;
+    }
+    if (umtx_initImplPreInit(uio)) {
+        (obj->*fp)();
+        umtx_initImplPostInit(uio);
+    }
+}
+
+
+// umtx_initOnce variant for plain functions, or static class functions.
+//               No context parameter.
+inline void umtx_initOnce(UInitOnce &uio, void (*fp)()) {
+    if (umtx_loadAcquire(uio.fState) == 2) {
+        return;
+    }
+    if (umtx_initImplPreInit(uio)) {
+        (*fp)();
+        umtx_initImplPostInit(uio);
+    }
+}
+
+// umtx_initOnce variant for plain functions, or static class functions.
+//               With ErrorCode, No context parameter.
+inline void umtx_initOnce(UInitOnce &uio, void (*fp)(UErrorCode &), UErrorCode &errCode) {
+    if (U_FAILURE(errCode)) {
+        return;
+    }
+    if (umtx_loadAcquire(uio.fState) != 2 && umtx_initImplPreInit(uio)) {
+        // We run the initialization.
+        (*fp)(errCode);
+        uio.fErrCode = errCode;
+        umtx_initImplPostInit(uio);
+    } else {
+        // Someone else already ran the initialization.
+        if (U_FAILURE(uio.fErrCode)) {
+            errCode = uio.fErrCode;
+        }
+    }
+}
+
+// umtx_initOnce variant for plain functions, or static class functions,
+//               with a context parameter.
+template<class T> void umtx_initOnce(UInitOnce &uio, void (*fp)(T), T context) {
+    if (umtx_loadAcquire(uio.fState) == 2) {
+        return;
+    }
+    if (umtx_initImplPreInit(uio)) {
+        (*fp)(context);
+        umtx_initImplPostInit(uio);
+    }
+}
+
+// umtx_initOnce variant for plain functions, or static class functions,
+//               with a context parameter and an error code.
+template<class T> void umtx_initOnce(UInitOnce &uio, void (*fp)(T, UErrorCode &), T context, UErrorCode &errCode) {
+    if (U_FAILURE(errCode)) {
+        return;
+    }
+    if (umtx_loadAcquire(uio.fState) != 2 && umtx_initImplPreInit(uio)) {
+        // We run the initialization.
+        (*fp)(context, errCode);
+        uio.fErrCode = errCode;
+        umtx_initImplPostInit(uio);
+    } else {
+        // Someone else already ran the initialization.
+        if (U_FAILURE(uio.fErrCode)) {
+            errCode = uio.fErrCode;
+        }
+    }
+}
+
+U_NAMESPACE_END
+
+
+
+/*************************************************************************************************
+ *
+ *  Mutex Definitions. Platform Dependent, #if platform chain follows.
+ *         TODO:  Add a C++11 version.
+ *                Need to convert all mutex using files to C++ first.
+ *
+ *************************************************************************************************/
+
+#if defined(U_USER_MUTEX_H)
+// #inlcude "U_USER_MUTEX_H"
+#include U_MUTEX_XSTR(U_USER_MUTEX_H)
+
+#elif U_PLATFORM_HAS_WIN32_API
+
+/* Windows Definitions.
+ *    Windows comes first in the platform chain.
+ *    Cygwin (and possibly others) have both WIN32 and POSIX APIs. Prefer Win32 in this case.
+ */
+
+
+/* For CRITICAL_SECTION */
+
+/*
+ *   Note: there is an earlier include of windows.h in this file, but it is in
+ *         different conditionals.
+ *         This one is needed if we are using C++11 for atomic ops, but
+ *         win32 APIs for Critical Sections.
+ */
+
+# define WIN32_LEAN_AND_MEAN
+# define VC_EXTRALEAN
+# define NOUSER
+# define NOSERVICE
+# define NOIME
+# define NOMCX
+# ifndef NOMINMAX
+# define NOMINMAX
+# endif
+# include <windows.h>
+
+
+typedef struct UMutex {
+    icu::UInitOnce    fInitOnce;
+    CRITICAL_SECTION  fCS;
+} UMutex;
+
+/* Initializer for a static UMUTEX. Deliberately contains no value for the
+ *  CRITICAL_SECTION.
+ */
+#define U_MUTEX_INITIALIZER {U_INITONCE_INITIALIZER}
+
+struct UConditionVar {
+    HANDLE           fEntryGate;
+    HANDLE           fExitGate;
+    int32_t          fWaitCount;
+};
+
+#define U_CONDITION_INITIALIZER {NULL, NULL, 0}
+    
+
+
+#elif U_PLATFORM_IMPLEMENTS_POSIX
+
+/*
+ *  POSIX platform
+ */
+
+#include <pthread.h>
+
+struct UMutex {
+    pthread_mutex_t  fMutex;
+};
+typedef struct UMutex UMutex;
+#define U_MUTEX_INITIALIZER  {PTHREAD_MUTEX_INITIALIZER}
+
+struct UConditionVar {
+    pthread_cond_t   fCondition;
+};
+#define U_CONDITION_INITIALIZER {PTHREAD_COND_INITIALIZER}
+
+#else
+
+/*
+ *  Unknow platform type.
+ *      This is an error condition. ICU requires mutexes.
+ */
+
+#error Unknown Platform.
+
+#endif
+
+
+
+/**************************************************************************************
+ *
+ *  Mutex Implementation function declaratations.
+ *     Declarations are platform neutral.
+ *     Implementations, in umutex.cpp, are platform specific.
+ *
+ ************************************************************************************/
+
+/* Lock a mutex.
+ * @param mutex The given mutex to be locked.  Pass NULL to specify
+ *              the global ICU mutex.  Recursive locks are an error
+ *              and may cause a deadlock on some platforms.
+ */
+U_INTERNAL void U_EXPORT2 umtx_lock(UMutex* mutex);
+
+/* Unlock a mutex.
+ * @param mutex The given mutex to be unlocked.  Pass NULL to specify
+ *              the global ICU mutex.
+ */
+U_INTERNAL void U_EXPORT2 umtx_unlock (UMutex* mutex);
+
+/*
+ * Wait on a condition variable.
+ * The calling thread will unlock the mutex and wait on the condition variable.
+ * The mutex must be locked by the calling thread when invoking this function.
+ *
+ * @param cond the condition variable to wait on.
+ * @param mutex the associated mutex.
+ */
+
+U_INTERNAL void U_EXPORT2 umtx_condWait(UConditionVar *cond, UMutex *mutex);
+
+
+/*
+ * Broadcast wakeup of all threads waiting on a Condition.
+ * The associated mutex must be locked by the calling thread when calling
+ * this function; this is a temporary ICU restriction.
+ * 
+ * @param cond the condition variable.
+ */
+U_INTERNAL void U_EXPORT2 umtx_condBroadcast(UConditionVar *cond);
+
+/*
+ * Signal a condition variable, waking up one waiting thread.
+ * CAUTION: Do not use. Place holder only. Not implemented for Windows.
+ */
+U_INTERNAL void U_EXPORT2 umtx_condSignal(UConditionVar *cond);
+
+#endif /* UMUTEX_H */
+/*eof*/
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/appendable.h b/src/core/basetypes/icu-ucsdet/include/unicode/appendable.h
new file mode 100644
index 00000000..a6a83b15
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/appendable.h
@@ -0,0 +1,232 @@
+/*
+*******************************************************************************
+*   Copyright (C) 2011-2012, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*   file name:  appendable.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2010dec07
+*   created by: Markus W. Scherer
+*/
+
+#ifndef __APPENDABLE_H__
+#define __APPENDABLE_H__
+
+/**
+ * \file
+ * \brief C++ API: Appendable class: Sink for Unicode code points and 16-bit code units (UChars).
+ */
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+
+U_NAMESPACE_BEGIN
+
+class UnicodeString;
+
+/**
+ * Base class for objects to which Unicode characters and strings can be appended.
+ * Combines elements of Java Appendable and ICU4C ByteSink.
+ *
+ * This class can be used in APIs where it does not matter whether the actual destination is
+ * a UnicodeString, a UChar[] array, a UnicodeSet, or any other object
+ * that receives and processes characters and/or strings.
+ *
+ * Implementation classes must implement at least appendCodeUnit(UChar).
+ * The base class provides default implementations for the other methods.
+ *
+ * The methods do not take UErrorCode parameters.
+ * If an error occurs (e.g., out-of-memory),
+ * in addition to returning FALSE from failing operations,
+ * the implementation must prevent unexpected behavior (e.g., crashes)
+ * from further calls and should make the error condition available separately
+ * (e.g., store a UErrorCode, make/keep a UnicodeString bogus).
+ * @stable ICU 4.8
+ */
+class U_COMMON_API Appendable : public UObject {
+public:
+    /**
+     * Destructor.
+     * @stable ICU 4.8
+     */
+    ~Appendable();
+
+    /**
+     * Appends a 16-bit code unit.
+     * @param c code unit
+     * @return TRUE if the operation succeeded
+     * @stable ICU 4.8
+     */
+    virtual UBool appendCodeUnit(UChar c) = 0;
+
+    /**
+     * Appends a code point.
+     * The default implementation calls appendCodeUnit(UChar) once or twice.
+     * @param c code point 0..0x10ffff
+     * @return TRUE if the operation succeeded
+     * @stable ICU 4.8
+     */
+    virtual UBool appendCodePoint(UChar32 c);
+
+    /**
+     * Appends a string.
+     * The default implementation calls appendCodeUnit(UChar) for each code unit.
+     * @param s string, must not be NULL if length!=0
+     * @param length string length, or -1 if NUL-terminated
+     * @return TRUE if the operation succeeded
+     * @stable ICU 4.8
+     */
+    virtual UBool appendString(const UChar *s, int32_t length);
+
+    /**
+     * Tells the object that the caller is going to append roughly
+     * appendCapacity UChars. A subclass might use this to pre-allocate
+     * a larger buffer if necessary.
+     * The default implementation does nothing. (It always returns TRUE.)
+     * @param appendCapacity estimated number of UChars that will be appended
+     * @return TRUE if the operation succeeded
+     * @stable ICU 4.8
+     */
+    virtual UBool reserveAppendCapacity(int32_t appendCapacity);
+
+    /**
+     * Returns a writable buffer for appending and writes the buffer's capacity to
+     * *resultCapacity. Guarantees *resultCapacity>=minCapacity.
+     * May return a pointer to the caller-owned scratch buffer which must have
+     * scratchCapacity>=minCapacity.
+     * The returned buffer is only valid until the next operation
+     * on this Appendable.
+     *
+     * After writing at most *resultCapacity UChars, call appendString() with the
+     * pointer returned from this function and the number of UChars written.
+     * Many appendString() implementations will avoid copying UChars if this function
+     * returned an internal buffer.
+     *
+     * Partial usage example:
+     * \code
+     *  int32_t capacity;
+     *  UChar* buffer = app.getAppendBuffer(..., &capacity);
+     *  ... Write n UChars into buffer, with n <= capacity.
+     *  app.appendString(buffer, n);
+     * \endcode
+     * In many implementations, that call to append will avoid copying UChars.
+     *
+     * If the Appendable allocates or reallocates an internal buffer, it should use
+     * the desiredCapacityHint if appropriate.
+     * If a caller cannot provide a reasonable guess at the desired capacity,
+     * it should pass desiredCapacityHint=0.
+     *
+     * If a non-scratch buffer is returned, the caller may only pass
+     * a prefix to it to appendString().
+     * That is, it is not correct to pass an interior pointer to appendString().
+     *
+     * The default implementation always returns the scratch buffer.
+     *
+     * @param minCapacity required minimum capacity of the returned buffer;
+     *                    must be non-negative
+     * @param desiredCapacityHint desired capacity of the returned buffer;
+     *                            must be non-negative
+     * @param scratch default caller-owned buffer
+     * @param scratchCapacity capacity of the scratch buffer
+     * @param resultCapacity pointer to an integer which will be set to the
+     *                       capacity of the returned buffer
+     * @return a buffer with *resultCapacity>=minCapacity
+     * @stable ICU 4.8
+     */
+    virtual UChar *getAppendBuffer(int32_t minCapacity,
+                                   int32_t desiredCapacityHint,
+                                   UChar *scratch, int32_t scratchCapacity,
+                                   int32_t *resultCapacity);
+};
+
+/**
+ * An Appendable implementation which writes to a UnicodeString.
+ *
+ * This class is not intended for public subclassing.
+ * @stable ICU 4.8
+ */
+class U_COMMON_API UnicodeStringAppendable : public Appendable {
+public:
+    /**
+     * Aliases the UnicodeString (keeps its reference) for writing.
+     * @param s The UnicodeString to which this Appendable will write.
+     * @stable ICU 4.8
+     */
+    explicit UnicodeStringAppendable(UnicodeString &s) : str(s) {}
+
+    /**
+     * Destructor.
+     * @stable ICU 4.8
+     */
+    ~UnicodeStringAppendable();
+
+    /**
+     * Appends a 16-bit code unit to the string.
+     * @param c code unit
+     * @return TRUE if the operation succeeded
+     * @stable ICU 4.8
+     */
+    virtual UBool appendCodeUnit(UChar c);
+
+    /**
+     * Appends a code point to the string.
+     * @param c code point 0..0x10ffff
+     * @return TRUE if the operation succeeded
+     * @stable ICU 4.8
+     */
+    virtual UBool appendCodePoint(UChar32 c);
+
+    /**
+     * Appends a string to the UnicodeString.
+     * @param s string, must not be NULL if length!=0
+     * @param length string length, or -1 if NUL-terminated
+     * @return TRUE if the operation succeeded
+     * @stable ICU 4.8
+     */
+    virtual UBool appendString(const UChar *s, int32_t length);
+
+    /**
+     * Tells the UnicodeString that the caller is going to append roughly
+     * appendCapacity UChars.
+     * @param appendCapacity estimated number of UChars that will be appended
+     * @return TRUE if the operation succeeded
+     * @stable ICU 4.8
+     */
+    virtual UBool reserveAppendCapacity(int32_t appendCapacity);
+
+    /**
+     * Returns a writable buffer for appending and writes the buffer's capacity to
+     * *resultCapacity. Guarantees *resultCapacity>=minCapacity.
+     * May return a pointer to the caller-owned scratch buffer which must have
+     * scratchCapacity>=minCapacity.
+     * The returned buffer is only valid until the next write operation
+     * on the UnicodeString.
+     *
+     * For details see Appendable::getAppendBuffer().
+     *
+     * @param minCapacity required minimum capacity of the returned buffer;
+     *                    must be non-negative
+     * @param desiredCapacityHint desired capacity of the returned buffer;
+     *                            must be non-negative
+     * @param scratch default caller-owned buffer
+     * @param scratchCapacity capacity of the scratch buffer
+     * @param resultCapacity pointer to an integer which will be set to the
+     *                       capacity of the returned buffer
+     * @return a buffer with *resultCapacity>=minCapacity
+     * @stable ICU 4.8
+     */
+    virtual UChar *getAppendBuffer(int32_t minCapacity,
+                                   int32_t desiredCapacityHint,
+                                   UChar *scratch, int32_t scratchCapacity,
+                                   int32_t *resultCapacity);
+
+private:
+    UnicodeString &str;
+};
+
+U_NAMESPACE_END
+
+#endif  // __APPENDABLE_H__
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/bytestream.h b/src/core/basetypes/icu-ucsdet/include/unicode/bytestream.h
new file mode 100644
index 00000000..174aa38a
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/bytestream.h
@@ -0,0 +1,257 @@
+// Copyright (C) 2009-2012, International Business Machines
+// Corporation and others. All Rights Reserved.
+//
+// Copyright 2007 Google Inc. All Rights Reserved.
+// Author: sanjay@google.com (Sanjay Ghemawat)
+//
+// Abstract interface that consumes a sequence of bytes (ByteSink).
+//
+// Used so that we can write a single piece of code that can operate
+// on a variety of output string types.
+//
+// Various implementations of this interface are provided:
+//   ByteSink:
+//      CheckedArrayByteSink    Write to a flat array, with bounds checking
+//      StringByteSink          Write to an STL string
+
+// This code is a contribution of Google code, and the style used here is
+// a compromise between the original Google code and the ICU coding guidelines.
+// For example, data types are ICU-ified (size_t,int->int32_t),
+// and API comments doxygen-ified, but function names and behavior are
+// as in the original, if possible.
+// Assertion-style error handling, not available in ICU, was changed to
+// parameter "pinning" similar to UnicodeString.
+//
+// In addition, this is only a partial port of the original Google code,
+// limited to what was needed so far. The (nearly) complete original code
+// is in the ICU svn repository at icuhtml/trunk/design/strings/contrib
+// (see ICU ticket 6765, r25517).
+
+#ifndef __BYTESTREAM_H__
+#define __BYTESTREAM_H__
+
+/**
+ * \file
+ * \brief C++ API: Interface for writing bytes, and implementation classes.
+ */
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "unicode/std_string.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * A ByteSink can be filled with bytes.
+ * @stable ICU 4.2
+ */
+class U_COMMON_API ByteSink : public UMemory {
+public:
+  /**
+   * Default constructor.
+   * @stable ICU 4.2
+   */
+  ByteSink() { }
+  /**
+   * Virtual destructor.
+   * @stable ICU 4.2
+   */
+  virtual ~ByteSink();
+
+  /**
+   * Append "bytes[0,n-1]" to this.
+   * @param bytes the pointer to the bytes
+   * @param n the number of bytes; must be non-negative
+   * @stable ICU 4.2
+   */
+  virtual void Append(const char* bytes, int32_t n) = 0;
+
+  /**
+   * Returns a writable buffer for appending and writes the buffer's capacity to
+   * *result_capacity. Guarantees *result_capacity>=min_capacity.
+   * May return a pointer to the caller-owned scratch buffer which must have
+   * scratch_capacity>=min_capacity.
+   * The returned buffer is only valid until the next operation
+   * on this ByteSink.
+   *
+   * After writing at most *result_capacity bytes, call Append() with the
+   * pointer returned from this function and the number of bytes written.
+   * Many Append() implementations will avoid copying bytes if this function
+   * returned an internal buffer.
+   *
+   * Partial usage example:
+   *  int32_t capacity;
+   *  char* buffer = sink->GetAppendBuffer(..., &capacity);
+   *  ... Write n bytes into buffer, with n <= capacity.
+   *  sink->Append(buffer, n);
+   * In many implementations, that call to Append will avoid copying bytes.
+   *
+   * If the ByteSink allocates or reallocates an internal buffer, it should use
+   * the desired_capacity_hint if appropriate.
+   * If a caller cannot provide a reasonable guess at the desired capacity,
+   * it should pass desired_capacity_hint=0.
+   *
+   * If a non-scratch buffer is returned, the caller may only pass
+   * a prefix to it to Append().
+   * That is, it is not correct to pass an interior pointer to Append().
+   *
+   * The default implementation always returns the scratch buffer.
+   *
+   * @param min_capacity required minimum capacity of the returned buffer;
+   *                     must be non-negative
+   * @param desired_capacity_hint desired capacity of the returned buffer;
+   *                              must be non-negative
+   * @param scratch default caller-owned buffer
+   * @param scratch_capacity capacity of the scratch buffer
+   * @param result_capacity pointer to an integer which will be set to the
+   *                        capacity of the returned buffer
+   * @return a buffer with *result_capacity>=min_capacity
+   * @stable ICU 4.2
+   */
+  virtual char* GetAppendBuffer(int32_t min_capacity,
+                                int32_t desired_capacity_hint,
+                                char* scratch, int32_t scratch_capacity,
+                                int32_t* result_capacity);
+
+  /**
+   * Flush internal buffers.
+   * Some byte sinks use internal buffers or provide buffering
+   * and require calling Flush() at the end of the stream.
+   * The ByteSink should be ready for further Append() calls after Flush().
+   * The default implementation of Flush() does nothing.
+   * @stable ICU 4.2
+   */
+  virtual void Flush();
+
+private:
+  ByteSink(const ByteSink &); // copy constructor not implemented
+  ByteSink &operator=(const ByteSink &); // assignment operator not implemented
+};
+
+// -------------------------------------------------------------
+// Some standard implementations
+
+/** 
+ * Implementation of ByteSink that writes to a flat byte array,
+ * with bounds-checking:
+ * This sink will not write more than capacity bytes to outbuf.
+ * If more than capacity bytes are Append()ed, then excess bytes are ignored,
+ * and Overflowed() will return true.
+ * Overflow does not cause a runtime error.
+ * @stable ICU 4.2
+ */
+class U_COMMON_API CheckedArrayByteSink : public ByteSink {
+public:
+  /**
+   * Constructs a ByteSink that will write to outbuf[0..capacity-1].
+   * @param outbuf buffer to write to
+   * @param capacity size of the buffer
+   * @stable ICU 4.2
+   */
+  CheckedArrayByteSink(char* outbuf, int32_t capacity);
+  /**
+   * Destructor.
+   * @stable ICU 4.2
+   */
+  virtual ~CheckedArrayByteSink();
+  /**
+   * Returns the sink to its original state, without modifying the buffer.
+   * Useful for reusing both the buffer and the sink for multiple streams.
+   * Resets the state to NumberOfBytesWritten()=NumberOfBytesAppended()=0
+   * and Overflowed()=FALSE.
+   * @return *this
+   * @stable ICU 4.6
+   */
+  virtual CheckedArrayByteSink& Reset();
+  /**
+   * Append "bytes[0,n-1]" to this.
+   * @param bytes the pointer to the bytes
+   * @param n the number of bytes; must be non-negative
+   * @stable ICU 4.2
+   */
+  virtual void Append(const char* bytes, int32_t n);
+  /**
+   * Returns a writable buffer for appending and writes the buffer's capacity to
+   * *result_capacity. For details see the base class documentation.
+   * @param min_capacity required minimum capacity of the returned buffer;
+   *                     must be non-negative
+   * @param desired_capacity_hint desired capacity of the returned buffer;
+   *                              must be non-negative
+   * @param scratch default caller-owned buffer
+   * @param scratch_capacity capacity of the scratch buffer
+   * @param result_capacity pointer to an integer which will be set to the
+   *                        capacity of the returned buffer
+   * @return a buffer with *result_capacity>=min_capacity
+   * @stable ICU 4.2
+   */
+  virtual char* GetAppendBuffer(int32_t min_capacity,
+                                int32_t desired_capacity_hint,
+                                char* scratch, int32_t scratch_capacity,
+                                int32_t* result_capacity);
+  /**
+   * Returns the number of bytes actually written to the sink.
+   * @return number of bytes written to the buffer
+   * @stable ICU 4.2
+   */
+  int32_t NumberOfBytesWritten() const { return size_; }
+  /**
+   * Returns true if any bytes were discarded, i.e., if there was an
+   * attempt to write more than 'capacity' bytes.
+   * @return TRUE if more than 'capacity' bytes were Append()ed
+   * @stable ICU 4.2
+   */
+  UBool Overflowed() const { return overflowed_; }
+  /**
+   * Returns the number of bytes appended to the sink.
+   * If Overflowed() then NumberOfBytesAppended()>NumberOfBytesWritten()
+   * else they return the same number.
+   * @return number of bytes written to the buffer
+   * @stable ICU 4.6
+   */
+  int32_t NumberOfBytesAppended() const { return appended_; }
+private:
+  char* outbuf_;
+  const int32_t capacity_;
+  int32_t size_;
+  int32_t appended_;
+  UBool overflowed_;
+  CheckedArrayByteSink(); ///< default constructor not implemented 
+  CheckedArrayByteSink(const CheckedArrayByteSink &); ///< copy constructor not implemented
+  CheckedArrayByteSink &operator=(const CheckedArrayByteSink &); ///< assignment operator not implemented
+};
+
+#if U_HAVE_STD_STRING
+
+/** 
+ * Implementation of ByteSink that writes to a "string".
+ * The StringClass is usually instantiated with a std::string.
+ * @stable ICU 4.2
+ */
+template<typename StringClass>
+class StringByteSink : public ByteSink {
+ public:
+  /**
+   * Constructs a ByteSink that will append bytes to the dest string.
+   * @param dest pointer to string object to append to
+   * @stable ICU 4.2
+   */
+  StringByteSink(StringClass* dest) : dest_(dest) { }
+  /**
+   * Append "bytes[0,n-1]" to this.
+   * @param data the pointer to the bytes
+   * @param n the number of bytes; must be non-negative
+   * @stable ICU 4.2
+   */
+  virtual void Append(const char* data, int32_t n) { dest_->append(data, n); }
+ private:
+  StringClass* dest_;
+  StringByteSink(); ///< default constructor not implemented 
+  StringByteSink(const StringByteSink &); ///< copy constructor not implemented
+  StringByteSink &operator=(const StringByteSink &); ///< assignment operator not implemented
+};
+
+#endif
+
+U_NAMESPACE_END
+
+#endif  // __BYTESTREAM_H__
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/localpointer.h b/src/core/basetypes/icu-ucsdet/include/unicode/localpointer.h
new file mode 100644
index 00000000..e3ccb258
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/localpointer.h
@@ -0,0 +1,304 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2009-2012, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  localpointer.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2009nov13
+*   created by: Markus W. Scherer
+*/
+
+#ifndef __LOCALPOINTER_H__
+#define __LOCALPOINTER_H__
+
+/**
+ * \file 
+ * \brief C++ API: "Smart pointers" for use with and in ICU4C C++ code.
+ *
+ * These classes are inspired by
+ * - std::auto_ptr
+ * - boost::scoped_ptr & boost::scoped_array
+ * - Taligent Safe Pointers (TOnlyPointerTo)
+ *
+ * but none of those provide for all of the goals for ICU smart pointers:
+ * - Smart pointer owns the object and releases it when it goes out of scope.
+ * - No transfer of ownership via copy/assignment to reduce misuse. Simpler & more robust.
+ * - ICU-compatible: No exceptions.
+ * - Need to be able to orphan/release the pointer and its ownership.
+ * - Need variants for normal C++ object pointers, C++ arrays, and ICU C service objects.
+ *
+ * For details see http://site.icu-project.org/design/cpp/scoped_ptr
+ */
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * "Smart pointer" base class; do not use directly: use LocalPointer etc.
+ *
+ * Base class for smart pointer classes that do not throw exceptions.
+ *
+ * Do not use this base class directly, since it does not delete its pointer.
+ * A subclass must implement methods that delete the pointer:
+ * Destructor and adoptInstead().
+ *
+ * There is no operator T *() provided because the programmer must decide
+ * whether to use getAlias() (without transfer of ownership) or orpan()
+ * (with transfer of ownership and NULLing of the pointer).
+ *
+ * @see LocalPointer
+ * @see LocalArray
+ * @see U_DEFINE_LOCAL_OPEN_POINTER
+ * @stable ICU 4.4
+ */
+template<typename T>
+class LocalPointerBase {
+public:
+    /**
+     * Constructor takes ownership.
+     * @param p simple pointer to an object that is adopted
+     * @stable ICU 4.4
+     */
+    explicit LocalPointerBase(T *p=NULL) : ptr(p) {}
+    /**
+     * Destructor deletes the object it owns.
+     * Subclass must override: Base class does nothing.
+     * @stable ICU 4.4
+     */
+    ~LocalPointerBase() { /* delete ptr; */ }
+    /**
+     * NULL check.
+     * @return TRUE if ==NULL
+     * @stable ICU 4.4
+     */
+    UBool isNull() const { return ptr==NULL; }
+    /**
+     * NULL check.
+     * @return TRUE if !=NULL
+     * @stable ICU 4.4
+     */
+    UBool isValid() const { return ptr!=NULL; }
+    /**
+     * Comparison with a simple pointer, so that existing code
+     * with ==NULL need not be changed.
+     * @param other simple pointer for comparison
+     * @return true if this pointer value equals other
+     * @stable ICU 4.4
+     */
+    bool operator==(const T *other) const { return ptr==other; }
+    /**
+     * Comparison with a simple pointer, so that existing code
+     * with !=NULL need not be changed.
+     * @param other simple pointer for comparison
+     * @return true if this pointer value differs from other
+     * @stable ICU 4.4
+     */
+    bool operator!=(const T *other) const { return ptr!=other; }
+    /**
+     * Access without ownership change.
+     * @return the pointer value
+     * @stable ICU 4.4
+     */
+    T *getAlias() const { return ptr; }
+    /**
+     * Access without ownership change.
+     * @return the pointer value as a reference
+     * @stable ICU 4.4
+     */
+    T &operator*() const { return *ptr; }
+    /**
+     * Access without ownership change.
+     * @return the pointer value
+     * @stable ICU 4.4
+     */
+    T *operator->() const { return ptr; }
+    /**
+     * Gives up ownership; the internal pointer becomes NULL.
+     * @return the pointer value;
+     *         caller becomes responsible for deleting the object
+     * @stable ICU 4.4
+     */
+    T *orphan() {
+        T *p=ptr;
+        ptr=NULL;
+        return p;
+    }
+    /**
+     * Deletes the object it owns,
+     * and adopts (takes ownership of) the one passed in.
+     * Subclass must override: Base class does not delete the object.
+     * @param p simple pointer to an object that is adopted
+     * @stable ICU 4.4
+     */
+    void adoptInstead(T *p) {
+        // delete ptr;
+        ptr=p;
+    }
+protected:
+    /**
+     * Actual pointer.
+     * @internal
+     */
+    T *ptr;
+private:
+    // No comparison operators with other LocalPointerBases.
+    bool operator==(const LocalPointerBase &other);
+    bool operator!=(const LocalPointerBase &other);
+    // No ownership transfer: No copy constructor, no assignment operator.
+    LocalPointerBase(const LocalPointerBase &other);
+    void operator=(const LocalPointerBase &other);
+    // No heap allocation. Use only on the stack.
+    static void * U_EXPORT2 operator new(size_t size);
+    static void * U_EXPORT2 operator new[](size_t size);
+#if U_HAVE_PLACEMENT_NEW
+    static void * U_EXPORT2 operator new(size_t, void *ptr);
+#endif
+};
+
+/**
+ * "Smart pointer" class, deletes objects via the standard C++ delete operator.
+ * For most methods see the LocalPointerBase base class.
+ *
+ * Usage example:
+ * \code
+ * LocalPointer<UnicodeString> s(new UnicodeString((UChar32)0x50005));
+ * int32_t length=s->length();  // 2
+ * UChar lead=s->charAt(0);  // 0xd900
+ * if(some condition) { return; }  // no need to explicitly delete the pointer
+ * s.adoptInstead(new UnicodeString((UChar)0xfffc));
+ * length=s->length();  // 1
+ * // no need to explicitly delete the pointer
+ * \endcode
+ *
+ * @see LocalPointerBase
+ * @stable ICU 4.4
+ */
+template<typename T>
+class LocalPointer : public LocalPointerBase<T> {
+public:
+    /**
+     * Constructor takes ownership.
+     * @param p simple pointer to an object that is adopted
+     * @stable ICU 4.4
+     */
+    explicit LocalPointer(T *p=NULL) : LocalPointerBase<T>(p) {}
+    /**
+     * Destructor deletes the object it owns.
+     * @stable ICU 4.4
+     */
+    ~LocalPointer() {
+        delete LocalPointerBase<T>::ptr;
+    }
+    /**
+     * Deletes the object it owns,
+     * and adopts (takes ownership of) the one passed in.
+     * @param p simple pointer to an object that is adopted
+     * @stable ICU 4.4
+     */
+    void adoptInstead(T *p) {
+        delete LocalPointerBase<T>::ptr;
+        LocalPointerBase<T>::ptr=p;
+    }
+};
+
+/**
+ * "Smart pointer" class, deletes objects via the C++ array delete[] operator.
+ * For most methods see the LocalPointerBase base class.
+ * Adds operator[] for array item access.
+ *
+ * Usage example:
+ * \code
+ * LocalArray<UnicodeString> a(new UnicodeString[2]);
+ * a[0].append((UChar)0x61);
+ * if(some condition) { return; }  // no need to explicitly delete the array
+ * a.adoptInstead(new UnicodeString[4]);
+ * a[3].append((UChar)0x62).append((UChar)0x63).reverse();
+ * // no need to explicitly delete the array
+ * \endcode
+ *
+ * @see LocalPointerBase
+ * @stable ICU 4.4
+ */
+template<typename T>
+class LocalArray : public LocalPointerBase<T> {
+public:
+    /**
+     * Constructor takes ownership.
+     * @param p simple pointer to an array of T objects that is adopted
+     * @stable ICU 4.4
+     */
+    explicit LocalArray(T *p=NULL) : LocalPointerBase<T>(p) {}
+    /**
+     * Destructor deletes the array it owns.
+     * @stable ICU 4.4
+     */
+    ~LocalArray() {
+        delete[] LocalPointerBase<T>::ptr;
+    }
+    /**
+     * Deletes the array it owns,
+     * and adopts (takes ownership of) the one passed in.
+     * @param p simple pointer to an array of T objects that is adopted
+     * @stable ICU 4.4
+     */
+    void adoptInstead(T *p) {
+        delete[] LocalPointerBase<T>::ptr;
+        LocalPointerBase<T>::ptr=p;
+    }
+    /**
+     * Array item access (writable).
+     * No index bounds check.
+     * @param i array index
+     * @return reference to the array item
+     * @stable ICU 4.4
+     */
+    T &operator[](ptrdiff_t i) const { return LocalPointerBase<T>::ptr[i]; }
+};
+
+/**
+ * \def U_DEFINE_LOCAL_OPEN_POINTER
+ * "Smart pointer" definition macro, deletes objects via the closeFunction.
+ * Defines a subclass of LocalPointerBase which works just
+ * like LocalPointer<Type> except that this subclass will use the closeFunction
+ * rather than the C++ delete operator.
+ *
+ * Requirement: The closeFunction must tolerate a NULL pointer.
+ * (We could add a NULL check here but it is normally redundant.)
+ *
+ * Usage example:
+ * \code
+ * LocalUCaseMapPointer csm(ucasemap_open(localeID, options, &errorCode));
+ * utf8OutLength=ucasemap_utf8ToLower(csm.getAlias(),
+ *     utf8Out, (int32_t)sizeof(utf8Out),
+ *     utf8In, utf8InLength, &errorCode);
+ * if(U_FAILURE(errorCode)) { return; }  // no need to explicitly delete the UCaseMap
+ * \endcode
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction) \
+    class LocalPointerClassName : public LocalPointerBase<Type> { \
+    public: \
+        explicit LocalPointerClassName(Type *p=NULL) : LocalPointerBase<Type>(p) {} \
+        ~LocalPointerClassName() { closeFunction(ptr); } \
+        void adoptInstead(Type *p) { \
+            closeFunction(ptr); \
+            ptr=p; \
+        } \
+    }
+
+U_NAMESPACE_END
+
+#endif  /* U_SHOW_CPLUSPLUS_API */
+#endif  /* __LOCALPOINTER_H__ */
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/platform.h b/src/core/basetypes/icu-ucsdet/include/unicode/platform.h
new file mode 100644
index 00000000..07dce8ad
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/platform.h
@@ -0,0 +1,751 @@
+/*
+******************************************************************************
+*
+*   Copyright (C) 1997-2014, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*
+*  FILE NAME : platform.h
+*
+*   Date        Name        Description
+*   05/13/98    nos         Creation (content moved here from ptypes.h).
+*   03/02/99    stephen     Added AS400 support.
+*   03/30/99    stephen     Added Linux support.
+*   04/13/99    stephen     Reworked for autoconf.
+******************************************************************************
+*/
+
+#ifndef _PLATFORM_H
+#define _PLATFORM_H
+
+#include "unicode/uconfig.h"
+#include "unicode/uvernum.h"
+
+/**
+ * \file
+ * \brief Basic types for the platform.
+ *
+ * This file used to be generated by autoconf/configure.
+ * Starting with ICU 49, platform.h is a normal source file,
+ * to simplify cross-compiling and working with non-autoconf/make build systems.
+ *
+ * When a value in this file does not work on a platform, then please
+ * try to derive it from the U_PLATFORM value
+ * (for which we might need a new value constant in rare cases)
+ * and/or from other macros that are predefined by the compiler
+ * or defined in standard (POSIX or platform or compiler) headers.
+ *
+ * As a temporary workaround, you can add an explicit <code>#define</code> for some macros
+ * before it is first tested, or add an equivalent -D macro definition
+ * to the compiler's command line.
+ *
+ * Note: Some compilers provide ways to show the predefined macros.
+ * For example, with gcc you can compile an empty .c file and have the compiler
+ * print the predefined macros with
+ * \code
+ * gcc -E -dM -x c /dev/null | sort
+ * \endcode
+ * (You can provide an actual empty .c file rather than /dev/null.
+ * <code>-x c++</code> is for C++.)
+ */
+
+/**
+ * Define some things so that they can be documented.
+ * @internal
+ */
+#ifdef U_IN_DOXYGEN
+/*
+ * Problem: "platform.h:335: warning: documentation for unknown define U_HAVE_STD_STRING found." means that U_HAVE_STD_STRING is not documented.
+ * Solution: #define any defines for non @internal API here, so that they are visible in the docs.  If you just set PREDEFINED in Doxyfile.in,  they won't be documented.
+ */
+
+/* None for now. */
+#endif
+
+/**
+ * \def U_PLATFORM
+ * The U_PLATFORM macro defines the platform we're on.
+ *
+ * We used to define one different, value-less macro per platform.
+ * That made it hard to know the set of relevant platforms and macros,
+ * and hard to deal with variants of platforms.
+ *
+ * Starting with ICU 49, we define platforms as numeric macros,
+ * with ranges of values for related platforms and their variants.
+ * The U_PLATFORM macro is set to one of these values.
+ *
+ * Historical note from the Solaris Wikipedia article:
+ * AT&T and Sun collaborated on a project to merge the most popular Unix variants
+ * on the market at that time: BSD, System V, and Xenix.
+ * This became Unix System V Release 4 (SVR4).
+ *
+ * @internal
+ */
+
+/** Unknown platform. @internal */
+#define U_PF_UNKNOWN 0
+/** Windows @internal */
+#define U_PF_WINDOWS 1000
+/** MinGW. Windows, calls to Win32 API, but using GNU gcc and binutils. @internal */
+#define U_PF_MINGW 1800
+/**
+ * Cygwin. Windows, calls to cygwin1.dll for Posix functions,
+ * using MSVC or GNU gcc and binutils.
+ * @internal
+ */
+#define U_PF_CYGWIN 1900
+/* Reserve 2000 for U_PF_UNIX? */
+/** HP-UX is based on UNIX System V. @internal */
+#define U_PF_HPUX 2100
+/** Solaris is a Unix operating system based on SVR4. @internal */
+#define U_PF_SOLARIS 2600
+/** BSD is a UNIX operating system derivative. @internal */
+#define U_PF_BSD 3000
+/** AIX is based on UNIX System V Releases and 4.3 BSD. @internal */
+#define U_PF_AIX 3100
+/** IRIX is based on UNIX System V with BSD extensions. @internal */
+#define U_PF_IRIX 3200
+/**
+ * Darwin is a POSIX-compliant operating system, composed of code developed by Apple,
+ * as well as code derived from NeXTSTEP, BSD, and other projects,
+ * built around the Mach kernel.
+ * Darwin forms the core set of components upon which Mac OS X, Apple TV, and iOS are based.
+ * (Original description modified from WikiPedia.)
+ * @internal
+ */
+#define U_PF_DARWIN 3500
+/** iPhone OS (iOS) is a derivative of Mac OS X. @internal */
+#define U_PF_IPHONE 3550
+/** QNX is a commercial Unix-like real-time operating system related to BSD. @internal */
+#define U_PF_QNX 3700
+/** Linux is a Unix-like operating system. @internal */
+#define U_PF_LINUX 4000
+/** Android is based on Linux. @internal */
+#define U_PF_ANDROID 4050
+/** z/OS is the successor to OS/390 which was the successor to MVS. @internal */
+#define U_PF_OS390 9000
+/** "IBM i" is the current name of what used to be i5/OS and earlier OS/400. @internal */
+#define U_PF_OS400 9400
+
+#ifdef U_PLATFORM
+    /* Use the predefined value. */
+#elif defined(__MINGW32__)
+#   define U_PLATFORM U_PF_MINGW
+#elif defined(__CYGWIN__)
+#   define U_PLATFORM U_PF_CYGWIN
+#elif defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
+#   define U_PLATFORM U_PF_WINDOWS
+#elif defined(__ANDROID__)
+#   define U_PLATFORM U_PF_ANDROID
+    /* Android wchar_t support depends on the API level. */
+#   include <android/api-level.h>
+#elif defined(linux) || defined(__linux__) || defined(__linux)
+#   define U_PLATFORM U_PF_LINUX
+#elif defined(__APPLE__) && defined(__MACH__)
+#   include <TargetConditionals.h>
+#   if defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE  /* variant of TARGET_OS_MAC */
+#       define U_PLATFORM U_PF_IPHONE
+#   else
+#       define U_PLATFORM U_PF_DARWIN
+#   endif
+#elif defined(BSD) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__MirBSD__)
+#   define U_PLATFORM U_PF_BSD
+#elif defined(sun) || defined(__sun)
+    /* Check defined(__SVR4) || defined(__svr4__) to distinguish Solaris from SunOS? */
+#   define U_PLATFORM U_PF_SOLARIS
+#   if defined(__GNUC__)
+        /* Solaris/GCC needs this header file to get the proper endianness. Normally, this
+         * header file is included with stddef.h but on Solairs/GCC, the GCC version of stddef.h
+         *  is included which does not include this header file.
+         */
+#       include <sys/isa_defs.h>
+#   endif
+#elif defined(_AIX) || defined(__TOS_AIX__)
+#   define U_PLATFORM U_PF_AIX
+#elif defined(_hpux) || defined(hpux) || defined(__hpux)
+#   define U_PLATFORM U_PF_HPUX
+#elif defined(sgi) || defined(__sgi)
+#   define U_PLATFORM U_PF_IRIX
+#elif defined(__QNX__) || defined(__QNXNTO__)
+#   define U_PLATFORM U_PF_QNX
+#elif defined(__TOS_MVS__)
+#   define U_PLATFORM U_PF_OS390
+#elif defined(__OS400__) || defined(__TOS_OS400__)
+#   define U_PLATFORM U_PF_OS400
+#else
+#   define U_PLATFORM U_PF_UNKNOWN
+#endif
+
+/**
+ * \def CYGWINMSVC
+ * Defined if this is Windows with Cygwin, but using MSVC rather than gcc.
+ * Otherwise undefined.
+ * @internal
+ */
+/* Commented out because this is already set in mh-cygwin-msvc
+#if U_PLATFORM == U_PF_CYGWIN && defined(_MSC_VER)
+#   define CYGWINMSVC
+#endif
+*/
+
+/**
+ * \def U_PLATFORM_USES_ONLY_WIN32_API
+ * Defines whether the platform uses only the Win32 API.
+ * Set to 1 for Windows/MSVC and MinGW but not Cygwin.
+ * @internal
+ */
+#ifdef U_PLATFORM_USES_ONLY_WIN32_API
+    /* Use the predefined value. */
+#elif (U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_MINGW) || defined(CYGWINMSVC)
+#   define U_PLATFORM_USES_ONLY_WIN32_API 1
+#else
+    /* Cygwin implements POSIX. */
+#   define U_PLATFORM_USES_ONLY_WIN32_API 0
+#endif
+
+/**
+ * \def U_PLATFORM_HAS_WIN32_API
+ * Defines whether the Win32 API is available on the platform.
+ * Set to 1 for Windows/MSVC, MinGW and Cygwin.
+ * @internal
+ */
+#ifdef U_PLATFORM_HAS_WIN32_API
+    /* Use the predefined value. */
+#elif U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN
+#   define U_PLATFORM_HAS_WIN32_API 1
+#else
+#   define U_PLATFORM_HAS_WIN32_API 0
+#endif
+
+/**
+ * \def U_PLATFORM_IMPLEMENTS_POSIX
+ * Defines whether the platform implements (most of) the POSIX API.
+ * Set to 1 for Cygwin and most other platforms.
+ * @internal
+ */
+#ifdef U_PLATFORM_IMPLEMENTS_POSIX
+    /* Use the predefined value. */
+#elif U_PLATFORM_USES_ONLY_WIN32_API
+#   define U_PLATFORM_IMPLEMENTS_POSIX 0
+#else
+#   define U_PLATFORM_IMPLEMENTS_POSIX 1
+#endif
+
+/**
+ * \def U_PLATFORM_IS_LINUX_BASED
+ * Defines whether the platform is Linux or one of its derivatives.
+ * @internal
+ */
+#ifdef U_PLATFORM_IS_LINUX_BASED
+    /* Use the predefined value. */
+#elif U_PF_LINUX <= U_PLATFORM && U_PLATFORM <= U_PF_ANDROID
+#   define U_PLATFORM_IS_LINUX_BASED 1
+#else
+#   define U_PLATFORM_IS_LINUX_BASED 0
+#endif
+
+/**
+ * \def U_PLATFORM_IS_DARWIN_BASED
+ * Defines whether the platform is Darwin or one of its derivatives.
+ * @internal
+ */
+#ifdef U_PLATFORM_IS_DARWIN_BASED
+    /* Use the predefined value. */
+#elif U_PF_DARWIN <= U_PLATFORM && U_PLATFORM <= U_PF_IPHONE
+#   define U_PLATFORM_IS_DARWIN_BASED 1
+#else
+#   define U_PLATFORM_IS_DARWIN_BASED 0
+#endif
+
+/**
+ * \def U_HAVE_STDINT_H
+ * Defines whether stdint.h is available. It is a C99 standard header.
+ * We used to include inttypes.h which includes stdint.h but we usually do not need
+ * the additional definitions from inttypes.h.
+ * @internal
+ */
+#ifdef U_HAVE_STDINT_H
+    /* Use the predefined value. */
+#elif U_PLATFORM_USES_ONLY_WIN32_API
+#   if defined(__BORLANDC__) || U_PLATFORM == U_PF_MINGW || (defined(_MSC_VER) && _MSC_VER>=1600)
+        /* Windows Visual Studio 9 and below do not have stdint.h & inttypes.h, but VS 2010 adds them. */
+#       define U_HAVE_STDINT_H 1
+#   else
+#       define U_HAVE_STDINT_H 0
+#   endif
+#elif U_PLATFORM == U_PF_SOLARIS
+    /* Solaris has inttypes.h but not stdint.h. */
+#   define U_HAVE_STDINT_H 0
+#elif U_PLATFORM == U_PF_AIX && !defined(_AIX51) && defined(_POWER)
+    /* PPC AIX <= 4.3 has inttypes.h but not stdint.h. */
+#   define U_HAVE_STDINT_H 0
+#else
+#   define U_HAVE_STDINT_H 1
+#endif
+
+/**
+ * \def U_HAVE_INTTYPES_H
+ * Defines whether inttypes.h is available. It is a C99 standard header.
+ * We include inttypes.h where it is available but stdint.h is not.
+ * @internal
+ */
+#ifdef U_HAVE_INTTYPES_H
+    /* Use the predefined value. */
+#elif U_PLATFORM == U_PF_SOLARIS
+    /* Solaris has inttypes.h but not stdint.h. */
+#   define U_HAVE_INTTYPES_H 1
+#elif U_PLATFORM == U_PF_AIX && !defined(_AIX51) && defined(_POWER)
+    /* PPC AIX <= 4.3 has inttypes.h but not stdint.h. */
+#   define U_HAVE_INTTYPES_H 1
+#else
+    /* Most platforms have both inttypes.h and stdint.h, or neither. */
+#   define U_HAVE_INTTYPES_H U_HAVE_STDINT_H
+#endif
+
+/**
+ * \def U_IOSTREAM_SOURCE
+ * Defines what support for C++ streams is available.
+ *
+ * If U_IOSTREAM_SOURCE is set to 199711, then &lt;iostream&gt; is available
+ * (the ISO/IEC C++ FDIS was published in November 1997), and then
+ * one should qualify streams using the std namespace in ICU header
+ * files.
+ * Starting with ICU 49, this is the only supported version.
+ *
+ * If U_IOSTREAM_SOURCE is set to 198506, then &lt;iostream.h&gt; is
+ * available instead (in June 1985 Stroustrup published
+ * "An Extensible I/O Facility for C++" at the summer USENIX conference).
+ * Starting with ICU 49, this version is not supported any more.
+ *
+ * If U_IOSTREAM_SOURCE is 0 (or any value less than 199711),
+ * then C++ streams are not available and
+ * support for them will be silently suppressed in ICU.
+ *
+ * @internal
+ */
+#ifndef U_IOSTREAM_SOURCE
+#define U_IOSTREAM_SOURCE 199711
+#endif
+
+/**
+ * \def U_HAVE_STD_STRING
+ * Defines whether the standard C++ (STL) &lt;string&gt; header is available.
+ * @internal
+ */
+#ifdef U_HAVE_STD_STRING
+    /* Use the predefined value. */
+#else
+#   define U_HAVE_STD_STRING 1
+#endif
+
+/*===========================================================================*/
+/** @{ Compiler and environment features                                     */
+/*===========================================================================*/
+
+/**
+ * \def U_GCC_MAJOR_MINOR
+ * Indicates whether the compiler is gcc (test for != 0),
+ * and if so, contains its major (times 100) and minor version numbers.
+ * If the compiler is not gcc, then U_GCC_MAJOR_MINOR == 0.
+ *
+ * For example, for testing for whether we have gcc, and whether it's 4.6 or higher,
+ * use "#if U_GCC_MAJOR_MINOR >= 406".
+ * @internal
+ */
+#ifdef __GNUC__
+#   define U_GCC_MAJOR_MINOR (__GNUC__ * 100 + __GNUC_MINOR__)
+#else
+#   define U_GCC_MAJOR_MINOR 0
+#endif
+
+/**
+ * \def U_IS_BIG_ENDIAN
+ * Determines the endianness of the platform.
+ * @internal
+ */
+#ifdef U_IS_BIG_ENDIAN
+    /* Use the predefined value. */
+#elif defined(BYTE_ORDER) && defined(BIG_ENDIAN)
+#   define U_IS_BIG_ENDIAN (BYTE_ORDER == BIG_ENDIAN)
+#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__)
+    /* gcc */
+#   define U_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+#elif defined(__BIG_ENDIAN__) || defined(_BIG_ENDIAN)
+#   define U_IS_BIG_ENDIAN 1
+#elif defined(__LITTLE_ENDIAN__) || defined(_LITTLE_ENDIAN)
+#   define U_IS_BIG_ENDIAN 0
+#elif U_PLATFORM == U_PF_OS390 || U_PLATFORM == U_PF_OS400 || defined(__s390__) || defined(__s390x__)
+    /* These platforms do not appear to predefine any endianness macros. */
+#   define U_IS_BIG_ENDIAN 1
+#elif defined(_PA_RISC1_0) || defined(_PA_RISC1_1) || defined(_PA_RISC2_0)
+    /* HPPA do not appear to predefine any endianness macros. */
+#   define U_IS_BIG_ENDIAN 1
+#elif defined(sparc) || defined(__sparc) || defined(__sparc__)
+    /* Some sparc based systems (e.g. Linux) do not predefine any endianness macros. */
+#   define U_IS_BIG_ENDIAN 1
+#else
+#   define U_IS_BIG_ENDIAN 0
+#endif
+
+/**
+ * \def U_HAVE_PLACEMENT_NEW
+ * Determines whether to override placement new and delete for STL.
+ * @stable ICU 2.6
+ */
+#ifdef U_HAVE_PLACEMENT_NEW
+    /* Use the predefined value. */
+#elif defined(__BORLANDC__)
+#   define U_HAVE_PLACEMENT_NEW 0
+#else
+#   define U_HAVE_PLACEMENT_NEW 1
+#endif
+
+/**
+ * \def U_HAVE_DEBUG_LOCATION_NEW 
+ * Define this to define the MFC debug version of the operator new.
+ *
+ * @stable ICU 3.4
+ */
+#ifdef U_HAVE_DEBUG_LOCATION_NEW
+    /* Use the predefined value. */
+#elif defined(_MSC_VER)
+#   define U_HAVE_DEBUG_LOCATION_NEW 1
+#else
+#   define U_HAVE_DEBUG_LOCATION_NEW 0
+#endif
+
+/* Compatibility with non clang compilers */
+#ifndef __has_attribute
+#    define __has_attribute(x) 0
+#endif
+
+/**
+ * \def U_MALLOC_ATTR
+ * Attribute to mark functions as malloc-like
+ * @internal
+ */
+#if defined(__GNUC__) && __GNUC__>=3
+#    define U_MALLOC_ATTR __attribute__ ((__malloc__))
+#else
+#    define U_MALLOC_ATTR
+#endif
+
+/**
+ * \def U_ALLOC_SIZE_ATTR
+ * Attribute to specify the size of the allocated buffer for malloc-like functions
+ * @internal
+ */
+#if (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) || __has_attribute(alloc_size)
+#   define U_ALLOC_SIZE_ATTR(X) __attribute__ ((alloc_size(X)))
+#   define U_ALLOC_SIZE_ATTR2(X,Y) __attribute__ ((alloc_size(X,Y)))
+#else
+#   define U_ALLOC_SIZE_ATTR(X)
+#   define U_ALLOC_SIZE_ATTR2(X,Y)
+#endif
+
+/** @} */
+
+/*===========================================================================*/
+/** @{ Character data types                                                  */
+/*===========================================================================*/
+
+/**
+ * U_CHARSET_FAMILY is equal to this value when the platform is an ASCII based platform.
+ * @stable ICU 2.0
+ */
+#define U_ASCII_FAMILY 0
+
+/**
+ * U_CHARSET_FAMILY is equal to this value when the platform is an EBCDIC based platform.
+ * @stable ICU 2.0
+ */
+#define U_EBCDIC_FAMILY 1
+
+/**
+ * \def U_CHARSET_FAMILY
+ *
+ * <p>These definitions allow to specify the encoding of text
+ * in the char data type as defined by the platform and the compiler.
+ * It is enough to determine the code point values of "invariant characters",
+ * which are the ones shared by all encodings that are in use
+ * on a given platform.</p>
+ *
+ * <p>Those "invariant characters" should be all the uppercase and lowercase
+ * latin letters, the digits, the space, and "basic punctuation".
+ * Also, '\\n', '\\r', '\\t' should be available.</p>
+ *
+ * <p>The list of "invariant characters" is:<br>
+ * \code
+ *    A-Z  a-z  0-9  SPACE  "  %  &amp;  '  (  )  *  +  ,  -  .  /  :  ;  <  =  >  ?  _
+ * \endcode
+ * <br>
+ * (52 letters + 10 numbers + 20 punc/sym/space = 82 total)</p>
+ *
+ * <p>This matches the IBM Syntactic Character Set (CS 640).</p>
+ *
+ * <p>In other words, all the graphic characters in 7-bit ASCII should
+ * be safely accessible except the following:</p>
+ *
+ * \code
+ *    '\' <backslash>
+ *    '[' <left bracket>
+ *    ']' <right bracket>
+ *    '{' <left brace>
+ *    '}' <right brace>
+ *    '^' <circumflex>
+ *    '~' <tilde>
+ *    '!' <exclamation mark>
+ *    '#' <number sign>
+ *    '|' <vertical line>
+ *    '$' <dollar sign>
+ *    '@' <commercial at>
+ *    '`' <grave accent>
+ * \endcode
+ * @stable ICU 2.0
+ */
+#ifdef U_CHARSET_FAMILY
+    /* Use the predefined value. */
+#elif U_PLATFORM == U_PF_OS390 && (!defined(__CHARSET_LIB) || !__CHARSET_LIB)
+#   define U_CHARSET_FAMILY U_EBCDIC_FAMILY
+#elif U_PLATFORM == U_PF_OS400 && !defined(__UTF32__)
+#   define U_CHARSET_FAMILY U_EBCDIC_FAMILY
+#else
+#   define U_CHARSET_FAMILY U_ASCII_FAMILY
+#endif
+
+/**
+ * \def U_CHARSET_IS_UTF8
+ *
+ * Hardcode the default charset to UTF-8.
+ *
+ * If this is set to 1, then
+ * - ICU will assume that all non-invariant char*, StringPiece, std::string etc.
+ *   contain UTF-8 text, regardless of what the system API uses
+ * - some ICU code will use fast functions like u_strFromUTF8()
+ *   rather than the more general and more heavy-weight conversion API (ucnv.h)
+ * - ucnv_getDefaultName() always returns "UTF-8"
+ * - ucnv_setDefaultName() is disabled and will not change the default charset
+ * - static builds of ICU are smaller
+ * - more functionality is available with the UCONFIG_NO_CONVERSION build-time
+ *   configuration option (see unicode/uconfig.h)
+ * - the UCONFIG_NO_CONVERSION build option in uconfig.h is more usable
+ *
+ * @stable ICU 4.2
+ * @see UCONFIG_NO_CONVERSION
+ */
+#ifdef U_CHARSET_IS_UTF8
+    /* Use the predefined value. */
+#elif U_PLATFORM == U_PF_ANDROID || U_PLATFORM_IS_DARWIN_BASED
+#   define U_CHARSET_IS_UTF8 1
+#else
+#   define U_CHARSET_IS_UTF8 0
+#endif
+
+/** @} */
+
+/*===========================================================================*/
+/** @{ Information about wchar support                                       */
+/*===========================================================================*/
+
+/**
+ * \def U_HAVE_WCHAR_H
+ * Indicates whether <wchar.h> is available (1) or not (0). Set to 1 by default.
+ *
+ * @stable ICU 2.0
+ */
+#ifdef U_HAVE_WCHAR_H
+    /* Use the predefined value. */
+#elif U_PLATFORM == U_PF_ANDROID && __ANDROID_API__ < 9
+    /*
+     * Android before Gingerbread (Android 2.3, API level 9) did not support wchar_t.
+     * The type and header existed, but the library functions did not work as expected.
+     * The size of wchar_t was 1 but L"xyz" string literals had 32-bit units anyway.
+     */
+#   define U_HAVE_WCHAR_H 0
+#else
+#   define U_HAVE_WCHAR_H 1
+#endif
+
+/**
+ * \def U_SIZEOF_WCHAR_T
+ * U_SIZEOF_WCHAR_T==sizeof(wchar_t)
+ *
+ * @stable ICU 2.0
+ */
+#ifdef U_SIZEOF_WCHAR_T
+    /* Use the predefined value. */
+#elif (U_PLATFORM == U_PF_ANDROID && __ANDROID_API__ < 9)
+    /*
+     * Classic Mac OS and Mac OS X before 10.3 (Panther) did not support wchar_t or wstring.
+     * Newer Mac OS X has size 4.
+     */
+#   define U_SIZEOF_WCHAR_T 1
+#elif U_PLATFORM_HAS_WIN32_API || U_PLATFORM == U_PF_CYGWIN
+#   define U_SIZEOF_WCHAR_T 2
+#elif U_PLATFORM == U_PF_AIX
+    /*
+     * AIX 6.1 information, section "Wide character data representation":
+     * "... the wchar_t datatype is 32-bit in the 64-bit environment and
+     * 16-bit in the 32-bit environment."
+     * and
+     * "All locales use Unicode for their wide character code values (process code),
+     * except the IBM-eucTW codeset."
+     */
+#   ifdef __64BIT__
+#       define U_SIZEOF_WCHAR_T 4
+#   else
+#       define U_SIZEOF_WCHAR_T 2
+#   endif
+#elif U_PLATFORM == U_PF_OS390
+    /*
+     * z/OS V1R11 information center, section "LP64 | ILP32":
+     * "In 31-bit mode, the size of long and pointers is 4 bytes and the size of wchar_t is 2 bytes.
+     * Under LP64, the size of long and pointer is 8 bytes and the size of wchar_t is 4 bytes."
+     */
+#   ifdef _LP64
+#       define U_SIZEOF_WCHAR_T 4
+#   else
+#       define U_SIZEOF_WCHAR_T 2
+#   endif
+#elif U_PLATFORM == U_PF_OS400
+#   if defined(__UTF32__)
+        /*
+         * LOCALETYPE(*LOCALEUTF) is specified.
+         * Wide-character strings are in UTF-32,
+         * narrow-character strings are in UTF-8.
+         */
+#       define U_SIZEOF_WCHAR_T 4
+#   elif defined(__UCS2__)
+        /*
+         * LOCALETYPE(*LOCALEUCS2) is specified.
+         * Wide-character strings are in UCS-2,
+         * narrow-character strings are in EBCDIC.
+         */
+#       define U_SIZEOF_WCHAR_T 2
+#else
+        /*
+         * LOCALETYPE(*CLD) or LOCALETYPE(*LOCALE) is specified.
+         * Wide-character strings are in 16-bit EBCDIC,
+         * narrow-character strings are in EBCDIC.
+         */
+#       define U_SIZEOF_WCHAR_T 2
+#   endif
+#else
+#   define U_SIZEOF_WCHAR_T 4
+#endif
+
+#ifndef U_HAVE_WCSCPY
+#define U_HAVE_WCSCPY U_HAVE_WCHAR_H
+#endif
+
+/** @} */
+
+/**
+ * \def U_HAVE_CHAR16_T
+ * Defines whether the char16_t type is available for UTF-16
+ * and u"abc" UTF-16 string literals are supported.
+ * This is a new standard type and standard string literal syntax in C++0x
+ * but has been available in some compilers before.
+ * @internal
+ */
+#ifdef U_HAVE_CHAR16_T
+    /* Use the predefined value. */
+#else
+    /*
+     * Notes:
+     * Visual Studio 10 (_MSC_VER>=1600) defines char16_t but
+     * does not support u"abc" string literals.
+     * gcc 4.4 defines the __CHAR16_TYPE__ macro to a usable type but
+     * does not support u"abc" string literals.
+     * C++11 and C11 require support for UTF-16 literals
+     */
+#   if (defined(__cplusplus) && __cplusplus >= 201103L) || (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L)
+#       define U_HAVE_CHAR16_T 1
+#   else
+#       define U_HAVE_CHAR16_T 0
+#   endif
+#endif
+
+/**
+ * @{
+ * \def U_DECLARE_UTF16
+ * Do not use this macro because it is not defined on all platforms.
+ * Use the UNICODE_STRING or U_STRING_DECL macros instead.
+ * @internal
+ */
+#ifdef U_DECLARE_UTF16
+    /* Use the predefined value. */
+#elif U_HAVE_CHAR16_T \
+    || (defined(__xlC__) && defined(__IBM_UTF_LITERAL) && U_SIZEOF_WCHAR_T != 2) \
+    || (defined(__HP_aCC) && __HP_aCC >= 035000) \
+    || (defined(__HP_cc) && __HP_cc >= 111106)
+#   define U_DECLARE_UTF16(string) u ## string
+#elif U_SIZEOF_WCHAR_T == 2 \
+    && (U_CHARSET_FAMILY == 0 || (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400 && defined(__UCS2__)))
+#   define U_DECLARE_UTF16(string) L ## string
+#else
+    /* Leave U_DECLARE_UTF16 undefined. See unistr.h. */
+#endif
+
+/** @} */
+
+/*===========================================================================*/
+/** @{ Symbol import-export control                                          */
+/*===========================================================================*/
+
+#ifdef U_EXPORT
+    /* Use the predefined value. */
+#elif defined(U_STATIC_IMPLEMENTATION)
+#   define U_EXPORT
+#elif defined(__GNUC__)
+#   define U_EXPORT __attribute__((visibility("default")))
+#elif (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x550) \
+   || (defined(__SUNPRO_C) && __SUNPRO_C >= 0x550) 
+#   define U_EXPORT __global
+/*#elif defined(__HP_aCC) || defined(__HP_cc)
+#   define U_EXPORT __declspec(dllexport)*/
+#elif defined(_MSC_VER)
+#   define U_EXPORT __declspec(dllexport)
+#else
+#   define U_EXPORT
+#endif
+
+/* U_CALLCONV is releated to U_EXPORT2 */
+#ifdef U_EXPORT2
+    /* Use the predefined value. */
+#elif defined(_MSC_VER)
+#   define U_EXPORT2 __cdecl
+#else
+#   define U_EXPORT2
+#endif
+
+#ifdef U_IMPORT
+    /* Use the predefined value. */
+#elif defined(_MSC_VER)
+    /* Windows needs to export/import data. */
+#   define U_IMPORT __declspec(dllimport)
+#else
+#   define U_IMPORT 
+#endif
+
+/**
+ * \def U_CALLCONV
+ * Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary
+ * in callback function typedefs to make sure that the calling convention
+ * is compatible.
+ *
+ * This is only used for non-ICU-API functions.
+ * When a function is a public ICU API,
+ * you must use the U_CAPI and U_EXPORT2 qualifiers.
+ * @stable ICU 2.0
+ */
+#if U_PLATFORM == U_PF_OS390 && defined(__cplusplus)
+#    define U_CALLCONV __cdecl
+#else
+#    define U_CALLCONV U_EXPORT2
+#endif
+
+/* @} */
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/ptypes.h b/src/core/basetypes/icu-ucsdet/include/unicode/ptypes.h
new file mode 100644
index 00000000..b7f71160
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/ptypes.h
@@ -0,0 +1,126 @@
+/*
+******************************************************************************
+*
+*   Copyright (C) 1997-2012, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*
+*  FILE NAME : ptypes.h
+*
+*   Date        Name        Description
+*   05/13/98    nos         Creation (content moved here from ptypes.h).
+*   03/02/99    stephen     Added AS400 support.
+*   03/30/99    stephen     Added Linux support.
+*   04/13/99    stephen     Reworked for autoconf.
+*   09/18/08    srl         Moved basic types back to ptypes.h from platform.h
+******************************************************************************
+*/
+
+/**
+ * \file
+ * \brief C API: Definitions of integer types of various widths
+ */
+
+#ifndef _PTYPES_H
+#define _PTYPES_H
+
+/**
+ * \def __STDC_LIMIT_MACROS
+ * According to the Linux stdint.h, the ISO C99 standard specifies that in C++ implementations
+ * macros like INT32_MIN and UINTPTR_MAX should only be defined if explicitly requested.
+ * We need to define __STDC_LIMIT_MACROS before including stdint.h in C++ code
+ * that uses such limit macros.
+ * @internal
+ */
+#ifndef __STDC_LIMIT_MACROS
+#define __STDC_LIMIT_MACROS
+#endif
+
+/* NULL, size_t, wchar_t */
+#include <stddef.h>
+
+/*
+ * If all compilers provided all of the C99 headers and types,
+ * we would just unconditionally #include <stdint.h> here
+ * and not need any of the stuff after including platform.h.
+ */
+
+/* Find out if we have stdint.h etc. */
+#include "unicode/platform.h"
+
+/*===========================================================================*/
+/* Generic data types                                                        */
+/*===========================================================================*/
+
+/* If your platform does not have the <stdint.h> header, you may
+   need to edit the typedefs in the #else section below.
+   Use #if...#else...#endif with predefined compiler macros if possible. */
+#if U_HAVE_STDINT_H
+
+/*
+ * We mostly need <stdint.h> (which defines the standard integer types) but not <inttypes.h>.
+ * <inttypes.h> includes <stdint.h> and adds the printf/scanf helpers PRId32, SCNx16 etc.
+ * which we almost never use, plus stuff like imaxabs() which we never use.
+ */
+#include <stdint.h>
+
+#if U_PLATFORM == U_PF_OS390
+/* The features header is needed to get (u)int64_t sometimes. */
+#include <features.h>
+/* z/OS has <stdint.h>, but some versions are missing uint8_t (APAR PK62248). */
+#if !defined(__uint8_t)
+#define __uint8_t 1
+typedef unsigned char uint8_t;
+#endif
+#endif /* U_PLATFORM == U_PF_OS390 */
+
+#elif U_HAVE_INTTYPES_H
+
+#   include <inttypes.h>
+
+#else /* neither U_HAVE_STDINT_H nor U_HAVE_INTTYPES_H */
+
+#if ! U_HAVE_INT8_T
+typedef signed char int8_t;
+#endif
+
+#if ! U_HAVE_UINT8_T
+typedef unsigned char uint8_t;
+#endif
+
+#if ! U_HAVE_INT16_T
+typedef signed short int16_t;
+#endif
+
+#if ! U_HAVE_UINT16_T
+typedef unsigned short uint16_t;
+#endif
+
+#if ! U_HAVE_INT32_T
+typedef signed int int32_t;
+#endif
+
+#if ! U_HAVE_UINT32_T
+typedef unsigned int uint32_t;
+#endif
+
+#if ! U_HAVE_INT64_T
+#ifdef _MSC_VER
+    typedef signed __int64 int64_t;
+#else
+    typedef signed long long int64_t;
+#endif
+#endif
+
+#if ! U_HAVE_UINT64_T
+#ifdef _MSC_VER
+    typedef unsigned __int64 uint64_t;
+#else
+    typedef unsigned long long uint64_t;
+#endif
+#endif
+
+#endif /* U_HAVE_STDINT_H / U_HAVE_INTTYPES_H */
+
+#endif /* _PTYPES_H */
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/putil.h b/src/core/basetypes/icu-ucsdet/include/unicode/putil.h
new file mode 100644
index 00000000..df1b17ba
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/putil.h
@@ -0,0 +1,181 @@
+/*
+******************************************************************************
+*
+*   Copyright (C) 1997-2014, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*
+*  FILE NAME : putil.h
+*
+*   Date        Name        Description
+*   05/14/98    nos         Creation (content moved here from utypes.h).
+*   06/17/99    erm         Added IEEE_754
+*   07/22/98    stephen     Added IEEEremainder, max, min, trunc
+*   08/13/98    stephen     Added isNegativeInfinity, isPositiveInfinity
+*   08/24/98    stephen     Added longBitsFromDouble
+*   03/02/99    stephen     Removed openFile().  Added AS400 support.
+*   04/15/99    stephen     Converted to C
+*   11/15/99    helena      Integrated S/390 changes for IEEE support.
+*   01/11/00    helena      Added u_getVersion.
+******************************************************************************
+*/
+
+#ifndef PUTIL_H
+#define PUTIL_H
+
+#include "unicode/utypes.h"
+ /**
+  * \file
+  * \brief C API: Platform Utilities
+  */
+
+/*==========================================================================*/
+/* Platform utilities                                                       */
+/*==========================================================================*/
+
+/**
+ * Platform utilities isolates the platform dependencies of the
+ * libarary.  For each platform which this code is ported to, these
+ * functions may have to be re-implemented.
+ */
+
+/**
+ * Return the ICU data directory. 
+ * The data directory is where common format ICU data files (.dat files)
+ *   are loaded from.  Note that normal use of the built-in ICU
+ *   facilities does not require loading of an external data file;
+ *   unless you are adding custom data to ICU, the data directory
+ *   does not need to be set.
+ *
+ * The data directory is determined as follows:
+ *    If u_setDataDirectory() has been called, that is it, otherwise
+ *    if the ICU_DATA environment variable is set, use that, otherwise
+ *    If a data directory was specifed at ICU build time
+ *      <code>
+ * \code
+ *        #define ICU_DATA_DIR "path" 
+ * \endcode
+ * </code> use that,
+ *    otherwise no data directory is available.
+ *
+ * @return the data directory, or an empty string ("") if no data directory has
+ *         been specified.
+ *   
+ * @stable ICU 2.0
+ */
+U_STABLE const char* U_EXPORT2 u_getDataDirectory(void);
+
+
+/** 
+ * Set the ICU data directory. 
+ * The data directory is where common format ICU data files (.dat files)
+ *   are loaded from.  Note that normal use of the built-in ICU
+ *   facilities does not require loading of an external data file;
+ *   unless you are adding custom data to ICU, the data directory
+ *   does not need to be set.
+ *
+ * This function should be called at most once in a process, before the
+ * first ICU operation (e.g., u_init()) that will require the loading of an
+ * ICU data file.
+ * This function is not thread-safe. Use it before calling ICU APIs from
+ * multiple threads.
+ *
+ * @param directory The directory to be set.
+ *
+ * @see u_init
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 u_setDataDirectory(const char *directory);
+
+#ifndef U_HIDE_INTERNAL_API
+/**
+  * Return the time zone files override directory, or an empty string if
+  * no directory was specified. Certain time zone resources will be preferrentially
+  * loaded from individual files in this directory.
+  *
+  * @return the time zone data override directory.
+  * @internal
+  */ 
+U_INTERNAL const char * U_EXPORT2 u_getTimeZoneFilesDirectory(UErrorCode *status);
+
+/**
+  * Set the time zone files override directory.
+  * This function is not thread safe; it must not be called concurrently with
+  *   u_getTimeZoneFilesDirectory() or any other use of ICU time zone functions.
+  * This function should only be called before using any ICU service that
+  *   will access the time zone data.
+  * @internal
+  */
+U_INTERNAL void U_EXPORT2 u_setTimeZoneFilesDirectory(const char *path, UErrorCode *status);
+#endif  /* U_HIDE_INTERNAL_API */
+
+
+/**
+ * @{
+ * Filesystem file and path separator characters.
+ * Example: '/' and ':' on Unix, '\\' and ';' on Windows.
+ * @stable ICU 2.0
+ */
+#if U_PLATFORM_USES_ONLY_WIN32_API
+#   define U_FILE_SEP_CHAR '\\'
+#   define U_FILE_ALT_SEP_CHAR '/'
+#   define U_PATH_SEP_CHAR ';'
+#   define U_FILE_SEP_STRING "\\"
+#   define U_FILE_ALT_SEP_STRING "/"
+#   define U_PATH_SEP_STRING ";"
+#else
+#   define U_FILE_SEP_CHAR '/'
+#   define U_FILE_ALT_SEP_CHAR '/'
+#   define U_PATH_SEP_CHAR ':'
+#   define U_FILE_SEP_STRING "/"
+#   define U_FILE_ALT_SEP_STRING "/"
+#   define U_PATH_SEP_STRING ":"
+#endif
+
+/** @} */
+
+/**
+ * Convert char characters to UChar characters.
+ * This utility function is useful only for "invariant characters"
+ * that are encoded in the platform default encoding.
+ * They are a small, constant subset of the encoding and include
+ * just the latin letters, digits, and some punctuation.
+ * For details, see U_CHARSET_FAMILY.
+ *
+ * @param cs Input string, points to <code>length</code>
+ *           character bytes from a subset of the platform encoding.
+ * @param us Output string, points to memory for <code>length</code>
+ *           Unicode characters.
+ * @param length The number of characters to convert; this may
+ *               include the terminating <code>NUL</code>.
+ *
+ * @see U_CHARSET_FAMILY
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+u_charsToUChars(const char *cs, UChar *us, int32_t length);
+
+/**
+ * Convert UChar characters to char characters.
+ * This utility function is useful only for "invariant characters"
+ * that can be encoded in the platform default encoding.
+ * They are a small, constant subset of the encoding and include
+ * just the latin letters, digits, and some punctuation.
+ * For details, see U_CHARSET_FAMILY.
+ *
+ * @param us Input string, points to <code>length</code>
+ *           Unicode characters that can be encoded with the
+ *           codepage-invariant subset of the platform encoding.
+ * @param cs Output string, points to memory for <code>length</code>
+ *           character bytes.
+ * @param length The number of characters to convert; this may
+ *               include the terminating <code>NUL</code>.
+ *
+ * @see U_CHARSET_FAMILY
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+u_UCharsToChars(const UChar *us, char *cs, int32_t length);
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/rep.h b/src/core/basetypes/icu-ucsdet/include/unicode/rep.h
new file mode 100644
index 00000000..4c7eae14
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/rep.h
@@ -0,0 +1,261 @@
+/*
+**************************************************************************
+* Copyright (C) 1999-2012, International Business Machines Corporation and
+* others. All Rights Reserved.
+**************************************************************************
+*   Date        Name        Description
+*   11/17/99    aliu        Creation.  Ported from java.  Modified to
+*                           match current UnicodeString API.  Forced
+*                           to use name "handleReplaceBetween" because
+*                           of existing methods in UnicodeString.
+**************************************************************************
+*/
+
+#ifndef REP_H
+#define REP_H
+
+#include "unicode/uobject.h"
+
+/**
+ * \file 
+ * \brief C++ API: Replaceable String
+ */
+ 
+U_NAMESPACE_BEGIN
+
+class UnicodeString;
+
+/**
+ * <code>Replaceable</code> is an abstract base class representing a
+ * string of characters that supports the replacement of a range of
+ * itself with a new string of characters.  It is used by APIs that
+ * change a piece of text while retaining metadata.  Metadata is data
+ * other than the Unicode characters returned by char32At().  One
+ * example of metadata is style attributes; another is an edit
+ * history, marking each character with an author and revision number.
+ *
+ * <p>An implicit aspect of the <code>Replaceable</code> API is that
+ * during a replace operation, new characters take on the metadata of
+ * the old characters.  For example, if the string "the <b>bold</b>
+ * font" has range (4, 8) replaced with "strong", then it becomes "the
+ * <b>strong</b> font".
+ *
+ * <p><code>Replaceable</code> specifies ranges using a start
+ * offset and a limit offset.  The range of characters thus specified
+ * includes the characters at offset start..limit-1.  That is, the
+ * start offset is inclusive, and the limit offset is exclusive.
+ *
+ * <p><code>Replaceable</code> also includes API to access characters
+ * in the string: <code>length()</code>, <code>charAt()</code>,
+ * <code>char32At()</code>, and <code>extractBetween()</code>.
+ *
+ * <p>For a subclass to support metadata, typical behavior of
+ * <code>replace()</code> is the following:
+ * <ul>
+ *   <li>Set the metadata of the new text to the metadata of the first
+ *   character replaced</li>
+ *   <li>If no characters are replaced, use the metadata of the
+ *   previous character</li>
+ *   <li>If there is no previous character (i.e. start == 0), use the
+ *   following character</li>
+ *   <li>If there is no following character (i.e. the replaceable was
+ *   empty), use default metadata.<br>
+ *   <li>If the code point U+FFFF is seen, it should be interpreted as
+ *   a special marker having no metadata<li>
+ *   </li>
+ * </ul>
+ * If this is not the behavior, the subclass should document any differences.
+ * @author Alan Liu
+ * @stable ICU 2.0
+ */
+class U_COMMON_API Replaceable : public UObject {
+
+public:
+    /**
+     * Destructor.
+     * @stable ICU 2.0
+     */
+    virtual ~Replaceable();
+
+    /**
+     * Returns the number of 16-bit code units in the text.
+     * @return number of 16-bit code units in text
+     * @stable ICU 1.8
+     */ 
+    inline int32_t length() const;
+
+    /**
+     * Returns the 16-bit code unit at the given offset into the text.
+     * @param offset an integer between 0 and <code>length()</code>-1
+     * inclusive
+     * @return 16-bit code unit of text at given offset
+     * @stable ICU 1.8
+     */
+    inline UChar charAt(int32_t offset) const;
+
+    /**
+     * Returns the 32-bit code point at the given 16-bit offset into
+     * the text.  This assumes the text is stored as 16-bit code units
+     * with surrogate pairs intermixed.  If the offset of a leading or
+     * trailing code unit of a surrogate pair is given, return the
+     * code point of the surrogate pair.
+     *
+     * @param offset an integer between 0 and <code>length()</code>-1
+     * inclusive
+     * @return 32-bit code point of text at given offset
+     * @stable ICU 1.8
+     */
+    inline UChar32 char32At(int32_t offset) const;
+
+    /**
+     * Copies characters in the range [<tt>start</tt>, <tt>limit</tt>) 
+     * into the UnicodeString <tt>target</tt>.
+     * @param start offset of first character which will be copied
+     * @param limit offset immediately following the last character to
+     * be copied
+     * @param target UnicodeString into which to copy characters.
+     * @return A reference to <TT>target</TT>
+     * @stable ICU 2.1
+     */
+    virtual void extractBetween(int32_t start,
+                                int32_t limit,
+                                UnicodeString& target) const = 0;
+
+    /**
+     * Replaces a substring of this object with the given text.  If the
+     * characters being replaced have metadata, the new characters
+     * that replace them should be given the same metadata.
+     *
+     * <p>Subclasses must ensure that if the text between start and
+     * limit is equal to the replacement text, that replace has no
+     * effect. That is, any metadata
+     * should be unaffected. In addition, subclasses are encouraged to
+     * check for initial and trailing identical characters, and make a
+     * smaller replacement if possible. This will preserve as much
+     * metadata as possible.
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= length()</code>.
+     * @param text the text to replace characters <code>start</code>
+     * to <code>limit - 1</code> 
+     * @stable ICU 2.0
+     */
+    virtual void handleReplaceBetween(int32_t start,
+                                      int32_t limit,
+                                      const UnicodeString& text) = 0;
+    // Note: All other methods in this class take the names of
+    // existing UnicodeString methods.  This method is the exception.
+    // It is named differently because all replace methods of
+    // UnicodeString return a UnicodeString&.  The 'between' is
+    // required in order to conform to the UnicodeString naming
+    // convention; API taking start/length are named <operation>, and
+    // those taking start/limit are named <operationBetween>.  The
+    // 'handle' is added because 'replaceBetween' and
+    // 'doReplaceBetween' are already taken.
+
+    /**
+     * Copies a substring of this object, retaining metadata.
+     * This method is used to duplicate or reorder substrings.
+     * The destination index must not overlap the source range.
+     * 
+     * @param start the beginning index, inclusive; <code>0 <= start <=
+     * limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit <=
+     * length()</code>.
+     * @param dest the destination index.  The characters from
+     * <code>start..limit-1</code> will be copied to <code>dest</code>.
+     * Implementations of this method may assume that <code>dest <= start ||
+     * dest >= limit</code>.
+     * @stable ICU 2.0
+     */
+    virtual void copy(int32_t start, int32_t limit, int32_t dest) = 0;
+
+    /**
+     * Returns true if this object contains metadata.  If a
+     * Replaceable object has metadata, calls to the Replaceable API
+     * must be made so as to preserve metadata.  If it does not, calls
+     * to the Replaceable API may be optimized to improve performance.
+     * The default implementation returns true.
+     * @return true if this object contains metadata
+     * @stable ICU 2.2
+     */
+    virtual UBool hasMetaData() const;
+
+    /**
+     * Clone this object, an instance of a subclass of Replaceable.
+     * Clones can be used concurrently in multiple threads.
+     * If a subclass does not implement clone(), or if an error occurs,
+     * then NULL is returned.
+     * The clone functions in all subclasses return a pointer to a Replaceable
+     * because some compilers do not support covariant (same-as-this)
+     * return types; cast to the appropriate subclass if necessary.
+     * The caller must delete the clone.
+     *
+     * @return a clone of this object
+     *
+     * @see getDynamicClassID
+     * @stable ICU 2.6
+     */
+    virtual Replaceable *clone() const;
+
+protected:
+
+    /**
+     * Default constructor.
+     * @stable ICU 2.4
+     */
+    inline Replaceable();
+
+    /*
+     * Assignment operator not declared. The compiler will provide one
+     * which does nothing since this class does not contain any data members.
+     * API/code coverage may show the assignment operator as present and
+     * untested - ignore.
+     * Subclasses need this assignment operator if they use compiler-provided
+     * assignment operators of their own. An alternative to not declaring one
+     * here would be to declare and empty-implement a protected or public one.
+    Replaceable &Replaceable::operator=(const Replaceable &);
+     */
+
+    /**
+     * Virtual version of length().
+     * @stable ICU 2.4
+     */ 
+    virtual int32_t getLength() const = 0;
+
+    /**
+     * Virtual version of charAt().
+     * @stable ICU 2.4
+     */
+    virtual UChar getCharAt(int32_t offset) const = 0;
+
+    /**
+     * Virtual version of char32At().
+     * @stable ICU 2.4
+     */
+    virtual UChar32 getChar32At(int32_t offset) const = 0;
+};
+
+inline Replaceable::Replaceable() {}
+
+inline int32_t
+Replaceable::length() const {
+    return getLength();
+}
+
+inline UChar
+Replaceable::charAt(int32_t offset) const {
+    return getCharAt(offset);
+}
+
+inline UChar32
+Replaceable::char32At(int32_t offset) const {
+    return getChar32At(offset);
+}
+
+// There is no rep.cpp, see unistr.cpp for Replaceable function implementations.
+
+U_NAMESPACE_END
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/std_string.h b/src/core/basetypes/icu-ucsdet/include/unicode/std_string.h
new file mode 100644
index 00000000..05955c5d
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/std_string.h
@@ -0,0 +1,37 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2009-2014, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  std_string.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2009feb19
+*   created by: Markus W. Scherer
+*/
+
+#ifndef __STD_STRING_H__
+#define __STD_STRING_H__
+
+/**
+ * \file
+ * \brief C++ API: Central ICU header for including the C++ standard &lt;string&gt;
+ *                 header and for related definitions.
+ */
+
+#include "unicode/utypes.h"
+
+#if U_HAVE_STD_STRING
+
+#if !defined(_MSC_VER)
+namespace std { class type_info; } // WORKAROUND: http://llvm.org/bugs/show_bug.cgi?id=13364
+#endif
+#include <string>
+
+#endif  // U_HAVE_STD_STRING
+
+#endif  // __STD_STRING_H__
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/strenum.h b/src/core/basetypes/icu-ucsdet/include/unicode/strenum.h
new file mode 100644
index 00000000..3dbe21c6
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/strenum.h
@@ -0,0 +1,276 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2002-2012, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*/
+
+#ifndef STRENUM_H
+#define STRENUM_H
+
+#include "unicode/uobject.h"
+#include "unicode/unistr.h"
+
+/**
+ * \file 
+ * \brief C++ API: String Enumeration
+ */
+ 
+U_NAMESPACE_BEGIN
+
+/**
+ * Base class for 'pure' C++ implementations of uenum api.  Adds a
+ * method that returns the next UnicodeString since in C++ this can
+ * be a common storage format for strings.
+ *
+ * <p>The model is that the enumeration is over strings maintained by
+ * a 'service.'  At any point, the service might change, invalidating
+ * the enumerator (though this is expected to be rare).  The iterator
+ * returns an error if this has occurred.  Lack of the error is no
+ * guarantee that the service didn't change immediately after the
+ * call, so the returned string still might not be 'valid' on
+ * subsequent use.</p>
+ *
+ * <p>Strings may take the form of const char*, const UChar*, or const
+ * UnicodeString*.  The type you get is determine by the variant of
+ * 'next' that you call.  In general the StringEnumeration is
+ * optimized for one of these types, but all StringEnumerations can
+ * return all types.  Returned strings are each terminated with a NUL.
+ * Depending on the service data, they might also include embedded NUL
+ * characters, so API is provided to optionally return the true
+ * length, counting the embedded NULs but not counting the terminating
+ * NUL.</p>
+ *
+ * <p>The pointers returned by next, unext, and snext become invalid
+ * upon any subsequent call to the enumeration's destructor, next,
+ * unext, snext, or reset.</p>
+ *
+ * ICU 2.8 adds some default implementations and helper functions
+ * for subclasses.
+ *
+ * @stable ICU 2.4 
+ */
+class U_COMMON_API StringEnumeration : public UObject { 
+public:
+    /**
+     * Destructor.
+     * @stable ICU 2.4
+     */
+    virtual ~StringEnumeration();
+
+    /**
+     * Clone this object, an instance of a subclass of StringEnumeration.
+     * Clones can be used concurrently in multiple threads.
+     * If a subclass does not implement clone(), or if an error occurs,
+     * then NULL is returned.
+     * The clone functions in all subclasses return a base class pointer
+     * because some compilers do not support covariant (same-as-this)
+     * return types; cast to the appropriate subclass if necessary.
+     * The caller must delete the clone.
+     *
+     * @return a clone of this object
+     *
+     * @see getDynamicClassID
+     * @stable ICU 2.8
+     */
+    virtual StringEnumeration *clone() const;
+
+    /**
+     * <p>Return the number of elements that the iterator traverses.  If
+     * the iterator is out of sync with its service, status is set to
+     * U_ENUM_OUT_OF_SYNC_ERROR, and the return value is zero.</p>
+     *
+     * <p>The return value will not change except possibly as a result of
+     * a subsequent call to reset, or if the iterator becomes out of sync.</p>
+     *
+     * <p>This is a convenience function. It can end up being very
+     * expensive as all the items might have to be pre-fetched
+     * (depending on the storage format of the data being
+     * traversed).</p>
+     *
+     * @param status the error code.
+     * @return number of elements in the iterator.
+     *
+     * @stable ICU 2.4 */
+    virtual int32_t count(UErrorCode& status) const = 0;
+
+    /**
+     * <p>Returns the next element as a NUL-terminated char*.  If there
+     * are no more elements, returns NULL.  If the resultLength pointer
+     * is not NULL, the length of the string (not counting the
+     * terminating NUL) is returned at that address.  If an error
+     * status is returned, the value at resultLength is undefined.</p>
+     *
+     * <p>The returned pointer is owned by this iterator and must not be
+     * deleted by the caller.  The pointer is valid until the next call
+     * to next, unext, snext, reset, or the enumerator's destructor.</p>
+     *
+     * <p>If the iterator is out of sync with its service, status is set
+     * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p>
+     *
+     * <p>If the native service string is a UChar* string, it is
+     * converted to char* with the invariant converter.  If the
+     * conversion fails (because a character cannot be converted) then
+     * status is set to U_INVARIANT_CONVERSION_ERROR and the return
+     * value is undefined (though not NULL).</p>
+     *
+     * Starting with ICU 2.8, the default implementation calls snext()
+     * and handles the conversion.
+     * Either next() or snext() must be implemented differently by a subclass.
+     *
+     * @param status the error code.
+     * @param resultLength a pointer to receive the length, can be NULL.
+     * @return a pointer to the string, or NULL.
+     *
+     * @stable ICU 2.4 
+     */
+    virtual const char* next(int32_t *resultLength, UErrorCode& status);
+
+    /**
+     * <p>Returns the next element as a NUL-terminated UChar*.  If there
+     * are no more elements, returns NULL.  If the resultLength pointer
+     * is not NULL, the length of the string (not counting the
+     * terminating NUL) is returned at that address.  If an error
+     * status is returned, the value at resultLength is undefined.</p>
+     *
+     * <p>The returned pointer is owned by this iterator and must not be
+     * deleted by the caller.  The pointer is valid until the next call
+     * to next, unext, snext, reset, or the enumerator's destructor.</p>
+     *
+     * <p>If the iterator is out of sync with its service, status is set
+     * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p>
+     *
+     * Starting with ICU 2.8, the default implementation calls snext()
+     * and handles the conversion.
+     *
+     * @param status the error code.
+     * @param resultLength a ponter to receive the length, can be NULL.
+     * @return a pointer to the string, or NULL.
+     *
+     * @stable ICU 2.4 
+     */
+    virtual const UChar* unext(int32_t *resultLength, UErrorCode& status);
+
+    /**
+     * <p>Returns the next element a UnicodeString*.  If there are no
+     * more elements, returns NULL.</p>
+     *
+     * <p>The returned pointer is owned by this iterator and must not be
+     * deleted by the caller.  The pointer is valid until the next call
+     * to next, unext, snext, reset, or the enumerator's destructor.</p>
+     *
+     * <p>If the iterator is out of sync with its service, status is set
+     * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p>
+     *
+     * Starting with ICU 2.8, the default implementation calls next()
+     * and handles the conversion.
+     * Either next() or snext() must be implemented differently by a subclass.
+     *
+     * @param status the error code.
+     * @return a pointer to the string, or NULL.
+     *
+     * @stable ICU 2.4 
+     */
+    virtual const UnicodeString* snext(UErrorCode& status);
+
+    /**
+     * <p>Resets the iterator.  This re-establishes sync with the
+     * service and rewinds the iterator to start at the first
+     * element.</p>
+     *
+     * <p>Previous pointers returned by next, unext, or snext become
+     * invalid, and the value returned by count might change.</p>
+     *
+     * @param status the error code.
+     *
+     * @stable ICU 2.4 
+     */
+    virtual void reset(UErrorCode& status) = 0;
+
+    /**
+     * Compares this enumeration to other to check if both are equal
+     *
+     * @param that The other string enumeration to compare this object to
+     * @return TRUE if the enumerations are equal. FALSE if not.
+     * @stable ICU 3.6 
+     */
+    virtual UBool operator==(const StringEnumeration& that)const;
+    /**
+     * Compares this enumeration to other to check if both are not equal
+     *
+     * @param that The other string enumeration to compare this object to
+     * @return TRUE if the enumerations are equal. FALSE if not.
+     * @stable ICU 3.6 
+     */
+    virtual UBool operator!=(const StringEnumeration& that)const;
+
+protected:
+    /**
+     * UnicodeString field for use with default implementations and subclasses.
+     * @stable ICU 2.8
+     */
+    UnicodeString unistr;
+    /**
+     * char * default buffer for use with default implementations and subclasses.
+     * @stable ICU 2.8
+     */
+    char charsBuffer[32];
+    /**
+     * char * buffer for use with default implementations and subclasses.
+     * Allocated in constructor and in ensureCharsCapacity().
+     * @stable ICU 2.8
+     */
+    char *chars;
+    /**
+     * Capacity of chars, for use with default implementations and subclasses.
+     * @stable ICU 2.8
+     */
+    int32_t charsCapacity;
+
+    /**
+     * Default constructor for use with default implementations and subclasses.
+     * @stable ICU 2.8
+     */
+    StringEnumeration();
+
+    /**
+     * Ensures that chars is at least as large as the requested capacity.
+     * For use with default implementations and subclasses.
+     *
+     * @param capacity Requested capacity.
+     * @param status ICU in/out error code.
+     * @stable ICU 2.8
+     */
+    void ensureCharsCapacity(int32_t capacity, UErrorCode &status);
+
+    /**
+     * Converts s to Unicode and sets unistr to the result.
+     * For use with default implementations and subclasses,
+     * especially for implementations of snext() in terms of next().
+     * This is provided with a helper function instead of a default implementation
+     * of snext() to avoid potential infinite loops between next() and snext().
+     *
+     * For example:
+     * \code
+     * const UnicodeString* snext(UErrorCode& status) {
+     *   int32_t resultLength=0;
+     *   const char *s=next(&resultLength, status);
+     *   return setChars(s, resultLength, status);
+     * }
+     * \endcode
+     *
+     * @param s String to be converted to Unicode.
+     * @param length Length of the string.
+     * @param status ICU in/out error code.
+     * @return A pointer to unistr.
+     * @stable ICU 2.8
+     */
+    UnicodeString *setChars(const char *s, int32_t length, UErrorCode &status);
+};
+
+U_NAMESPACE_END
+
+/* STRENUM_H */
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/stringpiece.h b/src/core/basetypes/icu-ucsdet/include/unicode/stringpiece.h
new file mode 100644
index 00000000..b29571d4
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/stringpiece.h
@@ -0,0 +1,224 @@
+// Copyright (C) 2009-2013, International Business Machines
+// Corporation and others. All Rights Reserved.
+//
+// Copyright 2001 and onwards Google Inc.
+// Author: Sanjay Ghemawat
+
+// This code is a contribution of Google code, and the style used here is
+// a compromise between the original Google code and the ICU coding guidelines.
+// For example, data types are ICU-ified (size_t,int->int32_t),
+// and API comments doxygen-ified, but function names and behavior are
+// as in the original, if possible.
+// Assertion-style error handling, not available in ICU, was changed to
+// parameter "pinning" similar to UnicodeString.
+//
+// In addition, this is only a partial port of the original Google code,
+// limited to what was needed so far. The (nearly) complete original code
+// is in the ICU svn repository at icuhtml/trunk/design/strings/contrib
+// (see ICU ticket 6765, r25517).
+
+#ifndef __STRINGPIECE_H__
+#define __STRINGPIECE_H__
+
+/**
+ * \file 
+ * \brief C++ API: StringPiece: Read-only byte string wrapper class.
+ */
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "unicode/std_string.h"
+
+// Arghh!  I wish C++ literals were "string".
+
+U_NAMESPACE_BEGIN
+
+/**
+ * A string-like object that points to a sized piece of memory.
+ *
+ * We provide non-explicit singleton constructors so users can pass
+ * in a "const char*" or a "string" wherever a "StringPiece" is
+ * expected.
+ *
+ * Functions or methods may use const StringPiece& parameters to accept either
+ * a "const char*" or a "string" value that will be implicitly converted to
+ * a StringPiece.
+ *
+ * Systematic usage of StringPiece is encouraged as it will reduce unnecessary
+ * conversions from "const char*" to "string" and back again.
+ *
+ * @stable ICU 4.2
+ */
+class U_COMMON_API StringPiece : public UMemory {
+ private:
+  const char*   ptr_;
+  int32_t       length_;
+
+ public:
+  /**
+   * Default constructor, creates an empty StringPiece.
+   * @stable ICU 4.2
+   */
+  StringPiece() : ptr_(NULL), length_(0) { }
+  /**
+   * Constructs from a NUL-terminated const char * pointer.
+   * @param str a NUL-terminated const char * pointer
+   * @stable ICU 4.2
+   */
+  StringPiece(const char* str);
+#if U_HAVE_STD_STRING
+  /**
+   * Constructs from a std::string.
+   * @stable ICU 4.2
+   */
+  StringPiece(const std::string& str)
+    : ptr_(str.data()), length_(static_cast<int32_t>(str.size())) { }
+#endif
+  /**
+   * Constructs from a const char * pointer and a specified length.
+   * @param offset a const char * pointer (need not be terminated)
+   * @param len the length of the string; must be non-negative
+   * @stable ICU 4.2
+   */
+  StringPiece(const char* offset, int32_t len) : ptr_(offset), length_(len) { }
+  /**
+   * Substring of another StringPiece.
+   * @param x the other StringPiece
+   * @param pos start position in x; must be non-negative and <= x.length().
+   * @stable ICU 4.2
+   */
+  StringPiece(const StringPiece& x, int32_t pos);
+  /**
+   * Substring of another StringPiece.
+   * @param x the other StringPiece
+   * @param pos start position in x; must be non-negative and <= x.length().
+   * @param len length of the substring;
+   *            must be non-negative and will be pinned to at most x.length() - pos.
+   * @stable ICU 4.2
+   */
+  StringPiece(const StringPiece& x, int32_t pos, int32_t len);
+
+  /**
+   * Returns the string pointer. May be NULL if it is empty.
+   *
+   * data() may return a pointer to a buffer with embedded NULs, and the
+   * returned buffer may or may not be null terminated.  Therefore it is
+   * typically a mistake to pass data() to a routine that expects a NUL
+   * terminated string.
+   * @return the string pointer
+   * @stable ICU 4.2
+   */
+  const char* data() const { return ptr_; }
+  /**
+   * Returns the string length. Same as length().
+   * @return the string length
+   * @stable ICU 4.2
+   */
+  int32_t size() const { return length_; }
+  /**
+   * Returns the string length. Same as size().
+   * @return the string length
+   * @stable ICU 4.2
+   */
+  int32_t length() const { return length_; }
+  /**
+   * Returns whether the string is empty.
+   * @return TRUE if the string is empty
+   * @stable ICU 4.2
+   */
+  UBool empty() const { return length_ == 0; }
+
+  /**
+   * Sets to an empty string.
+   * @stable ICU 4.2
+   */
+  void clear() { ptr_ = NULL; length_ = 0; }
+
+  /**
+   * Reset the stringpiece to refer to new data.
+   * @param xdata pointer the new string data.  Need not be nul terminated.
+   * @param len the length of the new data
+   * @stable ICU 4.8
+   */
+  void set(const char* xdata, int32_t len) { ptr_ = xdata; length_ = len; }
+
+  /**
+   * Reset the stringpiece to refer to new data.
+   * @param str a pointer to a NUL-terminated string. 
+   * @stable ICU 4.8
+   */
+  void set(const char* str);
+
+  /**
+   * Removes the first n string units.
+   * @param n prefix length, must be non-negative and <=length()
+   * @stable ICU 4.2
+   */
+  void remove_prefix(int32_t n) {
+    if (n >= 0) {
+      if (n > length_) {
+        n = length_;
+      }
+      ptr_ += n;
+      length_ -= n;
+    }
+  }
+
+  /**
+   * Removes the last n string units.
+   * @param n suffix length, must be non-negative and <=length()
+   * @stable ICU 4.2
+   */
+  void remove_suffix(int32_t n) {
+    if (n >= 0) {
+      if (n <= length_) {
+        length_ -= n;
+      } else {
+        length_ = 0;
+      }
+    }
+  }
+
+  /**
+   * Maximum integer, used as a default value for substring methods.
+   * @stable ICU 4.2
+   */
+  static const int32_t npos; // = 0x7fffffff;
+
+  /**
+   * Returns a substring of this StringPiece.
+   * @param pos start position; must be non-negative and <= length().
+   * @param len length of the substring;
+   *            must be non-negative and will be pinned to at most length() - pos.
+   * @return the substring StringPiece
+   * @stable ICU 4.2
+   */
+  StringPiece substr(int32_t pos, int32_t len = npos) const {
+    return StringPiece(*this, pos, len);
+  }
+};
+
+/**
+ * Global operator == for StringPiece
+ * @param x The first StringPiece to compare.
+ * @param y The second StringPiece to compare.
+ * @return TRUE if the string data is equal
+ * @stable ICU 4.8
+ */
+U_EXPORT UBool U_EXPORT2 
+operator==(const StringPiece& x, const StringPiece& y);
+
+/**
+ * Global operator != for StringPiece
+ * @param x The first StringPiece to compare.
+ * @param y The second StringPiece to compare.
+ * @return TRUE if the string data is not equal
+ * @stable ICU 4.8
+ */
+inline UBool operator!=(const StringPiece& x, const StringPiece& y) {
+  return !(x == y);
+}
+
+U_NAMESPACE_END
+
+#endif  // __STRINGPIECE_H__
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/ucasemap.h b/src/core/basetypes/icu-ucsdet/include/unicode/ucasemap.h
new file mode 100644
index 00000000..b37e1658
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/ucasemap.h
@@ -0,0 +1,423 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2005-2012, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  ucasemap.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2005may06
+*   created by: Markus W. Scherer
+*
+*   Case mapping service object and functions using it.
+*/
+
+#ifndef __UCASEMAP_H__
+#define __UCASEMAP_H__
+
+#include "unicode/utypes.h"
+#include "unicode/ustring.h"
+#include "unicode/localpointer.h"
+
+/**
+ * \file
+ * \brief C API: Unicode case mapping functions using a UCaseMap service object.
+ *
+ * The service object takes care of memory allocations, data loading, and setup
+ * for the attributes, as usual.
+ *
+ * Currently, the functionality provided here does not overlap with uchar.h
+ * and ustring.h, except for ucasemap_toTitle().
+ *
+ * ucasemap_utf8XYZ() functions operate directly on UTF-8 strings.
+ */
+
+/**
+ * UCaseMap is an opaque service object for newer ICU case mapping functions.
+ * Older functions did not use a service object.
+ * @stable ICU 3.4
+ */
+struct UCaseMap;
+typedef struct UCaseMap UCaseMap; /**< C typedef for struct UCaseMap. @stable ICU 3.4 */
+
+/**
+ * Open a UCaseMap service object for a locale and a set of options.
+ * The locale ID and options are preprocessed so that functions using the
+ * service object need not process them in each call.
+ *
+ * @param locale ICU locale ID, used for language-dependent
+ *               upper-/lower-/title-casing according to the Unicode standard.
+ *               Usual semantics: ""=root, NULL=default locale, etc.
+ * @param options Options bit set, used for case folding and string comparisons.
+ *                Same flags as for u_foldCase(), u_strFoldCase(),
+ *                u_strCaseCompare(), etc.
+ *                Use 0 or U_FOLD_CASE_DEFAULT for default behavior.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ *                   which must not indicate a failure before the function call.
+ * @return Pointer to a UCaseMap service object, if successful.
+ *
+ * @see U_FOLD_CASE_DEFAULT
+ * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * @see U_TITLECASE_NO_LOWERCASE
+ * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
+ * @stable ICU 3.4
+ */
+U_STABLE UCaseMap * U_EXPORT2
+ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode);
+
+/**
+ * Close a UCaseMap service object.
+ * @param csm Object to be closed.
+ * @stable ICU 3.4
+ */
+U_STABLE void U_EXPORT2
+ucasemap_close(UCaseMap *csm);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUCaseMapPointer
+ * "Smart pointer" class, closes a UCaseMap via ucasemap_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUCaseMapPointer, UCaseMap, ucasemap_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Get the locale ID that is used for language-dependent case mappings.
+ * @param csm UCaseMap service object.
+ * @return locale ID
+ * @stable ICU 3.4
+ */
+U_STABLE const char * U_EXPORT2
+ucasemap_getLocale(const UCaseMap *csm);
+
+/**
+ * Get the options bit set that is used for case folding and string comparisons.
+ * @param csm UCaseMap service object.
+ * @return options bit set
+ * @stable ICU 3.4
+ */
+U_STABLE uint32_t U_EXPORT2
+ucasemap_getOptions(const UCaseMap *csm);
+
+/**
+ * Set the locale ID that is used for language-dependent case mappings.
+ *
+ * @param csm UCaseMap service object.
+ * @param locale Locale ID, see ucasemap_open().
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ *                   which must not indicate a failure before the function call.
+ *
+ * @see ucasemap_open
+ * @stable ICU 3.4
+ */
+U_STABLE void U_EXPORT2
+ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode);
+
+/**
+ * Set the options bit set that is used for case folding and string comparisons.
+ *
+ * @param csm UCaseMap service object.
+ * @param options Options bit set, see ucasemap_open().
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ *                   which must not indicate a failure before the function call.
+ *
+ * @see ucasemap_open
+ * @stable ICU 3.4
+ */
+U_STABLE void U_EXPORT2
+ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode);
+
+/**
+ * Do not lowercase non-initial parts of words when titlecasing.
+ * Option bit for titlecasing APIs that take an options bit set.
+ *
+ * By default, titlecasing will titlecase the first cased character
+ * of a word and lowercase all other characters.
+ * With this option, the other characters will not be modified.
+ *
+ * @see ucasemap_setOptions
+ * @see ucasemap_toTitle
+ * @see ucasemap_utf8ToTitle
+ * @see UnicodeString::toTitle
+ * @stable ICU 3.8
+ */
+#define U_TITLECASE_NO_LOWERCASE 0x100
+
+/**
+ * Do not adjust the titlecasing indexes from BreakIterator::next() indexes;
+ * titlecase exactly the characters at breaks from the iterator.
+ * Option bit for titlecasing APIs that take an options bit set.
+ *
+ * By default, titlecasing will take each break iterator index,
+ * adjust it by looking for the next cased character, and titlecase that one.
+ * Other characters are lowercased.
+ *
+ * This follows Unicode 4 & 5 section 3.13 Default Case Operations:
+ *
+ * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
+ * #29, "Text Boundaries." Between each pair of word boundaries, find the first
+ * cased character F. If F exists, map F to default_title(F); then map each
+ * subsequent character C to default_lower(C).
+ *
+ * @see ucasemap_setOptions
+ * @see ucasemap_toTitle
+ * @see ucasemap_utf8ToTitle
+ * @see UnicodeString::toTitle
+ * @see U_TITLECASE_NO_LOWERCASE
+ * @stable ICU 3.8
+ */
+#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+/**
+ * Get the break iterator that is used for titlecasing.
+ * Do not modify the returned break iterator.
+ * @param csm UCaseMap service object.
+ * @return titlecasing break iterator
+ * @stable ICU 3.8
+ */
+U_STABLE const UBreakIterator * U_EXPORT2
+ucasemap_getBreakIterator(const UCaseMap *csm);
+
+/**
+ * Set the break iterator that is used for titlecasing.
+ * The UCaseMap service object releases a previously set break iterator
+ * and "adopts" this new one, taking ownership of it.
+ * It will be released in a subsequent call to ucasemap_setBreakIterator()
+ * or ucasemap_close().
+ *
+ * Break iterator operations are not thread-safe. Therefore, titlecasing
+ * functions use non-const UCaseMap objects. It is not possible to titlecase
+ * strings concurrently using the same UCaseMap.
+ *
+ * @param csm UCaseMap service object.
+ * @param iterToAdopt Break iterator to be adopted for titlecasing.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ *                   which must not indicate a failure before the function call.
+ *
+ * @see ucasemap_toTitle
+ * @see ucasemap_utf8ToTitle
+ * @stable ICU 3.8
+ */
+U_STABLE void U_EXPORT2
+ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode *pErrorCode);
+
+/**
+ * Titlecase a UTF-16 string. This function is almost a duplicate of u_strToTitle(),
+ * except that it takes ucasemap_setOptions() into account and has performance
+ * advantages from being able to use a UCaseMap object for multiple case mapping
+ * operations, saving setup time.
+ *
+ * Casing is locale-dependent and context-sensitive.
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others. (This can be modified with ucasemap_setOptions().)
+ *
+ * Note: This function takes a non-const UCaseMap pointer because it will
+ * open a default break iterator if no break iterator was set yet,
+ * and effectively call ucasemap_setBreakIterator();
+ * also because the break iterator is stateful and will be modified during
+ * the iteration.
+ *
+ * The titlecase break iterator can be provided to customize for arbitrary
+ * styles, using rules and dictionaries beyond the standard iterators.
+ * The standard titlecase iterator for the root locale implements the
+ * algorithm of Unicode TR 21.
+ *
+ * This function uses only the setUText(), first(), next() and close() methods of the
+ * provided break iterator.
+ *
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * @param csm       UCaseMap service object. This pointer is non-const!
+ *                  See the note above for details.
+ * @param dest      A buffer for the result string. The result will be NUL-terminated if
+ *                  the buffer is large enough.
+ *                  The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
+ *                  dest may be NULL and the function will only return the length of the result
+ *                  without writing any of the result string.
+ * @param src       The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ *                  which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ *         in which case it will be greater than destCapacity.
+ *
+ * @see u_strToTitle
+ * @stable ICU 3.8
+ */
+U_STABLE int32_t U_EXPORT2
+ucasemap_toTitle(UCaseMap *csm,
+                 UChar *dest, int32_t destCapacity,
+                 const UChar *src, int32_t srcLength,
+                 UErrorCode *pErrorCode);
+
+#endif
+
+/**
+ * Lowercase the characters in a UTF-8 string.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * @param csm       UCaseMap service object.
+ * @param dest      A buffer for the result string. The result will be NUL-terminated if
+ *                  the buffer is large enough.
+ *                  The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
+ *                  dest may be NULL and the function will only return the length of the result
+ *                  without writing any of the result string.
+ * @param src       The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ *                  which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ *         in which case it will be greater than destCapacity.
+ *
+ * @see u_strToLower
+ * @stable ICU 3.4
+ */
+U_STABLE int32_t U_EXPORT2
+ucasemap_utf8ToLower(const UCaseMap *csm,
+                     char *dest, int32_t destCapacity,
+                     const char *src, int32_t srcLength,
+                     UErrorCode *pErrorCode);
+
+/**
+ * Uppercase the characters in a UTF-8 string.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * @param csm       UCaseMap service object.
+ * @param dest      A buffer for the result string. The result will be NUL-terminated if
+ *                  the buffer is large enough.
+ *                  The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
+ *                  dest may be NULL and the function will only return the length of the result
+ *                  without writing any of the result string.
+ * @param src       The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ *                  which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ *         in which case it will be greater than destCapacity.
+ *
+ * @see u_strToUpper
+ * @stable ICU 3.4
+ */
+U_STABLE int32_t U_EXPORT2
+ucasemap_utf8ToUpper(const UCaseMap *csm,
+                     char *dest, int32_t destCapacity,
+                     const char *src, int32_t srcLength,
+                     UErrorCode *pErrorCode);
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+/**
+ * Titlecase a UTF-8 string.
+ * Casing is locale-dependent and context-sensitive.
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others. (This can be modified with ucasemap_setOptions().)
+ *
+ * Note: This function takes a non-const UCaseMap pointer because it will
+ * open a default break iterator if no break iterator was set yet,
+ * and effectively call ucasemap_setBreakIterator();
+ * also because the break iterator is stateful and will be modified during
+ * the iteration.
+ *
+ * The titlecase break iterator can be provided to customize for arbitrary
+ * styles, using rules and dictionaries beyond the standard iterators.
+ * The standard titlecase iterator for the root locale implements the
+ * algorithm of Unicode TR 21.
+ *
+ * This function uses only the setUText(), first(), next() and close() methods of the
+ * provided break iterator.
+ *
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * @param csm       UCaseMap service object. This pointer is non-const!
+ *                  See the note above for details.
+ * @param dest      A buffer for the result string. The result will be NUL-terminated if
+ *                  the buffer is large enough.
+ *                  The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
+ *                  dest may be NULL and the function will only return the length of the result
+ *                  without writing any of the result string.
+ * @param src       The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ *                  which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ *         in which case it will be greater than destCapacity.
+ *
+ * @see u_strToTitle
+ * @see U_TITLECASE_NO_LOWERCASE
+ * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
+ * @stable ICU 3.8
+ */
+U_STABLE int32_t U_EXPORT2
+ucasemap_utf8ToTitle(UCaseMap *csm,
+                    char *dest, int32_t destCapacity,
+                    const char *src, int32_t srcLength,
+                    UErrorCode *pErrorCode);
+
+#endif
+
+/**
+ * Case-folds the characters in a UTF-8 string.
+ *
+ * Case-folding is locale-independent and not context-sensitive,
+ * but there is an option for whether to include or exclude mappings for dotted I
+ * and dotless i that are marked with 'T' in CaseFolding.txt.
+ *
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * @param csm       UCaseMap service object.
+ * @param dest      A buffer for the result string. The result will be NUL-terminated if
+ *                  the buffer is large enough.
+ *                  The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
+ *                  dest may be NULL and the function will only return the length of the result
+ *                  without writing any of the result string.
+ * @param src       The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ *                  which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ *         in which case it will be greater than destCapacity.
+ *
+ * @see u_strFoldCase
+ * @see ucasemap_setOptions
+ * @see U_FOLD_CASE_DEFAULT
+ * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * @stable ICU 3.8
+ */
+U_STABLE int32_t U_EXPORT2
+ucasemap_utf8FoldCase(const UCaseMap *csm,
+                      char *dest, int32_t destCapacity,
+                      const char *src, int32_t srcLength,
+                      UErrorCode *pErrorCode);
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/uchar.h b/src/core/basetypes/icu-ucsdet/include/unicode/uchar.h
new file mode 100644
index 00000000..1d3ae0dd
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/uchar.h
@@ -0,0 +1,3426 @@
+/*
+**********************************************************************
+*   Copyright (C) 1997-2014, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*
+* File UCHAR.H
+*
+* Modification History:
+*
+*   Date        Name        Description
+*   04/02/97    aliu        Creation.
+*   03/29/99    helena      Updated for C APIs.
+*   4/15/99     Madhu       Updated for C Implementation and Javadoc
+*   5/20/99     Madhu       Added the function u_getVersion()
+*   8/19/1999   srl         Upgraded scripts to Unicode 3.0
+*   8/27/1999   schererm    UCharDirection constants: U_...
+*   11/11/1999  weiv        added u_isalnum(), cleaned comments
+*   01/11/2000  helena      Renamed u_getVersion to u_getUnicodeVersion().
+******************************************************************************
+*/
+
+#ifndef UCHAR_H
+#define UCHAR_H
+
+#include "unicode/utypes.h"
+
+U_CDECL_BEGIN
+
+/*==========================================================================*/
+/* Unicode version number                                                   */
+/*==========================================================================*/
+/**
+ * Unicode version number, default for the current ICU version.
+ * The actual Unicode Character Database (UCD) data is stored in uprops.dat
+ * and may be generated from UCD files from a different Unicode version.
+ * Call u_getUnicodeVersion to get the actual Unicode version of the data.
+ *
+ * @see u_getUnicodeVersion
+ * @stable ICU 2.0
+ */
+#define U_UNICODE_VERSION "7.0"
+
+/**
+ * \file
+ * \brief C API: Unicode Properties
+ *
+ * This C API provides low-level access to the Unicode Character Database.
+ * In addition to raw property values, some convenience functions calculate
+ * derived properties, for example for Java-style programming.
+ *
+ * Unicode assigns each code point (not just assigned character) values for
+ * many properties.
+ * Most of them are simple boolean flags, or constants from a small enumerated list.
+ * For some properties, values are strings or other relatively more complex types.
+ *
+ * For more information see
+ * "About the Unicode Character Database" (http://www.unicode.org/ucd/)
+ * and the ICU User Guide chapter on Properties (http://icu-project.org/userguide/properties.html).
+ *
+ * Many functions are designed to match java.lang.Character functions.
+ * See the individual function documentation,
+ * and see the JDK 1.4 java.lang.Character documentation
+ * at http://java.sun.com/j2se/1.4/docs/api/java/lang/Character.html
+ *
+ * There are also functions that provide easy migration from C/POSIX functions
+ * like isblank(). Their use is generally discouraged because the C/POSIX
+ * standards do not define their semantics beyond the ASCII range, which means
+ * that different implementations exhibit very different behavior.
+ * Instead, Unicode properties should be used directly.
+ *
+ * There are also only a few, broad C/POSIX character classes, and they tend
+ * to be used for conflicting purposes. For example, the "isalpha()" class
+ * is sometimes used to determine word boundaries, while a more sophisticated
+ * approach would at least distinguish initial letters from continuation
+ * characters (the latter including combining marks).
+ * (In ICU, BreakIterator is the most sophisticated API for word boundaries.)
+ * Another example: There is no "istitle()" class for titlecase characters.
+ *
+ * ICU 3.4 and later provides API access for all twelve C/POSIX character classes.
+ * ICU implements them according to the Standard Recommendations in
+ * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions
+ * (http://www.unicode.org/reports/tr18/#Compatibility_Properties).
+ *
+ * API access for C/POSIX character classes is as follows:
+ * - alpha:     u_isUAlphabetic(c) or u_hasBinaryProperty(c, UCHAR_ALPHABETIC)
+ * - lower:     u_isULowercase(c) or u_hasBinaryProperty(c, UCHAR_LOWERCASE)
+ * - upper:     u_isUUppercase(c) or u_hasBinaryProperty(c, UCHAR_UPPERCASE)
+ * - punct:     u_ispunct(c)
+ * - digit:     u_isdigit(c) or u_charType(c)==U_DECIMAL_DIGIT_NUMBER
+ * - xdigit:    u_isxdigit(c) or u_hasBinaryProperty(c, UCHAR_POSIX_XDIGIT)
+ * - alnum:     u_hasBinaryProperty(c, UCHAR_POSIX_ALNUM)
+ * - space:     u_isUWhiteSpace(c) or u_hasBinaryProperty(c, UCHAR_WHITE_SPACE)
+ * - blank:     u_isblank(c) or u_hasBinaryProperty(c, UCHAR_POSIX_BLANK)
+ * - cntrl:     u_charType(c)==U_CONTROL_CHAR
+ * - graph:     u_hasBinaryProperty(c, UCHAR_POSIX_GRAPH)
+ * - print:     u_hasBinaryProperty(c, UCHAR_POSIX_PRINT)
+ *
+ * Note: Some of the u_isxyz() functions in uchar.h predate, and do not match,
+ * the Standard Recommendations in UTS #18. Instead, they match Java
+ * functions according to their API documentation.
+ *
+ * \htmlonly
+ * The C/POSIX character classes are also available in UnicodeSet patterns,
+ * using patterns like [:graph:] or \p{graph}.
+ * \endhtmlonly
+ *
+ * Note: There are several ICU whitespace functions.
+ * Comparison:
+ * - u_isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property;
+ *       most of general categories "Z" (separators) + most whitespace ISO controls
+ *       (including no-break spaces, but excluding IS1..IS4 and ZWSP)
+ * - u_isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces
+ * - u_isJavaSpaceChar: Java isSpaceChar; just Z (including no-break spaces)
+ * - u_isspace: Z + whitespace ISO controls (including no-break spaces)
+ * - u_isblank: "horizontal spaces" = TAB + Zs - ZWSP
+ */
+
+/**
+ * Constants.
+ */
+
+/** The lowest Unicode code point value. Code points are non-negative. @stable ICU 2.0 */
+#define UCHAR_MIN_VALUE 0
+
+/**
+ * The highest Unicode code point value (scalar value) according to
+ * The Unicode Standard. This is a 21-bit value (20.1 bits, rounded up).
+ * For a single character, UChar32 is a simple type that can hold any code point value.
+ *
+ * @see UChar32
+ * @stable ICU 2.0
+ */
+#define UCHAR_MAX_VALUE 0x10ffff
+
+/**
+ * Get a single-bit bit set (a flag) from a bit number 0..31.
+ * @stable ICU 2.1
+ */
+#define U_MASK(x) ((uint32_t)1<<(x))
+
+/**
+ * Selection constants for Unicode properties.
+ * These constants are used in functions like u_hasBinaryProperty to select
+ * one of the Unicode properties.
+ *
+ * The properties APIs are intended to reflect Unicode properties as defined
+ * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).
+ * For details about the properties see http://www.unicode.org/ucd/ .
+ * For names of Unicode properties see the UCD file PropertyAliases.txt.
+ *
+ * Important: If ICU is built with UCD files from Unicode versions below, e.g., 3.2,
+ * then properties marked with "new in Unicode 3.2" are not or not fully available.
+ * Check u_getUnicodeVersion to be sure.
+ *
+ * @see u_hasBinaryProperty
+ * @see u_getIntPropertyValue
+ * @see u_getUnicodeVersion
+ * @stable ICU 2.1
+ */
+typedef enum UProperty {
+    /*
+     * Note: UProperty constants are parsed by preparseucd.py.
+     * It matches lines like
+     *     UCHAR_<Unicode property name>=<integer>,
+     */
+
+    /*  Note: Place UCHAR_ALPHABETIC before UCHAR_BINARY_START so that
+    debuggers display UCHAR_ALPHABETIC as the symbolic name for 0,
+    rather than UCHAR_BINARY_START.  Likewise for other *_START
+    identifiers. */
+
+    /** Binary property Alphabetic. Same as u_isUAlphabetic, different from u_isalpha.
+        Lu+Ll+Lt+Lm+Lo+Nl+Other_Alphabetic @stable ICU 2.1 */
+    UCHAR_ALPHABETIC=0,
+    /** First constant for binary Unicode properties. @stable ICU 2.1 */
+    UCHAR_BINARY_START=UCHAR_ALPHABETIC,
+    /** Binary property ASCII_Hex_Digit. 0-9 A-F a-f @stable ICU 2.1 */
+    UCHAR_ASCII_HEX_DIGIT=1,
+    /** Binary property Bidi_Control.
+        Format controls which have specific functions
+        in the Bidi Algorithm. @stable ICU 2.1 */
+    UCHAR_BIDI_CONTROL=2,
+    /** Binary property Bidi_Mirrored.
+        Characters that may change display in RTL text.
+        Same as u_isMirrored.
+        See Bidi Algorithm, UTR 9. @stable ICU 2.1 */
+    UCHAR_BIDI_MIRRORED=3,
+    /** Binary property Dash. Variations of dashes. @stable ICU 2.1 */
+    UCHAR_DASH=4,
+    /** Binary property Default_Ignorable_Code_Point (new in Unicode 3.2).
+        Ignorable in most processing.
+        <2060..206F, FFF0..FFFB, E0000..E0FFF>+Other_Default_Ignorable_Code_Point+(Cf+Cc+Cs-White_Space) @stable ICU 2.1 */
+    UCHAR_DEFAULT_IGNORABLE_CODE_POINT=5,
+    /** Binary property Deprecated (new in Unicode 3.2).
+        The usage of deprecated characters is strongly discouraged. @stable ICU 2.1 */
+    UCHAR_DEPRECATED=6,
+    /** Binary property Diacritic. Characters that linguistically modify
+        the meaning of another character to which they apply. @stable ICU 2.1 */
+    UCHAR_DIACRITIC=7,
+    /** Binary property Extender.
+        Extend the value or shape of a preceding alphabetic character,
+        e.g., length and iteration marks. @stable ICU 2.1 */
+    UCHAR_EXTENDER=8,
+    /** Binary property Full_Composition_Exclusion.
+        CompositionExclusions.txt+Singleton Decompositions+
+        Non-Starter Decompositions. @stable ICU 2.1 */
+    UCHAR_FULL_COMPOSITION_EXCLUSION=9,
+    /** Binary property Grapheme_Base (new in Unicode 3.2).
+        For programmatic determination of grapheme cluster boundaries.
+        [0..10FFFF]-Cc-Cf-Cs-Co-Cn-Zl-Zp-Grapheme_Link-Grapheme_Extend-CGJ @stable ICU 2.1 */
+    UCHAR_GRAPHEME_BASE=10,
+    /** Binary property Grapheme_Extend (new in Unicode 3.2).
+        For programmatic determination of grapheme cluster boundaries.
+        Me+Mn+Mc+Other_Grapheme_Extend-Grapheme_Link-CGJ @stable ICU 2.1 */
+    UCHAR_GRAPHEME_EXTEND=11,
+    /** Binary property Grapheme_Link (new in Unicode 3.2).
+        For programmatic determination of grapheme cluster boundaries. @stable ICU 2.1 */
+    UCHAR_GRAPHEME_LINK=12,
+    /** Binary property Hex_Digit.
+        Characters commonly used for hexadecimal numbers. @stable ICU 2.1 */
+    UCHAR_HEX_DIGIT=13,
+    /** Binary property Hyphen. Dashes used to mark connections
+        between pieces of words, plus the Katakana middle dot. @stable ICU 2.1 */
+    UCHAR_HYPHEN=14,
+    /** Binary property ID_Continue.
+        Characters that can continue an identifier.
+        DerivedCoreProperties.txt also says "NOTE: Cf characters should be filtered out."
+        ID_Start+Mn+Mc+Nd+Pc @stable ICU 2.1 */
+    UCHAR_ID_CONTINUE=15,
+    /** Binary property ID_Start.
+        Characters that can start an identifier.
+        Lu+Ll+Lt+Lm+Lo+Nl @stable ICU 2.1 */
+    UCHAR_ID_START=16,
+    /** Binary property Ideographic.
+        CJKV ideographs. @stable ICU 2.1 */
+    UCHAR_IDEOGRAPHIC=17,
+    /** Binary property IDS_Binary_Operator (new in Unicode 3.2).
+        For programmatic determination of
+        Ideographic Description Sequences. @stable ICU 2.1 */
+    UCHAR_IDS_BINARY_OPERATOR=18,
+    /** Binary property IDS_Trinary_Operator (new in Unicode 3.2).
+        For programmatic determination of
+        Ideographic Description Sequences. @stable ICU 2.1 */
+    UCHAR_IDS_TRINARY_OPERATOR=19,
+    /** Binary property Join_Control.
+        Format controls for cursive joining and ligation. @stable ICU 2.1 */
+    UCHAR_JOIN_CONTROL=20,
+    /** Binary property Logical_Order_Exception (new in Unicode 3.2).
+        Characters that do not use logical order and
+        require special handling in most processing. @stable ICU 2.1 */
+    UCHAR_LOGICAL_ORDER_EXCEPTION=21,
+    /** Binary property Lowercase. Same as u_isULowercase, different from u_islower.
+        Ll+Other_Lowercase @stable ICU 2.1 */
+    UCHAR_LOWERCASE=22,
+    /** Binary property Math. Sm+Other_Math @stable ICU 2.1 */
+    UCHAR_MATH=23,
+    /** Binary property Noncharacter_Code_Point.
+        Code points that are explicitly defined as illegal
+        for the encoding of characters. @stable ICU 2.1 */
+    UCHAR_NONCHARACTER_CODE_POINT=24,
+    /** Binary property Quotation_Mark. @stable ICU 2.1 */
+    UCHAR_QUOTATION_MARK=25,
+    /** Binary property Radical (new in Unicode 3.2).
+        For programmatic determination of
+        Ideographic Description Sequences. @stable ICU 2.1 */
+    UCHAR_RADICAL=26,
+    /** Binary property Soft_Dotted (new in Unicode 3.2).
+        Characters with a "soft dot", like i or j.
+        An accent placed on these characters causes
+        the dot to disappear. @stable ICU 2.1 */
+    UCHAR_SOFT_DOTTED=27,
+    /** Binary property Terminal_Punctuation.
+        Punctuation characters that generally mark
+        the end of textual units. @stable ICU 2.1 */
+    UCHAR_TERMINAL_PUNCTUATION=28,
+    /** Binary property Unified_Ideograph (new in Unicode 3.2).
+        For programmatic determination of
+        Ideographic Description Sequences. @stable ICU 2.1 */
+    UCHAR_UNIFIED_IDEOGRAPH=29,
+    /** Binary property Uppercase. Same as u_isUUppercase, different from u_isupper.
+        Lu+Other_Uppercase @stable ICU 2.1 */
+    UCHAR_UPPERCASE=30,
+    /** Binary property White_Space.
+        Same as u_isUWhiteSpace, different from u_isspace and u_isWhitespace.
+        Space characters+TAB+CR+LF-ZWSP-ZWNBSP @stable ICU 2.1 */
+    UCHAR_WHITE_SPACE=31,
+    /** Binary property XID_Continue.
+        ID_Continue modified to allow closure under
+        normalization forms NFKC and NFKD. @stable ICU 2.1 */
+    UCHAR_XID_CONTINUE=32,
+    /** Binary property XID_Start. ID_Start modified to allow
+        closure under normalization forms NFKC and NFKD. @stable ICU 2.1 */
+    UCHAR_XID_START=33,
+    /** Binary property Case_Sensitive. Either the source of a case
+        mapping or _in_ the target of a case mapping. Not the same as
+        the general category Cased_Letter. @stable ICU 2.6 */
+   UCHAR_CASE_SENSITIVE=34,
+    /** Binary property STerm (new in Unicode 4.0.1).
+        Sentence Terminal. Used in UAX #29: Text Boundaries
+        (http://www.unicode.org/reports/tr29/)
+        @stable ICU 3.0 */
+    UCHAR_S_TERM=35,
+    /** Binary property Variation_Selector (new in Unicode 4.0.1).
+        Indicates all those characters that qualify as Variation Selectors.
+        For details on the behavior of these characters,
+        see StandardizedVariants.html and 15.6 Variation Selectors.
+        @stable ICU 3.0 */
+    UCHAR_VARIATION_SELECTOR=36,
+    /** Binary property NFD_Inert.
+        ICU-specific property for characters that are inert under NFD,
+        i.e., they do not interact with adjacent characters.
+        See the documentation for the Normalizer2 class and the
+        Normalizer2::isInert() method.
+        @stable ICU 3.0 */
+    UCHAR_NFD_INERT=37,
+    /** Binary property NFKD_Inert.
+        ICU-specific property for characters that are inert under NFKD,
+        i.e., they do not interact with adjacent characters.
+        See the documentation for the Normalizer2 class and the
+        Normalizer2::isInert() method.
+        @stable ICU 3.0 */
+    UCHAR_NFKD_INERT=38,
+    /** Binary property NFC_Inert.
+        ICU-specific property for characters that are inert under NFC,
+        i.e., they do not interact with adjacent characters.
+        See the documentation for the Normalizer2 class and the
+        Normalizer2::isInert() method.
+        @stable ICU 3.0 */
+    UCHAR_NFC_INERT=39,
+    /** Binary property NFKC_Inert.
+        ICU-specific property for characters that are inert under NFKC,
+        i.e., they do not interact with adjacent characters.
+        See the documentation for the Normalizer2 class and the
+        Normalizer2::isInert() method.
+        @stable ICU 3.0 */
+    UCHAR_NFKC_INERT=40,
+    /** Binary Property Segment_Starter.
+        ICU-specific property for characters that are starters in terms of
+        Unicode normalization and combining character sequences.
+        They have ccc=0 and do not occur in non-initial position of the
+        canonical decomposition of any character
+        (like a-umlaut in NFD and a Jamo T in an NFD(Hangul LVT)).
+        ICU uses this property for segmenting a string for generating a set of
+        canonically equivalent strings, e.g. for canonical closure while
+        processing collation tailoring rules.
+        @stable ICU 3.0 */
+    UCHAR_SEGMENT_STARTER=41,
+    /** Binary property Pattern_Syntax (new in Unicode 4.1).
+        See UAX #31 Identifier and Pattern Syntax
+        (http://www.unicode.org/reports/tr31/)
+        @stable ICU 3.4 */
+    UCHAR_PATTERN_SYNTAX=42,
+    /** Binary property Pattern_White_Space (new in Unicode 4.1).
+        See UAX #31 Identifier and Pattern Syntax
+        (http://www.unicode.org/reports/tr31/)
+        @stable ICU 3.4 */
+    UCHAR_PATTERN_WHITE_SPACE=43,
+    /** Binary property alnum (a C/POSIX character class).
+        Implemented according to the UTS #18 Annex C Standard Recommendation.
+        See the uchar.h file documentation.
+        @stable ICU 3.4 */
+    UCHAR_POSIX_ALNUM=44,
+    /** Binary property blank (a C/POSIX character class).
+        Implemented according to the UTS #18 Annex C Standard Recommendation.
+        See the uchar.h file documentation.
+        @stable ICU 3.4 */
+    UCHAR_POSIX_BLANK=45,
+    /** Binary property graph (a C/POSIX character class).
+        Implemented according to the UTS #18 Annex C Standard Recommendation.
+        See the uchar.h file documentation.
+        @stable ICU 3.4 */
+    UCHAR_POSIX_GRAPH=46,
+    /** Binary property print (a C/POSIX character class).
+        Implemented according to the UTS #18 Annex C Standard Recommendation.
+        See the uchar.h file documentation.
+        @stable ICU 3.4 */
+    UCHAR_POSIX_PRINT=47,
+    /** Binary property xdigit (a C/POSIX character class).
+        Implemented according to the UTS #18 Annex C Standard Recommendation.
+        See the uchar.h file documentation.
+        @stable ICU 3.4 */
+    UCHAR_POSIX_XDIGIT=48,
+    /** Binary property Cased. For Lowercase, Uppercase and Titlecase characters. @stable ICU 4.4 */
+    UCHAR_CASED=49,
+    /** Binary property Case_Ignorable. Used in context-sensitive case mappings. @stable ICU 4.4 */
+    UCHAR_CASE_IGNORABLE=50,
+    /** Binary property Changes_When_Lowercased. @stable ICU 4.4 */
+    UCHAR_CHANGES_WHEN_LOWERCASED=51,
+    /** Binary property Changes_When_Uppercased. @stable ICU 4.4 */
+    UCHAR_CHANGES_WHEN_UPPERCASED=52,
+    /** Binary property Changes_When_Titlecased. @stable ICU 4.4 */
+    UCHAR_CHANGES_WHEN_TITLECASED=53,
+    /** Binary property Changes_When_Casefolded. @stable ICU 4.4 */
+    UCHAR_CHANGES_WHEN_CASEFOLDED=54,
+    /** Binary property Changes_When_Casemapped. @stable ICU 4.4 */
+    UCHAR_CHANGES_WHEN_CASEMAPPED=55,
+    /** Binary property Changes_When_NFKC_Casefolded. @stable ICU 4.4 */
+    UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED=56,
+    /** One more than the last constant for binary Unicode properties. @stable ICU 2.1 */
+    UCHAR_BINARY_LIMIT=57,
+
+    /** Enumerated property Bidi_Class.
+        Same as u_charDirection, returns UCharDirection values. @stable ICU 2.2 */
+    UCHAR_BIDI_CLASS=0x1000,
+    /** First constant for enumerated/integer Unicode properties. @stable ICU 2.2 */
+    UCHAR_INT_START=UCHAR_BIDI_CLASS,
+    /** Enumerated property Block.
+        Same as ublock_getCode, returns UBlockCode values. @stable ICU 2.2 */
+    UCHAR_BLOCK=0x1001,
+    /** Enumerated property Canonical_Combining_Class.
+        Same as u_getCombiningClass, returns 8-bit numeric values. @stable ICU 2.2 */
+    UCHAR_CANONICAL_COMBINING_CLASS=0x1002,
+    /** Enumerated property Decomposition_Type.
+        Returns UDecompositionType values. @stable ICU 2.2 */
+    UCHAR_DECOMPOSITION_TYPE=0x1003,
+    /** Enumerated property East_Asian_Width.
+        See http://www.unicode.org/reports/tr11/
+        Returns UEastAsianWidth values. @stable ICU 2.2 */
+    UCHAR_EAST_ASIAN_WIDTH=0x1004,
+    /** Enumerated property General_Category.
+        Same as u_charType, returns UCharCategory values. @stable ICU 2.2 */
+    UCHAR_GENERAL_CATEGORY=0x1005,
+    /** Enumerated property Joining_Group.
+        Returns UJoiningGroup values. @stable ICU 2.2 */
+    UCHAR_JOINING_GROUP=0x1006,
+    /** Enumerated property Joining_Type.
+        Returns UJoiningType values. @stable ICU 2.2 */
+    UCHAR_JOINING_TYPE=0x1007,
+    /** Enumerated property Line_Break.
+        Returns ULineBreak values. @stable ICU 2.2 */
+    UCHAR_LINE_BREAK=0x1008,
+    /** Enumerated property Numeric_Type.
+        Returns UNumericType values. @stable ICU 2.2 */
+    UCHAR_NUMERIC_TYPE=0x1009,
+    /** Enumerated property Script.
+        Same as uscript_getScript, returns UScriptCode values. @stable ICU 2.2 */
+    UCHAR_SCRIPT=0x100A,
+    /** Enumerated property Hangul_Syllable_Type, new in Unicode 4.
+        Returns UHangulSyllableType values. @stable ICU 2.6 */
+    UCHAR_HANGUL_SYLLABLE_TYPE=0x100B,
+    /** Enumerated property NFD_Quick_Check.
+        Returns UNormalizationCheckResult values. @stable ICU 3.0 */
+    UCHAR_NFD_QUICK_CHECK=0x100C,
+    /** Enumerated property NFKD_Quick_Check.
+        Returns UNormalizationCheckResult values. @stable ICU 3.0 */
+    UCHAR_NFKD_QUICK_CHECK=0x100D,
+    /** Enumerated property NFC_Quick_Check.
+        Returns UNormalizationCheckResult values. @stable ICU 3.0 */
+    UCHAR_NFC_QUICK_CHECK=0x100E,
+    /** Enumerated property NFKC_Quick_Check.
+        Returns UNormalizationCheckResult values. @stable ICU 3.0 */
+    UCHAR_NFKC_QUICK_CHECK=0x100F,
+    /** Enumerated property Lead_Canonical_Combining_Class.
+        ICU-specific property for the ccc of the first code point
+        of the decomposition, or lccc(c)=ccc(NFD(c)[0]).
+        Useful for checking for canonically ordered text;
+        see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD .
+        Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @stable ICU 3.0 */
+    UCHAR_LEAD_CANONICAL_COMBINING_CLASS=0x1010,
+    /** Enumerated property Trail_Canonical_Combining_Class.
+        ICU-specific property for the ccc of the last code point
+        of the decomposition, or tccc(c)=ccc(NFD(c)[last]).
+        Useful for checking for canonically ordered text;
+        see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD .
+        Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @stable ICU 3.0 */
+    UCHAR_TRAIL_CANONICAL_COMBINING_CLASS=0x1011,
+    /** Enumerated property Grapheme_Cluster_Break (new in Unicode 4.1).
+        Used in UAX #29: Text Boundaries
+        (http://www.unicode.org/reports/tr29/)
+        Returns UGraphemeClusterBreak values. @stable ICU 3.4 */
+    UCHAR_GRAPHEME_CLUSTER_BREAK=0x1012,
+    /** Enumerated property Sentence_Break (new in Unicode 4.1).
+        Used in UAX #29: Text Boundaries
+        (http://www.unicode.org/reports/tr29/)
+        Returns USentenceBreak values. @stable ICU 3.4 */
+    UCHAR_SENTENCE_BREAK=0x1013,
+    /** Enumerated property Word_Break (new in Unicode 4.1).
+        Used in UAX #29: Text Boundaries
+        (http://www.unicode.org/reports/tr29/)
+        Returns UWordBreakValues values. @stable ICU 3.4 */
+    UCHAR_WORD_BREAK=0x1014,
+    /** Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3).
+        Used in UAX #9: Unicode Bidirectional Algorithm
+        (http://www.unicode.org/reports/tr9/)
+        Returns UBidiPairedBracketType values. @stable ICU 52 */
+    UCHAR_BIDI_PAIRED_BRACKET_TYPE=0x1015,
+    /** One more than the last constant for enumerated/integer Unicode properties. @stable ICU 2.2 */
+    UCHAR_INT_LIMIT=0x1016,
+
+    /** Bitmask property General_Category_Mask.
+        This is the General_Category property returned as a bit mask.
+        When used in u_getIntPropertyValue(c), same as U_MASK(u_charType(c)),
+        returns bit masks for UCharCategory values where exactly one bit is set.
+        When used with u_getPropertyValueName() and u_getPropertyValueEnum(),
+        a multi-bit mask is used for sets of categories like "Letters".
+        Mask values should be cast to uint32_t.
+        @stable ICU 2.4 */
+    UCHAR_GENERAL_CATEGORY_MASK=0x2000,
+    /** First constant for bit-mask Unicode properties. @stable ICU 2.4 */
+    UCHAR_MASK_START=UCHAR_GENERAL_CATEGORY_MASK,
+    /** One more than the last constant for bit-mask Unicode properties. @stable ICU 2.4 */
+    UCHAR_MASK_LIMIT=0x2001,
+
+    /** Double property Numeric_Value.
+        Corresponds to u_getNumericValue. @stable ICU 2.4 */
+    UCHAR_NUMERIC_VALUE=0x3000,
+    /** First constant for double Unicode properties. @stable ICU 2.4 */
+    UCHAR_DOUBLE_START=UCHAR_NUMERIC_VALUE,
+    /** One more than the last constant for double Unicode properties. @stable ICU 2.4 */
+    UCHAR_DOUBLE_LIMIT=0x3001,
+
+    /** String property Age.
+        Corresponds to u_charAge. @stable ICU 2.4 */
+    UCHAR_AGE=0x4000,
+    /** First constant for string Unicode properties. @stable ICU 2.4 */
+    UCHAR_STRING_START=UCHAR_AGE,
+    /** String property Bidi_Mirroring_Glyph.
+        Corresponds to u_charMirror. @stable ICU 2.4 */
+    UCHAR_BIDI_MIRRORING_GLYPH=0x4001,
+    /** String property Case_Folding.
+        Corresponds to u_strFoldCase in ustring.h. @stable ICU 2.4 */
+    UCHAR_CASE_FOLDING=0x4002,
+#ifndef U_HIDE_DEPRECATED_API
+    /** Deprecated string property ISO_Comment.
+        Corresponds to u_getISOComment. @deprecated ICU 49 */
+    UCHAR_ISO_COMMENT=0x4003,
+#endif  /* U_HIDE_DEPRECATED_API */
+    /** String property Lowercase_Mapping.
+        Corresponds to u_strToLower in ustring.h. @stable ICU 2.4 */
+    UCHAR_LOWERCASE_MAPPING=0x4004,
+    /** String property Name.
+        Corresponds to u_charName. @stable ICU 2.4 */
+    UCHAR_NAME=0x4005,
+    /** String property Simple_Case_Folding.
+        Corresponds to u_foldCase. @stable ICU 2.4 */
+    UCHAR_SIMPLE_CASE_FOLDING=0x4006,
+    /** String property Simple_Lowercase_Mapping.
+        Corresponds to u_tolower. @stable ICU 2.4 */
+    UCHAR_SIMPLE_LOWERCASE_MAPPING=0x4007,
+    /** String property Simple_Titlecase_Mapping.
+        Corresponds to u_totitle. @stable ICU 2.4 */
+    UCHAR_SIMPLE_TITLECASE_MAPPING=0x4008,
+    /** String property Simple_Uppercase_Mapping.
+        Corresponds to u_toupper. @stable ICU 2.4 */
+    UCHAR_SIMPLE_UPPERCASE_MAPPING=0x4009,
+    /** String property Titlecase_Mapping.
+        Corresponds to u_strToTitle in ustring.h. @stable ICU 2.4 */
+    UCHAR_TITLECASE_MAPPING=0x400A,
+#ifndef U_HIDE_DEPRECATED_API
+    /** String property Unicode_1_Name.
+        This property is of little practical value.
+        Beginning with ICU 49, ICU APIs return an empty string for this property.
+        Corresponds to u_charName(U_UNICODE_10_CHAR_NAME). @deprecated ICU 49 */
+    UCHAR_UNICODE_1_NAME=0x400B,
+#endif  /* U_HIDE_DEPRECATED_API */
+    /** String property Uppercase_Mapping.
+        Corresponds to u_strToUpper in ustring.h. @stable ICU 2.4 */
+    UCHAR_UPPERCASE_MAPPING=0x400C,
+    /** String property Bidi_Paired_Bracket (new in Unicode 6.3).
+        Corresponds to u_getBidiPairedBracket. @stable ICU 52 */
+    UCHAR_BIDI_PAIRED_BRACKET=0x400D,
+    /** One more than the last constant for string Unicode properties. @stable ICU 2.4 */
+    UCHAR_STRING_LIMIT=0x400E,
+
+    /** Miscellaneous property Script_Extensions (new in Unicode 6.0).
+        Some characters are commonly used in multiple scripts.
+        For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
+        Corresponds to uscript_hasScript and uscript_getScriptExtensions in uscript.h.
+        @stable ICU 4.6 */
+    UCHAR_SCRIPT_EXTENSIONS=0x7000,
+    /** First constant for Unicode properties with unusual value types. @stable ICU 4.6 */
+    UCHAR_OTHER_PROPERTY_START=UCHAR_SCRIPT_EXTENSIONS,
+    /** One more than the last constant for Unicode properties with unusual value types.
+     * @stable ICU 4.6 */
+    UCHAR_OTHER_PROPERTY_LIMIT=0x7001,
+    /** Represents a nonexistent or invalid property or property value. @stable ICU 2.4 */
+    UCHAR_INVALID_CODE = -1
+} UProperty;
+
+/**
+ * Data for enumerated Unicode general category types.
+ * See http://www.unicode.org/Public/UNIDATA/UnicodeData.html .
+ * @stable ICU 2.0
+ */
+typedef enum UCharCategory
+{
+    /*
+     * Note: UCharCategory constants and their API comments are parsed by preparseucd.py.
+     * It matches pairs of lines like
+     *     / ** <Unicode 2-letter General_Category value> comment... * /
+     *     U_<[A-Z_]+> = <integer>,
+     */
+
+    /** Non-category for unassigned and non-character code points. @stable ICU 2.0 */
+    U_UNASSIGNED              = 0,
+    /** Cn "Other, Not Assigned (no characters in [UnicodeData.txt] have this property)" (same as U_UNASSIGNED!) @stable ICU 2.0 */
+    U_GENERAL_OTHER_TYPES     = 0,
+    /** Lu @stable ICU 2.0 */
+    U_UPPERCASE_LETTER        = 1,
+    /** Ll @stable ICU 2.0 */
+    U_LOWERCASE_LETTER        = 2,
+    /** Lt @stable ICU 2.0 */
+    U_TITLECASE_LETTER        = 3,
+    /** Lm @stable ICU 2.0 */
+    U_MODIFIER_LETTER         = 4,
+    /** Lo @stable ICU 2.0 */
+    U_OTHER_LETTER            = 5,
+    /** Mn @stable ICU 2.0 */
+    U_NON_SPACING_MARK        = 6,
+    /** Me @stable ICU 2.0 */
+    U_ENCLOSING_MARK          = 7,
+    /** Mc @stable ICU 2.0 */
+    U_COMBINING_SPACING_MARK  = 8,
+    /** Nd @stable ICU 2.0 */
+    U_DECIMAL_DIGIT_NUMBER    = 9,
+    /** Nl @stable ICU 2.0 */
+    U_LETTER_NUMBER           = 10,
+    /** No @stable ICU 2.0 */
+    U_OTHER_NUMBER            = 11,
+    /** Zs @stable ICU 2.0 */
+    U_SPACE_SEPARATOR         = 12,
+    /** Zl @stable ICU 2.0 */
+    U_LINE_SEPARATOR          = 13,
+    /** Zp @stable ICU 2.0 */
+    U_PARAGRAPH_SEPARATOR     = 14,
+    /** Cc @stable ICU 2.0 */
+    U_CONTROL_CHAR            = 15,
+    /** Cf @stable ICU 2.0 */
+    U_FORMAT_CHAR             = 16,
+    /** Co @stable ICU 2.0 */
+    U_PRIVATE_USE_CHAR        = 17,
+    /** Cs @stable ICU 2.0 */
+    U_SURROGATE               = 18,
+    /** Pd @stable ICU 2.0 */
+    U_DASH_PUNCTUATION        = 19,
+    /** Ps @stable ICU 2.0 */
+    U_START_PUNCTUATION       = 20,
+    /** Pe @stable ICU 2.0 */
+    U_END_PUNCTUATION         = 21,
+    /** Pc @stable ICU 2.0 */
+    U_CONNECTOR_PUNCTUATION   = 22,
+    /** Po @stable ICU 2.0 */
+    U_OTHER_PUNCTUATION       = 23,
+    /** Sm @stable ICU 2.0 */
+    U_MATH_SYMBOL             = 24,
+    /** Sc @stable ICU 2.0 */
+    U_CURRENCY_SYMBOL         = 25,
+    /** Sk @stable ICU 2.0 */
+    U_MODIFIER_SYMBOL         = 26,
+    /** So @stable ICU 2.0 */
+    U_OTHER_SYMBOL            = 27,
+    /** Pi @stable ICU 2.0 */
+    U_INITIAL_PUNCTUATION     = 28,
+    /** Pf @stable ICU 2.0 */
+    U_FINAL_PUNCTUATION       = 29,
+    /** One higher than the last enum UCharCategory constant. @stable ICU 2.0 */
+    U_CHAR_CATEGORY_COUNT
+} UCharCategory;
+
+/**
+ * U_GC_XX_MASK constants are bit flags corresponding to Unicode
+ * general category values.
+ * For each category, the nth bit is set if the numeric value of the
+ * corresponding UCharCategory constant is n.
+ *
+ * There are also some U_GC_Y_MASK constants for groups of general categories
+ * like L for all letter categories.
+ *
+ * @see u_charType
+ * @see U_GET_GC_MASK
+ * @see UCharCategory
+ * @stable ICU 2.1
+ */
+#define U_GC_CN_MASK    U_MASK(U_GENERAL_OTHER_TYPES)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_LU_MASK    U_MASK(U_UPPERCASE_LETTER)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_LL_MASK    U_MASK(U_LOWERCASE_LETTER)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_LT_MASK    U_MASK(U_TITLECASE_LETTER)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_LM_MASK    U_MASK(U_MODIFIER_LETTER)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_LO_MASK    U_MASK(U_OTHER_LETTER)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_MN_MASK    U_MASK(U_NON_SPACING_MARK)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_ME_MASK    U_MASK(U_ENCLOSING_MARK)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_MC_MASK    U_MASK(U_COMBINING_SPACING_MARK)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_ND_MASK    U_MASK(U_DECIMAL_DIGIT_NUMBER)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_NL_MASK    U_MASK(U_LETTER_NUMBER)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_NO_MASK    U_MASK(U_OTHER_NUMBER)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_ZS_MASK    U_MASK(U_SPACE_SEPARATOR)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_ZL_MASK    U_MASK(U_LINE_SEPARATOR)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_ZP_MASK    U_MASK(U_PARAGRAPH_SEPARATOR)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_CC_MASK    U_MASK(U_CONTROL_CHAR)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_CF_MASK    U_MASK(U_FORMAT_CHAR)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_CO_MASK    U_MASK(U_PRIVATE_USE_CHAR)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_CS_MASK    U_MASK(U_SURROGATE)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PD_MASK    U_MASK(U_DASH_PUNCTUATION)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PS_MASK    U_MASK(U_START_PUNCTUATION)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PE_MASK    U_MASK(U_END_PUNCTUATION)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PC_MASK    U_MASK(U_CONNECTOR_PUNCTUATION)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PO_MASK    U_MASK(U_OTHER_PUNCTUATION)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_SM_MASK    U_MASK(U_MATH_SYMBOL)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_SC_MASK    U_MASK(U_CURRENCY_SYMBOL)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_SK_MASK    U_MASK(U_MODIFIER_SYMBOL)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_SO_MASK    U_MASK(U_OTHER_SYMBOL)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PI_MASK    U_MASK(U_INITIAL_PUNCTUATION)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PF_MASK    U_MASK(U_FINAL_PUNCTUATION)
+
+
+/** Mask constant for multiple UCharCategory bits (L Letters). @stable ICU 2.1 */
+#define U_GC_L_MASK \
+            (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK|U_GC_LM_MASK|U_GC_LO_MASK)
+
+/** Mask constant for multiple UCharCategory bits (LC Cased Letters). @stable ICU 2.1 */
+#define U_GC_LC_MASK \
+            (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK)
+
+/** Mask constant for multiple UCharCategory bits (M Marks). @stable ICU 2.1 */
+#define U_GC_M_MASK (U_GC_MN_MASK|U_GC_ME_MASK|U_GC_MC_MASK)
+
+/** Mask constant for multiple UCharCategory bits (N Numbers). @stable ICU 2.1 */
+#define U_GC_N_MASK (U_GC_ND_MASK|U_GC_NL_MASK|U_GC_NO_MASK)
+
+/** Mask constant for multiple UCharCategory bits (Z Separators). @stable ICU 2.1 */
+#define U_GC_Z_MASK (U_GC_ZS_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK)
+
+/** Mask constant for multiple UCharCategory bits (C Others). @stable ICU 2.1 */
+#define U_GC_C_MASK \
+            (U_GC_CN_MASK|U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CO_MASK|U_GC_CS_MASK)
+
+/** Mask constant for multiple UCharCategory bits (P Punctuation). @stable ICU 2.1 */
+#define U_GC_P_MASK \
+            (U_GC_PD_MASK|U_GC_PS_MASK|U_GC_PE_MASK|U_GC_PC_MASK|U_GC_PO_MASK| \
+             U_GC_PI_MASK|U_GC_PF_MASK)
+
+/** Mask constant for multiple UCharCategory bits (S Symbols). @stable ICU 2.1 */
+#define U_GC_S_MASK (U_GC_SM_MASK|U_GC_SC_MASK|U_GC_SK_MASK|U_GC_SO_MASK)
+
+/**
+ * This specifies the language directional property of a character set.
+ * @stable ICU 2.0
+ */
+typedef enum UCharDirection {
+    /*
+     * Note: UCharDirection constants and their API comments are parsed by preparseucd.py.
+     * It matches pairs of lines like
+     *     / ** <Unicode 1..3-letter Bidi_Class value> comment... * /
+     *     U_<[A-Z_]+> = <integer>,
+     */
+
+    /** L @stable ICU 2.0 */
+    U_LEFT_TO_RIGHT               = 0,
+    /** R @stable ICU 2.0 */
+    U_RIGHT_TO_LEFT               = 1,
+    /** EN @stable ICU 2.0 */
+    U_EUROPEAN_NUMBER             = 2,
+    /** ES @stable ICU 2.0 */
+    U_EUROPEAN_NUMBER_SEPARATOR   = 3,
+    /** ET @stable ICU 2.0 */
+    U_EUROPEAN_NUMBER_TERMINATOR  = 4,
+    /** AN @stable ICU 2.0 */
+    U_ARABIC_NUMBER               = 5,
+    /** CS @stable ICU 2.0 */
+    U_COMMON_NUMBER_SEPARATOR     = 6,
+    /** B @stable ICU 2.0 */
+    U_BLOCK_SEPARATOR             = 7,
+    /** S @stable ICU 2.0 */
+    U_SEGMENT_SEPARATOR           = 8,
+    /** WS @stable ICU 2.0 */
+    U_WHITE_SPACE_NEUTRAL         = 9,
+    /** ON @stable ICU 2.0 */
+    U_OTHER_NEUTRAL               = 10,
+    /** LRE @stable ICU 2.0 */
+    U_LEFT_TO_RIGHT_EMBEDDING     = 11,
+    /** LRO @stable ICU 2.0 */
+    U_LEFT_TO_RIGHT_OVERRIDE      = 12,
+    /** AL @stable ICU 2.0 */
+    U_RIGHT_TO_LEFT_ARABIC        = 13,
+    /** RLE @stable ICU 2.0 */
+    U_RIGHT_TO_LEFT_EMBEDDING     = 14,
+    /** RLO @stable ICU 2.0 */
+    U_RIGHT_TO_LEFT_OVERRIDE      = 15,
+    /** PDF @stable ICU 2.0 */
+    U_POP_DIRECTIONAL_FORMAT      = 16,
+    /** NSM @stable ICU 2.0 */
+    U_DIR_NON_SPACING_MARK        = 17,
+    /** BN @stable ICU 2.0 */
+    U_BOUNDARY_NEUTRAL            = 18,
+    /** FSI @stable ICU 52 */
+    U_FIRST_STRONG_ISOLATE        = 19,
+    /** LRI @stable ICU 52 */
+    U_LEFT_TO_RIGHT_ISOLATE       = 20,
+    /** RLI @stable ICU 52 */
+    U_RIGHT_TO_LEFT_ISOLATE       = 21,
+    /** PDI @stable ICU 52 */
+    U_POP_DIRECTIONAL_ISOLATE     = 22,
+    /** @stable ICU 2.0 */
+    U_CHAR_DIRECTION_COUNT
+} UCharDirection;
+
+/**
+ * Bidi Paired Bracket Type constants.
+ *
+ * @see UCHAR_BIDI_PAIRED_BRACKET_TYPE
+ * @stable ICU 52
+ */
+typedef enum UBidiPairedBracketType {
+    /*
+     * Note: UBidiPairedBracketType constants are parsed by preparseucd.py.
+     * It matches lines like
+     *     U_BPT_<Unicode Bidi_Paired_Bracket_Type value name>
+     */
+
+    /** Not a paired bracket. @stable ICU 52 */
+    U_BPT_NONE,
+    /** Open paired bracket. @stable ICU 52 */
+    U_BPT_OPEN,
+    /** Close paired bracket. @stable ICU 52 */
+    U_BPT_CLOSE,
+    /** @stable ICU 52 */
+    U_BPT_COUNT /* 3 */
+} UBidiPairedBracketType;
+
+/**
+ * Constants for Unicode blocks, see the Unicode Data file Blocks.txt
+ * @stable ICU 2.0
+ */
+enum UBlockCode {
+    /*
+     * Note: UBlockCode constants are parsed by preparseucd.py.
+     * It matches lines like
+     *     UBLOCK_<Unicode Block value name> = <integer>,
+     */
+
+    /** New No_Block value in Unicode 4. @stable ICU 2.6 */
+    UBLOCK_NO_BLOCK = 0, /*[none]*/ /* Special range indicating No_Block */
+
+    /** @stable ICU 2.0 */
+    UBLOCK_BASIC_LATIN = 1, /*[0000]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_LATIN_1_SUPPLEMENT=2, /*[0080]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_LATIN_EXTENDED_A =3, /*[0100]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_LATIN_EXTENDED_B =4, /*[0180]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_IPA_EXTENSIONS =5, /*[0250]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_SPACING_MODIFIER_LETTERS =6, /*[02B0]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_COMBINING_DIACRITICAL_MARKS =7, /*[0300]*/
+
+    /**
+     * Unicode 3.2 renames this block to "Greek and Coptic".
+     * @stable ICU 2.0
+     */
+    UBLOCK_GREEK =8, /*[0370]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_CYRILLIC =9, /*[0400]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_ARMENIAN =10, /*[0530]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_HEBREW =11, /*[0590]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_ARABIC =12, /*[0600]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_SYRIAC =13, /*[0700]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_THAANA =14, /*[0780]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_DEVANAGARI =15, /*[0900]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_BENGALI =16, /*[0980]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_GURMUKHI =17, /*[0A00]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_GUJARATI =18, /*[0A80]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_ORIYA =19, /*[0B00]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_TAMIL =20, /*[0B80]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_TELUGU =21, /*[0C00]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_KANNADA =22, /*[0C80]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_MALAYALAM =23, /*[0D00]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_SINHALA =24, /*[0D80]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_THAI =25, /*[0E00]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_LAO =26, /*[0E80]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_TIBETAN =27, /*[0F00]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_MYANMAR =28, /*[1000]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_GEORGIAN =29, /*[10A0]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_HANGUL_JAMO =30, /*[1100]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_ETHIOPIC =31, /*[1200]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_CHEROKEE =32, /*[13A0]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =33, /*[1400]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_OGHAM =34, /*[1680]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_RUNIC =35, /*[16A0]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_KHMER =36, /*[1780]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_MONGOLIAN =37, /*[1800]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_LATIN_EXTENDED_ADDITIONAL =38, /*[1E00]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_GREEK_EXTENDED =39, /*[1F00]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_GENERAL_PUNCTUATION =40, /*[2000]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS =41, /*[2070]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_CURRENCY_SYMBOLS =42, /*[20A0]*/
+
+    /**
+     * Unicode 3.2 renames this block to "Combining Diacritical Marks for Symbols".
+     * @stable ICU 2.0
+     */
+    UBLOCK_COMBINING_MARKS_FOR_SYMBOLS =43, /*[20D0]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_LETTERLIKE_SYMBOLS =44, /*[2100]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_NUMBER_FORMS =45, /*[2150]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_ARROWS =46, /*[2190]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_MATHEMATICAL_OPERATORS =47, /*[2200]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_MISCELLANEOUS_TECHNICAL =48, /*[2300]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_CONTROL_PICTURES =49, /*[2400]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_OPTICAL_CHARACTER_RECOGNITION =50, /*[2440]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_ENCLOSED_ALPHANUMERICS =51, /*[2460]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_BOX_DRAWING =52, /*[2500]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_BLOCK_ELEMENTS =53, /*[2580]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_GEOMETRIC_SHAPES =54, /*[25A0]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_MISCELLANEOUS_SYMBOLS =55, /*[2600]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_DINGBATS =56, /*[2700]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_BRAILLE_PATTERNS =57, /*[2800]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_CJK_RADICALS_SUPPLEMENT =58, /*[2E80]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_KANGXI_RADICALS =59, /*[2F00]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS =60, /*[2FF0]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION =61, /*[3000]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_HIRAGANA =62, /*[3040]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_KATAKANA =63, /*[30A0]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_BOPOMOFO =64, /*[3100]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_HANGUL_COMPATIBILITY_JAMO =65, /*[3130]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_KANBUN =66, /*[3190]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_BOPOMOFO_EXTENDED =67, /*[31A0]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS =68, /*[3200]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_CJK_COMPATIBILITY =69, /*[3300]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =70, /*[3400]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_CJK_UNIFIED_IDEOGRAPHS =71, /*[4E00]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_YI_SYLLABLES =72, /*[A000]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_YI_RADICALS =73, /*[A490]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_HANGUL_SYLLABLES =74, /*[AC00]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_HIGH_SURROGATES =75, /*[D800]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_HIGH_PRIVATE_USE_SURROGATES =76, /*[DB80]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_LOW_SURROGATES =77, /*[DC00]*/
+
+    /**
+     * Same as UBLOCK_PRIVATE_USE.
+     * Until Unicode 3.1.1, the corresponding block name was "Private Use",
+     * and multiple code point ranges had this block.
+     * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and
+     * adds separate blocks for the supplementary PUAs.
+     *
+     * @stable ICU 2.0
+     */
+    UBLOCK_PRIVATE_USE_AREA =78, /*[E000]*/
+    /**
+     * Same as UBLOCK_PRIVATE_USE_AREA.
+     * Until Unicode 3.1.1, the corresponding block name was "Private Use",
+     * and multiple code point ranges had this block.
+     * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and
+     * adds separate blocks for the supplementary PUAs.
+     *
+     * @stable ICU 2.0
+     */
+    UBLOCK_PRIVATE_USE = UBLOCK_PRIVATE_USE_AREA,
+
+    /** @stable ICU 2.0 */
+    UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS =79, /*[F900]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_ALPHABETIC_PRESENTATION_FORMS =80, /*[FB00]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_ARABIC_PRESENTATION_FORMS_A =81, /*[FB50]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_COMBINING_HALF_MARKS =82, /*[FE20]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_CJK_COMPATIBILITY_FORMS =83, /*[FE30]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_SMALL_FORM_VARIANTS =84, /*[FE50]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_ARABIC_PRESENTATION_FORMS_B =85, /*[FE70]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_SPECIALS =86, /*[FFF0]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS =87, /*[FF00]*/
+
+    /* New blocks in Unicode 3.1 */
+
+    /** @stable ICU 2.0 */
+    UBLOCK_OLD_ITALIC = 88, /*[10300]*/
+    /** @stable ICU 2.0 */
+    UBLOCK_GOTHIC = 89, /*[10330]*/
+    /** @stable ICU 2.0 */
+    UBLOCK_DESERET = 90, /*[10400]*/
+    /** @stable ICU 2.0 */
+    UBLOCK_BYZANTINE_MUSICAL_SYMBOLS = 91, /*[1D000]*/
+    /** @stable ICU 2.0 */
+    UBLOCK_MUSICAL_SYMBOLS = 92, /*[1D100]*/
+    /** @stable ICU 2.0 */
+    UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 93, /*[1D400]*/
+    /** @stable ICU 2.0 */
+    UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B  = 94, /*[20000]*/
+    /** @stable ICU 2.0 */
+    UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 95, /*[2F800]*/
+    /** @stable ICU 2.0 */
+    UBLOCK_TAGS = 96, /*[E0000]*/
+
+    /* New blocks in Unicode 3.2 */
+
+    /** @stable ICU 3.0  */
+    UBLOCK_CYRILLIC_SUPPLEMENT = 97, /*[0500]*/
+    /**
+     * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
+     * @stable ICU 2.2
+     */
+    UBLOCK_CYRILLIC_SUPPLEMENTARY = UBLOCK_CYRILLIC_SUPPLEMENT, 
+    /** @stable ICU 2.2 */
+    UBLOCK_TAGALOG = 98, /*[1700]*/
+    /** @stable ICU 2.2 */
+    UBLOCK_HANUNOO = 99, /*[1720]*/
+    /** @stable ICU 2.2 */
+    UBLOCK_BUHID = 100, /*[1740]*/
+    /** @stable ICU 2.2 */
+    UBLOCK_TAGBANWA = 101, /*[1760]*/
+    /** @stable ICU 2.2 */
+    UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 102, /*[27C0]*/
+    /** @stable ICU 2.2 */
+    UBLOCK_SUPPLEMENTAL_ARROWS_A = 103, /*[27F0]*/
+    /** @stable ICU 2.2 */
+    UBLOCK_SUPPLEMENTAL_ARROWS_B = 104, /*[2900]*/
+    /** @stable ICU 2.2 */
+    UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 105, /*[2980]*/
+    /** @stable ICU 2.2 */
+    UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 106, /*[2A00]*/
+    /** @stable ICU 2.2 */
+    UBLOCK_KATAKANA_PHONETIC_EXTENSIONS = 107, /*[31F0]*/
+    /** @stable ICU 2.2 */
+    UBLOCK_VARIATION_SELECTORS = 108, /*[FE00]*/
+    /** @stable ICU 2.2 */
+    UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_A = 109, /*[F0000]*/
+    /** @stable ICU 2.2 */
+    UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B = 110, /*[100000]*/
+
+    /* New blocks in Unicode 4 */
+
+    /** @stable ICU 2.6 */
+    UBLOCK_LIMBU = 111, /*[1900]*/
+    /** @stable ICU 2.6 */
+    UBLOCK_TAI_LE = 112, /*[1950]*/
+    /** @stable ICU 2.6 */
+    UBLOCK_KHMER_SYMBOLS = 113, /*[19E0]*/
+    /** @stable ICU 2.6 */
+    UBLOCK_PHONETIC_EXTENSIONS = 114, /*[1D00]*/
+    /** @stable ICU 2.6 */
+    UBLOCK_MISCELLANEOUS_SYMBOLS_AND_ARROWS = 115, /*[2B00]*/
+    /** @stable ICU 2.6 */
+    UBLOCK_YIJING_HEXAGRAM_SYMBOLS = 116, /*[4DC0]*/
+    /** @stable ICU 2.6 */
+    UBLOCK_LINEAR_B_SYLLABARY = 117, /*[10000]*/
+    /** @stable ICU 2.6 */
+    UBLOCK_LINEAR_B_IDEOGRAMS = 118, /*[10080]*/
+    /** @stable ICU 2.6 */
+    UBLOCK_AEGEAN_NUMBERS = 119, /*[10100]*/
+    /** @stable ICU 2.6 */
+    UBLOCK_UGARITIC = 120, /*[10380]*/
+    /** @stable ICU 2.6 */
+    UBLOCK_SHAVIAN = 121, /*[10450]*/
+    /** @stable ICU 2.6 */
+    UBLOCK_OSMANYA = 122, /*[10480]*/
+    /** @stable ICU 2.6 */
+    UBLOCK_CYPRIOT_SYLLABARY = 123, /*[10800]*/
+    /** @stable ICU 2.6 */
+    UBLOCK_TAI_XUAN_JING_SYMBOLS = 124, /*[1D300]*/
+    /** @stable ICU 2.6 */
+    UBLOCK_VARIATION_SELECTORS_SUPPLEMENT = 125, /*[E0100]*/
+
+    /* New blocks in Unicode 4.1 */
+
+    /** @stable ICU 3.4 */
+    UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION = 126, /*[1D200]*/
+    /** @stable ICU 3.4 */
+    UBLOCK_ANCIENT_GREEK_NUMBERS = 127, /*[10140]*/
+    /** @stable ICU 3.4 */
+    UBLOCK_ARABIC_SUPPLEMENT = 128, /*[0750]*/
+    /** @stable ICU 3.4 */
+    UBLOCK_BUGINESE = 129, /*[1A00]*/
+    /** @stable ICU 3.4 */
+    UBLOCK_CJK_STROKES = 130, /*[31C0]*/
+    /** @stable ICU 3.4 */
+    UBLOCK_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 131, /*[1DC0]*/
+    /** @stable ICU 3.4 */
+    UBLOCK_COPTIC = 132, /*[2C80]*/
+    /** @stable ICU 3.4 */
+    UBLOCK_ETHIOPIC_EXTENDED = 133, /*[2D80]*/
+    /** @stable ICU 3.4 */
+    UBLOCK_ETHIOPIC_SUPPLEMENT = 134, /*[1380]*/
+    /** @stable ICU 3.4 */
+    UBLOCK_GEORGIAN_SUPPLEMENT = 135, /*[2D00]*/
+    /** @stable ICU 3.4 */
+    UBLOCK_GLAGOLITIC = 136, /*[2C00]*/
+    /** @stable ICU 3.4 */
+    UBLOCK_KHAROSHTHI = 137, /*[10A00]*/
+    /** @stable ICU 3.4 */
+    UBLOCK_MODIFIER_TONE_LETTERS = 138, /*[A700]*/
+    /** @stable ICU 3.4 */
+    UBLOCK_NEW_TAI_LUE = 139, /*[1980]*/
+    /** @stable ICU 3.4 */
+    UBLOCK_OLD_PERSIAN = 140, /*[103A0]*/
+    /** @stable ICU 3.4 */
+    UBLOCK_PHONETIC_EXTENSIONS_SUPPLEMENT = 141, /*[1D80]*/
+    /** @stable ICU 3.4 */
+    UBLOCK_SUPPLEMENTAL_PUNCTUATION = 142, /*[2E00]*/
+    /** @stable ICU 3.4 */
+    UBLOCK_SYLOTI_NAGRI = 143, /*[A800]*/
+    /** @stable ICU 3.4 */
+    UBLOCK_TIFINAGH = 144, /*[2D30]*/
+    /** @stable ICU 3.4 */
+    UBLOCK_VERTICAL_FORMS = 145, /*[FE10]*/
+
+    /* New blocks in Unicode 5.0 */
+
+    /** @stable ICU 3.6 */
+    UBLOCK_NKO = 146, /*[07C0]*/
+    /** @stable ICU 3.6 */
+    UBLOCK_BALINESE = 147, /*[1B00]*/
+    /** @stable ICU 3.6 */
+    UBLOCK_LATIN_EXTENDED_C = 148, /*[2C60]*/
+    /** @stable ICU 3.6 */
+    UBLOCK_LATIN_EXTENDED_D = 149, /*[A720]*/
+    /** @stable ICU 3.6 */
+    UBLOCK_PHAGS_PA = 150, /*[A840]*/
+    /** @stable ICU 3.6 */
+    UBLOCK_PHOENICIAN = 151, /*[10900]*/
+    /** @stable ICU 3.6 */
+    UBLOCK_CUNEIFORM = 152, /*[12000]*/
+    /** @stable ICU 3.6 */
+    UBLOCK_CUNEIFORM_NUMBERS_AND_PUNCTUATION = 153, /*[12400]*/
+    /** @stable ICU 3.6 */
+    UBLOCK_COUNTING_ROD_NUMERALS = 154, /*[1D360]*/
+
+    /* New blocks in Unicode 5.1 */
+
+    /** @stable ICU 4.0 */
+    UBLOCK_SUNDANESE = 155, /*[1B80]*/
+    /** @stable ICU 4.0 */
+    UBLOCK_LEPCHA = 156, /*[1C00]*/
+    /** @stable ICU 4.0 */
+    UBLOCK_OL_CHIKI = 157, /*[1C50]*/
+    /** @stable ICU 4.0 */
+    UBLOCK_CYRILLIC_EXTENDED_A = 158, /*[2DE0]*/
+    /** @stable ICU 4.0 */
+    UBLOCK_VAI = 159, /*[A500]*/
+    /** @stable ICU 4.0 */
+    UBLOCK_CYRILLIC_EXTENDED_B = 160, /*[A640]*/
+    /** @stable ICU 4.0 */
+    UBLOCK_SAURASHTRA = 161, /*[A880]*/
+    /** @stable ICU 4.0 */
+    UBLOCK_KAYAH_LI = 162, /*[A900]*/
+    /** @stable ICU 4.0 */
+    UBLOCK_REJANG = 163, /*[A930]*/
+    /** @stable ICU 4.0 */
+    UBLOCK_CHAM = 164, /*[AA00]*/
+    /** @stable ICU 4.0 */
+    UBLOCK_ANCIENT_SYMBOLS = 165, /*[10190]*/
+    /** @stable ICU 4.0 */
+    UBLOCK_PHAISTOS_DISC = 166, /*[101D0]*/
+    /** @stable ICU 4.0 */
+    UBLOCK_LYCIAN = 167, /*[10280]*/
+    /** @stable ICU 4.0 */
+    UBLOCK_CARIAN = 168, /*[102A0]*/
+    /** @stable ICU 4.0 */
+    UBLOCK_LYDIAN = 169, /*[10920]*/
+    /** @stable ICU 4.0 */
+    UBLOCK_MAHJONG_TILES = 170, /*[1F000]*/
+    /** @stable ICU 4.0 */
+    UBLOCK_DOMINO_TILES = 171, /*[1F030]*/
+
+    /* New blocks in Unicode 5.2 */
+
+    /** @stable ICU 4.4 */
+    UBLOCK_SAMARITAN = 172, /*[0800]*/
+    /** @stable ICU 4.4 */
+    UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 173, /*[18B0]*/
+    /** @stable ICU 4.4 */
+    UBLOCK_TAI_THAM = 174, /*[1A20]*/
+    /** @stable ICU 4.4 */
+    UBLOCK_VEDIC_EXTENSIONS = 175, /*[1CD0]*/
+    /** @stable ICU 4.4 */
+    UBLOCK_LISU = 176, /*[A4D0]*/
+    /** @stable ICU 4.4 */
+    UBLOCK_BAMUM = 177, /*[A6A0]*/
+    /** @stable ICU 4.4 */
+    UBLOCK_COMMON_INDIC_NUMBER_FORMS = 178, /*[A830]*/
+    /** @stable ICU 4.4 */
+    UBLOCK_DEVANAGARI_EXTENDED = 179, /*[A8E0]*/
+    /** @stable ICU 4.4 */
+    UBLOCK_HANGUL_JAMO_EXTENDED_A = 180, /*[A960]*/
+    /** @stable ICU 4.4 */
+    UBLOCK_JAVANESE = 181, /*[A980]*/
+    /** @stable ICU 4.4 */
+    UBLOCK_MYANMAR_EXTENDED_A = 182, /*[AA60]*/
+    /** @stable ICU 4.4 */
+    UBLOCK_TAI_VIET = 183, /*[AA80]*/
+    /** @stable ICU 4.4 */
+    UBLOCK_MEETEI_MAYEK = 184, /*[ABC0]*/
+    /** @stable ICU 4.4 */
+    UBLOCK_HANGUL_JAMO_EXTENDED_B = 185, /*[D7B0]*/
+    /** @stable ICU 4.4 */
+    UBLOCK_IMPERIAL_ARAMAIC = 186, /*[10840]*/
+    /** @stable ICU 4.4 */
+    UBLOCK_OLD_SOUTH_ARABIAN = 187, /*[10A60]*/
+    /** @stable ICU 4.4 */
+    UBLOCK_AVESTAN = 188, /*[10B00]*/
+    /** @stable ICU 4.4 */
+    UBLOCK_INSCRIPTIONAL_PARTHIAN = 189, /*[10B40]*/
+    /** @stable ICU 4.4 */
+    UBLOCK_INSCRIPTIONAL_PAHLAVI = 190, /*[10B60]*/
+    /** @stable ICU 4.4 */
+    UBLOCK_OLD_TURKIC = 191, /*[10C00]*/
+    /** @stable ICU 4.4 */
+    UBLOCK_RUMI_NUMERAL_SYMBOLS = 192, /*[10E60]*/
+    /** @stable ICU 4.4 */
+    UBLOCK_KAITHI = 193, /*[11080]*/
+    /** @stable ICU 4.4 */
+    UBLOCK_EGYPTIAN_HIEROGLYPHS = 194, /*[13000]*/
+    /** @stable ICU 4.4 */
+    UBLOCK_ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 195, /*[1F100]*/
+    /** @stable ICU 4.4 */
+    UBLOCK_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 196, /*[1F200]*/
+    /** @stable ICU 4.4 */
+    UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 197, /*[2A700]*/
+
+    /* New blocks in Unicode 6.0 */
+
+    /** @stable ICU 4.6 */
+    UBLOCK_MANDAIC = 198, /*[0840]*/
+    /** @stable ICU 4.6 */
+    UBLOCK_BATAK = 199, /*[1BC0]*/
+    /** @stable ICU 4.6 */
+    UBLOCK_ETHIOPIC_EXTENDED_A = 200, /*[AB00]*/
+    /** @stable ICU 4.6 */
+    UBLOCK_BRAHMI = 201, /*[11000]*/
+    /** @stable ICU 4.6 */
+    UBLOCK_BAMUM_SUPPLEMENT = 202, /*[16800]*/
+    /** @stable ICU 4.6 */
+    UBLOCK_KANA_SUPPLEMENT = 203, /*[1B000]*/
+    /** @stable ICU 4.6 */
+    UBLOCK_PLAYING_CARDS = 204, /*[1F0A0]*/
+    /** @stable ICU 4.6 */
+    UBLOCK_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 205, /*[1F300]*/
+    /** @stable ICU 4.6 */
+    UBLOCK_EMOTICONS = 206, /*[1F600]*/
+    /** @stable ICU 4.6 */
+    UBLOCK_TRANSPORT_AND_MAP_SYMBOLS = 207, /*[1F680]*/
+    /** @stable ICU 4.6 */
+    UBLOCK_ALCHEMICAL_SYMBOLS = 208, /*[1F700]*/
+    /** @stable ICU 4.6 */
+    UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 209, /*[2B740]*/
+
+    /* New blocks in Unicode 6.1 */
+
+    /** @stable ICU 49 */
+    UBLOCK_ARABIC_EXTENDED_A = 210, /*[08A0]*/
+    /** @stable ICU 49 */
+    UBLOCK_ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 211, /*[1EE00]*/
+    /** @stable ICU 49 */
+    UBLOCK_CHAKMA = 212, /*[11100]*/
+    /** @stable ICU 49 */
+    UBLOCK_MEETEI_MAYEK_EXTENSIONS = 213, /*[AAE0]*/
+    /** @stable ICU 49 */
+    UBLOCK_MEROITIC_CURSIVE = 214, /*[109A0]*/
+    /** @stable ICU 49 */
+    UBLOCK_MEROITIC_HIEROGLYPHS = 215, /*[10980]*/
+    /** @stable ICU 49 */
+    UBLOCK_MIAO = 216, /*[16F00]*/
+    /** @stable ICU 49 */
+    UBLOCK_SHARADA = 217, /*[11180]*/
+    /** @stable ICU 49 */
+    UBLOCK_SORA_SOMPENG = 218, /*[110D0]*/
+    /** @stable ICU 49 */
+    UBLOCK_SUNDANESE_SUPPLEMENT = 219, /*[1CC0]*/
+    /** @stable ICU 49 */
+    UBLOCK_TAKRI = 220, /*[11680]*/
+
+    /* New blocks in Unicode 7.0 */
+
+    /** @stable ICU 54 */
+    UBLOCK_BASSA_VAH = 221, /*[16AD0]*/
+    /** @stable ICU 54 */
+    UBLOCK_CAUCASIAN_ALBANIAN = 222, /*[10530]*/
+    /** @stable ICU 54 */
+    UBLOCK_COPTIC_EPACT_NUMBERS = 223, /*[102E0]*/
+    /** @stable ICU 54 */
+    UBLOCK_COMBINING_DIACRITICAL_MARKS_EXTENDED = 224, /*[1AB0]*/
+    /** @stable ICU 54 */
+    UBLOCK_DUPLOYAN = 225, /*[1BC00]*/
+    /** @stable ICU 54 */
+    UBLOCK_ELBASAN = 226, /*[10500]*/
+    /** @stable ICU 54 */
+    UBLOCK_GEOMETRIC_SHAPES_EXTENDED = 227, /*[1F780]*/
+    /** @stable ICU 54 */
+    UBLOCK_GRANTHA = 228, /*[11300]*/
+    /** @stable ICU 54 */
+    UBLOCK_KHOJKI = 229, /*[11200]*/
+    /** @stable ICU 54 */
+    UBLOCK_KHUDAWADI = 230, /*[112B0]*/
+    /** @stable ICU 54 */
+    UBLOCK_LATIN_EXTENDED_E = 231, /*[AB30]*/
+    /** @stable ICU 54 */
+    UBLOCK_LINEAR_A = 232, /*[10600]*/
+    /** @stable ICU 54 */
+    UBLOCK_MAHAJANI = 233, /*[11150]*/
+    /** @stable ICU 54 */
+    UBLOCK_MANICHAEAN = 234, /*[10AC0]*/
+    /** @stable ICU 54 */
+    UBLOCK_MENDE_KIKAKUI = 235, /*[1E800]*/
+    /** @stable ICU 54 */
+    UBLOCK_MODI = 236, /*[11600]*/
+    /** @stable ICU 54 */
+    UBLOCK_MRO = 237, /*[16A40]*/
+    /** @stable ICU 54 */
+    UBLOCK_MYANMAR_EXTENDED_B = 238, /*[A9E0]*/
+    /** @stable ICU 54 */
+    UBLOCK_NABATAEAN = 239, /*[10880]*/
+    /** @stable ICU 54 */
+    UBLOCK_OLD_NORTH_ARABIAN = 240, /*[10A80]*/
+    /** @stable ICU 54 */
+    UBLOCK_OLD_PERMIC = 241, /*[10350]*/
+    /** @stable ICU 54 */
+    UBLOCK_ORNAMENTAL_DINGBATS = 242, /*[1F650]*/
+    /** @stable ICU 54 */
+    UBLOCK_PAHAWH_HMONG = 243, /*[16B00]*/
+    /** @stable ICU 54 */
+    UBLOCK_PALMYRENE = 244, /*[10860]*/
+    /** @stable ICU 54 */
+    UBLOCK_PAU_CIN_HAU = 245, /*[11AC0]*/
+    /** @stable ICU 54 */
+    UBLOCK_PSALTER_PAHLAVI = 246, /*[10B80]*/
+    /** @stable ICU 54 */
+    UBLOCK_SHORTHAND_FORMAT_CONTROLS = 247, /*[1BCA0]*/
+    /** @stable ICU 54 */
+    UBLOCK_SIDDHAM = 248, /*[11580]*/
+    /** @stable ICU 54 */
+    UBLOCK_SINHALA_ARCHAIC_NUMBERS = 249, /*[111E0]*/
+    /** @stable ICU 54 */
+    UBLOCK_SUPPLEMENTAL_ARROWS_C = 250, /*[1F800]*/
+    /** @stable ICU 54 */
+    UBLOCK_TIRHUTA = 251, /*[11480]*/
+    /** @stable ICU 54 */
+    UBLOCK_WARANG_CITI = 252, /*[118A0]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_COUNT = 253,
+
+    /** @stable ICU 2.0 */
+    UBLOCK_INVALID_CODE=-1
+};
+
+/** @stable ICU 2.0 */
+typedef enum UBlockCode UBlockCode;
+
+/**
+ * East Asian Width constants.
+ *
+ * @see UCHAR_EAST_ASIAN_WIDTH
+ * @see u_getIntPropertyValue
+ * @stable ICU 2.2
+ */
+typedef enum UEastAsianWidth {
+    /*
+     * Note: UEastAsianWidth constants are parsed by preparseucd.py.
+     * It matches lines like
+     *     U_EA_<Unicode East_Asian_Width value name>
+     */
+
+    U_EA_NEUTRAL,   /*[N]*/
+    U_EA_AMBIGUOUS, /*[A]*/
+    U_EA_HALFWIDTH, /*[H]*/
+    U_EA_FULLWIDTH, /*[F]*/
+    U_EA_NARROW,    /*[Na]*/
+    U_EA_WIDE,      /*[W]*/
+    U_EA_COUNT
+} UEastAsianWidth;
+
+/**
+ * Selector constants for u_charName().
+ * u_charName() returns the "modern" name of a
+ * Unicode character; or the name that was defined in
+ * Unicode version 1.0, before the Unicode standard merged
+ * with ISO-10646; or an "extended" name that gives each
+ * Unicode code point a unique name.
+ *
+ * @see u_charName
+ * @stable ICU 2.0
+ */
+typedef enum UCharNameChoice {
+    /** Unicode character name (Name property). @stable ICU 2.0 */
+    U_UNICODE_CHAR_NAME,
+#ifndef U_HIDE_DEPRECATED_API 
+    /**
+     * The Unicode_1_Name property value which is of little practical value.
+     * Beginning with ICU 49, ICU APIs return an empty string for this name choice.
+     * @deprecated ICU 49
+     */
+    U_UNICODE_10_CHAR_NAME,
+#endif  /* U_HIDE_DEPRECATED_API */
+    /** Standard or synthetic character name. @stable ICU 2.0 */
+    U_EXTENDED_CHAR_NAME = U_UNICODE_CHAR_NAME+2,
+    /** Corrected name from NameAliases.txt. @stable ICU 4.4 */
+    U_CHAR_NAME_ALIAS,
+    /** @stable ICU 2.0 */
+    U_CHAR_NAME_CHOICE_COUNT
+} UCharNameChoice;
+
+/**
+ * Selector constants for u_getPropertyName() and
+ * u_getPropertyValueName().  These selectors are used to choose which
+ * name is returned for a given property or value.  All properties and
+ * values have a long name.  Most have a short name, but some do not.
+ * Unicode allows for additional names, beyond the long and short
+ * name, which would be indicated by U_LONG_PROPERTY_NAME + i, where
+ * i=1, 2,...
+ *
+ * @see u_getPropertyName()
+ * @see u_getPropertyValueName()
+ * @stable ICU 2.4
+ */
+typedef enum UPropertyNameChoice {
+    U_SHORT_PROPERTY_NAME,
+    U_LONG_PROPERTY_NAME,
+    U_PROPERTY_NAME_CHOICE_COUNT
+} UPropertyNameChoice;
+
+/**
+ * Decomposition Type constants.
+ *
+ * @see UCHAR_DECOMPOSITION_TYPE
+ * @stable ICU 2.2
+ */
+typedef enum UDecompositionType {
+    /*
+     * Note: UDecompositionType constants are parsed by preparseucd.py.
+     * It matches lines like
+     *     U_DT_<Unicode Decomposition_Type value name>
+     */
+
+    U_DT_NONE,              /*[none]*/
+    U_DT_CANONICAL,         /*[can]*/
+    U_DT_COMPAT,            /*[com]*/
+    U_DT_CIRCLE,            /*[enc]*/
+    U_DT_FINAL,             /*[fin]*/
+    U_DT_FONT,              /*[font]*/
+    U_DT_FRACTION,          /*[fra]*/
+    U_DT_INITIAL,           /*[init]*/
+    U_DT_ISOLATED,          /*[iso]*/
+    U_DT_MEDIAL,            /*[med]*/
+    U_DT_NARROW,            /*[nar]*/
+    U_DT_NOBREAK,           /*[nb]*/
+    U_DT_SMALL,             /*[sml]*/
+    U_DT_SQUARE,            /*[sqr]*/
+    U_DT_SUB,               /*[sub]*/
+    U_DT_SUPER,             /*[sup]*/
+    U_DT_VERTICAL,          /*[vert]*/
+    U_DT_WIDE,              /*[wide]*/
+    U_DT_COUNT /* 18 */
+} UDecompositionType;
+
+/**
+ * Joining Type constants.
+ *
+ * @see UCHAR_JOINING_TYPE
+ * @stable ICU 2.2
+ */
+typedef enum UJoiningType {
+    /*
+     * Note: UJoiningType constants are parsed by preparseucd.py.
+     * It matches lines like
+     *     U_JT_<Unicode Joining_Type value name>
+     */
+
+    U_JT_NON_JOINING,       /*[U]*/
+    U_JT_JOIN_CAUSING,      /*[C]*/
+    U_JT_DUAL_JOINING,      /*[D]*/
+    U_JT_LEFT_JOINING,      /*[L]*/
+    U_JT_RIGHT_JOINING,     /*[R]*/
+    U_JT_TRANSPARENT,       /*[T]*/
+    U_JT_COUNT /* 6 */
+} UJoiningType;
+
+/**
+ * Joining Group constants.
+ *
+ * @see UCHAR_JOINING_GROUP
+ * @stable ICU 2.2
+ */
+typedef enum UJoiningGroup {
+    /*
+     * Note: UJoiningGroup constants are parsed by preparseucd.py.
+     * It matches lines like
+     *     U_JG_<Unicode Joining_Group value name>
+     */
+
+    U_JG_NO_JOINING_GROUP,
+    U_JG_AIN,
+    U_JG_ALAPH,
+    U_JG_ALEF,
+    U_JG_BEH,
+    U_JG_BETH,
+    U_JG_DAL,
+    U_JG_DALATH_RISH,
+    U_JG_E,
+    U_JG_FEH,
+    U_JG_FINAL_SEMKATH,
+    U_JG_GAF,
+    U_JG_GAMAL,
+    U_JG_HAH,
+    U_JG_TEH_MARBUTA_GOAL,  /**< @stable ICU 4.6 */
+    U_JG_HAMZA_ON_HEH_GOAL=U_JG_TEH_MARBUTA_GOAL,
+    U_JG_HE,
+    U_JG_HEH,
+    U_JG_HEH_GOAL,
+    U_JG_HETH,
+    U_JG_KAF,
+    U_JG_KAPH,
+    U_JG_KNOTTED_HEH,
+    U_JG_LAM,
+    U_JG_LAMADH,
+    U_JG_MEEM,
+    U_JG_MIM,
+    U_JG_NOON,
+    U_JG_NUN,
+    U_JG_PE,
+    U_JG_QAF,
+    U_JG_QAPH,
+    U_JG_REH,
+    U_JG_REVERSED_PE,
+    U_JG_SAD,
+    U_JG_SADHE,
+    U_JG_SEEN,
+    U_JG_SEMKATH,
+    U_JG_SHIN,
+    U_JG_SWASH_KAF,
+    U_JG_SYRIAC_WAW,
+    U_JG_TAH,
+    U_JG_TAW,
+    U_JG_TEH_MARBUTA,
+    U_JG_TETH,
+    U_JG_WAW,
+    U_JG_YEH,
+    U_JG_YEH_BARREE,
+    U_JG_YEH_WITH_TAIL,
+    U_JG_YUDH,
+    U_JG_YUDH_HE,
+    U_JG_ZAIN,
+    U_JG_FE,        /**< @stable ICU 2.6 */
+    U_JG_KHAPH,     /**< @stable ICU 2.6 */
+    U_JG_ZHAIN,     /**< @stable ICU 2.6 */
+    U_JG_BURUSHASKI_YEH_BARREE, /**< @stable ICU 4.0 */
+    U_JG_FARSI_YEH, /**< @stable ICU 4.4 */
+    U_JG_NYA,       /**< @stable ICU 4.4 */
+    U_JG_ROHINGYA_YEH,  /**< @stable ICU 49 */
+    U_JG_MANICHAEAN_ALEPH,  /**< @stable ICU 54 */
+    U_JG_MANICHAEAN_AYIN,  /**< @stable ICU 54 */
+    U_JG_MANICHAEAN_BETH,  /**< @stable ICU 54 */
+    U_JG_MANICHAEAN_DALETH,  /**< @stable ICU 54 */
+    U_JG_MANICHAEAN_DHAMEDH,  /**< @stable ICU 54 */
+    U_JG_MANICHAEAN_FIVE,  /**< @stable ICU 54 */
+    U_JG_MANICHAEAN_GIMEL,  /**< @stable ICU 54 */
+    U_JG_MANICHAEAN_HETH,  /**< @stable ICU 54 */
+    U_JG_MANICHAEAN_HUNDRED,  /**< @stable ICU 54 */
+    U_JG_MANICHAEAN_KAPH,  /**< @stable ICU 54 */
+    U_JG_MANICHAEAN_LAMEDH,  /**< @stable ICU 54 */
+    U_JG_MANICHAEAN_MEM,  /**< @stable ICU 54 */
+    U_JG_MANICHAEAN_NUN,  /**< @stable ICU 54 */
+    U_JG_MANICHAEAN_ONE,  /**< @stable ICU 54 */
+    U_JG_MANICHAEAN_PE,  /**< @stable ICU 54 */
+    U_JG_MANICHAEAN_QOPH,  /**< @stable ICU 54 */
+    U_JG_MANICHAEAN_RESH,  /**< @stable ICU 54 */
+    U_JG_MANICHAEAN_SADHE,  /**< @stable ICU 54 */
+    U_JG_MANICHAEAN_SAMEKH,  /**< @stable ICU 54 */
+    U_JG_MANICHAEAN_TAW,  /**< @stable ICU 54 */
+    U_JG_MANICHAEAN_TEN,  /**< @stable ICU 54 */
+    U_JG_MANICHAEAN_TETH,  /**< @stable ICU 54 */
+    U_JG_MANICHAEAN_THAMEDH,  /**< @stable ICU 54 */
+    U_JG_MANICHAEAN_TWENTY,  /**< @stable ICU 54 */
+    U_JG_MANICHAEAN_WAW,  /**< @stable ICU 54 */
+    U_JG_MANICHAEAN_YODH,  /**< @stable ICU 54 */
+    U_JG_MANICHAEAN_ZAYIN,  /**< @stable ICU 54 */
+    U_JG_STRAIGHT_WAW,  /**< @stable ICU 54 */
+    U_JG_COUNT
+} UJoiningGroup;
+
+/**
+ * Grapheme Cluster Break constants.
+ *
+ * @see UCHAR_GRAPHEME_CLUSTER_BREAK
+ * @stable ICU 3.4
+ */
+typedef enum UGraphemeClusterBreak {
+    /*
+     * Note: UGraphemeClusterBreak constants are parsed by preparseucd.py.
+     * It matches lines like
+     *     U_GCB_<Unicode Grapheme_Cluster_Break value name>
+     */
+
+    U_GCB_OTHER = 0,            /*[XX]*/
+    U_GCB_CONTROL = 1,          /*[CN]*/
+    U_GCB_CR = 2,               /*[CR]*/
+    U_GCB_EXTEND = 3,           /*[EX]*/
+    U_GCB_L = 4,                /*[L]*/
+    U_GCB_LF = 5,               /*[LF]*/
+    U_GCB_LV = 6,               /*[LV]*/
+    U_GCB_LVT = 7,              /*[LVT]*/
+    U_GCB_T = 8,                /*[T]*/
+    U_GCB_V = 9,                /*[V]*/
+    U_GCB_SPACING_MARK = 10,    /*[SM]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */
+    U_GCB_PREPEND = 11,         /*[PP]*/
+    U_GCB_REGIONAL_INDICATOR = 12,  /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
+    U_GCB_COUNT = 13
+} UGraphemeClusterBreak;
+
+/**
+ * Word Break constants.
+ * (UWordBreak is a pre-existing enum type in ubrk.h for word break status tags.)
+ *
+ * @see UCHAR_WORD_BREAK
+ * @stable ICU 3.4
+ */
+typedef enum UWordBreakValues {
+    /*
+     * Note: UWordBreakValues constants are parsed by preparseucd.py.
+     * It matches lines like
+     *     U_WB_<Unicode Word_Break value name>
+     */
+
+    U_WB_OTHER = 0,             /*[XX]*/
+    U_WB_ALETTER = 1,           /*[LE]*/
+    U_WB_FORMAT = 2,            /*[FO]*/
+    U_WB_KATAKANA = 3,          /*[KA]*/
+    U_WB_MIDLETTER = 4,         /*[ML]*/
+    U_WB_MIDNUM = 5,            /*[MN]*/
+    U_WB_NUMERIC = 6,           /*[NU]*/
+    U_WB_EXTENDNUMLET = 7,      /*[EX]*/
+    U_WB_CR = 8,                /*[CR]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */
+    U_WB_EXTEND = 9,            /*[Extend]*/
+    U_WB_LF = 10,               /*[LF]*/
+    U_WB_MIDNUMLET =11,         /*[MB]*/
+    U_WB_NEWLINE =12,           /*[NL]*/
+    U_WB_REGIONAL_INDICATOR = 13,   /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
+    U_WB_HEBREW_LETTER = 14,    /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */
+    U_WB_SINGLE_QUOTE = 15,     /*[SQ]*/
+    U_WB_DOUBLE_QUOTE = 16,     /*[DQ]*/
+    U_WB_COUNT = 17
+} UWordBreakValues;
+
+/**
+ * Sentence Break constants.
+ *
+ * @see UCHAR_SENTENCE_BREAK
+ * @stable ICU 3.4
+ */
+typedef enum USentenceBreak {
+    /*
+     * Note: USentenceBreak constants are parsed by preparseucd.py.
+     * It matches lines like
+     *     U_SB_<Unicode Sentence_Break value name>
+     */
+
+    U_SB_OTHER = 0,             /*[XX]*/
+    U_SB_ATERM = 1,             /*[AT]*/
+    U_SB_CLOSE = 2,             /*[CL]*/
+    U_SB_FORMAT = 3,            /*[FO]*/
+    U_SB_LOWER = 4,             /*[LO]*/
+    U_SB_NUMERIC = 5,           /*[NU]*/
+    U_SB_OLETTER = 6,           /*[LE]*/
+    U_SB_SEP = 7,               /*[SE]*/
+    U_SB_SP = 8,                /*[SP]*/
+    U_SB_STERM = 9,             /*[ST]*/
+    U_SB_UPPER = 10,            /*[UP]*/
+    U_SB_CR = 11,               /*[CR]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */
+    U_SB_EXTEND = 12,           /*[EX]*/
+    U_SB_LF = 13,               /*[LF]*/
+    U_SB_SCONTINUE = 14,        /*[SC]*/
+    U_SB_COUNT = 15
+} USentenceBreak;
+
+/**
+ * Line Break constants.
+ *
+ * @see UCHAR_LINE_BREAK
+ * @stable ICU 2.2
+ */
+typedef enum ULineBreak {
+    /*
+     * Note: ULineBreak constants are parsed by preparseucd.py.
+     * It matches lines like
+     *     U_LB_<Unicode Line_Break value name>
+     */
+
+    U_LB_UNKNOWN = 0,           /*[XX]*/
+    U_LB_AMBIGUOUS = 1,         /*[AI]*/
+    U_LB_ALPHABETIC = 2,        /*[AL]*/
+    U_LB_BREAK_BOTH = 3,        /*[B2]*/
+    U_LB_BREAK_AFTER = 4,       /*[BA]*/
+    U_LB_BREAK_BEFORE = 5,      /*[BB]*/
+    U_LB_MANDATORY_BREAK = 6,   /*[BK]*/
+    U_LB_CONTINGENT_BREAK = 7,  /*[CB]*/
+    U_LB_CLOSE_PUNCTUATION = 8, /*[CL]*/
+    U_LB_COMBINING_MARK = 9,    /*[CM]*/
+    U_LB_CARRIAGE_RETURN = 10,   /*[CR]*/
+    U_LB_EXCLAMATION = 11,       /*[EX]*/
+    U_LB_GLUE = 12,              /*[GL]*/
+    U_LB_HYPHEN = 13,            /*[HY]*/
+    U_LB_IDEOGRAPHIC = 14,       /*[ID]*/
+    /** Renamed from the misspelled "inseperable" in Unicode 4.0.1/ICU 3.0 @stable ICU 3.0 */
+    U_LB_INSEPARABLE = 15,       /*[IN]*/
+    U_LB_INSEPERABLE = U_LB_INSEPARABLE,
+    U_LB_INFIX_NUMERIC = 16,     /*[IS]*/
+    U_LB_LINE_FEED = 17,         /*[LF]*/
+    U_LB_NONSTARTER = 18,        /*[NS]*/
+    U_LB_NUMERIC = 19,           /*[NU]*/
+    U_LB_OPEN_PUNCTUATION = 20,  /*[OP]*/
+    U_LB_POSTFIX_NUMERIC = 21,   /*[PO]*/
+    U_LB_PREFIX_NUMERIC = 22,    /*[PR]*/
+    U_LB_QUOTATION = 23,         /*[QU]*/
+    U_LB_COMPLEX_CONTEXT = 24,   /*[SA]*/
+    U_LB_SURROGATE = 25,         /*[SG]*/
+    U_LB_SPACE = 26,             /*[SP]*/
+    U_LB_BREAK_SYMBOLS = 27,     /*[SY]*/
+    U_LB_ZWSPACE = 28,           /*[ZW]*/
+    U_LB_NEXT_LINE = 29,         /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */
+    U_LB_WORD_JOINER = 30,       /*[WJ]*/
+    U_LB_H2 = 31,                /*[H2]*/ /* from here on: new in Unicode 4.1/ICU 3.4 */
+    U_LB_H3 = 32,                /*[H3]*/
+    U_LB_JL = 33,                /*[JL]*/
+    U_LB_JT = 34,                /*[JT]*/
+    U_LB_JV = 35,                /*[JV]*/
+    U_LB_CLOSE_PARENTHESIS = 36, /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */
+    U_LB_CONDITIONAL_JAPANESE_STARTER = 37,/*[CJ]*/ /* new in Unicode 6.1/ICU 49 */
+    U_LB_HEBREW_LETTER = 38,     /*[HL]*/ /* new in Unicode 6.1/ICU 49 */
+    U_LB_REGIONAL_INDICATOR = 39,/*[RI]*/ /* new in Unicode 6.2/ICU 50 */
+    U_LB_COUNT = 40
+} ULineBreak;
+
+/**
+ * Numeric Type constants.
+ *
+ * @see UCHAR_NUMERIC_TYPE
+ * @stable ICU 2.2
+ */
+typedef enum UNumericType {
+    /*
+     * Note: UNumericType constants are parsed by preparseucd.py.
+     * It matches lines like
+     *     U_NT_<Unicode Numeric_Type value name>
+     */
+
+    U_NT_NONE,              /*[None]*/
+    U_NT_DECIMAL,           /*[de]*/
+    U_NT_DIGIT,             /*[di]*/
+    U_NT_NUMERIC,           /*[nu]*/
+    U_NT_COUNT
+} UNumericType;
+
+/**
+ * Hangul Syllable Type constants.
+ *
+ * @see UCHAR_HANGUL_SYLLABLE_TYPE
+ * @stable ICU 2.6
+ */
+typedef enum UHangulSyllableType {
+    /*
+     * Note: UHangulSyllableType constants are parsed by preparseucd.py.
+     * It matches lines like
+     *     U_HST_<Unicode Hangul_Syllable_Type value name>
+     */
+
+    U_HST_NOT_APPLICABLE,   /*[NA]*/
+    U_HST_LEADING_JAMO,     /*[L]*/
+    U_HST_VOWEL_JAMO,       /*[V]*/
+    U_HST_TRAILING_JAMO,    /*[T]*/
+    U_HST_LV_SYLLABLE,      /*[LV]*/
+    U_HST_LVT_SYLLABLE,     /*[LVT]*/
+    U_HST_COUNT
+} UHangulSyllableType;
+
+/**
+ * Check a binary Unicode property for a code point.
+ *
+ * Unicode, especially in version 3.2, defines many more properties than the
+ * original set in UnicodeData.txt.
+ *
+ * The properties APIs are intended to reflect Unicode properties as defined
+ * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).
+ * For details about the properties see http://www.unicode.org/ucd/ .
+ * For names of Unicode properties see the UCD file PropertyAliases.txt.
+ *
+ * Important: If ICU is built with UCD files from Unicode versions below 3.2,
+ * then properties marked with "new in Unicode 3.2" are not or not fully available.
+ *
+ * @param c Code point to test.
+ * @param which UProperty selector constant, identifies which binary property to check.
+ *        Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT.
+ * @return TRUE or FALSE according to the binary Unicode property value for c.
+ *         Also FALSE if 'which' is out of bounds or if the Unicode version
+ *         does not have data for the property at all, or not for this code point.
+ *
+ * @see UProperty
+ * @see u_getIntPropertyValue
+ * @see u_getUnicodeVersion
+ * @stable ICU 2.1
+ */
+U_STABLE UBool U_EXPORT2
+u_hasBinaryProperty(UChar32 c, UProperty which);
+
+/**
+ * Check if a code point has the Alphabetic Unicode property.
+ * Same as u_hasBinaryProperty(c, UCHAR_ALPHABETIC).
+ * This is different from u_isalpha!
+ * @param c Code point to test
+ * @return true if the code point has the Alphabetic Unicode property, false otherwise
+ *
+ * @see UCHAR_ALPHABETIC
+ * @see u_isalpha
+ * @see u_hasBinaryProperty
+ * @stable ICU 2.1
+ */
+U_STABLE UBool U_EXPORT2
+u_isUAlphabetic(UChar32 c);
+
+/**
+ * Check if a code point has the Lowercase Unicode property.
+ * Same as u_hasBinaryProperty(c, UCHAR_LOWERCASE).
+ * This is different from u_islower!
+ * @param c Code point to test
+ * @return true if the code point has the Lowercase Unicode property, false otherwise
+ *
+ * @see UCHAR_LOWERCASE
+ * @see u_islower
+ * @see u_hasBinaryProperty
+ * @stable ICU 2.1
+ */
+U_STABLE UBool U_EXPORT2
+u_isULowercase(UChar32 c);
+
+/**
+ * Check if a code point has the Uppercase Unicode property.
+ * Same as u_hasBinaryProperty(c, UCHAR_UPPERCASE).
+ * This is different from u_isupper!
+ * @param c Code point to test
+ * @return true if the code point has the Uppercase Unicode property, false otherwise
+ *
+ * @see UCHAR_UPPERCASE
+ * @see u_isupper
+ * @see u_hasBinaryProperty
+ * @stable ICU 2.1
+ */
+U_STABLE UBool U_EXPORT2
+u_isUUppercase(UChar32 c);
+
+/**
+ * Check if a code point has the White_Space Unicode property.
+ * Same as u_hasBinaryProperty(c, UCHAR_WHITE_SPACE).
+ * This is different from both u_isspace and u_isWhitespace!
+ *
+ * Note: There are several ICU whitespace functions; please see the uchar.h
+ * file documentation for a detailed comparison.
+ *
+ * @param c Code point to test
+ * @return true if the code point has the White_Space Unicode property, false otherwise.
+ *
+ * @see UCHAR_WHITE_SPACE
+ * @see u_isWhitespace
+ * @see u_isspace
+ * @see u_isJavaSpaceChar
+ * @see u_hasBinaryProperty
+ * @stable ICU 2.1
+ */
+U_STABLE UBool U_EXPORT2
+u_isUWhiteSpace(UChar32 c);
+
+/**
+ * Get the property value for an enumerated or integer Unicode property for a code point.
+ * Also returns binary and mask property values.
+ *
+ * Unicode, especially in version 3.2, defines many more properties than the
+ * original set in UnicodeData.txt.
+ *
+ * The properties APIs are intended to reflect Unicode properties as defined
+ * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).
+ * For details about the properties see http://www.unicode.org/ .
+ * For names of Unicode properties see the UCD file PropertyAliases.txt.
+ *
+ * Sample usage:
+ * UEastAsianWidth ea=(UEastAsianWidth)u_getIntPropertyValue(c, UCHAR_EAST_ASIAN_WIDTH);
+ * UBool b=(UBool)u_getIntPropertyValue(c, UCHAR_IDEOGRAPHIC);
+ *
+ * @param c Code point to test.
+ * @param which UProperty selector constant, identifies which property to check.
+ *        Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
+ *        or UCHAR_INT_START<=which<UCHAR_INT_LIMIT
+ *        or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT.
+ * @return Numeric value that is directly the property value or,
+ *         for enumerated properties, corresponds to the numeric value of the enumerated
+ *         constant of the respective property value enumeration type
+ *         (cast to enum type if necessary).
+ *         Returns 0 or 1 (for FALSE/TRUE) for binary Unicode properties.
+ *         Returns a bit-mask for mask properties.
+ *         Returns 0 if 'which' is out of bounds or if the Unicode version
+ *         does not have data for the property at all, or not for this code point.
+ *
+ * @see UProperty
+ * @see u_hasBinaryProperty
+ * @see u_getIntPropertyMinValue
+ * @see u_getIntPropertyMaxValue
+ * @see u_getUnicodeVersion
+ * @stable ICU 2.2
+ */
+U_STABLE int32_t U_EXPORT2
+u_getIntPropertyValue(UChar32 c, UProperty which);
+
+/**
+ * Get the minimum value for an enumerated/integer/binary Unicode property.
+ * Can be used together with u_getIntPropertyMaxValue
+ * to allocate arrays of UnicodeSet or similar.
+ *
+ * @param which UProperty selector constant, identifies which binary property to check.
+ *        Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
+ *        or UCHAR_INT_START<=which<UCHAR_INT_LIMIT.
+ * @return Minimum value returned by u_getIntPropertyValue for a Unicode property.
+ *         0 if the property selector is out of range.
+ *
+ * @see UProperty
+ * @see u_hasBinaryProperty
+ * @see u_getUnicodeVersion
+ * @see u_getIntPropertyMaxValue
+ * @see u_getIntPropertyValue
+ * @stable ICU 2.2
+ */
+U_STABLE int32_t U_EXPORT2
+u_getIntPropertyMinValue(UProperty which);
+
+/**
+ * Get the maximum value for an enumerated/integer/binary Unicode property.
+ * Can be used together with u_getIntPropertyMinValue
+ * to allocate arrays of UnicodeSet or similar.
+ *
+ * Examples for min/max values (for Unicode 3.2):
+ *
+ * - UCHAR_BIDI_CLASS:    0/18 (U_LEFT_TO_RIGHT/U_BOUNDARY_NEUTRAL)
+ * - UCHAR_SCRIPT:        0/45 (USCRIPT_COMMON/USCRIPT_TAGBANWA)
+ * - UCHAR_IDEOGRAPHIC:   0/1  (FALSE/TRUE)
+ *
+ * For undefined UProperty constant values, min/max values will be 0/-1.
+ *
+ * @param which UProperty selector constant, identifies which binary property to check.
+ *        Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
+ *        or UCHAR_INT_START<=which<UCHAR_INT_LIMIT.
+ * @return Maximum value returned by u_getIntPropertyValue for a Unicode property.
+ *         <=0 if the property selector is out of range.
+ *
+ * @see UProperty
+ * @see u_hasBinaryProperty
+ * @see u_getUnicodeVersion
+ * @see u_getIntPropertyMaxValue
+ * @see u_getIntPropertyValue
+ * @stable ICU 2.2
+ */
+U_STABLE int32_t U_EXPORT2
+u_getIntPropertyMaxValue(UProperty which);
+
+/**
+ * Get the numeric value for a Unicode code point as defined in the
+ * Unicode Character Database.
+ *
+ * A "double" return type is necessary because
+ * some numeric values are fractions, negative, or too large for int32_t.
+ *
+ * For characters without any numeric values in the Unicode Character Database,
+ * this function will return U_NO_NUMERIC_VALUE.
+ * Note: This is different from the Unicode Standard which specifies NaN as the default value.
+ * (NaN is not available on all platforms.)
+ *
+ * Similar to java.lang.Character.getNumericValue(), but u_getNumericValue()
+ * also supports negative values, large values, and fractions,
+ * while Java's getNumericValue() returns values 10..35 for ASCII letters.
+ *
+ * @param c Code point to get the numeric value for.
+ * @return Numeric value of c, or U_NO_NUMERIC_VALUE if none is defined.
+ *
+ * @see U_NO_NUMERIC_VALUE
+ * @stable ICU 2.2
+ */
+U_STABLE double U_EXPORT2
+u_getNumericValue(UChar32 c);
+
+/**
+ * Special value that is returned by u_getNumericValue when
+ * no numeric value is defined for a code point.
+ *
+ * @see u_getNumericValue
+ * @stable ICU 2.2
+ */
+#define U_NO_NUMERIC_VALUE ((double)-123456789.)
+
+/**
+ * Determines whether the specified code point has the general category "Ll"
+ * (lowercase letter).
+ *
+ * Same as java.lang.Character.isLowerCase().
+ *
+ * This misses some characters that are also lowercase but
+ * have a different general category value.
+ * In order to include those, use UCHAR_LOWERCASE.
+ *
+ * In addition to being equivalent to a Java function, this also serves
+ * as a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is an Ll lowercase letter
+ *
+ * @see UCHAR_LOWERCASE
+ * @see u_isupper
+ * @see u_istitle
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_islower(UChar32 c);
+
+/**
+ * Determines whether the specified code point has the general category "Lu"
+ * (uppercase letter).
+ *
+ * Same as java.lang.Character.isUpperCase().
+ *
+ * This misses some characters that are also uppercase but
+ * have a different general category value.
+ * In order to include those, use UCHAR_UPPERCASE.
+ *
+ * In addition to being equivalent to a Java function, this also serves
+ * as a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is an Lu uppercase letter
+ *
+ * @see UCHAR_UPPERCASE
+ * @see u_islower
+ * @see u_istitle
+ * @see u_tolower
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isupper(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a titlecase letter.
+ * True for general category "Lt" (titlecase letter).
+ *
+ * Same as java.lang.Character.isTitleCase().
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is an Lt titlecase letter
+ *
+ * @see u_isupper
+ * @see u_islower
+ * @see u_totitle
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_istitle(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a digit character according to Java.
+ * True for characters with general category "Nd" (decimal digit numbers).
+ * Beginning with Unicode 4, this is the same as
+ * testing for the Numeric_Type of Decimal.
+ *
+ * Same as java.lang.Character.isDigit().
+ *
+ * In addition to being equivalent to a Java function, this also serves
+ * as a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a digit character according to Character.isDigit()
+ *
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isdigit(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a letter character.
+ * True for general categories "L" (letters).
+ *
+ * Same as java.lang.Character.isLetter().
+ *
+ * In addition to being equivalent to a Java function, this also serves
+ * as a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a letter character
+ *
+ * @see u_isdigit
+ * @see u_isalnum
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isalpha(UChar32 c);
+
+/**
+ * Determines whether the specified code point is an alphanumeric character
+ * (letter or digit) according to Java.
+ * True for characters with general categories
+ * "L" (letters) and "Nd" (decimal digit numbers).
+ *
+ * Same as java.lang.Character.isLetterOrDigit().
+ *
+ * In addition to being equivalent to a Java function, this also serves
+ * as a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is an alphanumeric character according to Character.isLetterOrDigit()
+ *
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isalnum(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a hexadecimal digit.
+ * This is equivalent to u_digit(c, 16)>=0.
+ * True for characters with general category "Nd" (decimal digit numbers)
+ * as well as Latin letters a-f and A-F in both ASCII and Fullwidth ASCII.
+ * (That is, for letters with code points
+ * 0041..0046, 0061..0066, FF21..FF26, FF41..FF46.)
+ *
+ * In order to narrow the definition of hexadecimal digits to only ASCII
+ * characters, use (c<=0x7f && u_isxdigit(c)).
+ *
+ * This is a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a hexadecimal digit
+ *
+ * @stable ICU 2.6
+ */
+U_STABLE UBool U_EXPORT2
+u_isxdigit(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a punctuation character.
+ * True for characters with general categories "P" (punctuation).
+ *
+ * This is a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a punctuation character
+ *
+ * @stable ICU 2.6
+ */
+U_STABLE UBool U_EXPORT2
+u_ispunct(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a "graphic" character
+ * (printable, excluding spaces).
+ * TRUE for all characters except those with general categories
+ * "Cc" (control codes), "Cf" (format controls), "Cs" (surrogates),
+ * "Cn" (unassigned), and "Z" (separators).
+ *
+ * This is a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a "graphic" character
+ *
+ * @stable ICU 2.6
+ */
+U_STABLE UBool U_EXPORT2
+u_isgraph(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a "blank" or "horizontal space",
+ * a character that visibly separates words on a line.
+ * The following are equivalent definitions:
+ *
+ * TRUE for Unicode White_Space characters except for "vertical space controls"
+ * where "vertical space controls" are the following characters:
+ * U+000A (LF) U+000B (VT) U+000C (FF) U+000D (CR) U+0085 (NEL) U+2028 (LS) U+2029 (PS)
+ *
+ * same as
+ *
+ * TRUE for U+0009 (TAB) and characters with general category "Zs" (space separators)
+ * except Zero Width Space (ZWSP, U+200B).
+ *
+ * Note: There are several ICU whitespace functions; please see the uchar.h
+ * file documentation for a detailed comparison.
+ *
+ * This is a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a "blank"
+ *
+ * @stable ICU 2.6
+ */
+U_STABLE UBool U_EXPORT2
+u_isblank(UChar32 c);
+
+/**
+ * Determines whether the specified code point is "defined",
+ * which usually means that it is assigned a character.
+ * True for general categories other than "Cn" (other, not assigned),
+ * i.e., true for all code points mentioned in UnicodeData.txt.
+ *
+ * Note that non-character code points (e.g., U+FDD0) are not "defined"
+ * (they are Cn), but surrogate code points are "defined" (Cs).
+ *
+ * Same as java.lang.Character.isDefined().
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is assigned a character
+ *
+ * @see u_isdigit
+ * @see u_isalpha
+ * @see u_isalnum
+ * @see u_isupper
+ * @see u_islower
+ * @see u_istitle
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isdefined(UChar32 c);
+
+/**
+ * Determines if the specified character is a space character or not.
+ *
+ * Note: There are several ICU whitespace functions; please see the uchar.h
+ * file documentation for a detailed comparison.
+ *
+ * This is a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c    the character to be tested
+ * @return  true if the character is a space character; false otherwise.
+ *
+ * @see u_isJavaSpaceChar
+ * @see u_isWhitespace
+ * @see u_isUWhiteSpace
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isspace(UChar32 c);
+
+/**
+ * Determine if the specified code point is a space character according to Java.
+ * True for characters with general categories "Z" (separators),
+ * which does not include control codes (e.g., TAB or Line Feed).
+ *
+ * Same as java.lang.Character.isSpaceChar().
+ *
+ * Note: There are several ICU whitespace functions; please see the uchar.h
+ * file documentation for a detailed comparison.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a space character according to Character.isSpaceChar()
+ *
+ * @see u_isspace
+ * @see u_isWhitespace
+ * @see u_isUWhiteSpace
+ * @stable ICU 2.6
+ */
+U_STABLE UBool U_EXPORT2
+u_isJavaSpaceChar(UChar32 c);
+
+/**
+ * Determines if the specified code point is a whitespace character according to Java/ICU.
+ * A character is considered to be a Java whitespace character if and only
+ * if it satisfies one of the following criteria:
+ *
+ * - It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not
+ *      also a non-breaking space (U+00A0 NBSP or U+2007 Figure Space or U+202F Narrow NBSP).
+ * - It is U+0009 HORIZONTAL TABULATION.
+ * - It is U+000A LINE FEED.
+ * - It is U+000B VERTICAL TABULATION.
+ * - It is U+000C FORM FEED.
+ * - It is U+000D CARRIAGE RETURN.
+ * - It is U+001C FILE SEPARATOR.
+ * - It is U+001D GROUP SEPARATOR.
+ * - It is U+001E RECORD SEPARATOR.
+ * - It is U+001F UNIT SEPARATOR.
+ *
+ * This API tries to sync with the semantics of Java's
+ * java.lang.Character.isWhitespace(), but it may not return
+ * the exact same results because of the Unicode version
+ * difference.
+ *
+ * Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs)
+ * to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false.
+ * See http://www.unicode.org/versions/Unicode4.0.1/
+ *
+ * Note: There are several ICU whitespace functions; please see the uchar.h
+ * file documentation for a detailed comparison.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a whitespace character according to Java/ICU
+ *
+ * @see u_isspace
+ * @see u_isJavaSpaceChar
+ * @see u_isUWhiteSpace
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isWhitespace(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a control character
+ * (as defined by this function).
+ * A control character is one of the following:
+ * - ISO 8-bit control character (U+0000..U+001f and U+007f..U+009f)
+ * - U_CONTROL_CHAR (Cc)
+ * - U_FORMAT_CHAR (Cf)
+ * - U_LINE_SEPARATOR (Zl)
+ * - U_PARAGRAPH_SEPARATOR (Zp)
+ *
+ * This is a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a control character
+ *
+ * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT
+ * @see u_isprint
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_iscntrl(UChar32 c);
+
+/**
+ * Determines whether the specified code point is an ISO control code.
+ * True for U+0000..U+001f and U+007f..U+009f (general category "Cc").
+ *
+ * Same as java.lang.Character.isISOControl().
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is an ISO control code
+ *
+ * @see u_iscntrl
+ * @stable ICU 2.6
+ */
+U_STABLE UBool U_EXPORT2
+u_isISOControl(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a printable character.
+ * True for general categories <em>other</em> than "C" (controls).
+ *
+ * This is a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a printable character
+ *
+ * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT
+ * @see u_iscntrl
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isprint(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a base character.
+ * True for general categories "L" (letters), "N" (numbers),
+ * "Mc" (spacing combining marks), and "Me" (enclosing marks).
+ *
+ * Note that this is different from the Unicode definition in
+ * chapter 3.5, conformance clause D13,
+ * which defines base characters to be all characters (not Cn)
+ * that do not graphically combine with preceding characters (M)
+ * and that are neither control (Cc) or format (Cf) characters.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a base character according to this function
+ *
+ * @see u_isalpha
+ * @see u_isdigit
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isbase(UChar32 c);
+
+/**
+ * Returns the bidirectional category value for the code point,
+ * which is used in the Unicode bidirectional algorithm
+ * (UAX #9 http://www.unicode.org/reports/tr9/).
+ * Note that some <em>unassigned</em> code points have bidi values
+ * of R or AL because they are in blocks that are reserved
+ * for Right-To-Left scripts.
+ *
+ * Same as java.lang.Character.getDirectionality()
+ *
+ * @param c the code point to be tested
+ * @return the bidirectional category (UCharDirection) value
+ *
+ * @see UCharDirection
+ * @stable ICU 2.0
+ */
+U_STABLE UCharDirection U_EXPORT2
+u_charDirection(UChar32 c);
+
+/**
+ * Determines whether the code point has the Bidi_Mirrored property.
+ * This property is set for characters that are commonly used in
+ * Right-To-Left contexts and need to be displayed with a "mirrored"
+ * glyph.
+ *
+ * Same as java.lang.Character.isMirrored().
+ * Same as UCHAR_BIDI_MIRRORED
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the character has the Bidi_Mirrored property
+ *
+ * @see UCHAR_BIDI_MIRRORED
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isMirrored(UChar32 c);
+
+/**
+ * Maps the specified character to a "mirror-image" character.
+ * For characters with the Bidi_Mirrored property, implementations
+ * sometimes need a "poor man's" mapping to another Unicode
+ * character (code point) such that the default glyph may serve
+ * as the mirror-image of the default glyph of the specified
+ * character. This is useful for text conversion to and from
+ * codepages with visual order, and for displays without glyph
+ * selection capabilities.
+ *
+ * @param c the code point to be mapped
+ * @return another Unicode code point that may serve as a mirror-image
+ *         substitute, or c itself if there is no such mapping or c
+ *         does not have the Bidi_Mirrored property
+ *
+ * @see UCHAR_BIDI_MIRRORED
+ * @see u_isMirrored
+ * @stable ICU 2.0
+ */
+U_STABLE UChar32 U_EXPORT2
+u_charMirror(UChar32 c);
+
+/**
+ * Maps the specified character to its paired bracket character.
+ * For Bidi_Paired_Bracket_Type!=None, this is the same as u_charMirror().
+ * Otherwise c itself is returned.
+ * See http://www.unicode.org/reports/tr9/
+ *
+ * @param c the code point to be mapped
+ * @return the paired bracket code point,
+ *         or c itself if there is no such mapping
+ *         (Bidi_Paired_Bracket_Type=None)
+ *
+ * @see UCHAR_BIDI_PAIRED_BRACKET
+ * @see UCHAR_BIDI_PAIRED_BRACKET_TYPE
+ * @see u_charMirror
+ * @stable ICU 52
+ */
+U_STABLE UChar32 U_EXPORT2
+u_getBidiPairedBracket(UChar32 c);
+
+/**
+ * Returns the general category value for the code point.
+ *
+ * Same as java.lang.Character.getType().
+ *
+ * @param c the code point to be tested
+ * @return the general category (UCharCategory) value
+ *
+ * @see UCharCategory
+ * @stable ICU 2.0
+ */
+U_STABLE int8_t U_EXPORT2
+u_charType(UChar32 c);
+
+/**
+ * Get a single-bit bit set for the general category of a character.
+ * This bit set can be compared bitwise with U_GC_SM_MASK, U_GC_L_MASK, etc.
+ * Same as U_MASK(u_charType(c)).
+ *
+ * @param c the code point to be tested
+ * @return a single-bit mask corresponding to the general category (UCharCategory) value
+ *
+ * @see u_charType
+ * @see UCharCategory
+ * @see U_GC_CN_MASK
+ * @stable ICU 2.1
+ */
+#define U_GET_GC_MASK(c) U_MASK(u_charType(c))
+
+/**
+ * Callback from u_enumCharTypes(), is called for each contiguous range
+ * of code points c (where start<=c<limit)
+ * with the same Unicode general category ("character type").
+ *
+ * The callback function can stop the enumeration by returning FALSE.
+ *
+ * @param context an opaque pointer, as passed into utrie_enum()
+ * @param start the first code point in a contiguous range with value
+ * @param limit one past the last code point in a contiguous range with value
+ * @param type the general category for all code points in [start..limit[
+ * @return FALSE to stop the enumeration
+ *
+ * @stable ICU 2.1
+ * @see UCharCategory
+ * @see u_enumCharTypes
+ */
+typedef UBool U_CALLCONV
+UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type);
+
+/**
+ * Enumerate efficiently all code points with their Unicode general categories.
+ *
+ * This is useful for building data structures (e.g., UnicodeSet's),
+ * for enumerating all assigned code points (type!=U_UNASSIGNED), etc.
+ *
+ * For each contiguous range of code points with a given general category ("character type"),
+ * the UCharEnumTypeRange function is called.
+ * Adjacent ranges have different types.
+ * The Unicode Standard guarantees that the numeric value of the type is 0..31.
+ *
+ * @param enumRange a pointer to a function that is called for each contiguous range
+ *                  of code points with the same general category
+ * @param context an opaque pointer that is passed on to the callback function
+ *
+ * @stable ICU 2.1
+ * @see UCharCategory
+ * @see UCharEnumTypeRange
+ */
+U_STABLE void U_EXPORT2
+u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context);
+
+#if !UCONFIG_NO_NORMALIZATION
+
+/**
+ * Returns the combining class of the code point as specified in UnicodeData.txt.
+ *
+ * @param c the code point of the character
+ * @return the combining class of the character
+ * @stable ICU 2.0
+ */
+U_STABLE uint8_t U_EXPORT2
+u_getCombiningClass(UChar32 c);
+
+#endif
+
+/**
+ * Returns the decimal digit value of a decimal digit character.
+ * Such characters have the general category "Nd" (decimal digit numbers)
+ * and a Numeric_Type of Decimal.
+ *
+ * Unlike ICU releases before 2.6, no digit values are returned for any
+ * Han characters because Han number characters are often used with a special
+ * Chinese-style number format (with characters for powers of 10 in between)
+ * instead of in decimal-positional notation.
+ * Unicode 4 explicitly assigns Han number characters the Numeric_Type
+ * Numeric instead of Decimal.
+ * See Jitterbug 1483 for more details.
+ *
+ * Use u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE) and u_getNumericValue()
+ * for complete numeric Unicode properties.
+ *
+ * @param c the code point for which to get the decimal digit value
+ * @return the decimal digit value of c,
+ *         or -1 if c is not a decimal digit character
+ *
+ * @see u_getNumericValue
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_charDigitValue(UChar32 c);
+
+/**
+ * Returns the Unicode allocation block that contains the character.
+ *
+ * @param c the code point to be tested
+ * @return the block value (UBlockCode) for c
+ *
+ * @see UBlockCode
+ * @stable ICU 2.0
+ */
+U_STABLE UBlockCode U_EXPORT2
+ublock_getCode(UChar32 c);
+
+/**
+ * Retrieve the name of a Unicode character.
+ * Depending on <code>nameChoice</code>, the character name written
+ * into the buffer is the "modern" name or the name that was defined
+ * in Unicode version 1.0.
+ * The name contains only "invariant" characters
+ * like A-Z, 0-9, space, and '-'.
+ * Unicode 1.0 names are only retrieved if they are different from the modern
+ * names and if the data file contains the data for them. gennames may or may
+ * not be called with a command line option to include 1.0 names in unames.dat.
+ *
+ * @param code The character (code point) for which to get the name.
+ *             It must be <code>0<=code<=0x10ffff</code>.
+ * @param nameChoice Selector for which name to get.
+ * @param buffer Destination address for copying the name.
+ *               The name will always be zero-terminated.
+ *               If there is no name, then the buffer will be set to the empty string.
+ * @param bufferLength <code>==sizeof(buffer)</code>
+ * @param pErrorCode Pointer to a UErrorCode variable;
+ *        check for <code>U_SUCCESS()</code> after <code>u_charName()</code>
+ *        returns.
+ * @return The length of the name, or 0 if there is no name for this character.
+ *         If the bufferLength is less than or equal to the length, then the buffer
+ *         contains the truncated name and the returned length indicates the full
+ *         length of the name.
+ *         The length does not include the zero-termination.
+ *
+ * @see UCharNameChoice
+ * @see u_charFromName
+ * @see u_enumCharNames
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_charName(UChar32 code, UCharNameChoice nameChoice,
+           char *buffer, int32_t bufferLength,
+           UErrorCode *pErrorCode);
+
+#ifndef U_HIDE_DEPRECATED_API 
+/**
+ * Returns an empty string.
+ * Used to return the ISO 10646 comment for a character.
+ * The Unicode ISO_Comment property is deprecated and has no values.
+ *
+ * @param c The character (code point) for which to get the ISO comment.
+ *             It must be <code>0<=c<=0x10ffff</code>.
+ * @param dest Destination address for copying the comment.
+ *             The comment will be zero-terminated if possible.
+ *             If there is no comment, then the buffer will be set to the empty string.
+ * @param destCapacity <code>==sizeof(dest)</code>
+ * @param pErrorCode Pointer to a UErrorCode variable;
+ *        check for <code>U_SUCCESS()</code> after <code>u_getISOComment()</code>
+ *        returns.
+ * @return 0
+ *
+ * @deprecated ICU 49
+ */
+U_DEPRECATED int32_t U_EXPORT2
+u_getISOComment(UChar32 c,
+                char *dest, int32_t destCapacity,
+                UErrorCode *pErrorCode);
+#endif  /* U_HIDE_DEPRECATED_API */
+
+/**
+ * Find a Unicode character by its name and return its code point value.
+ * The name is matched exactly and completely.
+ * If the name does not correspond to a code point, <i>pErrorCode</i>
+ * is set to <code>U_INVALID_CHAR_FOUND</code>.
+ * A Unicode 1.0 name is matched only if it differs from the modern name.
+ * Unicode names are all uppercase. Extended names are lowercase followed
+ * by an uppercase hexadecimal number, and within angle brackets.
+ *
+ * @param nameChoice Selector for which name to match.
+ * @param name The name to match.
+ * @param pErrorCode Pointer to a UErrorCode variable
+ * @return The Unicode value of the code point with the given name,
+ *         or an undefined value if there is no such code point.
+ *
+ * @see UCharNameChoice
+ * @see u_charName
+ * @see u_enumCharNames
+ * @stable ICU 1.7
+ */
+U_STABLE UChar32 U_EXPORT2
+u_charFromName(UCharNameChoice nameChoice,
+               const char *name,
+               UErrorCode *pErrorCode);
+
+/**
+ * Type of a callback function for u_enumCharNames() that gets called
+ * for each Unicode character with the code point value and
+ * the character name.
+ * If such a function returns FALSE, then the enumeration is stopped.
+ *
+ * @param context The context pointer that was passed to u_enumCharNames().
+ * @param code The Unicode code point for the character with this name.
+ * @param nameChoice Selector for which kind of names is enumerated.
+ * @param name The character's name, zero-terminated.
+ * @param length The length of the name.
+ * @return TRUE if the enumeration should continue, FALSE to stop it.
+ *
+ * @see UCharNameChoice
+ * @see u_enumCharNames
+ * @stable ICU 1.7
+ */
+typedef UBool U_CALLCONV UEnumCharNamesFn(void *context,
+                               UChar32 code,
+                               UCharNameChoice nameChoice,
+                               const char *name,
+                               int32_t length);
+
+/**
+ * Enumerate all assigned Unicode characters between the start and limit
+ * code points (start inclusive, limit exclusive) and call a function
+ * for each, passing the code point value and the character name.
+ * For Unicode 1.0 names, only those are enumerated that differ from the
+ * modern names.
+ *
+ * @param start The first code point in the enumeration range.
+ * @param limit One more than the last code point in the enumeration range
+ *              (the first one after the range).
+ * @param fn The function that is to be called for each character name.
+ * @param context An arbitrary pointer that is passed to the function.
+ * @param nameChoice Selector for which kind of names to enumerate.
+ * @param pErrorCode Pointer to a UErrorCode variable
+ *
+ * @see UCharNameChoice
+ * @see UEnumCharNamesFn
+ * @see u_charName
+ * @see u_charFromName
+ * @stable ICU 1.7
+ */
+U_STABLE void U_EXPORT2
+u_enumCharNames(UChar32 start, UChar32 limit,
+                UEnumCharNamesFn *fn,
+                void *context,
+                UCharNameChoice nameChoice,
+                UErrorCode *pErrorCode);
+
+/**
+ * Return the Unicode name for a given property, as given in the
+ * Unicode database file PropertyAliases.txt.
+ *
+ * In addition, this function maps the property
+ * UCHAR_GENERAL_CATEGORY_MASK to the synthetic names "gcm" /
+ * "General_Category_Mask".  These names are not in
+ * PropertyAliases.txt.
+ *
+ * @param property UProperty selector other than UCHAR_INVALID_CODE.
+ *         If out of range, NULL is returned.
+ *
+ * @param nameChoice selector for which name to get.  If out of range,
+ *         NULL is returned.  All properties have a long name.  Most
+ *         have a short name, but some do not.  Unicode allows for
+ *         additional names; if present these will be returned by
+ *         U_LONG_PROPERTY_NAME + i, where i=1, 2,...
+ *
+ * @return a pointer to the name, or NULL if either the
+ *         property or the nameChoice is out of range.  If a given
+ *         nameChoice returns NULL, then all larger values of
+ *         nameChoice will return NULL, with one exception: if NULL is
+ *         returned for U_SHORT_PROPERTY_NAME, then
+ *         U_LONG_PROPERTY_NAME (and higher) may still return a
+ *         non-NULL value.  The returned pointer is valid until
+ *         u_cleanup() is called.
+ *
+ * @see UProperty
+ * @see UPropertyNameChoice
+ * @stable ICU 2.4
+ */
+U_STABLE const char* U_EXPORT2
+u_getPropertyName(UProperty property,
+                  UPropertyNameChoice nameChoice);
+
+/**
+ * Return the UProperty enum for a given property name, as specified
+ * in the Unicode database file PropertyAliases.txt.  Short, long, and
+ * any other variants are recognized.
+ *
+ * In addition, this function maps the synthetic names "gcm" /
+ * "General_Category_Mask" to the property
+ * UCHAR_GENERAL_CATEGORY_MASK.  These names are not in
+ * PropertyAliases.txt.
+ *
+ * @param alias the property name to be matched.  The name is compared
+ *         using "loose matching" as described in PropertyAliases.txt.
+ *
+ * @return a UProperty enum, or UCHAR_INVALID_CODE if the given name
+ *         does not match any property.
+ *
+ * @see UProperty
+ * @stable ICU 2.4
+ */
+U_STABLE UProperty U_EXPORT2
+u_getPropertyEnum(const char* alias);
+
+/**
+ * Return the Unicode name for a given property value, as given in the
+ * Unicode database file PropertyValueAliases.txt.
+ *
+ * Note: Some of the names in PropertyValueAliases.txt can only be
+ * retrieved using UCHAR_GENERAL_CATEGORY_MASK, not
+ * UCHAR_GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
+ * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
+ * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
+ *
+ * @param property UProperty selector constant.
+ *        Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
+ *        or UCHAR_INT_START<=which<UCHAR_INT_LIMIT
+ *        or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT.
+ *        If out of range, NULL is returned.
+ *
+ * @param value selector for a value for the given property.  If out
+ *         of range, NULL is returned.  In general, valid values range
+ *         from 0 up to some maximum.  There are a few exceptions:
+ *         (1.) UCHAR_BLOCK values begin at the non-zero value
+ *         UBLOCK_BASIC_LATIN.  (2.)  UCHAR_CANONICAL_COMBINING_CLASS
+ *         values are not contiguous and range from 0..240.  (3.)
+ *         UCHAR_GENERAL_CATEGORY_MASK values are not values of
+ *         UCharCategory, but rather mask values produced by
+ *         U_GET_GC_MASK().  This allows grouped categories such as
+ *         [:L:] to be represented.  Mask values range
+ *         non-contiguously from 1..U_GC_P_MASK.
+ *
+ * @param nameChoice selector for which name to get.  If out of range,
+ *         NULL is returned.  All values have a long name.  Most have
+ *         a short name, but some do not.  Unicode allows for
+ *         additional names; if present these will be returned by
+ *         U_LONG_PROPERTY_NAME + i, where i=1, 2,...
+
+ * @return a pointer to the name, or NULL if either the
+ *         property or the nameChoice is out of range.  If a given
+ *         nameChoice returns NULL, then all larger values of
+ *         nameChoice will return NULL, with one exception: if NULL is
+ *         returned for U_SHORT_PROPERTY_NAME, then
+ *         U_LONG_PROPERTY_NAME (and higher) may still return a
+ *         non-NULL value.  The returned pointer is valid until
+ *         u_cleanup() is called.
+ *
+ * @see UProperty
+ * @see UPropertyNameChoice
+ * @stable ICU 2.4
+ */
+U_STABLE const char* U_EXPORT2
+u_getPropertyValueName(UProperty property,
+                       int32_t value,
+                       UPropertyNameChoice nameChoice);
+
+/**
+ * Return the property value integer for a given value name, as
+ * specified in the Unicode database file PropertyValueAliases.txt.
+ * Short, long, and any other variants are recognized.
+ *
+ * Note: Some of the names in PropertyValueAliases.txt will only be
+ * recognized with UCHAR_GENERAL_CATEGORY_MASK, not
+ * UCHAR_GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
+ * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
+ * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
+ *
+ * @param property UProperty selector constant.
+ *        Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
+ *        or UCHAR_INT_START<=which<UCHAR_INT_LIMIT
+ *        or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT.
+ *        If out of range, UCHAR_INVALID_CODE is returned.
+ *
+ * @param alias the value name to be matched.  The name is compared
+ *         using "loose matching" as described in
+ *         PropertyValueAliases.txt.
+ *
+ * @return a value integer or UCHAR_INVALID_CODE if the given name
+ *         does not match any value of the given property, or if the
+ *         property is invalid.  Note: UCHAR_GENERAL_CATEGORY_MASK values
+ *         are not values of UCharCategory, but rather mask values
+ *         produced by U_GET_GC_MASK().  This allows grouped
+ *         categories such as [:L:] to be represented.
+ *
+ * @see UProperty
+ * @stable ICU 2.4
+ */
+U_STABLE int32_t U_EXPORT2
+u_getPropertyValueEnum(UProperty property,
+                       const char* alias);
+
+/**
+ * Determines if the specified character is permissible as the
+ * first character in an identifier according to Unicode
+ * (The Unicode Standard, Version 3.0, chapter 5.16 Identifiers).
+ * True for characters with general categories "L" (letters) and "Nl" (letter numbers).
+ *
+ * Same as java.lang.Character.isUnicodeIdentifierStart().
+ * Same as UCHAR_ID_START
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point may start an identifier
+ *
+ * @see UCHAR_ID_START
+ * @see u_isalpha
+ * @see u_isIDPart
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isIDStart(UChar32 c);
+
+/**
+ * Determines if the specified character is permissible
+ * in an identifier according to Java.
+ * True for characters with general categories "L" (letters),
+ * "Nl" (letter numbers), "Nd" (decimal digits),
+ * "Mc" and "Mn" (combining marks), "Pc" (connecting punctuation), and
+ * u_isIDIgnorable(c).
+ *
+ * Same as java.lang.Character.isUnicodeIdentifierPart().
+ * Almost the same as Unicode's ID_Continue (UCHAR_ID_CONTINUE)
+ * except that Unicode recommends to ignore Cf which is less than
+ * u_isIDIgnorable(c).
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point may occur in an identifier according to Java
+ *
+ * @see UCHAR_ID_CONTINUE
+ * @see u_isIDStart
+ * @see u_isIDIgnorable
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isIDPart(UChar32 c);
+
+/**
+ * Determines if the specified character should be regarded
+ * as an ignorable character in an identifier,
+ * according to Java.
+ * True for characters with general category "Cf" (format controls) as well as
+ * non-whitespace ISO controls
+ * (U+0000..U+0008, U+000E..U+001B, U+007F..U+009F).
+ *
+ * Same as java.lang.Character.isIdentifierIgnorable().
+ *
+ * Note that Unicode just recommends to ignore Cf (format controls).
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is ignorable in identifiers according to Java
+ *
+ * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT
+ * @see u_isIDStart
+ * @see u_isIDPart
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isIDIgnorable(UChar32 c);
+
+/**
+ * Determines if the specified character is permissible as the
+ * first character in a Java identifier.
+ * In addition to u_isIDStart(c), true for characters with
+ * general categories "Sc" (currency symbols) and "Pc" (connecting punctuation).
+ *
+ * Same as java.lang.Character.isJavaIdentifierStart().
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point may start a Java identifier
+ *
+ * @see     u_isJavaIDPart
+ * @see     u_isalpha
+ * @see     u_isIDStart
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isJavaIDStart(UChar32 c);
+
+/**
+ * Determines if the specified character is permissible
+ * in a Java identifier.
+ * In addition to u_isIDPart(c), true for characters with
+ * general category "Sc" (currency symbols).
+ *
+ * Same as java.lang.Character.isJavaIdentifierPart().
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point may occur in a Java identifier
+ *
+ * @see     u_isIDIgnorable
+ * @see     u_isJavaIDStart
+ * @see     u_isalpha
+ * @see     u_isdigit
+ * @see     u_isIDPart
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isJavaIDPart(UChar32 c);
+
+/**
+ * The given character is mapped to its lowercase equivalent according to
+ * UnicodeData.txt; if the character has no lowercase equivalent, the character
+ * itself is returned.
+ *
+ * Same as java.lang.Character.toLowerCase().
+ *
+ * This function only returns the simple, single-code point case mapping.
+ * Full case mappings should be used whenever possible because they produce
+ * better results by working on whole strings.
+ * They take into account the string context and the language and can map
+ * to a result string with a different length as appropriate.
+ * Full case mappings are applied by the string case mapping functions,
+ * see ustring.h and the UnicodeString class.
+ * See also the User Guide chapter on C/POSIX migration:
+ * http://icu-project.org/userguide/posix.html#case_mappings
+ *
+ * @param c the code point to be mapped
+ * @return the Simple_Lowercase_Mapping of the code point, if any;
+ *         otherwise the code point itself.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar32 U_EXPORT2
+u_tolower(UChar32 c);
+
+/**
+ * The given character is mapped to its uppercase equivalent according to UnicodeData.txt;
+ * if the character has no uppercase equivalent, the character itself is
+ * returned.
+ *
+ * Same as java.lang.Character.toUpperCase().
+ *
+ * This function only returns the simple, single-code point case mapping.
+ * Full case mappings should be used whenever possible because they produce
+ * better results by working on whole strings.
+ * They take into account the string context and the language and can map
+ * to a result string with a different length as appropriate.
+ * Full case mappings are applied by the string case mapping functions,
+ * see ustring.h and the UnicodeString class.
+ * See also the User Guide chapter on C/POSIX migration:
+ * http://icu-project.org/userguide/posix.html#case_mappings
+ *
+ * @param c the code point to be mapped
+ * @return the Simple_Uppercase_Mapping of the code point, if any;
+ *         otherwise the code point itself.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar32 U_EXPORT2
+u_toupper(UChar32 c);
+
+/**
+ * The given character is mapped to its titlecase equivalent
+ * according to UnicodeData.txt;
+ * if none is defined, the character itself is returned.
+ *
+ * Same as java.lang.Character.toTitleCase().
+ *
+ * This function only returns the simple, single-code point case mapping.
+ * Full case mappings should be used whenever possible because they produce
+ * better results by working on whole strings.
+ * They take into account the string context and the language and can map
+ * to a result string with a different length as appropriate.
+ * Full case mappings are applied by the string case mapping functions,
+ * see ustring.h and the UnicodeString class.
+ * See also the User Guide chapter on C/POSIX migration:
+ * http://icu-project.org/userguide/posix.html#case_mappings
+ *
+ * @param c the code point to be mapped
+ * @return the Simple_Titlecase_Mapping of the code point, if any;
+ *         otherwise the code point itself.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar32 U_EXPORT2
+u_totitle(UChar32 c);
+
+/** Option value for case folding: use default mappings defined in CaseFolding.txt. @stable ICU 2.0 */
+#define U_FOLD_CASE_DEFAULT 0
+
+/**
+ * Option value for case folding:
+ *
+ * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
+ * and dotless i appropriately for Turkic languages (tr, az).
+ *
+ * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
+ * are to be included for default mappings and
+ * excluded for the Turkic-specific mappings.
+ *
+ * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
+ * are to be excluded for default mappings and
+ * included for the Turkic-specific mappings.
+ *
+ * @stable ICU 2.0
+ */
+#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
+
+/**
+ * The given character is mapped to its case folding equivalent according to
+ * UnicodeData.txt and CaseFolding.txt;
+ * if the character has no case folding equivalent, the character
+ * itself is returned.
+ *
+ * This function only returns the simple, single-code point case mapping.
+ * Full case mappings should be used whenever possible because they produce
+ * better results by working on whole strings.
+ * They take into account the string context and the language and can map
+ * to a result string with a different length as appropriate.
+ * Full case mappings are applied by the string case mapping functions,
+ * see ustring.h and the UnicodeString class.
+ * See also the User Guide chapter on C/POSIX migration:
+ * http://icu-project.org/userguide/posix.html#case_mappings
+ *
+ * @param c the code point to be mapped
+ * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * @return the Simple_Case_Folding of the code point, if any;
+ *         otherwise the code point itself.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar32 U_EXPORT2
+u_foldCase(UChar32 c, uint32_t options);
+
+/**
+ * Returns the decimal digit value of the code point in the
+ * specified radix.
+ *
+ * If the radix is not in the range <code>2<=radix<=36</code> or if the
+ * value of <code>c</code> is not a valid digit in the specified
+ * radix, <code>-1</code> is returned. A character is a valid digit
+ * if at least one of the following is true:
+ * <ul>
+ * <li>The character has a decimal digit value.
+ *     Such characters have the general category "Nd" (decimal digit numbers)
+ *     and a Numeric_Type of Decimal.
+ *     In this case the value is the character's decimal digit value.</li>
+ * <li>The character is one of the uppercase Latin letters
+ *     <code>'A'</code> through <code>'Z'</code>.
+ *     In this case the value is <code>c-'A'+10</code>.</li>
+ * <li>The character is one of the lowercase Latin letters
+ *     <code>'a'</code> through <code>'z'</code>.
+ *     In this case the value is <code>ch-'a'+10</code>.</li>
+ * <li>Latin letters from both the ASCII range (0061..007A, 0041..005A)
+ *     as well as from the Fullwidth ASCII range (FF41..FF5A, FF21..FF3A)
+ *     are recognized.</li>
+ * </ul>
+ *
+ * Same as java.lang.Character.digit().
+ *
+ * @param   ch      the code point to be tested.
+ * @param   radix   the radix.
+ * @return  the numeric value represented by the character in the
+ *          specified radix,
+ *          or -1 if there is no value or if the value exceeds the radix.
+ *
+ * @see     UCHAR_NUMERIC_TYPE
+ * @see     u_forDigit
+ * @see     u_charDigitValue
+ * @see     u_isdigit
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_digit(UChar32 ch, int8_t radix);
+
+/**
+ * Determines the character representation for a specific digit in
+ * the specified radix. If the value of <code>radix</code> is not a
+ * valid radix, or the value of <code>digit</code> is not a valid
+ * digit in the specified radix, the null character
+ * (<code>U+0000</code>) is returned.
+ * <p>
+ * The <code>radix</code> argument is valid if it is greater than or
+ * equal to 2 and less than or equal to 36.
+ * The <code>digit</code> argument is valid if
+ * <code>0 <= digit < radix</code>.
+ * <p>
+ * If the digit is less than 10, then
+ * <code>'0' + digit</code> is returned. Otherwise, the value
+ * <code>'a' + digit - 10</code> is returned.
+ *
+ * Same as java.lang.Character.forDigit().
+ *
+ * @param   digit   the number to convert to a character.
+ * @param   radix   the radix.
+ * @return  the <code>char</code> representation of the specified digit
+ *          in the specified radix.
+ *
+ * @see     u_digit
+ * @see     u_charDigitValue
+ * @see     u_isdigit
+ * @stable ICU 2.0
+ */
+U_STABLE UChar32 U_EXPORT2
+u_forDigit(int32_t digit, int8_t radix);
+
+/**
+ * Get the "age" of the code point.
+ * The "age" is the Unicode version when the code point was first
+ * designated (as a non-character or for Private Use)
+ * or assigned a character.
+ * This can be useful to avoid emitting code points to receiving
+ * processes that do not accept newer characters.
+ * The data is from the UCD file DerivedAge.txt.
+ *
+ * @param c The code point.
+ * @param versionArray The Unicode version number array, to be filled in.
+ *
+ * @stable ICU 2.1
+ */
+U_STABLE void U_EXPORT2
+u_charAge(UChar32 c, UVersionInfo versionArray);
+
+/**
+ * Gets the Unicode version information.
+ * The version array is filled in with the version information
+ * for the Unicode standard that is currently used by ICU.
+ * For example, Unicode version 3.1.1 is represented as an array with
+ * the values { 3, 1, 1, 0 }.
+ *
+ * @param versionArray an output array that will be filled in with
+ *                     the Unicode version number
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+u_getUnicodeVersion(UVersionInfo versionArray);
+
+#if !UCONFIG_NO_NORMALIZATION
+/**
+ * Get the FC_NFKC_Closure property string for a character.
+ * See Unicode Standard Annex #15 for details, search for "FC_NFKC_Closure"
+ * or for "FNC": http://www.unicode.org/reports/tr15/
+ *
+ * @param c The character (code point) for which to get the FC_NFKC_Closure string.
+ *             It must be <code>0<=c<=0x10ffff</code>.
+ * @param dest Destination address for copying the string.
+ *             The string will be zero-terminated if possible.
+ *             If there is no FC_NFKC_Closure string,
+ *             then the buffer will be set to the empty string.
+ * @param destCapacity <code>==sizeof(dest)</code>
+ * @param pErrorCode Pointer to a UErrorCode variable.
+ * @return The length of the string, or 0 if there is no FC_NFKC_Closure string for this character.
+ *         If the destCapacity is less than or equal to the length, then the buffer
+ *         contains the truncated name and the returned length indicates the full
+ *         length of the name.
+ *         The length does not include the zero-termination.
+ *
+ * @stable ICU 2.2
+ */
+U_STABLE int32_t U_EXPORT2
+u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode);
+
+#endif
+
+
+U_CDECL_END
+
+#endif /*_UCHAR*/
+/*eof*/
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/uclean.h b/src/core/basetypes/icu-ucsdet/include/unicode/uclean.h
new file mode 100644
index 00000000..d9a1e539
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/uclean.h
@@ -0,0 +1,258 @@
+/*
+******************************************************************************
+* Copyright (C) 2001-2014, International Business Machines
+*                Corporation and others. All Rights Reserved.
+******************************************************************************
+*   file name:  uclean.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2001July05
+*   created by: George Rhoten
+*/
+
+#ifndef __UCLEAN_H__
+#define __UCLEAN_H__
+
+#include "unicode/utypes.h"
+/**
+ * \file
+ * \brief C API: Initialize and clean up ICU
+ */
+ 
+/**
+ *  Initialize ICU.
+ *
+ *  Use of this function is optional.  It is OK to simply use ICU
+ *  services and functions without first having initialized
+ *  ICU by calling u_init().
+ *
+ *  u_init() will attempt to load some part of ICU's data, and is
+ *  useful as a test for configuration or installation problems that
+ *  leave the ICU data inaccessible.  A successful invocation of u_init()
+ *  does not, however, guarantee that all ICU data is accessible.
+ *
+ *  Multiple calls to u_init() cause no harm, aside from the small amount
+ *  of time required.
+ *
+ *  In old versions of ICU, u_init() was required in multi-threaded applications
+ *  to ensure the thread safety of ICU.  u_init() is no longer needed for this purpose.
+ *
+ * @param status An ICU UErrorCode parameter. It must not be <code>NULL</code>.
+ *    An Error will be returned if some required part of ICU data can not
+ *    be loaded or initialized.
+ *    The function returns immediately if the input error code indicates a
+ *    failure, as usual.
+ *
+ * @stable ICU 2.6
+ */  
+U_STABLE void U_EXPORT2 
+u_init(UErrorCode *status);
+
+#ifndef U_HIDE_SYSTEM_API
+/**
+ * Clean up the system resources, such as allocated memory or open files,
+ * used in all ICU libraries. This will free/delete all memory owned by the
+ * ICU libraries, and return them to their original load state. All open ICU
+ * items (collators, resource bundles, converters, etc.) must be closed before
+ * calling this function, otherwise ICU may not free its allocated memory
+ * (e.g. close your converters and resource bundles before calling this
+ * function). Generally, this function should be called once just before
+ * an application exits. For applications that dynamically load and unload
+ * the ICU libraries (relatively uncommon), u_cleanup() should be called
+ * just before the library unload.
+ * <p>
+ * u_cleanup() also clears any ICU heap functions, mutex functions or
+ * trace functions that may have been set for the process.  
+ * This has the effect of restoring ICU to its initial condition, before
+ * any of these override functions were installed.  Refer to
+ * u_setMemoryFunctions(), u_setMutexFunctions and 
+ * utrace_setFunctions().  If ICU is to be reinitialized after after
+ * calling u_cleanup(), these runtime override functions will need to
+ * be set up again if they are still required.
+ * <p>
+ * u_cleanup() is not thread safe.  All other threads should stop using ICU
+ * before calling this function.
+ * <p>
+ * Any open ICU items will be left in an undefined state by u_cleanup(),
+ * and any subsequent attempt to use such an item will give unpredictable
+ * results.
+ * <p>
+ * After calling u_cleanup(), an application may continue to use ICU by
+ * calling u_init().  An application must invoke u_init() first from one single
+ * thread before allowing other threads call u_init().  All threads existing
+ * at the time of the first thread's call to u_init() must also call
+ * u_init() themselves before continuing with other ICU operations.  
+ * <p>
+ * The use of u_cleanup() just before an application terminates is optional,
+ * but it should be called only once for performance reasons. The primary
+ * benefit is to eliminate reports of memory or resource leaks originating
+ * in ICU code from the results generated by heap analysis tools.
+ * <p>
+ * <strong>Use this function with great care!</strong>
+ * </p>
+ *
+ * @stable ICU 2.0
+ * @system
+ */
+U_STABLE void U_EXPORT2 
+u_cleanup(void);
+
+
+/**
+  *  Pointer type for a user supplied memory allocation function.
+  *  @param context user supplied value, obtained from from u_setMemoryFunctions().
+  *  @param size    The number of bytes to be allocated
+  *  @return        Pointer to the newly allocated memory, or NULL if the allocation failed.
+  *  @stable ICU 2.8
+  *  @system
+  */
+typedef void *U_CALLCONV UMemAllocFn(const void *context, size_t size);
+/**
+  *  Pointer type for a user supplied memory re-allocation function.
+  *  @param context user supplied value, obtained from from u_setMemoryFunctions().
+  *  @param size    The number of bytes to be allocated
+  *  @return        Pointer to the newly allocated memory, or NULL if the allocation failed.
+  *  @stable ICU 2.8
+  *  @system
+  */
+typedef void *U_CALLCONV UMemReallocFn(const void *context, void *mem, size_t size);
+/**
+  *  Pointer type for a user supplied memory free  function.  Behavior should be
+  *  similar the standard C library free().
+  *  @param context user supplied value, obtained from from u_setMemoryFunctions().
+  *  @param mem     Pointer to the memory block to be resized
+  *  @param size    The new size for the block
+  *  @return        Pointer to the resized memory block, or NULL if the resizing failed.
+  *  @stable ICU 2.8
+  *  @system
+  */
+typedef void  U_CALLCONV UMemFreeFn (const void *context, void *mem);
+
+/**
+ *  Set the functions that ICU will use for memory allocation.
+ *  Use of this function is optional; by default (without this function), ICU will
+ *  use the standard C library malloc() and free() functions.
+ *  This function can only be used when ICU is in an initial, unused state, before
+ *  u_init() has been called.
+ *  @param context This pointer value will be saved, and then (later) passed as
+ *                 a parameter to the memory functions each time they
+ *                 are called.
+ *  @param a       Pointer to a user-supplied malloc function.
+ *  @param r       Pointer to a user-supplied realloc function.
+ *  @param f       Pointer to a user-supplied free function.
+ *  @param status  Receives error values.
+ *  @stable ICU 2.8
+ *  @system
+ */  
+U_STABLE void U_EXPORT2 
+u_setMemoryFunctions(const void *context, UMemAllocFn *a, UMemReallocFn *r, UMemFreeFn *f, 
+                    UErrorCode *status);
+
+
+#ifndef U_HIDE_DEPRECATED_API
+/*********************************************************************************
+ *
+ * Deprecated Functions
+ *
+ *    The following functions for user supplied mutexes are no longer supported.
+ *    Any attempt to use them will return a U_UNSUPPORTED_ERROR.
+ *
+ **********************************************************************************/
+
+/**
+  * An opaque pointer type that represents an ICU mutex.
+  * For user-implemented mutexes, the value will typically point to a
+  *  struct or object that implements the mutex.
+  * @deprecated ICU 52. This type is no longer supported.
+  * @system
+  */
+typedef void *UMTX;
+
+/**
+  *  Function Pointer type for a user supplied mutex initialization function.
+  *  The user-supplied function will be called by ICU whenever ICU needs to create a
+  *  new mutex.  The function implementation should create a mutex, and store a pointer
+  *  to something that uniquely identifies the mutex into the UMTX that is supplied
+  *  as a paramter.
+  *  @param context user supplied value, obtained from from u_setMutexFunctions().
+  *  @param mutex   Receives a pointer that identifies the new mutex.
+  *                 The mutex init function must set the UMTX to a non-null value.   
+  *                 Subsequent calls by ICU to lock, unlock, or destroy a mutex will 
+  *                 identify the mutex by the UMTX value.
+  *  @param status  Error status.  Report errors back to ICU by setting this variable
+  *                 with an error code.
+  *  @deprecated ICU 52. This function is no longer supported.
+  *  @system
+  */
+typedef void U_CALLCONV UMtxInitFn (const void *context, UMTX  *mutex, UErrorCode* status);
+
+
+/**
+  *  Function Pointer type for a user supplied mutex functions.
+  *  One of the  user-supplied functions with this signature will be called by ICU
+  *  whenever ICU needs to lock, unlock, or destroy a mutex.
+  *  @param context user supplied value, obtained from from u_setMutexFunctions().
+  *  @param mutex   specify the mutex on which to operate.
+  *  @deprecated ICU 52. This function is no longer supported.
+  *  @system
+  */
+typedef void U_CALLCONV UMtxFn   (const void *context, UMTX  *mutex);
+
+
+/**
+  *  Set the functions that ICU will use for mutex operations
+  *  Use of this function is optional; by default (without this function), ICU will
+  *  directly access system functions for mutex operations
+  *  This function can only be used when ICU is in an initial, unused state, before
+  *  u_init() has been called.
+  *  @param context This pointer value will be saved, and then (later) passed as
+  *                 a parameter to the user-supplied mutex functions each time they
+  *                 are called. 
+  *  @param init    Pointer to a mutex initialization function.  Must be non-null.
+  *  @param destroy Pointer to the mutex destroy function.  Must be non-null.
+  *  @param lock    pointer to the mutex lock function.  Must be non-null.
+  *  @param unlock  Pointer to the mutex unlock function.  Must be non-null.
+  *  @param status  Receives error values.
+  *  @deprecated ICU 52. This function is no longer supported.
+  *  @system
+  */  
+U_DEPRECATED void U_EXPORT2 
+u_setMutexFunctions(const void *context, UMtxInitFn *init, UMtxFn *destroy, UMtxFn *lock, UMtxFn *unlock,
+                    UErrorCode *status);
+
+
+/**
+  *  Pointer type for a user supplied atomic increment or decrement function.
+  *  @param context user supplied value, obtained from from u_setAtomicIncDecFunctions().
+  *  @param p   Pointer to a 32 bit int to be incremented or decremented
+  *  @return    The value of the variable after the inc or dec operation.
+  *  @deprecated ICU 52. This function is no longer supported.
+  *  @system
+  */
+typedef int32_t U_CALLCONV UMtxAtomicFn(const void *context, int32_t *p);
+
+/**
+ *  Set the functions that ICU will use for atomic increment and decrement of int32_t values.
+ *  Use of this function is optional; by default (without this function), ICU will
+ *  use its own internal implementation of atomic increment/decrement.
+ *  This function can only be used when ICU is in an initial, unused state, before
+ *  u_init() has been called.
+ *  @param context This pointer value will be saved, and then (later) passed as
+ *                 a parameter to the increment and decrement functions each time they
+ *                 are called.  This function can only be called 
+ *  @param inc     Pointer to a function to do an atomic increment operation.  Must be non-null.
+ *  @param dec     Pointer to a function to do an atomic decrement operation.  Must be non-null.
+ *  @param status  Receives error values.
+ *  @deprecated ICU 52. This function is no longer supported.
+ *  @system
+ */  
+U_DEPRECATED void U_EXPORT2 
+u_setAtomicIncDecFunctions(const void *context, UMtxAtomicFn *inc, UMtxAtomicFn *dec,
+                    UErrorCode *status);
+
+#endif  /* U_HIDE_DEPRECATED_API */
+#endif  /* U_HIDE_SYSTEM_API */
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/ucnv.h b/src/core/basetypes/icu-ucsdet/include/unicode/ucnv.h
new file mode 100644
index 00000000..564656c2
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/ucnv.h
@@ -0,0 +1,2036 @@
+/*
+**********************************************************************
+*   Copyright (C) 1999-2014, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+ *  ucnv.h:
+ *  External APIs for the ICU's codeset conversion library
+ *  Bertrand A. Damiba
+ *
+ * Modification History:
+ *
+ *   Date        Name        Description
+ *   04/04/99    helena      Fixed internal header inclusion.
+ *   05/11/00    helena      Added setFallback and usesFallback APIs.
+ *   06/29/2000  helena      Major rewrite of the callback APIs.
+ *   12/07/2000  srl         Update of documentation
+ */
+
+/**
+ * \file
+ * \brief C API: Character conversion
+ *
+ * <h2>Character Conversion C API</h2>
+ *
+ * <p>This API is used to convert codepage or character encoded data to and
+ * from UTF-16. You can open a converter with {@link ucnv_open() }. With that
+ * converter, you can get its properties, set options, convert your data and
+ * close the converter.</p>
+ *
+ * <p>Since many software programs recogize different converter names for
+ * different types of converters, there are other functions in this API to
+ * iterate over the converter aliases. The functions {@link ucnv_getAvailableName() },
+ * {@link ucnv_getAlias() } and {@link ucnv_getStandardName() } are some of the
+ * more frequently used alias functions to get this information.</p>
+ *
+ * <p>When a converter encounters an illegal, irregular, invalid or unmappable character
+ * its default behavior is to use a substitution character to replace the
+ * bad byte sequence. This behavior can be changed by using {@link ucnv_setFromUCallBack() }
+ * or {@link ucnv_setToUCallBack() } on the converter. The header ucnv_err.h defines
+ * many other callback actions that can be used instead of a character substitution.</p>
+ *
+ * <p>More information about this API can be found in our
+ * <a href="http://icu-project.org/userguide/conversion.html">User's
+ * Guide</a>.</p>
+ */
+
+#ifndef UCNV_H
+#define UCNV_H
+
+#include "unicode/ucnv_err.h"
+#include "unicode/uenum.h"
+#include "unicode/localpointer.h"
+
+#ifndef __USET_H__
+
+/**
+ * USet is the C API type for Unicode sets.
+ * It is forward-declared here to avoid including the header file if related
+ * conversion APIs are not used.
+ * See unicode/uset.h
+ *
+ * @see ucnv_getUnicodeSet
+ * @stable ICU 2.6
+ */
+struct USet;
+/** @stable ICU 2.6 */
+typedef struct USet USet;
+
+#endif
+
+#if !UCONFIG_NO_CONVERSION
+
+U_CDECL_BEGIN
+
+/** Maximum length of a converter name including the terminating NULL @stable ICU 2.0 */
+#define UCNV_MAX_CONVERTER_NAME_LENGTH 60
+/** Maximum length of a converter name including path and terminating NULL @stable ICU 2.0 */
+#define UCNV_MAX_FULL_FILE_NAME_LENGTH (600+UCNV_MAX_CONVERTER_NAME_LENGTH)
+
+/** Shift in for EBDCDIC_STATEFUL and iso2022 states @stable ICU 2.0 */
+#define  UCNV_SI 0x0F
+/** Shift out for EBDCDIC_STATEFUL and iso2022 states @stable ICU 2.0 */
+#define  UCNV_SO 0x0E
+
+/**
+ * Enum for specifying basic types of converters
+ * @see ucnv_getType
+ * @stable ICU 2.0
+ */
+typedef enum {
+    /** @stable ICU 2.0 */
+    UCNV_UNSUPPORTED_CONVERTER = -1,
+    /** @stable ICU 2.0 */
+    UCNV_SBCS = 0,
+    /** @stable ICU 2.0 */
+    UCNV_DBCS = 1,
+    /** @stable ICU 2.0 */
+    UCNV_MBCS = 2,
+    /** @stable ICU 2.0 */
+    UCNV_LATIN_1 = 3,
+    /** @stable ICU 2.0 */
+    UCNV_UTF8 = 4,
+    /** @stable ICU 2.0 */
+    UCNV_UTF16_BigEndian = 5,
+    /** @stable ICU 2.0 */
+    UCNV_UTF16_LittleEndian = 6,
+    /** @stable ICU 2.0 */
+    UCNV_UTF32_BigEndian = 7,
+    /** @stable ICU 2.0 */
+    UCNV_UTF32_LittleEndian = 8,
+    /** @stable ICU 2.0 */
+    UCNV_EBCDIC_STATEFUL = 9,
+    /** @stable ICU 2.0 */
+    UCNV_ISO_2022 = 10,
+
+    /** @stable ICU 2.0 */
+    UCNV_LMBCS_1 = 11,
+    /** @stable ICU 2.0 */
+    UCNV_LMBCS_2,
+    /** @stable ICU 2.0 */
+    UCNV_LMBCS_3,
+    /** @stable ICU 2.0 */
+    UCNV_LMBCS_4,
+    /** @stable ICU 2.0 */
+    UCNV_LMBCS_5,
+    /** @stable ICU 2.0 */
+    UCNV_LMBCS_6,
+    /** @stable ICU 2.0 */
+    UCNV_LMBCS_8,
+    /** @stable ICU 2.0 */
+    UCNV_LMBCS_11,
+    /** @stable ICU 2.0 */
+    UCNV_LMBCS_16,
+    /** @stable ICU 2.0 */
+    UCNV_LMBCS_17,
+    /** @stable ICU 2.0 */
+    UCNV_LMBCS_18,
+    /** @stable ICU 2.0 */
+    UCNV_LMBCS_19,
+    /** @stable ICU 2.0 */
+    UCNV_LMBCS_LAST = UCNV_LMBCS_19,
+    /** @stable ICU 2.0 */
+    UCNV_HZ,
+    /** @stable ICU 2.0 */
+    UCNV_SCSU,
+    /** @stable ICU 2.0 */
+    UCNV_ISCII,
+    /** @stable ICU 2.0 */
+    UCNV_US_ASCII,
+    /** @stable ICU 2.0 */
+    UCNV_UTF7,
+    /** @stable ICU 2.2 */
+    UCNV_BOCU1,
+    /** @stable ICU 2.2 */
+    UCNV_UTF16,
+    /** @stable ICU 2.2 */
+    UCNV_UTF32,
+    /** @stable ICU 2.2 */
+    UCNV_CESU8,
+    /** @stable ICU 2.4 */
+    UCNV_IMAP_MAILBOX,
+    /** @stable ICU 4.8 */
+    UCNV_COMPOUND_TEXT,
+
+    /* Number of converter types for which we have conversion routines. */
+    UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES
+} UConverterType;
+
+/**
+ * Enum for specifying which platform a converter ID refers to.
+ * The use of platform/CCSID is not recommended. See ucnv_openCCSID().
+ *
+ * @see ucnv_getPlatform
+ * @see ucnv_openCCSID
+ * @see ucnv_getCCSID
+ * @stable ICU 2.0
+ */
+typedef enum {
+    UCNV_UNKNOWN = -1,
+    UCNV_IBM = 0
+} UConverterPlatform;
+
+/**
+ * Function pointer for error callback in the codepage to unicode direction.
+ * Called when an error has occured in conversion to unicode, or on open/close of the callback (see reason).
+ * @param context Pointer to the callback's private data
+ * @param args Information about the conversion in progress
+ * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param reason Defines the reason the callback was invoked
+ * @param pErrorCode    ICU error code in/out parameter.
+ *                      For converter callback functions, set to a conversion error
+ *                      before the call, and the callback may reset it to U_ZERO_ERROR.
+ * @see ucnv_setToUCallBack
+ * @see UConverterToUnicodeArgs
+ * @stable ICU 2.0
+ */
+typedef void (U_EXPORT2 *UConverterToUCallback) (
+                  const void* context,
+                  UConverterToUnicodeArgs *args,
+                  const char *codeUnits,
+                  int32_t length,
+                  UConverterCallbackReason reason,
+                  UErrorCode *pErrorCode);
+
+/**
+ * Function pointer for error callback in the unicode to codepage direction.
+ * Called when an error has occured in conversion from unicode, or on open/close of the callback (see reason).
+ * @param context Pointer to the callback's private data
+ * @param args Information about the conversion in progress
+ * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
+ * @param reason Defines the reason the callback was invoked
+ * @param pErrorCode    ICU error code in/out parameter.
+ *                      For converter callback functions, set to a conversion error
+ *                      before the call, and the callback may reset it to U_ZERO_ERROR.
+ * @see ucnv_setFromUCallBack
+ * @stable ICU 2.0
+ */
+typedef void (U_EXPORT2 *UConverterFromUCallback) (
+                    const void* context,
+                    UConverterFromUnicodeArgs *args,
+                    const UChar* codeUnits,
+                    int32_t length,
+                    UChar32 codePoint,
+                    UConverterCallbackReason reason,
+                    UErrorCode *pErrorCode);
+
+U_CDECL_END
+
+/**
+ * Character that separates converter names from options and options from each other.
+ * @see ucnv_open
+ * @stable ICU 2.0
+ */
+#define UCNV_OPTION_SEP_CHAR ','
+
+/**
+ * String version of UCNV_OPTION_SEP_CHAR.
+ * @see ucnv_open
+ * @stable ICU 2.0
+ */
+#define UCNV_OPTION_SEP_STRING ","
+
+/**
+ * Character that separates a converter option from its value.
+ * @see ucnv_open
+ * @stable ICU 2.0
+ */
+#define UCNV_VALUE_SEP_CHAR '='
+
+/**
+ * String version of UCNV_VALUE_SEP_CHAR.
+ * @see ucnv_open
+ * @stable ICU 2.0
+ */
+#define UCNV_VALUE_SEP_STRING "="
+
+/**
+ * Converter option for specifying a locale.
+ * For example, ucnv_open("SCSU,locale=ja", &errorCode);
+ * See convrtrs.txt.
+ *
+ * @see ucnv_open
+ * @stable ICU 2.0
+ */
+#define UCNV_LOCALE_OPTION_STRING ",locale="
+
+/**
+ * Converter option for specifying a version selector (0..9) for some converters.
+ * For example,
+ * \code
+ *   ucnv_open("UTF-7,version=1", &errorCode);
+ * \endcode
+ * See convrtrs.txt.
+ *
+ * @see ucnv_open
+ * @stable ICU 2.4
+ */
+#define UCNV_VERSION_OPTION_STRING ",version="
+
+/**
+ * Converter option for EBCDIC SBCS or mixed-SBCS/DBCS (stateful) codepages.
+ * Swaps Unicode mappings for EBCDIC LF and NL codes, as used on
+ * S/390 (z/OS) Unix System Services (Open Edition).
+ * For example, ucnv_open("ibm-1047,swaplfnl", &errorCode);
+ * See convrtrs.txt.
+ *
+ * @see ucnv_open
+ * @stable ICU 2.4
+ */
+#define UCNV_SWAP_LFNL_OPTION_STRING ",swaplfnl"
+
+/**
+ * Do a fuzzy compare of two converter/alias names.
+ * The comparison is case-insensitive, ignores leading zeroes if they are not
+ * followed by further digits, and ignores all but letters and digits.
+ * Thus the strings "UTF-8", "utf_8", "u*T@f08" and "Utf 8" are exactly equivalent.
+ * See section 1.4, Charset Alias Matching in Unicode Technical Standard #22
+ * at http://www.unicode.org/reports/tr22/
+ *
+ * @param name1 a converter name or alias, zero-terminated
+ * @param name2 a converter name or alias, zero-terminated
+ * @return 0 if the names match, or a negative value if the name1
+ * lexically precedes name2, or a positive value if the name1
+ * lexically follows name2.
+ * @stable ICU 2.0
+ */
+U_STABLE int U_EXPORT2
+ucnv_compareNames(const char *name1, const char *name2);
+
+
+/**
+ * Creates a UConverter object with the name of a coded character set specified as a C string.
+ * The actual name will be resolved with the alias file
+ * using a case-insensitive string comparison that ignores
+ * leading zeroes and all non-alphanumeric characters.
+ * E.g., the names "UTF8", "utf-8", "u*T@f08" and "Utf 8" are all equivalent.
+ * (See also ucnv_compareNames().)
+ * If <code>NULL</code> is passed for the converter name, it will create one with the
+ * getDefaultName return value.
+ *
+ * <p>A converter name for ICU 1.5 and above may contain options
+ * like a locale specification to control the specific behavior of
+ * the newly instantiated converter.
+ * The meaning of the options depends on the particular converter.
+ * If an option is not defined for or recognized by a given converter, then it is ignored.</p>
+ *
+ * <p>Options are appended to the converter name string, with a
+ * <code>UCNV_OPTION_SEP_CHAR</code> between the name and the first option and
+ * also between adjacent options.</p>
+ *
+ * <p>If the alias is ambiguous, then the preferred converter is used
+ * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.</p>
+ *
+ * <p>The conversion behavior and names can vary between platforms. ICU may
+ * convert some characters differently from other platforms. Details on this topic
+ * are in the <a href="http://icu-project.org/userguide/conversion.html">User's
+ * Guide</a>. Aliases starting with a "cp" prefix have no specific meaning
+ * other than its an alias starting with the letters "cp". Please do not
+ * associate any meaning to these aliases.</p>
+ *
+ * \snippet samples/ucnv/convsamp.cpp ucnv_open
+ *
+ * @param converterName Name of the coded character set table.
+ *          This may have options appended to the string.
+ *          IANA alias character set names, IBM CCSIDs starting with "ibm-",
+ *          Windows codepage numbers starting with "windows-" are frequently
+ *          used for this parameter. See ucnv_getAvailableName and
+ *          ucnv_getAlias for a complete list that is available.
+ *          If this parameter is NULL, the default converter will be used.
+ * @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT>
+ * @return the created Unicode converter object, or <TT>NULL</TT> if an error occured
+ * @see ucnv_openU
+ * @see ucnv_openCCSID
+ * @see ucnv_getAvailableName
+ * @see ucnv_getAlias
+ * @see ucnv_getDefaultName
+ * @see ucnv_close
+ * @see ucnv_compareNames
+ * @stable ICU 2.0
+ */
+U_STABLE UConverter* U_EXPORT2
+ucnv_open(const char *converterName, UErrorCode *err);
+
+
+/**
+ * Creates a Unicode converter with the names specified as unicode string.
+ * The name should be limited to the ASCII-7 alphanumerics range.
+ * The actual name will be resolved with the alias file
+ * using a case-insensitive string comparison that ignores
+ * leading zeroes and all non-alphanumeric characters.
+ * E.g., the names "UTF8", "utf-8", "u*T@f08" and "Utf 8" are all equivalent.
+ * (See also ucnv_compareNames().)
+ * If <TT>NULL</TT> is passed for the converter name, it will create
+ * one with the ucnv_getDefaultName() return value.
+ * If the alias is ambiguous, then the preferred converter is used
+ * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
+ *
+ * <p>See ucnv_open for the complete details</p>
+ * @param name Name of the UConverter table in a zero terminated
+ *        Unicode string
+ * @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR,
+ *        U_FILE_ACCESS_ERROR</TT>
+ * @return the created Unicode converter object, or <TT>NULL</TT> if an
+ *        error occured
+ * @see ucnv_open
+ * @see ucnv_openCCSID
+ * @see ucnv_close
+ * @see ucnv_compareNames
+ * @stable ICU 2.0
+ */
+U_STABLE UConverter* U_EXPORT2
+ucnv_openU(const UChar *name,
+           UErrorCode *err);
+
+/**
+ * Creates a UConverter object from a CCSID number and platform pair.
+ * Note that the usefulness of this function is limited to platforms with numeric
+ * encoding IDs. Only IBM and Microsoft platforms use numeric (16-bit) identifiers for
+ * encodings.
+ *
+ * In addition, IBM CCSIDs and Unicode conversion tables are not 1:1 related.
+ * For many IBM CCSIDs there are multiple (up to six) Unicode conversion tables, and
+ * for some Unicode conversion tables there are multiple CCSIDs.
+ * Some "alternate" Unicode conversion tables are provided by the
+ * IBM CDRA conversion table registry.
+ * The most prominent example of a systematic modification of conversion tables that is
+ * not provided in the form of conversion table files in the repository is
+ * that S/390 Unix System Services swaps the codes for Line Feed and New Line in all
+ * EBCDIC codepages, which requires such a swap in the Unicode conversion tables as well.
+ *
+ * Only IBM default conversion tables are accessible with ucnv_openCCSID().
+ * ucnv_getCCSID() will return the same CCSID for all conversion tables that are associated
+ * with that CCSID.
+ *
+ * Currently, the only "platform" supported in the ICU converter API is UCNV_IBM.
+ *
+ * In summary, the use of CCSIDs and the associated API functions is not recommended.
+ *
+ * In order to open a converter with the default IBM CDRA Unicode conversion table,
+ * you can use this function or use the prefix "ibm-":
+ * \code
+ *     char name[20];
+ *     sprintf(name, "ibm-%hu", ccsid);
+ *     cnv=ucnv_open(name, &errorCode);
+ * \endcode
+ *
+ * In order to open a converter with the IBM S/390 Unix System Services variant
+ * of a Unicode/EBCDIC conversion table,
+ * you can use the prefix "ibm-" together with the option string UCNV_SWAP_LFNL_OPTION_STRING:
+ * \code
+ *     char name[20];
+ *     sprintf(name, "ibm-%hu" UCNV_SWAP_LFNL_OPTION_STRING, ccsid);
+ *     cnv=ucnv_open(name, &errorCode);
+ * \endcode
+ *
+ * In order to open a converter from a Microsoft codepage number, use the prefix "cp":
+ * \code
+ *     char name[20];
+ *     sprintf(name, "cp%hu", codepageID);
+ *     cnv=ucnv_open(name, &errorCode);
+ * \endcode
+ *
+ * If the alias is ambiguous, then the preferred converter is used
+ * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
+ *
+ * @param codepage codepage number to create
+ * @param platform the platform in which the codepage number exists
+ * @param err error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT>
+ * @return the created Unicode converter object, or <TT>NULL</TT> if an error
+ *   occured.
+ * @see ucnv_open
+ * @see ucnv_openU
+ * @see ucnv_close
+ * @see ucnv_getCCSID
+ * @see ucnv_getPlatform
+ * @see UConverterPlatform
+ * @stable ICU 2.0
+ */
+U_STABLE UConverter* U_EXPORT2
+ucnv_openCCSID(int32_t codepage,
+               UConverterPlatform platform,
+               UErrorCode * err);
+
+/**
+ * <p>Creates a UConverter object specified from a packageName and a converterName.</p>
+ *
+ * <p>The packageName and converterName must point to an ICU udata object, as defined by
+ *   <code> udata_open( packageName, "cnv", converterName, err) </code> or equivalent.
+ * Typically, packageName will refer to a (.dat) file, or to a package registered with
+ * udata_setAppData(). Using a full file or directory pathname for packageName is deprecated.</p>
+ *
+ * <p>The name will NOT be looked up in the alias mechanism, nor will the converter be
+ * stored in the converter cache or the alias table. The only way to open further converters
+ * is call this function multiple times, or use the ucnv_safeClone() function to clone a
+ * 'master' converter.</p>
+ *
+ * <p>A future version of ICU may add alias table lookups and/or caching
+ * to this function.</p>
+ *
+ * <p>Example Use:
+ *      <code>cnv = ucnv_openPackage("myapp", "myconverter", &err);</code>
+ * </p>
+ *
+ * @param packageName name of the package (equivalent to 'path' in udata_open() call)
+ * @param converterName name of the data item to be used, without suffix.
+ * @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT>
+ * @return the created Unicode converter object, or <TT>NULL</TT> if an error occured
+ * @see udata_open
+ * @see ucnv_open
+ * @see ucnv_safeClone
+ * @see ucnv_close
+ * @stable ICU 2.2
+ */
+U_STABLE UConverter* U_EXPORT2
+ucnv_openPackage(const char *packageName, const char *converterName, UErrorCode *err);
+
+/**
+ * Thread safe converter cloning operation.
+ * For most efficient operation, pass in a stackBuffer (and a *pBufferSize)
+ * with at least U_CNV_SAFECLONE_BUFFERSIZE bytes of space.
+ * If the buffer size is sufficient, then the clone will use the stack buffer;
+ * otherwise, it will be allocated, and *pBufferSize will indicate
+ * the actual size. (This should not occur with U_CNV_SAFECLONE_BUFFERSIZE.)
+ *
+ * You must ucnv_close() the clone in any case.
+ *
+ * If *pBufferSize==0, (regardless of whether stackBuffer==NULL or not)
+ * then *pBufferSize will be changed to a sufficient size
+ * for cloning this converter,
+ * without actually cloning the converter ("pure pre-flighting").
+ *
+ * If *pBufferSize is greater than zero but not large enough for a stack-based
+ * clone, then the converter is cloned using newly allocated memory
+ * and *pBufferSize is changed to the necessary size.
+ *
+ * If the converter clone fits into the stack buffer but the stack buffer is not
+ * sufficiently aligned for the clone, then the clone will use an
+ * adjusted pointer and use an accordingly smaller buffer size.
+ *
+ * @param cnv converter to be cloned
+ * @param stackBuffer <em>Deprecated functionality as of ICU 52, use NULL.</em><br>
+ *  user allocated space for the new clone. If NULL new memory will be allocated.
+ *  If buffer is not large enough, new memory will be allocated.
+ *  Clients can use the U_CNV_SAFECLONE_BUFFERSIZE. This will probably be enough to avoid memory allocations.
+ * @param pBufferSize <em>Deprecated functionality as of ICU 52, use NULL or 1.</em><br>
+ *  pointer to size of allocated space.
+ * @param status to indicate whether the operation went on smoothly or there were errors
+ *  An informational status value, U_SAFECLONE_ALLOCATED_WARNING,
+ *  is used if any allocations were necessary.
+ *  However, it is better to check if *pBufferSize grew for checking for
+ *  allocations because warning codes can be overridden by subsequent
+ *  function calls.
+ * @return pointer to the new clone
+ * @stable ICU 2.0
+ */
+U_STABLE UConverter * U_EXPORT2
+ucnv_safeClone(const UConverter *cnv,
+               void             *stackBuffer,
+               int32_t          *pBufferSize,
+               UErrorCode       *status);
+
+#ifndef U_HIDE_DEPRECATED_API
+
+/**
+ * \def U_CNV_SAFECLONE_BUFFERSIZE
+ * Definition of a buffer size that is designed to be large enough for
+ * converters to be cloned with ucnv_safeClone().
+ * @deprecated ICU 52. Do not rely on ucnv_safeClone() cloning into any provided buffer.
+ */
+#define U_CNV_SAFECLONE_BUFFERSIZE  1024
+
+#endif /* U_HIDE_DEPRECATED_API */
+
+/**
+ * Deletes the unicode converter and releases resources associated
+ * with just this instance.
+ * Does not free up shared converter tables.
+ *
+ * @param converter the converter object to be deleted
+ * @see ucnv_open
+ * @see ucnv_openU
+ * @see ucnv_openCCSID
+ * @stable ICU 2.0
+ */
+U_STABLE void  U_EXPORT2
+ucnv_close(UConverter * converter);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUConverterPointer
+ * "Smart pointer" class, closes a UConverter via ucnv_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUConverterPointer, UConverter, ucnv_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Fills in the output parameter, subChars, with the substitution characters
+ * as multiple bytes.
+ * If ucnv_setSubstString() set a Unicode string because the converter is
+ * stateful, then subChars will be an empty string.
+ *
+ * @param converter the Unicode converter
+ * @param subChars the subsitution characters
+ * @param len on input the capacity of subChars, on output the number
+ * of bytes copied to it
+ * @param  err the outgoing error status code.
+ * If the substitution character array is too small, an
+ * <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned.
+ * @see ucnv_setSubstString
+ * @see ucnv_setSubstChars
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_getSubstChars(const UConverter *converter,
+                   char *subChars,
+                   int8_t *len,
+                   UErrorCode *err);
+
+/**
+ * Sets the substitution chars when converting from unicode to a codepage. The
+ * substitution is specified as a string of 1-4 bytes, and may contain
+ * <TT>NULL</TT> bytes.
+ * The subChars must represent a single character. The caller needs to know the
+ * byte sequence of a valid character in the converter's charset.
+ * For some converters, for example some ISO 2022 variants, only single-byte
+ * substitution characters may be supported.
+ * The newer ucnv_setSubstString() function relaxes these limitations.
+ *
+ * @param converter the Unicode converter
+ * @param subChars the substitution character byte sequence we want set
+ * @param len the number of bytes in subChars
+ * @param err the error status code.  <TT>U_INDEX_OUTOFBOUNDS_ERROR </TT> if
+ * len is bigger than the maximum number of bytes allowed in subchars
+ * @see ucnv_setSubstString
+ * @see ucnv_getSubstChars
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_setSubstChars(UConverter *converter,
+                   const char *subChars,
+                   int8_t len,
+                   UErrorCode *err);
+
+/**
+ * Set a substitution string for converting from Unicode to a charset.
+ * The caller need not know the charset byte sequence for each charset.
+ *
+ * Unlike ucnv_setSubstChars() which is designed to set a charset byte sequence
+ * for a single character, this function takes a Unicode string with
+ * zero, one or more characters, and immediately verifies that the string can be
+ * converted to the charset.
+ * If not, or if the result is too long (more than 32 bytes as of ICU 3.6),
+ * then the function returns with an error accordingly.
+ *
+ * Also unlike ucnv_setSubstChars(), this function works for stateful charsets
+ * by converting on the fly at the point of substitution rather than setting
+ * a fixed byte sequence.
+ *
+ * @param cnv The UConverter object.
+ * @param s The Unicode string.
+ * @param length The number of UChars in s, or -1 for a NUL-terminated string.
+ * @param err Pointer to a standard ICU error code. Its input value must
+ *            pass the U_SUCCESS() test, or else the function returns
+ *            immediately. Check for U_FAILURE() on output or use with
+ *            function chaining. (See User Guide for details.)
+ *
+ * @see ucnv_setSubstChars
+ * @see ucnv_getSubstChars
+ * @stable ICU 3.6
+ */
+U_STABLE void U_EXPORT2
+ucnv_setSubstString(UConverter *cnv,
+                    const UChar *s,
+                    int32_t length,
+                    UErrorCode *err);
+
+/**
+ * Fills in the output parameter, errBytes, with the error characters from the
+ * last failing conversion.
+ *
+ * @param converter the Unicode converter
+ * @param errBytes the codepage bytes which were in error
+ * @param len on input the capacity of errBytes, on output the number of
+ *  bytes which were copied to it
+ * @param err the error status code.
+ * If the substitution character array is too small, an
+ * <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_getInvalidChars(const UConverter *converter,
+                     char *errBytes,
+                     int8_t *len,
+                     UErrorCode *err);
+
+/**
+ * Fills in the output parameter, errChars, with the error characters from the
+ * last failing conversion.
+ *
+ * @param converter the Unicode converter
+ * @param errUChars the UChars which were in error
+ * @param len on input the capacity of errUChars, on output the number of
+ *  UChars which were copied to it
+ * @param err the error status code.
+ * If the substitution character array is too small, an
+ * <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_getInvalidUChars(const UConverter *converter,
+                      UChar *errUChars,
+                      int8_t *len,
+                      UErrorCode *err);
+
+/**
+ * Resets the state of a converter to the default state. This is used
+ * in the case of an error, to restart a conversion from a known default state.
+ * It will also empty the internal output buffers.
+ * @param converter the Unicode converter
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_reset(UConverter *converter);
+
+/**
+ * Resets the to-Unicode part of a converter state to the default state.
+ * This is used in the case of an error to restart a conversion to
+ * Unicode to a known default state. It will also empty the internal
+ * output buffers used for the conversion to Unicode codepoints.
+ * @param converter the Unicode converter
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_resetToUnicode(UConverter *converter);
+
+/**
+ * Resets the from-Unicode part of a converter state to the default state.
+ * This is used in the case of an error to restart a conversion from
+ * Unicode to a known default state. It will also empty the internal output
+ * buffers used for the conversion from Unicode codepoints.
+ * @param converter the Unicode converter
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_resetFromUnicode(UConverter *converter);
+
+/**
+ * Returns the maximum number of bytes that are output per UChar in conversion
+ * from Unicode using this converter.
+ * The returned number can be used with UCNV_GET_MAX_BYTES_FOR_STRING
+ * to calculate the size of a target buffer for conversion from Unicode.
+ *
+ * Note: Before ICU 2.8, this function did not return reliable numbers for
+ * some stateful converters (EBCDIC_STATEFUL, ISO-2022) and LMBCS.
+ *
+ * This number may not be the same as the maximum number of bytes per
+ * "conversion unit". In other words, it may not be the intuitively expected
+ * number of bytes per character that would be published for a charset,
+ * and may not fulfill any other purpose than the allocation of an output
+ * buffer of guaranteed sufficient size for a given input length and converter.
+ *
+ * Examples for special cases that are taken into account:
+ * - Supplementary code points may convert to more bytes than BMP code points.
+ *   This function returns bytes per UChar (UTF-16 code unit), not per
+ *   Unicode code point, for efficient buffer allocation.
+ * - State-shifting output (SI/SO, escapes, etc.) from stateful converters.
+ * - When m input UChars are converted to n output bytes, then the maximum m/n
+ *   is taken into account.
+ *
+ * The number returned here does not take into account
+ * (see UCNV_GET_MAX_BYTES_FOR_STRING):
+ * - callbacks which output more than one charset character sequence per call,
+ *   like escape callbacks
+ * - initial and final non-character bytes that are output by some converters
+ *   (automatic BOMs, initial escape sequence, final SI, etc.)
+ *
+ * Examples for returned values:
+ * - SBCS charsets: 1
+ * - Shift-JIS: 2
+ * - UTF-16: 2 (2 per BMP, 4 per surrogate _pair_, BOM not counted)
+ * - UTF-8: 3 (3 per BMP, 4 per surrogate _pair_)
+ * - EBCDIC_STATEFUL (EBCDIC mixed SBCS/DBCS): 3 (SO + DBCS)
+ * - ISO-2022: 3 (always outputs UTF-8)
+ * - ISO-2022-JP: 6 (4-byte escape sequences + DBCS)
+ * - ISO-2022-CN: 8 (4-byte designator sequences + 2-byte SS2/SS3 + DBCS)
+ *
+ * @param converter The Unicode converter.
+ * @return The maximum number of bytes per UChar (16 bit code unit)
+ *    that are output by ucnv_fromUnicode(),
+ *    to be used together with UCNV_GET_MAX_BYTES_FOR_STRING
+ *    for buffer allocation.
+ *
+ * @see UCNV_GET_MAX_BYTES_FOR_STRING
+ * @see ucnv_getMinCharSize
+ * @stable ICU 2.0
+ */
+U_STABLE int8_t U_EXPORT2
+ucnv_getMaxCharSize(const UConverter *converter);
+
+/**
+ * Calculates the size of a buffer for conversion from Unicode to a charset.
+ * The calculated size is guaranteed to be sufficient for this conversion.
+ *
+ * It takes into account initial and final non-character bytes that are output
+ * by some converters.
+ * It does not take into account callbacks which output more than one charset
+ * character sequence per call, like escape callbacks.
+ * The default (substitution) callback only outputs one charset character sequence.
+ *
+ * @param length Number of UChars to be converted.
+ * @param maxCharSize Return value from ucnv_getMaxCharSize() for the converter
+ *                    that will be used.
+ * @return Size of a buffer that will be large enough to hold the output bytes of
+ *         converting length UChars with the converter that returned the maxCharSize.
+ *
+ * @see ucnv_getMaxCharSize
+ * @stable ICU 2.8
+ */
+#define UCNV_GET_MAX_BYTES_FOR_STRING(length, maxCharSize) \
+     (((int32_t)(length)+10)*(int32_t)(maxCharSize))
+
+/**
+ * Returns the minimum byte length (per codepoint) for characters in this codepage.
+ * This is usually either 1 or 2.
+ * @param converter the Unicode converter
+ * @return the minimum number of bytes per codepoint allowed by this particular converter
+ * @see ucnv_getMaxCharSize
+ * @stable ICU 2.0
+ */
+U_STABLE int8_t U_EXPORT2
+ucnv_getMinCharSize(const UConverter *converter);
+
+/**
+ * Returns the display name of the converter passed in based on the Locale
+ * passed in. If the locale contains no display name, the internal ASCII
+ * name will be filled in.
+ *
+ * @param converter the Unicode converter.
+ * @param displayLocale is the specific Locale we want to localised for
+ * @param displayName user provided buffer to be filled in
+ * @param displayNameCapacity size of displayName Buffer
+ * @param err error status code
+ * @return displayNameLength number of UChar needed in displayName
+ * @see ucnv_getName
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_getDisplayName(const UConverter *converter,
+                    const char *displayLocale,
+                    UChar *displayName,
+                    int32_t displayNameCapacity,
+                    UErrorCode *err);
+
+/**
+ * Gets the internal, canonical name of the converter (zero-terminated).
+ * The lifetime of the returned string will be that of the converter
+ * passed to this function.
+ * @param converter the Unicode converter
+ * @param err UErrorCode status
+ * @return the internal name of the converter
+ * @see ucnv_getDisplayName
+ * @stable ICU 2.0
+ */
+U_STABLE const char * U_EXPORT2
+ucnv_getName(const UConverter *converter, UErrorCode *err);
+
+/**
+ * Gets a codepage number associated with the converter. This is not guaranteed
+ * to be the one used to create the converter. Some converters do not represent
+ * platform registered codepages and return zero for the codepage number.
+ * The error code fill-in parameter indicates if the codepage number
+ * is available.
+ * Does not check if the converter is <TT>NULL</TT> or if converter's data
+ * table is <TT>NULL</TT>.
+ *
+ * Important: The use of CCSIDs is not recommended because it is limited
+ * to only two platforms in principle and only one (UCNV_IBM) in the current
+ * ICU converter API.
+ * Also, CCSIDs are insufficient to identify IBM Unicode conversion tables precisely.
+ * For more details see ucnv_openCCSID().
+ *
+ * @param converter the Unicode converter
+ * @param err the error status code.
+ * @return If any error occurrs, -1 will be returned otherwise, the codepage number
+ * will be returned
+ * @see ucnv_openCCSID
+ * @see ucnv_getPlatform
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_getCCSID(const UConverter *converter,
+              UErrorCode *err);
+
+/**
+ * Gets a codepage platform associated with the converter. Currently,
+ * only <TT>UCNV_IBM</TT> will be returned.
+ * Does not test if the converter is <TT>NULL</TT> or if converter's data
+ * table is <TT>NULL</TT>.
+ * @param converter the Unicode converter
+ * @param err the error status code.
+ * @return The codepage platform
+ * @stable ICU 2.0
+ */
+U_STABLE UConverterPlatform U_EXPORT2
+ucnv_getPlatform(const UConverter *converter,
+                 UErrorCode *err);
+
+/**
+ * Gets the type of the converter
+ * e.g. SBCS, MBCS, DBCS, UTF8, UTF16_BE, UTF16_LE, ISO_2022,
+ * EBCDIC_STATEFUL, LATIN_1
+ * @param converter a valid, opened converter
+ * @return the type of the converter
+ * @stable ICU 2.0
+ */
+U_STABLE UConverterType U_EXPORT2
+ucnv_getType(const UConverter * converter);
+
+/**
+ * Gets the "starter" (lead) bytes for converters of type MBCS.
+ * Will fill in an <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> if converter passed in
+ * is not MBCS. Fills in an array of type UBool, with the value of the byte
+ * as offset to the array. For example, if (starters[0x20] == TRUE) at return,
+ * it means that the byte 0x20 is a starter byte in this converter.
+ * Context pointers are always owned by the caller.
+ *
+ * @param converter a valid, opened converter of type MBCS
+ * @param starters an array of size 256 to be filled in
+ * @param err error status, <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> if the
+ * converter is not a type which can return starters.
+ * @see ucnv_getType
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_getStarters(const UConverter* converter,
+                 UBool starters[256],
+                 UErrorCode* err);
+
+
+/**
+ * Selectors for Unicode sets that can be returned by ucnv_getUnicodeSet().
+ * @see ucnv_getUnicodeSet
+ * @stable ICU 2.6
+ */
+typedef enum UConverterUnicodeSet {
+    /** Select the set of roundtrippable Unicode code points. @stable ICU 2.6 */
+    UCNV_ROUNDTRIP_SET,
+    /** Select the set of Unicode code points with roundtrip or fallback mappings. @stable ICU 4.0 */
+    UCNV_ROUNDTRIP_AND_FALLBACK_SET,
+    /** Number of UConverterUnicodeSet selectors. @stable ICU 2.6 */
+    UCNV_SET_COUNT
+} UConverterUnicodeSet;
+
+
+/**
+ * Returns the set of Unicode code points that can be converted by an ICU converter.
+ *
+ * Returns one of several kinds of set:
+ *
+ * 1. UCNV_ROUNDTRIP_SET
+ *
+ * The set of all Unicode code points that can be roundtrip-converted
+ * (converted without any data loss) with the converter (ucnv_fromUnicode()).
+ * This set will not include code points that have fallback mappings
+ * or are only the result of reverse fallback mappings.
+ * This set will also not include PUA code points with fallbacks, although
+ * ucnv_fromUnicode() will always uses those mappings despite ucnv_setFallback().
+ * See UTR #22 "Character Mapping Markup Language"
+ * at http://www.unicode.org/reports/tr22/
+ *
+ * This is useful for example for
+ * - checking that a string or document can be roundtrip-converted with a converter,
+ *   without/before actually performing the conversion
+ * - testing if a converter can be used for text for typical text for a certain locale,
+ *   by comparing its roundtrip set with the set of ExemplarCharacters from
+ *   ICU's locale data or other sources
+ *
+ * 2. UCNV_ROUNDTRIP_AND_FALLBACK_SET
+ *
+ * The set of all Unicode code points that can be converted with the converter (ucnv_fromUnicode())
+ * when fallbacks are turned on (see ucnv_setFallback()).
+ * This set includes all code points with roundtrips and fallbacks (but not reverse fallbacks).
+ *
+ * In the future, there may be more UConverterUnicodeSet choices to select
+ * sets with different properties.
+ *
+ * @param cnv The converter for which a set is requested.
+ * @param setFillIn A valid USet *. It will be cleared by this function before
+ *            the converter's specific set is filled into the USet.
+ * @param whichSet A UConverterUnicodeSet selector;
+ *              currently UCNV_ROUNDTRIP_SET is the only supported value.
+ * @param pErrorCode ICU error code in/out parameter.
+ *                   Must fulfill U_SUCCESS before the function call.
+ *
+ * @see UConverterUnicodeSet
+ * @see uset_open
+ * @see uset_close
+ * @stable ICU 2.6
+ */
+U_STABLE void U_EXPORT2
+ucnv_getUnicodeSet(const UConverter *cnv,
+                   USet *setFillIn,
+                   UConverterUnicodeSet whichSet,
+                   UErrorCode *pErrorCode);
+
+/**
+ * Gets the current calback function used by the converter when an illegal
+ *  or invalid codepage sequence is found.
+ * Context pointers are always owned by the caller.
+ *
+ * @param converter the unicode converter
+ * @param action fillin: returns the callback function pointer
+ * @param context fillin: returns the callback's private void* context
+ * @see ucnv_setToUCallBack
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_getToUCallBack (const UConverter * converter,
+                     UConverterToUCallback *action,
+                     const void **context);
+
+/**
+ * Gets the current callback function used by the converter when illegal
+ * or invalid Unicode sequence is found.
+ * Context pointers are always owned by the caller.
+ *
+ * @param converter the unicode converter
+ * @param action fillin: returns the callback function pointer
+ * @param context fillin: returns the callback's private void* context
+ * @see ucnv_setFromUCallBack
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_getFromUCallBack (const UConverter * converter,
+                       UConverterFromUCallback *action,
+                       const void **context);
+
+/**
+ * Changes the callback function used by the converter when
+ * an illegal or invalid sequence is found.
+ * Context pointers are always owned by the caller.
+ * Predefined actions and contexts can be found in the ucnv_err.h header.
+ *
+ * @param converter the unicode converter
+ * @param newAction the new callback function
+ * @param newContext the new toUnicode callback context pointer. This can be NULL.
+ * @param oldAction fillin: returns the old callback function pointer. This can be NULL.
+ * @param oldContext fillin: returns the old callback's private void* context. This can be NULL.
+ * @param err The error code status
+ * @see ucnv_getToUCallBack
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_setToUCallBack (UConverter * converter,
+                     UConverterToUCallback newAction,
+                     const void* newContext,
+                     UConverterToUCallback *oldAction,
+                     const void** oldContext,
+                     UErrorCode * err);
+
+/**
+ * Changes the current callback function used by the converter when
+ * an illegal or invalid sequence is found.
+ * Context pointers are always owned by the caller.
+ * Predefined actions and contexts can be found in the ucnv_err.h header.
+ *
+ * @param converter the unicode converter
+ * @param newAction the new callback function
+ * @param newContext the new fromUnicode callback context pointer. This can be NULL.
+ * @param oldAction fillin: returns the old callback function pointer. This can be NULL.
+ * @param oldContext fillin: returns the old callback's private void* context. This can be NULL.
+ * @param err The error code status
+ * @see ucnv_getFromUCallBack
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_setFromUCallBack (UConverter * converter,
+                       UConverterFromUCallback newAction,
+                       const void *newContext,
+                       UConverterFromUCallback *oldAction,
+                       const void **oldContext,
+                       UErrorCode * err);
+
+/**
+ * Converts an array of unicode characters to an array of codepage
+ * characters. This function is optimized for converting a continuous
+ * stream of data in buffer-sized chunks, where the entire source and
+ * target does not fit in available buffers.
+ *
+ * The source pointer is an in/out parameter. It starts out pointing where the
+ * conversion is to begin, and ends up pointing after the last UChar consumed.
+ *
+ * Target similarly starts out pointer at the first available byte in the output
+ * buffer, and ends up pointing after the last byte written to the output.
+ *
+ * The converter always attempts to consume the entire source buffer, unless
+ * (1.) the target buffer is full, or (2.) a failing error is returned from the
+ * current callback function.  When a successful error status has been
+ * returned, it means that all of the source buffer has been
+ *  consumed. At that point, the caller should reset the source and
+ *  sourceLimit pointers to point to the next chunk.
+ *
+ * At the end of the stream (flush==TRUE), the input is completely consumed
+ * when *source==sourceLimit and no error code is set.
+ * The converter object is then automatically reset by this function.
+ * (This means that a converter need not be reset explicitly between data
+ * streams if it finishes the previous stream without errors.)
+ *
+ * This is a <I>stateful</I> conversion. Additionally, even when all source data has
+ * been consumed, some data may be in the converters' internal state.
+ * Call this function repeatedly, updating the target pointers with
+ * the next empty chunk of target in case of a
+ * <TT>U_BUFFER_OVERFLOW_ERROR</TT>, and updating the source  pointers
+ *  with the next chunk of source when a successful error status is
+ * returned, until there are no more chunks of source data.
+ * @param converter the Unicode converter
+ * @param target I/O parameter. Input : Points to the beginning of the buffer to copy
+ *  codepage characters to. Output : points to after the last codepage character copied
+ *  to <TT>target</TT>.
+ * @param targetLimit the pointer just after last of the <TT>target</TT> buffer
+ * @param source I/O parameter, pointer to pointer to the source Unicode character buffer.
+ * @param sourceLimit the pointer just after the last of the source buffer
+ * @param offsets if NULL is passed, nothing will happen to it, otherwise it needs to have the same number
+ * of allocated cells as <TT>target</TT>. Will fill in offsets from target to source pointer
+ * e.g: <TT>offsets[3]</TT> is equal to 6, it means that the <TT>target[3]</TT> was a result of transcoding <TT>source[6]</TT>
+ * For output data carried across calls, and other data without a specific source character
+ * (such as from escape sequences or callbacks)  -1 will be placed for offsets.
+ * @param flush set to <TT>TRUE</TT> if the current source buffer is the last available
+ * chunk of the source, <TT>FALSE</TT> otherwise. Note that if a failing status is returned,
+ * this function may have to be called multiple times with flush set to <TT>TRUE</TT> until
+ * the source buffer is consumed.
+ * @param err the error status.  <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> will be set if the
+ * converter is <TT>NULL</TT>.
+ * <code>U_BUFFER_OVERFLOW_ERROR</code> will be set if the target is full and there is
+ * still data to be written to the target.
+ * @see ucnv_fromUChars
+ * @see ucnv_convert
+ * @see ucnv_getMinCharSize
+ * @see ucnv_setToUCallBack
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_fromUnicode (UConverter * converter,
+                  char **target,
+                  const char *targetLimit,
+                  const UChar ** source,
+                  const UChar * sourceLimit,
+                  int32_t* offsets,
+                  UBool flush,
+                  UErrorCode * err);
+
+/**
+ * Converts a buffer of codepage bytes into an array of unicode UChars
+ * characters. This function is optimized for converting a continuous
+ * stream of data in buffer-sized chunks, where the entire source and
+ * target does not fit in available buffers.
+ *
+ * The source pointer is an in/out parameter. It starts out pointing where the
+ * conversion is to begin, and ends up pointing after the last byte of source consumed.
+ *
+ * Target similarly starts out pointer at the first available UChar in the output
+ * buffer, and ends up pointing after the last UChar written to the output.
+ * It does NOT necessarily keep UChar sequences together.
+ *
+ * The converter always attempts to consume the entire source buffer, unless
+ * (1.) the target buffer is full, or (2.) a failing error is returned from the
+ * current callback function.  When a successful error status has been
+ * returned, it means that all of the source buffer has been
+ *  consumed. At that point, the caller should reset the source and
+ *  sourceLimit pointers to point to the next chunk.
+ *
+ * At the end of the stream (flush==TRUE), the input is completely consumed
+ * when *source==sourceLimit and no error code is set
+ * The converter object is then automatically reset by this function.
+ * (This means that a converter need not be reset explicitly between data
+ * streams if it finishes the previous stream without errors.)
+ *
+ * This is a <I>stateful</I> conversion. Additionally, even when all source data has
+ * been consumed, some data may be in the converters' internal state.
+ * Call this function repeatedly, updating the target pointers with
+ * the next empty chunk of target in case of a
+ * <TT>U_BUFFER_OVERFLOW_ERROR</TT>, and updating the source  pointers
+ *  with the next chunk of source when a successful error status is
+ * returned, until there are no more chunks of source data.
+ * @param converter the Unicode converter
+ * @param target I/O parameter. Input : Points to the beginning of the buffer to copy
+ *  UChars into. Output : points to after the last UChar copied.
+ * @param targetLimit the pointer just after the end of the <TT>target</TT> buffer
+ * @param source I/O parameter, pointer to pointer to the source codepage buffer.
+ * @param sourceLimit the pointer to the byte after the end of the source buffer
+ * @param offsets if NULL is passed, nothing will happen to it, otherwise it needs to have the same number
+ * of allocated cells as <TT>target</TT>. Will fill in offsets from target to source pointer
+ * e.g: <TT>offsets[3]</TT> is equal to 6, it means that the <TT>target[3]</TT> was a result of transcoding <TT>source[6]</TT>
+ * For output data carried across calls, and other data without a specific source character
+ * (such as from escape sequences or callbacks)  -1 will be placed for offsets.
+ * @param flush set to <TT>TRUE</TT> if the current source buffer is the last available
+ * chunk of the source, <TT>FALSE</TT> otherwise. Note that if a failing status is returned,
+ * this function may have to be called multiple times with flush set to <TT>TRUE</TT> until
+ * the source buffer is consumed.
+ * @param err the error status.  <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> will be set if the
+ * converter is <TT>NULL</TT>.
+ * <code>U_BUFFER_OVERFLOW_ERROR</code> will be set if the target is full and there is
+ * still data to be written to the target.
+ * @see ucnv_fromUChars
+ * @see ucnv_convert
+ * @see ucnv_getMinCharSize
+ * @see ucnv_setFromUCallBack
+ * @see ucnv_getNextUChar
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_toUnicode(UConverter *converter,
+               UChar **target,
+               const UChar *targetLimit,
+               const char **source,
+               const char *sourceLimit,
+               int32_t *offsets,
+               UBool flush,
+               UErrorCode *err);
+
+/**
+ * Convert the Unicode string into a codepage string using an existing UConverter.
+ * The output string is NUL-terminated if possible.
+ *
+ * This function is a more convenient but less powerful version of ucnv_fromUnicode().
+ * It is only useful for whole strings, not for streaming conversion.
+ *
+ * The maximum output buffer capacity required (barring output from callbacks) will be
+ * UCNV_GET_MAX_BYTES_FOR_STRING(srcLength, ucnv_getMaxCharSize(cnv)).
+ *
+ * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called)
+ * @param src the input Unicode string
+ * @param srcLength the input string length, or -1 if NUL-terminated
+ * @param dest destination string buffer, can be NULL if destCapacity==0
+ * @param destCapacity the number of chars available at dest
+ * @param pErrorCode normal ICU error code;
+ *                  common error codes that may be set by this function include
+ *                  U_BUFFER_OVERFLOW_ERROR, U_STRING_NOT_TERMINATED_WARNING,
+ *                  U_ILLEGAL_ARGUMENT_ERROR, and conversion errors
+ * @return the length of the output string, not counting the terminating NUL;
+ *         if the length is greater than destCapacity, then the string will not fit
+ *         and a buffer of the indicated length would need to be passed in
+ * @see ucnv_fromUnicode
+ * @see ucnv_convert
+ * @see UCNV_GET_MAX_BYTES_FOR_STRING
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_fromUChars(UConverter *cnv,
+                char *dest, int32_t destCapacity,
+                const UChar *src, int32_t srcLength,
+                UErrorCode *pErrorCode);
+
+/**
+ * Convert the codepage string into a Unicode string using an existing UConverter.
+ * The output string is NUL-terminated if possible.
+ *
+ * This function is a more convenient but less powerful version of ucnv_toUnicode().
+ * It is only useful for whole strings, not for streaming conversion.
+ *
+ * The maximum output buffer capacity required (barring output from callbacks) will be
+ * 2*srcLength (each char may be converted into a surrogate pair).
+ *
+ * @param cnv the converter object to be used (ucnv_resetToUnicode() will be called)
+ * @param src the input codepage string
+ * @param srcLength the input string length, or -1 if NUL-terminated
+ * @param dest destination string buffer, can be NULL if destCapacity==0
+ * @param destCapacity the number of UChars available at dest
+ * @param pErrorCode normal ICU error code;
+ *                  common error codes that may be set by this function include
+ *                  U_BUFFER_OVERFLOW_ERROR, U_STRING_NOT_TERMINATED_WARNING,
+ *                  U_ILLEGAL_ARGUMENT_ERROR, and conversion errors
+ * @return the length of the output string, not counting the terminating NUL;
+ *         if the length is greater than destCapacity, then the string will not fit
+ *         and a buffer of the indicated length would need to be passed in
+ * @see ucnv_toUnicode
+ * @see ucnv_convert
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_toUChars(UConverter *cnv,
+              UChar *dest, int32_t destCapacity,
+              const char *src, int32_t srcLength,
+              UErrorCode *pErrorCode);
+
+/**
+ * Convert a codepage buffer into Unicode one character at a time.
+ * The input is completely consumed when the U_INDEX_OUTOFBOUNDS_ERROR is set.
+ *
+ * Advantage compared to ucnv_toUnicode() or ucnv_toUChars():
+ * - Faster for small amounts of data, for most converters, e.g.,
+ *   US-ASCII, ISO-8859-1, UTF-8/16/32, and most "normal" charsets.
+ *   (For complex converters, e.g., SCSU, UTF-7 and ISO 2022 variants,
+ *    it uses ucnv_toUnicode() internally.)
+ * - Convenient.
+ *
+ * Limitations compared to ucnv_toUnicode():
+ * - Always assumes flush=TRUE.
+ *   This makes ucnv_getNextUChar() unsuitable for "streaming" conversion,
+ *   that is, for where the input is supplied in multiple buffers,
+ *   because ucnv_getNextUChar() will assume the end of the input at the end
+ *   of the first buffer.
+ * - Does not provide offset output.
+ *
+ * It is possible to "mix" ucnv_getNextUChar() and ucnv_toUnicode() because
+ * ucnv_getNextUChar() uses the current state of the converter
+ * (unlike ucnv_toUChars() which always resets first).
+ * However, if ucnv_getNextUChar() is called after ucnv_toUnicode()
+ * stopped in the middle of a character sequence (with flush=FALSE),
+ * then ucnv_getNextUChar() will always use the slower ucnv_toUnicode()
+ * internally until the next character boundary.
+ * (This is new in ICU 2.6. In earlier releases, ucnv_getNextUChar() had to
+ * start at a character boundary.)
+ *
+ * Instead of using ucnv_getNextUChar(), it is recommended
+ * to convert using ucnv_toUnicode() or ucnv_toUChars()
+ * and then iterate over the text using U16_NEXT() or a UCharIterator (uiter.h)
+ * or a C++ CharacterIterator or similar.
+ * This allows streaming conversion and offset output, for example.
+ *
+ * <p>Handling of surrogate pairs and supplementary-plane code points:<br>
+ * There are two different kinds of codepages that provide mappings for surrogate characters:
+ * <ul>
+ *   <li>Codepages like UTF-8, UTF-32, and GB 18030 provide direct representations for Unicode
+ *       code points U+10000-U+10ffff as well as for single surrogates U+d800-U+dfff.
+ *       Each valid sequence will result in exactly one returned code point.
+ *       If a sequence results in a single surrogate, then that will be returned
+ *       by itself, even if a neighboring sequence encodes the matching surrogate.</li>
+ *   <li>Codepages like SCSU and LMBCS (and UTF-16) provide direct representations only for BMP code points
+ *       including surrogates. Code points in supplementary planes are represented with
+ *       two sequences, each encoding a surrogate.
+ *       For these codepages, matching pairs of surrogates will be combined into single
+ *       code points for returning from this function.
+ *       (Note that SCSU is actually a mix of these codepage types.)</li>
+ * </ul></p>
+ *
+ * @param converter an open UConverter
+ * @param source the address of a pointer to the codepage buffer, will be
+ *  updated to point after the bytes consumed in the conversion call.
+ * @param sourceLimit points to the end of the input buffer
+ * @param err fills in error status (see ucnv_toUnicode)
+ * <code>U_INDEX_OUTOFBOUNDS_ERROR</code> will be set if the input
+ * is empty or does not convert to any output (e.g.: pure state-change
+ * codes SI/SO, escape sequences for ISO 2022,
+ * or if the callback did not output anything, ...).
+ * This function will not set a <code>U_BUFFER_OVERFLOW_ERROR</code> because
+ *  the "buffer" is the return code. However, there might be subsequent output
+ *  stored in the converter object
+ * that will be returned in following calls to this function.
+ * @return a UChar32 resulting from the partial conversion of source
+ * @see ucnv_toUnicode
+ * @see ucnv_toUChars
+ * @see ucnv_convert
+ * @stable ICU 2.0
+ */
+U_STABLE UChar32 U_EXPORT2
+ucnv_getNextUChar(UConverter * converter,
+                  const char **source,
+                  const char * sourceLimit,
+                  UErrorCode * err);
+
+/**
+ * Convert from one external charset to another using two existing UConverters.
+ * Internally, two conversions - ucnv_toUnicode() and ucnv_fromUnicode() -
+ * are used, "pivoting" through 16-bit Unicode.
+ *
+ * Important: For streaming conversion (multiple function calls for successive
+ * parts of a text stream), the caller must provide a pivot buffer explicitly,
+ * and must preserve the pivot buffer and associated pointers from one
+ * call to another. (The buffer may be moved if its contents and the relative
+ * pointer positions are preserved.)
+ *
+ * There is a similar function, ucnv_convert(),
+ * which has the following limitations:
+ * - it takes charset names, not converter objects, so that
+ *   - two converters are opened for each call
+ *   - only single-string conversion is possible, not streaming operation
+ * - it does not provide enough information to find out,
+ *   in case of failure, whether the toUnicode or
+ *   the fromUnicode conversion failed
+ *
+ * By contrast, ucnv_convertEx()
+ * - takes UConverter parameters instead of charset names
+ * - fully exposes the pivot buffer for streaming conversion and complete error handling
+ *
+ * ucnv_convertEx() also provides further convenience:
+ * - an option to reset the converters at the beginning
+ *   (if reset==TRUE, see parameters;
+ *    also sets *pivotTarget=*pivotSource=pivotStart)
+ * - allow NUL-terminated input
+ *   (only a single NUL byte, will not work for charsets with multi-byte NULs)
+ *   (if sourceLimit==NULL, see parameters)
+ * - terminate with a NUL on output
+ *   (only a single NUL byte, not useful for charsets with multi-byte NULs),
+ *   or set U_STRING_NOT_TERMINATED_WARNING if the output exactly fills
+ *   the target buffer
+ * - the pivot buffer can be provided internally;
+ *   possible only for whole-string conversion, not streaming conversion;
+ *   in this case, the caller will not be able to get details about where an
+ *   error occurred
+ *   (if pivotStart==NULL, see below)
+ *
+ * The function returns when one of the following is true:
+ * - the entire source text has been converted successfully to the target buffer
+ * - a target buffer overflow occurred (U_BUFFER_OVERFLOW_ERROR)
+ * - a conversion error occurred
+ *   (other U_FAILURE(), see description of pErrorCode)
+ *
+ * Limitation compared to the direct use of
+ * ucnv_fromUnicode() and ucnv_toUnicode():
+ * ucnv_convertEx() does not provide offset information.
+ *
+ * Limitation compared to ucnv_fromUChars() and ucnv_toUChars():
+ * ucnv_convertEx() does not support preflighting directly.
+ *
+ * Sample code for converting a single string from
+ * one external charset to UTF-8, ignoring the location of errors:
+ *
+ * \code
+ * int32_t
+ * myToUTF8(UConverter *cnv,
+ *          const char *s, int32_t length,
+ *          char *u8, int32_t capacity,
+ *          UErrorCode *pErrorCode) {
+ *     UConverter *utf8Cnv;
+ *     char *target;
+ *
+ *     if(U_FAILURE(*pErrorCode)) {
+ *         return 0;
+ *     }
+ *
+ *     utf8Cnv=myGetCachedUTF8Converter(pErrorCode);
+ *     if(U_FAILURE(*pErrorCode)) {
+ *         return 0;
+ *     }
+ *
+ *     if(length<0) {
+ *         length=strlen(s);
+ *     }
+ *     target=u8;
+ *     ucnv_convertEx(utf8Cnv, cnv,
+ *                    &target, u8+capacity,
+ *                    &s, s+length,
+ *                    NULL, NULL, NULL, NULL,
+ *                    TRUE, TRUE,
+ *                    pErrorCode);
+ *
+ *     myReleaseCachedUTF8Converter(utf8Cnv);
+ *
+ *     // return the output string length, but without preflighting
+ *     return (int32_t)(target-u8);
+ * }
+ * \endcode
+ *
+ * @param targetCnv     Output converter, used to convert from the UTF-16 pivot
+ *                      to the target using ucnv_fromUnicode().
+ * @param sourceCnv     Input converter, used to convert from the source to
+ *                      the UTF-16 pivot using ucnv_toUnicode().
+ * @param target        I/O parameter, same as for ucnv_fromUChars().
+ *                      Input: *target points to the beginning of the target buffer.
+ *                      Output: *target points to the first unit after the last char written.
+ * @param targetLimit   Pointer to the first unit after the target buffer.
+ * @param source        I/O parameter, same as for ucnv_toUChars().
+ *                      Input: *source points to the beginning of the source buffer.
+ *                      Output: *source points to the first unit after the last char read.
+ * @param sourceLimit   Pointer to the first unit after the source buffer.
+ * @param pivotStart    Pointer to the UTF-16 pivot buffer. If pivotStart==NULL,
+ *                      then an internal buffer is used and the other pivot
+ *                      arguments are ignored and can be NULL as well.
+ * @param pivotSource   I/O parameter, same as source in ucnv_fromUChars() for
+ *                      conversion from the pivot buffer to the target buffer.
+ * @param pivotTarget   I/O parameter, same as target in ucnv_toUChars() for
+ *                      conversion from the source buffer to the pivot buffer.
+ *                      It must be pivotStart<=*pivotSource<=*pivotTarget<=pivotLimit
+ *                      and pivotStart<pivotLimit (unless pivotStart==NULL).
+ * @param pivotLimit    Pointer to the first unit after the pivot buffer.
+ * @param reset         If TRUE, then ucnv_resetToUnicode(sourceCnv) and
+ *                      ucnv_resetFromUnicode(targetCnv) are called, and the
+ *                      pivot pointers are reset (*pivotTarget=*pivotSource=pivotStart).
+ * @param flush         If true, indicates the end of the input.
+ *                      Passed directly to ucnv_toUnicode(), and carried over to
+ *                      ucnv_fromUnicode() when the source is empty as well.
+ * @param pErrorCode    ICU error code in/out parameter.
+ *                      Must fulfill U_SUCCESS before the function call.
+ *                      U_BUFFER_OVERFLOW_ERROR always refers to the target buffer
+ *                      because overflows into the pivot buffer are handled internally.
+ *                      Other conversion errors are from the source-to-pivot
+ *                      conversion if *pivotSource==pivotStart, otherwise from
+ *                      the pivot-to-target conversion.
+ *
+ * @see ucnv_convert
+ * @see ucnv_fromAlgorithmic
+ * @see ucnv_toAlgorithmic
+ * @see ucnv_fromUnicode
+ * @see ucnv_toUnicode
+ * @see ucnv_fromUChars
+ * @see ucnv_toUChars
+ * @stable ICU 2.6
+ */
+U_STABLE void U_EXPORT2
+ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv,
+               char **target, const char *targetLimit,
+               const char **source, const char *sourceLimit,
+               UChar *pivotStart, UChar **pivotSource,
+               UChar **pivotTarget, const UChar *pivotLimit,
+               UBool reset, UBool flush,
+               UErrorCode *pErrorCode);
+
+/**
+ * Convert from one external charset to another.
+ * Internally, two converters are opened according to the name arguments,
+ * then the text is converted to and from the 16-bit Unicode "pivot"
+ * using ucnv_convertEx(), then the converters are closed again.
+ *
+ * This is a convenience function, not an efficient way to convert a lot of text:
+ * ucnv_convert()
+ * - takes charset names, not converter objects, so that
+ *   - two converters are opened for each call
+ *   - only single-string conversion is possible, not streaming operation
+ * - does not provide enough information to find out,
+ *   in case of failure, whether the toUnicode or
+ *   the fromUnicode conversion failed
+ * - allows NUL-terminated input
+ *   (only a single NUL byte, will not work for charsets with multi-byte NULs)
+ *   (if sourceLength==-1, see parameters)
+ * - terminate with a NUL on output
+ *   (only a single NUL byte, not useful for charsets with multi-byte NULs),
+ *   or set U_STRING_NOT_TERMINATED_WARNING if the output exactly fills
+ *   the target buffer
+ * - a pivot buffer is provided internally
+ *
+ * The function returns when one of the following is true:
+ * - the entire source text has been converted successfully to the target buffer
+ *   and either the target buffer is terminated with a single NUL byte
+ *   or the error code is set to U_STRING_NOT_TERMINATED_WARNING
+ * - a target buffer overflow occurred (U_BUFFER_OVERFLOW_ERROR)
+ *   and the full output string length is returned ("preflighting")
+ * - a conversion error occurred
+ *   (other U_FAILURE(), see description of pErrorCode)
+ *
+ * @param toConverterName   The name of the converter that is used to convert
+ *                          from the UTF-16 pivot buffer to the target.
+ * @param fromConverterName The name of the converter that is used to convert
+ *                          from the source to the UTF-16 pivot buffer.
+ * @param target            Pointer to the output buffer.
+ * @param targetCapacity    Capacity of the target, in bytes.
+ * @param source            Pointer to the input buffer.
+ * @param sourceLength      Length of the input text, in bytes, or -1 for NUL-terminated input.
+ * @param pErrorCode        ICU error code in/out parameter.
+ *                          Must fulfill U_SUCCESS before the function call.
+ * @return Length of the complete output text in bytes, even if it exceeds the targetCapacity
+ *         and a U_BUFFER_OVERFLOW_ERROR is set.
+ *
+ * @see ucnv_convertEx
+ * @see ucnv_fromAlgorithmic
+ * @see ucnv_toAlgorithmic
+ * @see ucnv_fromUnicode
+ * @see ucnv_toUnicode
+ * @see ucnv_fromUChars
+ * @see ucnv_toUChars
+ * @see ucnv_getNextUChar
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_convert(const char *toConverterName,
+             const char *fromConverterName,
+             char *target,
+             int32_t targetCapacity,
+             const char *source,
+             int32_t sourceLength,
+             UErrorCode *pErrorCode);
+
+/**
+ * Convert from one external charset to another.
+ * Internally, the text is converted to and from the 16-bit Unicode "pivot"
+ * using ucnv_convertEx(). ucnv_toAlgorithmic() works exactly like ucnv_convert()
+ * except that the two converters need not be looked up and opened completely.
+ *
+ * The source-to-pivot conversion uses the cnv converter parameter.
+ * The pivot-to-target conversion uses a purely algorithmic converter
+ * according to the specified type, e.g., UCNV_UTF8 for a UTF-8 converter.
+ *
+ * Internally, the algorithmic converter is opened and closed for each
+ * function call, which is more efficient than using the public ucnv_open()
+ * but somewhat less efficient than only resetting an existing converter
+ * and using ucnv_convertEx().
+ *
+ * This function is more convenient than ucnv_convertEx() for single-string
+ * conversions, especially when "preflighting" is desired (returning the length
+ * of the complete output even if it does not fit into the target buffer;
+ * see the User Guide Strings chapter). See ucnv_convert() for details.
+ *
+ * @param algorithmicType   UConverterType constant identifying the desired target
+ *                          charset as a purely algorithmic converter.
+ *                          Those are converters for Unicode charsets like
+ *                          UTF-8, BOCU-1, SCSU, UTF-7, IMAP-mailbox-name, etc.,
+ *                          as well as US-ASCII and ISO-8859-1.
+ * @param cnv               The converter that is used to convert
+ *                          from the source to the UTF-16 pivot buffer.
+ * @param target            Pointer to the output buffer.
+ * @param targetCapacity    Capacity of the target, in bytes.
+ * @param source            Pointer to the input buffer.
+ * @param sourceLength      Length of the input text, in bytes
+ * @param pErrorCode        ICU error code in/out parameter.
+ *                          Must fulfill U_SUCCESS before the function call.
+ * @return Length of the complete output text in bytes, even if it exceeds the targetCapacity
+ *         and a U_BUFFER_OVERFLOW_ERROR is set.
+ *
+ * @see ucnv_fromAlgorithmic
+ * @see ucnv_convert
+ * @see ucnv_convertEx
+ * @see ucnv_fromUnicode
+ * @see ucnv_toUnicode
+ * @see ucnv_fromUChars
+ * @see ucnv_toUChars
+ * @stable ICU 2.6
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_toAlgorithmic(UConverterType algorithmicType,
+                   UConverter *cnv,
+                   char *target, int32_t targetCapacity,
+                   const char *source, int32_t sourceLength,
+                   UErrorCode *pErrorCode);
+
+/**
+ * Convert from one external charset to another.
+ * Internally, the text is converted to and from the 16-bit Unicode "pivot"
+ * using ucnv_convertEx(). ucnv_fromAlgorithmic() works exactly like ucnv_convert()
+ * except that the two converters need not be looked up and opened completely.
+ *
+ * The source-to-pivot conversion uses a purely algorithmic converter
+ * according to the specified type, e.g., UCNV_UTF8 for a UTF-8 converter.
+ * The pivot-to-target conversion uses the cnv converter parameter.
+ *
+ * Internally, the algorithmic converter is opened and closed for each
+ * function call, which is more efficient than using the public ucnv_open()
+ * but somewhat less efficient than only resetting an existing converter
+ * and using ucnv_convertEx().
+ *
+ * This function is more convenient than ucnv_convertEx() for single-string
+ * conversions, especially when "preflighting" is desired (returning the length
+ * of the complete output even if it does not fit into the target buffer;
+ * see the User Guide Strings chapter). See ucnv_convert() for details.
+ *
+ * @param cnv               The converter that is used to convert
+ *                          from the UTF-16 pivot buffer to the target.
+ * @param algorithmicType   UConverterType constant identifying the desired source
+ *                          charset as a purely algorithmic converter.
+ *                          Those are converters for Unicode charsets like
+ *                          UTF-8, BOCU-1, SCSU, UTF-7, IMAP-mailbox-name, etc.,
+ *                          as well as US-ASCII and ISO-8859-1.
+ * @param target            Pointer to the output buffer.
+ * @param targetCapacity    Capacity of the target, in bytes.
+ * @param source            Pointer to the input buffer.
+ * @param sourceLength      Length of the input text, in bytes
+ * @param pErrorCode        ICU error code in/out parameter.
+ *                          Must fulfill U_SUCCESS before the function call.
+ * @return Length of the complete output text in bytes, even if it exceeds the targetCapacity
+ *         and a U_BUFFER_OVERFLOW_ERROR is set.
+ *
+ * @see ucnv_fromAlgorithmic
+ * @see ucnv_convert
+ * @see ucnv_convertEx
+ * @see ucnv_fromUnicode
+ * @see ucnv_toUnicode
+ * @see ucnv_fromUChars
+ * @see ucnv_toUChars
+ * @stable ICU 2.6
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_fromAlgorithmic(UConverter *cnv,
+                     UConverterType algorithmicType,
+                     char *target, int32_t targetCapacity,
+                     const char *source, int32_t sourceLength,
+                     UErrorCode *pErrorCode);
+
+/**
+ * Frees up memory occupied by unused, cached converter shared data.
+ *
+ * @return the number of cached converters successfully deleted
+ * @see ucnv_close
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_flushCache(void);
+
+/**
+ * Returns the number of available converters, as per the alias file.
+ *
+ * @return the number of available converters
+ * @see ucnv_getAvailableName
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_countAvailable(void);
+
+/**
+ * Gets the canonical converter name of the specified converter from a list of
+ * all available converters contaied in the alias file. All converters
+ * in this list can be opened.
+ *
+ * @param n the index to a converter available on the system (in the range <TT>[0..ucnv_countAvaiable()]</TT>)
+ * @return a pointer a string (library owned), or <TT>NULL</TT> if the index is out of bounds.
+ * @see ucnv_countAvailable
+ * @stable ICU 2.0
+ */
+U_STABLE const char* U_EXPORT2
+ucnv_getAvailableName(int32_t n);
+
+/**
+ * Returns a UEnumeration to enumerate all of the canonical converter
+ * names, as per the alias file, regardless of the ability to open each
+ * converter.
+ *
+ * @return A UEnumeration object for getting all the recognized canonical
+ *   converter names.
+ * @see ucnv_getAvailableName
+ * @see uenum_close
+ * @see uenum_next
+ * @stable ICU 2.4
+ */
+U_STABLE UEnumeration * U_EXPORT2
+ucnv_openAllNames(UErrorCode *pErrorCode);
+
+/**
+ * Gives the number of aliases for a given converter or alias name.
+ * If the alias is ambiguous, then the preferred converter is used
+ * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
+ * This method only enumerates the listed entries in the alias file.
+ * @param alias alias name
+ * @param pErrorCode error status
+ * @return number of names on alias list for given alias
+ * @stable ICU 2.0
+ */
+U_STABLE uint16_t U_EXPORT2
+ucnv_countAliases(const char *alias, UErrorCode *pErrorCode);
+
+/**
+ * Gives the name of the alias at given index of alias list.
+ * This method only enumerates the listed entries in the alias file.
+ * If the alias is ambiguous, then the preferred converter is used
+ * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
+ * @param alias alias name
+ * @param n index in alias list
+ * @param pErrorCode result of operation
+ * @return returns the name of the alias at given index
+ * @see ucnv_countAliases
+ * @stable ICU 2.0
+ */
+U_STABLE const char * U_EXPORT2
+ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode);
+
+/**
+ * Fill-up the list of alias names for the given alias.
+ * This method only enumerates the listed entries in the alias file.
+ * If the alias is ambiguous, then the preferred converter is used
+ * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
+ * @param alias alias name
+ * @param aliases fill-in list, aliases is a pointer to an array of
+ *        <code>ucnv_countAliases()</code> string-pointers
+ *        (<code>const char *</code>) that will be filled in.
+ *        The strings themselves are owned by the library.
+ * @param pErrorCode result of operation
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode);
+
+/**
+ * Return a new UEnumeration object for enumerating all the
+ * alias names for a given converter that are recognized by a standard.
+ * This method only enumerates the listed entries in the alias file.
+ * The convrtrs.txt file can be modified to change the results of
+ * this function.
+ * The first result in this list is the same result given by
+ * <code>ucnv_getStandardName</code>, which is the default alias for
+ * the specified standard name. The returned object must be closed with
+ * <code>uenum_close</code> when you are done with the object.
+ *
+ * @param convName original converter name
+ * @param standard name of the standard governing the names; MIME and IANA
+ *      are such standards
+ * @param pErrorCode The error code
+ * @return A UEnumeration object for getting all aliases that are recognized
+ *      by a standard. If any of the parameters are invalid, NULL
+ *      is returned.
+ * @see ucnv_getStandardName
+ * @see uenum_close
+ * @see uenum_next
+ * @stable ICU 2.2
+ */
+U_STABLE UEnumeration * U_EXPORT2
+ucnv_openStandardNames(const char *convName,
+                       const char *standard,
+                       UErrorCode *pErrorCode);
+
+/**
+ * Gives the number of standards associated to converter names.
+ * @return number of standards
+ * @stable ICU 2.0
+ */
+U_STABLE uint16_t U_EXPORT2
+ucnv_countStandards(void);
+
+/**
+ * Gives the name of the standard at given index of standard list.
+ * @param n index in standard list
+ * @param pErrorCode result of operation
+ * @return returns the name of the standard at given index. Owned by the library.
+ * @stable ICU 2.0
+ */
+U_STABLE const char * U_EXPORT2
+ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode);
+
+/**
+ * Returns a standard name for a given converter name.
+ * <p>
+ * Example alias table:<br>
+ * conv alias1 { STANDARD1 } alias2 { STANDARD1* }
+ * <p>
+ * Result of ucnv_getStandardName("conv", "STANDARD1") from example
+ * alias table:<br>
+ * <b>"alias2"</b>
+ *
+ * @param name original converter name
+ * @param standard name of the standard governing the names; MIME and IANA
+ *        are such standards
+ * @param pErrorCode result of operation
+ * @return returns the standard converter name;
+ *         if a standard converter name cannot be determined,
+ *         then <code>NULL</code> is returned. Owned by the library.
+ * @stable ICU 2.0
+ */
+U_STABLE const char * U_EXPORT2
+ucnv_getStandardName(const char *name, const char *standard, UErrorCode *pErrorCode);
+
+/**
+ * This function will return the internal canonical converter name of the
+ * tagged alias. This is the opposite of ucnv_openStandardNames, which
+ * returns the tagged alias given the canonical name.
+ * <p>
+ * Example alias table:<br>
+ * conv alias1 { STANDARD1 } alias2 { STANDARD1* }
+ * <p>
+ * Result of ucnv_getStandardName("alias1", "STANDARD1") from example
+ * alias table:<br>
+ * <b>"conv"</b>
+ *
+ * @return returns the canonical converter name;
+ *         if a standard or alias name cannot be determined,
+ *         then <code>NULL</code> is returned. The returned string is
+ *         owned by the library.
+ * @see ucnv_getStandardName
+ * @stable ICU 2.4
+ */
+U_STABLE const char * U_EXPORT2
+ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode);
+
+/**
+ * Returns the current default converter name. If you want to open
+ * a default converter, you do not need to use this function.
+ * It is faster if you pass a NULL argument to ucnv_open the
+ * default converter.
+ *
+ * If U_CHARSET_IS_UTF8 is defined to 1 in utypes.h then this function
+ * always returns "UTF-8".
+ *
+ * @return returns the current default converter name.
+ *         Storage owned by the library
+ * @see ucnv_setDefaultName
+ * @stable ICU 2.0
+ */
+U_STABLE const char * U_EXPORT2
+ucnv_getDefaultName(void);
+
+#ifndef U_HIDE_SYSTEM_API
+/**
+ * This function is not thread safe. DO NOT call this function when ANY ICU
+ * function is being used from more than one thread! This function sets the
+ * current default converter name. If this function needs to be called, it
+ * should be called during application initialization. Most of the time, the
+ * results from ucnv_getDefaultName() or ucnv_open with a NULL string argument
+ * is sufficient for your application.
+ *
+ * If U_CHARSET_IS_UTF8 is defined to 1 in utypes.h then this function
+ * does nothing.
+ *
+ * @param name the converter name to be the default (must be known by ICU).
+ * @see ucnv_getDefaultName
+ * @system
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_setDefaultName(const char *name);
+#endif  /* U_HIDE_SYSTEM_API */
+
+/**
+ * Fixes the backslash character mismapping.  For example, in SJIS, the backslash
+ * character in the ASCII portion is also used to represent the yen currency sign.
+ * When mapping from Unicode character 0x005C, it's unclear whether to map the
+ * character back to yen or backslash in SJIS.  This function will take the input
+ * buffer and replace all the yen sign characters with backslash.  This is necessary
+ * when the user tries to open a file with the input buffer on Windows.
+ * This function will test the converter to see whether such mapping is
+ * required.  You can sometimes avoid using this function by using the correct version
+ * of Shift-JIS.
+ *
+ * @param cnv The converter representing the target codepage.
+ * @param source the input buffer to be fixed
+ * @param sourceLen the length of the input buffer
+ * @see ucnv_isAmbiguous
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_fixFileSeparator(const UConverter *cnv, UChar *source, int32_t sourceLen);
+
+/**
+ * Determines if the converter contains ambiguous mappings of the same
+ * character or not.
+ * @param cnv the converter to be tested
+ * @return TRUE if the converter contains ambiguous mapping of the same
+ * character, FALSE otherwise.
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+ucnv_isAmbiguous(const UConverter *cnv);
+
+/**
+ * Sets the converter to use fallback mappings or not.
+ * Regardless of this flag, the converter will always use
+ * fallbacks from Unicode Private Use code points, as well as
+ * reverse fallbacks (to Unicode).
+ * For details see ".ucm File Format"
+ * in the Conversion Data chapter of the ICU User Guide:
+ * http://www.icu-project.org/userguide/conversion-data.html#ucmformat
+ *
+ * @param cnv The converter to set the fallback mapping usage on.
+ * @param usesFallback TRUE if the user wants the converter to take advantage of the fallback
+ * mapping, FALSE otherwise.
+ * @stable ICU 2.0
+ * @see ucnv_usesFallback
+ */
+U_STABLE void U_EXPORT2
+ucnv_setFallback(UConverter *cnv, UBool usesFallback);
+
+/**
+ * Determines if the converter uses fallback mappings or not.
+ * This flag has restrictions, see ucnv_setFallback().
+ *
+ * @param cnv The converter to be tested
+ * @return TRUE if the converter uses fallback, FALSE otherwise.
+ * @stable ICU 2.0
+ * @see ucnv_setFallback
+ */
+U_STABLE UBool U_EXPORT2
+ucnv_usesFallback(const UConverter *cnv);
+
+/**
+ * Detects Unicode signature byte sequences at the start of the byte stream
+ * and returns the charset name of the indicated Unicode charset.
+ * NULL is returned when no Unicode signature is recognized.
+ * The number of bytes in the signature is output as well.
+ *
+ * The caller can ucnv_open() a converter using the charset name.
+ * The first code unit (UChar) from the start of the stream will be U+FEFF
+ * (the Unicode BOM/signature character) and can usually be ignored.
+ *
+ * For most Unicode charsets it is also possible to ignore the indicated
+ * number of initial stream bytes and start converting after them.
+ * However, there are stateful Unicode charsets (UTF-7 and BOCU-1) for which
+ * this will not work. Therefore, it is best to ignore the first output UChar
+ * instead of the input signature bytes.
+ * <p>
+ * Usage:
+ * \snippet samples/ucnv/convsamp.cpp ucnv_detectUnicodeSignature
+ *
+ * @param source            The source string in which the signature should be detected.
+ * @param sourceLength      Length of the input string, or -1 if terminated with a NUL byte.
+ * @param signatureLength   A pointer to int32_t to receive the number of bytes that make up the signature
+ *                          of the detected UTF. 0 if not detected.
+ *                          Can be a NULL pointer.
+ * @param pErrorCode        ICU error code in/out parameter.
+ *                          Must fulfill U_SUCCESS before the function call.
+ * @return The name of the encoding detected. NULL if encoding is not detected.
+ * @stable ICU 2.4
+ */
+U_STABLE const char* U_EXPORT2
+ucnv_detectUnicodeSignature(const char* source,
+                            int32_t sourceLength,
+                            int32_t *signatureLength,
+                            UErrorCode *pErrorCode);
+
+/**
+ * Returns the number of UChars held in the converter's internal state
+ * because more input is needed for completing the conversion. This function is
+ * useful for mapping semantics of ICU's converter interface to those of iconv,
+ * and this information is not needed for normal conversion.
+ * @param cnv       The converter in which the input is held
+ * @param status    ICU error code in/out parameter.
+ *                  Must fulfill U_SUCCESS before the function call.
+ * @return The number of UChars in the state. -1 if an error is encountered.
+ * @stable ICU 3.4
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status);
+
+/**
+ * Returns the number of chars held in the converter's internal state
+ * because more input is needed for completing the conversion. This function is
+ * useful for mapping semantics of ICU's converter interface to those of iconv,
+ * and this information is not needed for normal conversion.
+ * @param cnv       The converter in which the input is held as internal state
+ * @param status    ICU error code in/out parameter.
+ *                  Must fulfill U_SUCCESS before the function call.
+ * @return The number of chars in the state. -1 if an error is encountered.
+ * @stable ICU 3.4
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status);
+
+/**
+ * Returns whether or not the charset of the converter has a fixed number of bytes
+ * per charset character.
+ * An example of this are converters that are of the type UCNV_SBCS or UCNV_DBCS.
+ * Another example is UTF-32 which is always 4 bytes per character.
+ * A Unicode code point may be represented by more than one UTF-8 or UTF-16 code unit
+ * but a UTF-32 converter encodes each code point with 4 bytes.
+ * Note: This method is not intended to be used to determine whether the charset has a
+ * fixed ratio of bytes to Unicode codes <i>units</i> for any particular Unicode encoding form.
+ * FALSE is returned with the UErrorCode if error occurs or cnv is NULL.
+ * @param cnv       The converter to be tested
+ * @param status    ICU error code in/out paramter
+ * @return TRUE if the converter is fixed-width
+ * @stable ICU 4.8
+ */
+U_STABLE UBool U_EXPORT2
+ucnv_isFixedWidth(UConverter *cnv, UErrorCode *status);
+
+#endif
+
+#endif
+/*_UCNV*/
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/ucnv_err.h b/src/core/basetypes/icu-ucsdet/include/unicode/ucnv_err.h
new file mode 100644
index 00000000..e092e95f
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/ucnv_err.h
@@ -0,0 +1,463 @@
+/*
+**********************************************************************
+*   Copyright (C) 1999-2009, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+ *
+ *
+ *   ucnv_err.h:
+ */
+
+/**
+ * \file
+ * \brief C UConverter predefined error callbacks
+ *
+ *  <h2>Error Behaviour Functions</h2>
+ *  Defines some error behaviour functions called by ucnv_{from,to}Unicode
+ *  These are provided as part of ICU and many are stable, but they
+ *  can also be considered only as an example of what can be done with
+ *  callbacks.  You may of course write your own.
+ *
+ *  If you want to write your own, you may also find the functions from
+ *  ucnv_cb.h useful when writing your own callbacks.
+ *
+ *  These functions, although public, should NEVER be called directly.
+ *  They should be used as parameters to the ucnv_setFromUCallback
+ *  and ucnv_setToUCallback functions, to set the behaviour of a converter
+ *  when it encounters ILLEGAL/UNMAPPED/INVALID sequences.
+ *
+ *  usage example:  'STOP' doesn't need any context, but newContext
+ *    could be set to something other than 'NULL' if needed. The available
+ *    contexts in this header can modify the default behavior of the callback.
+ *
+ *  \code
+ *  UErrorCode err = U_ZERO_ERROR;
+ *  UConverter *myConverter = ucnv_open("ibm-949", &err);
+ *  const void *oldContext;
+ *  UConverterFromUCallback oldAction;
+ *
+ *
+ *  if (U_SUCCESS(err))
+ *  {
+ *      ucnv_setFromUCallBack(myConverter,
+ *                       UCNV_FROM_U_CALLBACK_STOP,
+ *                       NULL,
+ *                       &oldAction,
+ *                       &oldContext,
+ *                       &status);
+ *  }
+ *  \endcode
+ *
+ *  The code above tells "myConverter" to stop when it encounters an
+ *  ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
+ *  Unicode -> Codepage. The behavior from Codepage to Unicode is not changed,
+ *  and ucnv_setToUCallBack would need to be called in order to change
+ *  that behavior too.
+ *
+ *  Here is an example with a context:
+ *
+ *  \code
+ *  UErrorCode err = U_ZERO_ERROR;
+ *  UConverter *myConverter = ucnv_open("ibm-949", &err);
+ *  const void *oldContext;
+ *  UConverterFromUCallback oldAction;
+ *
+ *
+ *  if (U_SUCCESS(err))
+ *  {
+ *      ucnv_setToUCallBack(myConverter,
+ *                       UCNV_TO_U_CALLBACK_SUBSTITUTE,
+ *                       UCNV_SUB_STOP_ON_ILLEGAL,
+ *                       &oldAction,
+ *                       &oldContext,
+ *                       &status);
+ *  }
+ *  \endcode
+ *
+ *  The code above tells "myConverter" to stop when it encounters an
+ *  ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
+ *  Codepage -> Unicode. Any unmapped and legal characters will be
+ *  substituted to be the default substitution character.
+ */
+
+#ifndef UCNV_ERR_H
+#define UCNV_ERR_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+/** Forward declaring the UConverter structure. @stable ICU 2.0 */
+struct UConverter;
+
+/** @stable ICU 2.0 */
+typedef struct UConverter UConverter;
+
+/**
+ * FROM_U, TO_U context options for sub callback
+ * @stable ICU 2.0
+ */
+#define UCNV_SUB_STOP_ON_ILLEGAL "i"
+
+/**
+ * FROM_U, TO_U context options for skip callback
+ * @stable ICU 2.0
+ */
+#define UCNV_SKIP_STOP_ON_ILLEGAL "i"
+
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX) 
+ * @stable ICU 2.0
+ */
+#define UCNV_ESCAPE_ICU       NULL
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\\uXXXX)
+ * @stable ICU 2.0
+ */
+#define UCNV_ESCAPE_JAVA      "J"
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX)
+ * TO_U_CALLBACK_ESCAPE option to escape the character value accoding to C (\\xXXXX)
+ * @stable ICU 2.0
+ */
+#define UCNV_ESCAPE_C         "C"
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&amp;#DDDD;)\endhtmlonly
+ * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Decimal escape \htmlonly(&amp;#DDDD;)\endhtmlonly
+ * @stable ICU 2.0
+ */
+#define UCNV_ESCAPE_XML_DEC   "D"
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&amp;#xXXXX;)\endhtmlonly
+ * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Hex escape \htmlonly(&amp;#xXXXX;)\endhtmlonly
+ * @stable ICU 2.0
+ */
+#define UCNV_ESCAPE_XML_HEX   "X"
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to Unicode (U+XXXXX)
+ * @stable ICU 2.0
+ */
+#define UCNV_ESCAPE_UNICODE   "U"
+
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to CSS2 conventions (\\HH..H<space>, that is,
+ * a backslash, 1..6 hex digits, and a space)
+ * @stable ICU 4.0
+ */
+#define UCNV_ESCAPE_CSS2   "S"
+
+/** 
+ * The process condition code to be used with the callbacks.  
+ * Codes which are greater than UCNV_IRREGULAR should be 
+ * passed on to any chained callbacks.
+ * @stable ICU 2.0
+ */
+typedef enum {
+    UCNV_UNASSIGNED = 0,  /**< The code point is unassigned.
+                             The error code U_INVALID_CHAR_FOUND will be set. */
+    UCNV_ILLEGAL = 1,     /**< The code point is illegal. For example, 
+                             \\x81\\x2E is illegal in SJIS because \\x2E
+                             is not a valid trail byte for the \\x81 
+                             lead byte.
+                             Also, starting with Unicode 3.0.1, non-shortest byte sequences
+                             in UTF-8 (like \\xC1\\xA1 instead of \\x61 for U+0061)
+                             are also illegal, not just irregular.
+                             The error code U_ILLEGAL_CHAR_FOUND will be set. */
+    UCNV_IRREGULAR = 2,   /**< The codepoint is not a regular sequence in 
+                             the encoding. For example, \\xED\\xA0\\x80..\\xED\\xBF\\xBF
+                             are irregular UTF-8 byte sequences for single surrogate
+                             code points.
+                             The error code U_INVALID_CHAR_FOUND will be set. */
+    UCNV_RESET = 3,       /**< The callback is called with this reason when a
+                             'reset' has occured. Callback should reset all
+                             state. */
+    UCNV_CLOSE = 4,        /**< Called when the converter is closed. The
+                             callback should release any allocated memory.*/
+    UCNV_CLONE = 5         /**< Called when ucnv_safeClone() is called on the
+                              converter. the pointer available as the
+                              'context' is an alias to the original converters'
+                              context pointer. If the context must be owned
+                              by the new converter, the callback must clone 
+                              the data and call ucnv_setFromUCallback 
+                              (or setToUCallback) with the correct pointer.
+                              @stable ICU 2.2
+                           */
+} UConverterCallbackReason;
+
+
+/**
+ * The structure for the fromUnicode callback function parameter.
+ * @stable ICU 2.0
+ */
+typedef struct {
+    uint16_t size;              /**< The size of this struct. @stable ICU 2.0 */
+    UBool flush;                /**< The internal state of converter will be reset and data flushed if set to TRUE. @stable ICU 2.0    */
+    UConverter *converter;      /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0  */
+    const UChar *source;        /**< Pointer to the source source buffer. @stable ICU 2.0    */
+    const UChar *sourceLimit;   /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0    */
+    char *target;               /**< Pointer to the target buffer. @stable ICU 2.0    */
+    const char *targetLimit;    /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0     */
+    int32_t *offsets;           /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0  */
+} UConverterFromUnicodeArgs;
+
+
+/**
+ * The structure for the toUnicode callback function parameter.
+ * @stable ICU 2.0
+ */
+typedef struct {
+    uint16_t size;              /**< The size of this struct   @stable ICU 2.0 */
+    UBool flush;                /**< The internal state of converter will be reset and data flushed if set to TRUE. @stable ICU 2.0   */
+    UConverter *converter;      /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */
+    const char *source;         /**< Pointer to the source source buffer. @stable ICU 2.0    */
+    const char *sourceLimit;    /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0    */
+    UChar *target;              /**< Pointer to the target buffer. @stable ICU 2.0    */
+    const UChar *targetLimit;   /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0     */
+    int32_t *offsets;           /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0  */
+} UConverterToUnicodeArgs;
+
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This From Unicode callback STOPS at the ILLEGAL_SEQUENCE,
+ * returning the error code back to the caller immediately.
+ *
+ * @param context Pointer to the callback's private data
+ * @param fromUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
+ * @param reason Defines the reason the callback was invoked
+ * @param err This should always be set to a failure status prior to calling.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_STOP (
+                  const void *context,
+                  UConverterFromUnicodeArgs *fromUArgs,
+                  const UChar* codeUnits,
+                  int32_t length,
+                  UChar32 codePoint,
+                  UConverterCallbackReason reason,
+                  UErrorCode * err);
+
+
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This To Unicode callback STOPS at the ILLEGAL_SEQUENCE,
+ * returning the error code back to the caller immediately.
+ *
+ * @param context Pointer to the callback's private data
+ * @param toUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param reason Defines the reason the callback was invoked
+ * @param err This should always be set to a failure status prior to calling.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_STOP (
+                  const void *context,
+                  UConverterToUnicodeArgs *toUArgs,
+                  const char* codeUnits,
+                  int32_t length,
+                  UConverterCallbackReason reason,
+                  UErrorCode * err);
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This From Unicode callback skips any ILLEGAL_SEQUENCE, or
+ * skips only UNASSINGED_SEQUENCE depending on the context parameter
+ * simply ignoring those characters. 
+ *
+ * @param context  The function currently recognizes the callback options:
+ *                 UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
+ *                      returning the error code back to the caller immediately.
+ *                 NULL: Skips any ILLEGAL_SEQUENCE
+ * @param fromUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
+ * @param reason Defines the reason the callback was invoked
+ * @param err Return value will be set to success if the callback was handled,
+ *      otherwise this value will be set to a failure status.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_SKIP (
+                  const void *context,
+                  UConverterFromUnicodeArgs *fromUArgs,
+                  const UChar* codeUnits,
+                  int32_t length,
+                  UChar32 codePoint,
+                  UConverterCallbackReason reason,
+                  UErrorCode * err);
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This From Unicode callback will Substitute the ILLEGAL SEQUENCE, or 
+ * UNASSIGNED_SEQUENCE depending on context parameter, with the
+ * current substitution string for the converter. This is the default
+ * callback.
+ *
+ * @param context The function currently recognizes the callback options:
+ *                 UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
+ *                      returning the error code back to the caller immediately.
+ *                 NULL: Substitutes any ILLEGAL_SEQUENCE
+ * @param fromUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
+ * @param reason Defines the reason the callback was invoked
+ * @param err Return value will be set to success if the callback was handled,
+ *      otherwise this value will be set to a failure status.
+ * @see ucnv_setSubstChars
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_SUBSTITUTE (
+                  const void *context,
+                  UConverterFromUnicodeArgs *fromUArgs,
+                  const UChar* codeUnits,
+                  int32_t length,
+                  UChar32 codePoint,
+                  UConverterCallbackReason reason,
+                  UErrorCode * err);
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This From Unicode callback will Substitute the ILLEGAL SEQUENCE with the
+ * hexadecimal representation of the illegal codepoints
+ *
+ * @param context The function currently recognizes the callback options:
+ *        <ul>
+ *        <li>UCNV_ESCAPE_ICU: Substitues the  ILLEGAL SEQUENCE with the hexadecimal 
+ *          representation in the format  %UXXXX, e.g. "%uFFFE%u00AC%uC8FE"). 
+ *          In the Event the converter doesn't support the characters {%,U}[A-F][0-9], 
+ *          it will  substitute  the illegal sequence with the substitution characters.
+ *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
+ *          %UD84D%UDC56</li>
+ *        <li>UCNV_ESCAPE_JAVA: Substitues the  ILLEGAL SEQUENCE with the hexadecimal 
+ *          representation in the format  \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE"). 
+ *          In the Event the converter doesn't support the characters {\,u}[A-F][0-9], 
+ *          it will  substitute  the illegal sequence with the substitution characters.
+ *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
+ *          \\uD84D\\uDC56</li>
+ *        <li>UCNV_ESCAPE_C: Substitues the  ILLEGAL SEQUENCE with the hexadecimal 
+ *          representation in the format  \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE"). 
+ *          In the Event the converter doesn't support the characters {\,u,U}[A-F][0-9], 
+ *          it will  substitute  the illegal sequence with the substitution characters.
+ *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
+ *          \\U00023456</li>
+ *        <li>UCNV_ESCAPE_XML_DEC: Substitues the  ILLEGAL SEQUENCE with the decimal 
+ *          representation in the format \htmlonly&amp;#DDDDDDDD;, e.g. "&amp;#65534;&amp;#172;&amp;#51454;")\endhtmlonly. 
+ *          In the Event the converter doesn't support the characters {&amp;,#}[0-9], 
+ *          it will  substitute  the illegal sequence with the substitution characters.
+ *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
+ *          &amp;#144470; and Zero padding is ignored.</li>
+ *        <li>UCNV_ESCAPE_XML_HEX:Substitues the  ILLEGAL SEQUENCE with the decimal 
+ *          representation in the format \htmlonly&amp;#xXXXX; e.g. "&amp;#xFFFE;&amp;#x00AC;&amp;#xC8FE;")\endhtmlonly. 
+ *          In the Event the converter doesn't support the characters {&,#,x}[0-9], 
+ *          it will  substitute  the illegal sequence with the substitution characters.
+ *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
+ *          \htmlonly&amp;#x23456;\endhtmlonly</li>
+ *        </ul>
+ * @param fromUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
+ * @param reason Defines the reason the callback was invoked
+ * @param err Return value will be set to success if the callback was handled,
+ *      otherwise this value will be set to a failure status.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE (
+                  const void *context,
+                  UConverterFromUnicodeArgs *fromUArgs,
+                  const UChar* codeUnits,
+                  int32_t length,
+                  UChar32 codePoint,
+                  UConverterCallbackReason reason,
+                  UErrorCode * err);
+
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This To Unicode callback skips any ILLEGAL_SEQUENCE, or
+ * skips only UNASSINGED_SEQUENCE depending on the context parameter
+ * simply ignoring those characters. 
+ *
+ * @param context  The function currently recognizes the callback options:
+ *                 UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
+ *                      returning the error code back to the caller immediately.
+ *                 NULL: Skips any ILLEGAL_SEQUENCE
+ * @param toUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param reason Defines the reason the callback was invoked
+ * @param err Return value will be set to success if the callback was handled,
+ *      otherwise this value will be set to a failure status.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_SKIP (
+                  const void *context,
+                  UConverterToUnicodeArgs *toUArgs,
+                  const char* codeUnits,
+                  int32_t length,
+                  UConverterCallbackReason reason,
+                  UErrorCode * err);
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This To Unicode callback will Substitute the ILLEGAL SEQUENCE,or 
+ * UNASSIGNED_SEQUENCE depending on context parameter,  with the
+ * Unicode substitution character, U+FFFD.
+ *
+ * @param context  The function currently recognizes the callback options:
+ *                 UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
+ *                      returning the error code back to the caller immediately.
+ *                 NULL: Substitutes any ILLEGAL_SEQUENCE
+ * @param toUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param reason Defines the reason the callback was invoked
+ * @param err Return value will be set to success if the callback was handled,
+ *      otherwise this value will be set to a failure status.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_SUBSTITUTE (
+                  const void *context,
+                  UConverterToUnicodeArgs *toUArgs,
+                  const char* codeUnits,
+                  int32_t length,
+                  UConverterCallbackReason reason,
+                  UErrorCode * err);
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This To Unicode callback will Substitute the ILLEGAL SEQUENCE with the
+ * hexadecimal representation of the illegal bytes
+ *  (in the format  %XNN, e.g. "%XFF%X0A%XC8%X03").
+ *
+ * @param context This function currently recognizes the callback options:
+ *      UCNV_ESCAPE_ICU, UCNV_ESCAPE_JAVA, UCNV_ESCAPE_C, UCNV_ESCAPE_XML_DEC,
+ *      UCNV_ESCAPE_XML_HEX and UCNV_ESCAPE_UNICODE.
+ * @param toUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param reason Defines the reason the callback was invoked
+ * @param err Return value will be set to success if the callback was handled,
+ *      otherwise this value will be set to a failure status.
+ * @stable ICU 2.0
+ */
+
+U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE (
+                  const void *context,
+                  UConverterToUnicodeArgs *toUArgs,
+                  const char* codeUnits,
+                  int32_t length,
+                  UConverterCallbackReason reason,
+                  UErrorCode * err);
+
+#endif
+
+#endif
+
+/*UCNV_ERR_H*/ 
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/uconfig.h b/src/core/basetypes/icu-ucsdet/include/unicode/uconfig.h
new file mode 100644
index 00000000..ed073b63
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/uconfig.h
@@ -0,0 +1,430 @@
+/*  
+**********************************************************************
+*   Copyright (C) 2002-2014, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   file name:  uconfig.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2002sep19
+*   created by: Markus W. Scherer
+*/
+
+#ifndef __UCONFIG_H__
+#define __UCONFIG_H__
+
+
+/*!
+ * \file
+ * \brief User-configurable settings
+ *
+ * Miscellaneous switches:
+ *
+ * A number of macros affect a variety of minor aspects of ICU.
+ * Most of them used to be defined elsewhere (e.g., in utypes.h or platform.h)
+ * and moved here to make them easier to find.
+ *
+ * Switches for excluding parts of ICU library code modules:
+ *
+ * Changing these macros allows building partial, smaller libraries for special purposes.
+ * By default, all modules are built.
+ * The switches are fairly coarse, controlling large modules.
+ * Basic services cannot be turned off.
+ *
+ * Building with any of these options does not guarantee that the
+ * ICU build process will completely work. It is recommended that
+ * the ICU libraries and data be built using the normal build.
+ * At that time you should remove the data used by those services.
+ * After building the ICU data library, you should rebuild the ICU
+ * libraries with these switches customized to your needs.
+ *
+ * @stable ICU 2.4
+ */
+
+/**
+ * If this switch is defined, ICU will attempt to load a header file named "uconfig_local.h"
+ * prior to determining default settings for uconfig variables.
+ *
+ * @internal ICU 4.0
+ */
+#if defined(UCONFIG_USE_LOCAL)
+#include "uconfig_local.h"
+#endif
+
+/**
+ * \def U_DEBUG
+ * Determines whether to include debugging code.
+ * Automatically set on Windows, but most compilers do not have
+ * related predefined macros.
+ * @internal
+ */
+#ifdef U_DEBUG
+    /* Use the predefined value. */
+#elif defined(_DEBUG)
+    /*
+     * _DEBUG is defined by Visual Studio debug compilation.
+     * Do *not* test for its NDEBUG macro: It is an orthogonal macro
+     * which disables assert().
+     */
+#   define U_DEBUG 1
+# else
+#   define U_DEBUG 0
+#endif
+
+/**
+ * Determines wheter to enable auto cleanup of libraries. 
+ * @internal
+ */
+#ifndef UCLN_NO_AUTO_CLEANUP
+#define UCLN_NO_AUTO_CLEANUP 1
+#endif
+
+/**
+ * \def U_DISABLE_RENAMING
+ * Determines whether to disable renaming or not.
+ * @internal
+ */
+#ifndef U_DISABLE_RENAMING
+#define U_DISABLE_RENAMING 0
+#endif
+
+/**
+ * \def U_NO_DEFAULT_INCLUDE_UTF_HEADERS
+ * Determines whether utypes.h includes utf.h, utf8.h, utf16.h and utf_old.h.
+ * utypes.h includes those headers if this macro is defined to 0.
+ * Otherwise, each those headers must be included explicitly when using one of their macros.
+ * Defaults to 0 for backward compatibility, except inside ICU.
+ * @stable ICU 49
+ */
+#ifdef U_NO_DEFAULT_INCLUDE_UTF_HEADERS
+    /* Use the predefined value. */
+#elif defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || \
+      defined(U_IO_IMPLEMENTATION) || defined(U_LAYOUT_IMPLEMENTATION) || defined(U_LAYOUTEX_IMPLEMENTATION) || \
+      defined(U_TOOLUTIL_IMPLEMENTATION)
+#   define U_NO_DEFAULT_INCLUDE_UTF_HEADERS 1
+#else
+#   define U_NO_DEFAULT_INCLUDE_UTF_HEADERS 0
+#endif
+
+/**
+ * \def U_OVERRIDE_CXX_ALLOCATION
+ * Determines whether to override new and delete.
+ * ICU is normally built such that all of its C++ classes, via their UMemory base,
+ * override operators new and delete to use its internal, customizable,
+ * non-exception-throwing memory allocation functions. (Default value 1 for this macro.)
+ *
+ * This is especially important when the application and its libraries use multiple heaps.
+ * For example, on Windows, this allows the ICU DLL to be used by
+ * applications that statically link the C Runtime library.
+ *
+ * @stable ICU 2.2
+ */
+#ifndef U_OVERRIDE_CXX_ALLOCATION
+#define U_OVERRIDE_CXX_ALLOCATION 1
+#endif
+
+/**
+ * \def U_ENABLE_TRACING
+ * Determines whether to enable tracing.
+ * @internal
+ */
+#ifndef U_ENABLE_TRACING
+#define U_ENABLE_TRACING 0
+#endif
+
+/**
+ * \def U_ENABLE_DYLOAD
+ * Whether to enable Dynamic loading in ICU.
+ * @internal
+ */
+#ifndef U_ENABLE_DYLOAD
+#define U_ENABLE_DYLOAD 1
+#endif
+
+/**
+ * \def U_CHECK_DYLOAD
+ * Whether to test Dynamic loading as an OS capability.
+ * @internal
+ */
+#ifndef U_CHECK_DYLOAD
+#define U_CHECK_DYLOAD 1
+#endif
+
+
+/**
+ * \def U_DEFAULT_SHOW_DRAFT
+ * Do we allow ICU users to use the draft APIs by default?
+ * @internal
+ */
+#ifndef U_DEFAULT_SHOW_DRAFT
+#define U_DEFAULT_SHOW_DRAFT 1
+#endif
+
+/*===========================================================================*/
+/* Custom icu entry point renaming                                           */
+/*===========================================================================*/
+
+/**
+ * \def U_HAVE_LIB_SUFFIX
+ * 1 if a custom library suffix is set.
+ * @internal
+ */
+#ifdef U_HAVE_LIB_SUFFIX
+    /* Use the predefined value. */
+#elif defined(U_LIB_SUFFIX_C_NAME)
+#   define U_HAVE_LIB_SUFFIX 1
+#endif
+
+/**
+ * \def U_LIB_SUFFIX_C_NAME_STRING
+ * Defines the library suffix as a string with C syntax.
+ * @internal
+ */
+#ifdef U_LIB_SUFFIX_C_NAME_STRING
+    /* Use the predefined value. */
+#elif defined(U_LIB_SUFFIX_C_NAME)
+#   define CONVERT_TO_STRING(s) #s
+#   define U_LIB_SUFFIX_C_NAME_STRING CONVERT_TO_STRING(U_LIB_SUFFIX_C_NAME)
+#else
+#   define U_LIB_SUFFIX_C_NAME_STRING ""
+#endif
+
+/* common/i18n library switches --------------------------------------------- */
+
+/**
+ * \def UCONFIG_ONLY_COLLATION
+ * This switch turns off modules that are not needed for collation.
+ *
+ * It does not turn off legacy conversion because that is necessary
+ * for ICU to work on EBCDIC platforms (for the default converter).
+ * If you want "only collation" and do not build for EBCDIC,
+ * then you can define UCONFIG_NO_LEGACY_CONVERSION 1 as well.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_ONLY_COLLATION
+#   define UCONFIG_ONLY_COLLATION 0
+#endif
+
+#if UCONFIG_ONLY_COLLATION
+    /* common library */
+#   define UCONFIG_NO_BREAK_ITERATION 1
+#   define UCONFIG_NO_IDNA 1
+
+    /* i18n library */
+#   if UCONFIG_NO_COLLATION
+#       error Contradictory collation switches in uconfig.h.
+#   endif
+#   define UCONFIG_NO_FORMATTING 1
+#   define UCONFIG_NO_TRANSLITERATION 1
+#   define UCONFIG_NO_REGULAR_EXPRESSIONS 1
+#endif
+
+/* common library switches -------------------------------------------------- */
+
+/**
+ * \def UCONFIG_NO_FILE_IO
+ * This switch turns off all file access in the common library
+ * where file access is only used for data loading.
+ * ICU data must then be provided in the form of a data DLL (or with an
+ * equivalent way to link to the data residing in an executable,
+ * as in building a combined library with both the common library's code and
+ * the data), or via udata_setCommonData().
+ * Application data must be provided via udata_setAppData() or by using
+ * "open" functions that take pointers to data, for example ucol_openBinary().
+ *
+ * File access is not used at all in the i18n library.
+ *
+ * File access cannot be turned off for the icuio library or for the ICU
+ * test suites and ICU tools.
+ *
+ * @stable ICU 3.6
+ */
+#ifndef UCONFIG_NO_FILE_IO
+#   define UCONFIG_NO_FILE_IO 0
+#endif
+
+#if UCONFIG_NO_FILE_IO && defined(U_TIMEZONE_FILES_DIR) 
+#   error Contradictory file io switches in uconfig.h.
+#endif
+
+/**
+ * \def UCONFIG_NO_CONVERSION
+ * ICU will not completely build with this switch turned on.
+ * This switch turns off all converters.
+ *
+ * You may want to use this together with U_CHARSET_IS_UTF8 defined to 1
+ * in utypes.h if char* strings in your environment are always in UTF-8.
+ *
+ * @stable ICU 3.2
+ * @see U_CHARSET_IS_UTF8
+ */
+#ifndef UCONFIG_NO_CONVERSION
+#   define UCONFIG_NO_CONVERSION 0
+#endif
+
+#if UCONFIG_NO_CONVERSION
+#   define UCONFIG_NO_LEGACY_CONVERSION 1
+#endif
+
+/**
+ * \def UCONFIG_NO_LEGACY_CONVERSION
+ * This switch turns off all converters except for
+ * - Unicode charsets (UTF-7/8/16/32, CESU-8, SCSU, BOCU-1)
+ * - US-ASCII
+ * - ISO-8859-1
+ *
+ * Turning off legacy conversion is not possible on EBCDIC platforms
+ * because they need ibm-37 or ibm-1047 default converters.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_NO_LEGACY_CONVERSION
+#   define UCONFIG_NO_LEGACY_CONVERSION 0
+#endif
+
+/**
+ * \def UCONFIG_NO_NORMALIZATION
+ * This switch turns off normalization.
+ * It implies turning off several other services as well, for example
+ * collation and IDNA.
+ *
+ * @stable ICU 2.6
+ */
+#ifndef UCONFIG_NO_NORMALIZATION
+#   define UCONFIG_NO_NORMALIZATION 0
+#elif UCONFIG_NO_NORMALIZATION
+    /* common library */
+    /* ICU 50 CJK dictionary BreakIterator uses normalization */
+#   define UCONFIG_NO_BREAK_ITERATION 1
+    /* IDNA (UTS #46) is implemented via normalization */
+#   define UCONFIG_NO_IDNA 1
+
+    /* i18n library */
+#   if UCONFIG_ONLY_COLLATION
+#       error Contradictory collation switches in uconfig.h.
+#   endif
+#   define UCONFIG_NO_COLLATION 1
+#   define UCONFIG_NO_TRANSLITERATION 1
+#endif
+
+/**
+ * \def UCONFIG_NO_BREAK_ITERATION
+ * This switch turns off break iteration.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_NO_BREAK_ITERATION
+#   define UCONFIG_NO_BREAK_ITERATION 0
+#endif
+
+/**
+ * \def UCONFIG_NO_IDNA
+ * This switch turns off IDNA.
+ *
+ * @stable ICU 2.6
+ */
+#ifndef UCONFIG_NO_IDNA
+#   define UCONFIG_NO_IDNA 0
+#endif
+
+/**
+ * \def UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
+ * Determines the default UMessagePatternApostropheMode.
+ * See the documentation for that enum.
+ *
+ * @stable ICU 4.8
+ */
+#ifndef UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
+#   define UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE UMSGPAT_APOS_DOUBLE_OPTIONAL
+#endif
+
+/* i18n library switches ---------------------------------------------------- */
+
+/**
+ * \def UCONFIG_NO_COLLATION
+ * This switch turns off collation and collation-based string search.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_NO_COLLATION
+#   define UCONFIG_NO_COLLATION 0
+#endif
+
+/**
+ * \def UCONFIG_NO_FORMATTING
+ * This switch turns off formatting and calendar/timezone services.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_NO_FORMATTING
+#   define UCONFIG_NO_FORMATTING 0
+#endif
+
+/**
+ * \def UCONFIG_NO_TRANSLITERATION
+ * This switch turns off transliteration.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_NO_TRANSLITERATION
+#   define UCONFIG_NO_TRANSLITERATION 0
+#endif
+
+/**
+ * \def UCONFIG_NO_REGULAR_EXPRESSIONS
+ * This switch turns off regular expressions.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_NO_REGULAR_EXPRESSIONS
+#   define UCONFIG_NO_REGULAR_EXPRESSIONS 0
+#endif
+
+/**
+ * \def UCONFIG_NO_SERVICE
+ * This switch turns off service registration.
+ *
+ * @stable ICU 3.2
+ */
+#ifndef UCONFIG_NO_SERVICE
+#   define UCONFIG_NO_SERVICE 0
+#endif
+
+/**
+ * \def UCONFIG_HAVE_PARSEALLINPUT
+ * This switch turns on the "parse all input" attribute. Binary incompatible.
+ *
+ * @internal
+ */
+#ifndef UCONFIG_HAVE_PARSEALLINPUT
+#   define UCONFIG_HAVE_PARSEALLINPUT 1
+#endif
+
+
+/**
+ * \def UCONFIG_FORMAT_FASTPATHS_49
+ * This switch turns on other formatting fastpaths. Binary incompatible in object DecimalFormat and DecimalFormatSymbols
+ *
+ * @internal
+ */
+#ifndef UCONFIG_FORMAT_FASTPATHS_49
+#   define UCONFIG_FORMAT_FASTPATHS_49 1
+#endif
+
+/**
+ * \def UCONFIG_NO_FILTERED_BREAK_ITERATION
+ * This switch turns off filtered break iteration code.
+ *
+ * @internal
+ */
+#ifndef UCONFIG_NO_FILTERED_BREAK_ITERATION
+#   define UCONFIG_NO_FILTERED_BREAK_ITERATION 0
+
+
+
+#endif
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/ucsdet.h b/src/core/basetypes/icu-ucsdet/include/unicode/ucsdet.h
new file mode 100644
index 00000000..d3a297be
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/ucsdet.h
@@ -0,0 +1,413 @@
+/*
+ **********************************************************************
+ *   Copyright (C) 2005-2013, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ **********************************************************************
+ *   file name:  ucsdet.h
+ *   encoding:   US-ASCII
+ *   indentation:4
+ *
+ *   created on: 2005Aug04
+ *   created by: Andy Heninger
+ *
+ *   ICU Character Set Detection, API for C
+ *
+ *   Draft version 18 Oct 2005
+ *
+ */
+
+#ifndef __UCSDET_H
+#define __UCSDET_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "unicode/localpointer.h"
+#include "unicode/uenum.h"
+
+/**
+ * \file 
+ * \brief C API: Charset Detection API
+ *
+ * This API provides a facility for detecting the
+ * charset or encoding of character data in an unknown text format.
+ * The input data can be from an array of bytes.
+ * <p>
+ * Character set detection is at best an imprecise operation.  The detection
+ * process will attempt to identify the charset that best matches the characteristics
+ * of the byte data, but the process is partly statistical in nature, and
+ * the results can not be guaranteed to always be correct.
+ * <p>
+ * For best accuracy in charset detection, the input data should be primarily
+ * in a single language, and a minimum of a few hundred bytes worth of plain text
+ * in the language are needed.  The detection process will attempt to
+ * ignore html or xml style markup that could otherwise obscure the content.
+ */
+ 
+
+struct UCharsetDetector;
+/**
+  * Structure representing a charset detector
+  * @stable ICU 3.6
+  */
+typedef struct UCharsetDetector UCharsetDetector;
+
+struct UCharsetMatch;
+/**
+  *  Opaque structure representing a match that was identified
+  *  from a charset detection operation.
+  *  @stable ICU 3.6
+  */
+typedef struct UCharsetMatch UCharsetMatch;
+
+/**
+  *  Open a charset detector.
+  *
+  *  @param status Any error conditions occurring during the open
+  *                operation are reported back in this variable.
+  *  @return the newly opened charset detector.
+  *  @stable ICU 3.6
+  */
+U_STABLE UCharsetDetector * U_EXPORT2
+ucsdet_open(UErrorCode   *status);
+
+/**
+  * Close a charset detector.  All storage and any other resources
+  *   owned by this charset detector will be released.  Failure to
+  *   close a charset detector when finished with it can result in
+  *   memory leaks in the application.
+  *
+  *  @param ucsd  The charset detector to be closed.
+  *  @stable ICU 3.6
+  */
+U_STABLE void U_EXPORT2
+ucsdet_close(UCharsetDetector *ucsd);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUCharsetDetectorPointer
+ * "Smart pointer" class, closes a UCharsetDetector via ucsdet_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUCharsetDetectorPointer, UCharsetDetector, ucsdet_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+  * Set the input byte data whose charset is to detected.
+  *
+  * Ownership of the input  text byte array remains with the caller.
+  * The input string must not be altered or deleted until the charset
+  * detector is either closed or reset to refer to different input text.
+  *
+  * @param ucsd   the charset detector to be used.
+  * @param textIn the input text of unknown encoding.   .
+  * @param len    the length of the input text, or -1 if the text
+  *               is NUL terminated.
+  * @param status any error conditions are reported back in this variable.
+  *
+  * @stable ICU 3.6
+  */
+U_STABLE void U_EXPORT2
+ucsdet_setText(UCharsetDetector *ucsd, const char *textIn, int32_t len, UErrorCode *status);
+
+
+/** Set the declared encoding for charset detection.
+ *  The declared encoding of an input text is an encoding obtained
+ *  by the user from an http header or xml declaration or similar source that
+ *  can be provided as an additional hint to the charset detector.
+ *
+ *  How and whether the declared encoding will be used during the
+ *  detection process is TBD.
+ *
+ * @param ucsd      the charset detector to be used.
+ * @param encoding  an encoding for the current data obtained from
+ *                  a header or declaration or other source outside
+ *                  of the byte data itself.
+ * @param length    the length of the encoding name, or -1 if the name string
+ *                  is NUL terminated.
+ * @param status    any error conditions are reported back in this variable.
+ *
+ * @stable ICU 3.6
+ */
+U_STABLE void U_EXPORT2
+ucsdet_setDeclaredEncoding(UCharsetDetector *ucsd, const char *encoding, int32_t length, UErrorCode *status);
+
+
+/**
+ * Return the charset that best matches the supplied input data.
+ * 
+ * Note though, that because the detection 
+ * only looks at the start of the input data,
+ * there is a possibility that the returned charset will fail to handle
+ * the full set of input data.
+ * <p>
+ * The returned UCharsetMatch object is owned by the UCharsetDetector.
+ * It will remain valid until the detector input is reset, or until
+ * the detector is closed.
+ * <p>
+ * The function will fail if
+ *  <ul>
+ *    <li>no charset appears to match the data.</li>
+ *    <li>no input text has been provided</li>
+ *  </ul>
+ *
+ * @param ucsd      the charset detector to be used.
+ * @param status    any error conditions are reported back in this variable.
+ * @return          a UCharsetMatch  representing the best matching charset,
+ *                  or NULL if no charset matches the byte data.
+ *
+ * @stable ICU 3.6
+ */
+U_STABLE const UCharsetMatch * U_EXPORT2
+ucsdet_detect(UCharsetDetector *ucsd, UErrorCode *status);
+    
+
+/**
+ *  Find all charset matches that appear to be consistent with the input,
+ *  returning an array of results.  The results are ordered with the
+ *  best quality match first.
+ *
+ *  Because the detection only looks at a limited amount of the
+ *  input byte data, some of the returned charsets may fail to handle
+ *  the all of input data.
+ *  <p>
+ *  The returned UCharsetMatch objects are owned by the UCharsetDetector.
+ *  They will remain valid until the detector is closed or modified
+ *  
+ * <p>
+ * Return an error if 
+ *  <ul>
+ *    <li>no charsets appear to match the input data.</li>
+ *    <li>no input text has been provided</li>
+ *  </ul>
+ * 
+ * @param ucsd          the charset detector to be used.
+ * @param matchesFound  pointer to a variable that will be set to the
+ *                      number of charsets identified that are consistent with
+ *                      the input data.  Output only.
+ * @param status        any error conditions are reported back in this variable.
+ * @return              A pointer to an array of pointers to UCharSetMatch objects.
+ *                      This array, and the UCharSetMatch instances to which it refers,
+ *                      are owned by the UCharsetDetector, and will remain valid until
+ *                      the detector is closed or modified.
+ * @stable ICU 3.6
+ */
+U_STABLE const UCharsetMatch ** U_EXPORT2
+ucsdet_detectAll(UCharsetDetector *ucsd, int32_t *matchesFound, UErrorCode *status);
+
+
+
+/**
+ *  Get the name of the charset represented by a UCharsetMatch.
+ *
+ *  The storage for the returned name string is owned by the
+ *  UCharsetMatch, and will remain valid while the UCharsetMatch
+ *  is valid.
+ *
+ *  The name returned is suitable for use with the ICU conversion APIs.
+ *
+ *  @param ucsm    The charset match object.
+ *  @param status  Any error conditions are reported back in this variable.
+ *  @return        The name of the matching charset.
+ *
+ *  @stable ICU 3.6
+ */
+U_STABLE const char * U_EXPORT2
+ucsdet_getName(const UCharsetMatch *ucsm, UErrorCode *status);
+
+/**
+ *  Get a confidence number for the quality of the match of the byte
+ *  data with the charset.  Confidence numbers range from zero to 100,
+ *  with 100 representing complete confidence and zero representing
+ *  no confidence.
+ *
+ *  The confidence values are somewhat arbitrary.  They define an
+ *  an ordering within the results for any single detection operation
+ *  but are not generally comparable between the results for different input.
+ *
+ *  A confidence value of ten does have a general meaning - it is used
+ *  for charsets that can represent the input data, but for which there
+ *  is no other indication that suggests that the charset is the correct one.
+ *  Pure 7 bit ASCII data, for example, is compatible with a
+ *  great many charsets, most of which will appear as possible matches
+ *  with a confidence of 10.
+ *
+ *  @param ucsm    The charset match object.
+ *  @param status  Any error conditions are reported back in this variable.
+ *  @return        A confidence number for the charset match.
+ *
+ *  @stable ICU 3.6
+ */
+U_STABLE int32_t U_EXPORT2
+ucsdet_getConfidence(const UCharsetMatch *ucsm, UErrorCode *status);
+
+/**
+ *  Get the RFC 3066 code for the language of the input data.
+ *
+ *  The Charset Detection service is intended primarily for detecting
+ *  charsets, not language.  For some, but not all, charsets, a language is
+ *  identified as a byproduct of the detection process, and that is what
+ *  is returned by this function.
+ *
+ *  CAUTION:
+ *    1.  Language information is not available for input data encoded in
+ *        all charsets. In particular, no language is identified
+ *        for UTF-8 input data.
+ *
+ *    2.  Closely related languages may sometimes be confused.
+ *
+ *  If more accurate language detection is required, a linguistic
+ *  analysis package should be used.
+ *
+ *  The storage for the returned name string is owned by the
+ *  UCharsetMatch, and will remain valid while the UCharsetMatch
+ *  is valid.
+ *
+ *  @param ucsm    The charset match object.
+ *  @param status  Any error conditions are reported back in this variable.
+ *  @return        The RFC 3066 code for the language of the input data, or
+ *                 an empty string if the language could not be determined.
+ *
+ *  @stable ICU 3.6
+ */
+U_STABLE const char * U_EXPORT2
+ucsdet_getLanguage(const UCharsetMatch *ucsm, UErrorCode *status);
+
+
+/**
+  *  Get the entire input text as a UChar string, placing it into
+  *  a caller-supplied buffer.  A terminating
+  *  NUL character will be appended to the buffer if space is available.
+  *
+  *  The number of UChars in the output string, not including the terminating
+  *  NUL, is returned. 
+  *
+  *  If the supplied buffer is smaller than required to hold the output,
+  *  the contents of the buffer are undefined.  The full output string length
+  *  (in UChars) is returned as always, and can be used to allocate a buffer
+  *  of the correct size.
+  *
+  *
+  * @param ucsm    The charset match object.
+  * @param buf     A UChar buffer to be filled with the converted text data.
+  * @param cap     The capacity of the buffer in UChars.
+  * @param status  Any error conditions are reported back in this variable.
+  * @return        The number of UChars in the output string.
+  *
+  * @stable ICU 3.6
+  */
+U_STABLE  int32_t U_EXPORT2
+ucsdet_getUChars(const UCharsetMatch *ucsm,
+                 UChar *buf, int32_t cap, UErrorCode *status);
+
+
+
+/**
+  *  Get an iterator over the set of all detectable charsets - 
+  *  over the charsets that are known to the charset detection
+  *  service.
+  *
+  *  The returned UEnumeration provides access to the names of
+  *  the charsets.
+  *
+  *  <p>
+  *  The state of the Charset detector that is passed in does not
+  *  affect the result of this function, but requiring a valid, open
+  *  charset detector as a parameter insures that the charset detection
+  *  service has been safely initialized and that the required detection
+  *  data is available.
+  *
+  *  <p>
+  *  <b>Note:</b> Multiple different charset encodings in a same family may use
+  *  a single shared name in this implementation. For example, this method returns
+  *  an array including "ISO-8859-1" (ISO Latin 1), but not including "windows-1252"
+  *  (Windows Latin 1). However, actual detection result could be "windows-1252"
+  *  when the input data matches Latin 1 code points with any points only available
+  *  in "windows-1252".
+  *
+  *  @param ucsd a Charset detector.
+  *  @param status  Any error conditions are reported back in this variable.
+  *  @return an iterator providing access to the detectable charset names.
+  *  @stable ICU 3.6
+  */
+U_STABLE  UEnumeration * U_EXPORT2
+ucsdet_getAllDetectableCharsets(const UCharsetDetector *ucsd,  UErrorCode *status);
+
+/**
+  *  Test whether input filtering is enabled for this charset detector.
+  *  Input filtering removes text that appears to be HTML or xml
+  *  markup from the input before applying the code page detection
+  *  heuristics.
+  *
+  *  @param ucsd  The charset detector to check.
+  *  @return TRUE if filtering is enabled.
+  *  @stable ICU 3.6
+  */
+
+U_STABLE  UBool U_EXPORT2
+ucsdet_isInputFilterEnabled(const UCharsetDetector *ucsd);
+
+
+/**
+ * Enable filtering of input text. If filtering is enabled,
+ * text within angle brackets ("<" and ">") will be removed
+ * before detection, which will remove most HTML or xml markup.
+ *
+ * @param ucsd   the charset detector to be modified.
+ * @param filter <code>true</code> to enable input text filtering.
+ * @return The previous setting.
+ *
+ * @stable ICU 3.6
+ */
+U_STABLE  UBool U_EXPORT2
+ucsdet_enableInputFilter(UCharsetDetector *ucsd, UBool filter);
+
+#ifndef U_HIDE_INTERNAL_API
+/**
+  *  Get an iterator over the set of detectable charsets -
+  *  over the charsets that are enabled by the specified charset detector.
+  *
+  *  The returned UEnumeration provides access to the names of
+  *  the charsets.
+  *
+  *  @param ucsd a Charset detector.
+  *  @param status  Any error conditions are reported back in this variable.
+  *  @return an iterator providing access to the detectable charset names by
+  *  the specified charset detector.
+  *  @internal
+  */
+U_INTERNAL UEnumeration * U_EXPORT2
+ucsdet_getDetectableCharsets(const UCharsetDetector *ucsd,  UErrorCode *status);
+
+/**
+  * Enable or disable individual charset encoding.
+  * A name of charset encoding must be included in the names returned by
+  * {@link #getAllDetectableCharsets()}.
+  *
+  * @param ucsd a Charset detector.
+  * @param encoding encoding the name of charset encoding.
+  * @param enabled <code>TRUE</code> to enable, or <code>FALSE</code> to disable the
+  *   charset encoding.
+  * @param status receives the return status. When the name of charset encoding
+  *   is not supported, U_ILLEGAL_ARGUMENT_ERROR is set.
+  * @internal
+  */
+U_INTERNAL void U_EXPORT2
+ucsdet_setDetectableCharset(UCharsetDetector *ucsd, const char *encoding, UBool enabled, UErrorCode *status);
+#endif  /* U_HIDE_INTERNAL_API */
+
+#endif
+#endif   /* __UCSDET_H */
+
+
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/udata.h b/src/core/basetypes/icu-ucsdet/include/unicode/udata.h
new file mode 100644
index 00000000..29e46630
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/udata.h
@@ -0,0 +1,430 @@
+/*
+******************************************************************************
+*
+*   Copyright (C) 1999-2014, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*   file name:  udata.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 1999oct25
+*   created by: Markus W. Scherer
+*/
+
+#ifndef __UDATA_H__
+#define __UDATA_H__
+
+#include "unicode/utypes.h"
+#include "unicode/localpointer.h"
+
+U_CDECL_BEGIN
+
+/**
+ * \file
+ * \brief C API: Data loading interface
+ *
+ * <h2>Information about data loading interface</h2>
+ *
+ * This API is used to find and efficiently load data for ICU and applications
+ * using ICU. It provides an abstract interface that specifies a data type and
+ * name to find and load the data. Normally this API is used by other ICU APIs
+ * to load required data out of the ICU data library, but it can be used to
+ * load data out of other places.
+ *
+ * See the User Guide Data Management chapter.
+ */
+ 
+#ifndef U_HIDE_INTERNAL_API
+/**
+ * Character used to separate package names from tree names 
+ * @internal ICU 3.0
+ */
+#define U_TREE_SEPARATOR '-'
+
+/**
+ * String used to separate package names from tree names 
+ * @internal ICU 3.0
+ */
+#define U_TREE_SEPARATOR_STRING "-"
+
+/**
+ * Character used to separate parts of entry names
+ * @internal ICU 3.0
+ */
+#define U_TREE_ENTRY_SEP_CHAR '/'
+
+/**
+ * String used to separate parts of entry names
+ * @internal ICU 3.0
+ */
+#define U_TREE_ENTRY_SEP_STRING "/"
+
+/**
+ * Alias for standard ICU data 
+ * @internal ICU 3.0
+ */
+#define U_ICUDATA_ALIAS "ICUDATA"
+
+#endif /* U_HIDE_INTERNAL_API */
+
+/**
+ * UDataInfo contains the properties about the requested data.
+ * This is meta data.
+ *
+ * <p>This structure may grow in the future, indicated by the
+ * <code>size</code> field.</p>
+ *
+ * <p>ICU data must be at least 8-aligned, and should be 16-aligned.
+ * The UDataInfo struct begins 4 bytes after the start of the data item,
+ * so it is 4-aligned.
+ *
+ * <p>The platform data property fields help determine if a data
+ * file can be efficiently used on a given machine.
+ * The particular fields are of importance only if the data
+ * is affected by the properties - if there is integer data
+ * with word sizes > 1 byte, char* text, or UChar* text.</p>
+ *
+ * <p>The implementation for the <code>udata_open[Choice]()</code>
+ * functions may reject data based on the value in <code>isBigEndian</code>.
+ * No other field is used by the <code>udata</code> API implementation.</p>
+ *
+ * <p>The <code>dataFormat</code> may be used to identify
+ * the kind of data, e.g. a converter table.</p>
+ *
+ * <p>The <code>formatVersion</code> field should be used to
+ * make sure that the format can be interpreted.
+ * It may be a good idea to check only for the one or two highest
+ * of the version elements to allow the data memory to
+ * get more or somewhat rearranged contents, for as long
+ * as the using code can still interpret the older contents.</p>
+ *
+ * <p>The <code>dataVersion</code> field is intended to be a
+ * common place to store the source version of the data;
+ * for data from the Unicode character database, this could
+ * reflect the Unicode version.</p>
+ *
+ * @stable ICU 2.0
+ */
+typedef struct {
+    /** sizeof(UDataInfo)
+     *  @stable ICU 2.0 */
+    uint16_t size;
+
+    /** unused, set to 0 
+     *  @stable ICU 2.0*/
+    uint16_t reservedWord;
+
+    /* platform data properties */
+    /** 0 for little-endian machine, 1 for big-endian
+     *  @stable ICU 2.0 */
+    uint8_t isBigEndian;
+
+    /** see U_CHARSET_FAMILY values in utypes.h 
+     *  @stable ICU 2.0*/
+    uint8_t charsetFamily;
+
+    /** sizeof(UChar), one of { 1, 2, 4 } 
+     *  @stable ICU 2.0*/
+    uint8_t sizeofUChar;
+
+    /** unused, set to 0 
+     *  @stable ICU 2.0*/
+    uint8_t reservedByte;
+
+    /** data format identifier 
+     *  @stable ICU 2.0*/
+    uint8_t dataFormat[4];
+
+    /** versions: [0] major [1] minor [2] milli [3] micro 
+     *  @stable ICU 2.0*/
+    uint8_t formatVersion[4];
+
+    /** versions: [0] major [1] minor [2] milli [3] micro 
+     *  @stable ICU 2.0*/
+    uint8_t dataVersion[4];
+} UDataInfo;
+
+/* API for reading data -----------------------------------------------------*/
+
+/**
+ * Forward declaration of the data memory type.
+ * @stable ICU 2.0
+ */
+typedef struct UDataMemory UDataMemory;
+
+/**
+ * Callback function for udata_openChoice().
+ * @param context parameter passed into <code>udata_openChoice()</code>.
+ * @param type The type of the data as passed into <code>udata_openChoice()</code>.
+ *             It may be <code>NULL</code>.
+ * @param name The name of the data as passed into <code>udata_openChoice()</code>.
+ * @param pInfo A pointer to the <code>UDataInfo</code> structure
+ *              of data that has been loaded and will be returned
+ *              by <code>udata_openChoice()</code> if this function
+ *              returns <code>TRUE</code>.
+ * @return TRUE if the current data memory is acceptable
+ * @stable ICU 2.0
+ */
+typedef UBool U_CALLCONV
+UDataMemoryIsAcceptable(void *context,
+                        const char *type, const char *name,
+                        const UDataInfo *pInfo);
+
+
+/**
+ * Convenience function.
+ * This function works the same as <code>udata_openChoice</code>
+ * except that any data that matches the type and name
+ * is assumed to be acceptable.
+ * @param path Specifies an absolute path and/or a basename for the
+ *             finding of the data in the file system.
+ *             <code>NULL</code> for ICU data.
+ * @param type A string that specifies the type of data to be loaded.
+ *             For example, resource bundles are loaded with type "res",
+ *             conversion tables with type "cnv".
+ *             This may be <code>NULL</code> or empty.
+ * @param name A string that specifies the name of the data.
+ * @param pErrorCode An ICU UErrorCode parameter. It must not be <code>NULL</code>.
+ * @return A pointer (handle) to a data memory object, or <code>NULL</code>
+ *         if an error occurs. Call <code>udata_getMemory()</code>
+ *         to get a pointer to the actual data.
+ *
+ * @see udata_openChoice
+ * @stable ICU 2.0
+ */
+U_STABLE UDataMemory * U_EXPORT2
+udata_open(const char *path, const char *type, const char *name,
+           UErrorCode *pErrorCode);
+
+/**
+ * Data loading function.
+ * This function is used to find and load efficiently data for
+ * ICU and applications using ICU.
+ * It provides an abstract interface that allows to specify a data
+ * type and name to find and load the data.
+ *
+ * <p>The implementation depends on platform properties and user preferences
+ * and may involve loading shared libraries (DLLs), mapping
+ * files into memory, or fopen()/fread() files.
+ * It may also involve using static memory or database queries etc.
+ * Several or all data items may be combined into one entity
+ * (DLL, memory-mappable file).</p>
+ *
+ * <p>The data is always preceded by a header that includes
+ * a <code>UDataInfo</code> structure.
+ * The caller's <code>isAcceptable()</code> function is called to make
+ * sure that the data is useful. It may be called several times if it
+ * rejects the data and there is more than one location with data
+ * matching the type and name.</p>
+ *
+ * <p>If <code>path==NULL</code>, then ICU data is loaded.
+ * Otherwise, it is separated into a basename and a basename-less directory string.
+ * The basename is used as the data package name, and the directory is
+ * logically prepended to the ICU data directory string.</p>
+ *
+ * <p>For details about ICU data loading see the User Guide
+ * Data Management chapter. (http://icu-project.org/userguide/icudata.html)</p>
+ *
+ * @param path Specifies an absolute path and/or a basename for the
+ *             finding of the data in the file system.
+ *             <code>NULL</code> for ICU data.
+ * @param type A string that specifies the type of data to be loaded.
+ *             For example, resource bundles are loaded with type "res",
+ *             conversion tables with type "cnv".
+ *             This may be <code>NULL</code> or empty.
+ * @param name A string that specifies the name of the data.
+ * @param isAcceptable This function is called to verify that loaded data
+ *                     is useful for the client code. If it returns FALSE
+ *                     for all data items, then <code>udata_openChoice()</code>
+ *                     will return with an error.
+ * @param context Arbitrary parameter to be passed into isAcceptable.
+ * @param pErrorCode An ICU UErrorCode parameter. It must not be <code>NULL</code>.
+ * @return A pointer (handle) to a data memory object, or <code>NULL</code>
+ *         if an error occurs. Call <code>udata_getMemory()</code>
+ *         to get a pointer to the actual data.
+ * @stable ICU 2.0
+ */
+U_STABLE UDataMemory * U_EXPORT2
+udata_openChoice(const char *path, const char *type, const char *name,
+                 UDataMemoryIsAcceptable *isAcceptable, void *context,
+                 UErrorCode *pErrorCode);
+
+/**
+ * Close the data memory.
+ * This function must be called to allow the system to
+ * release resources associated with this data memory.
+ * @param pData The pointer to data memory object
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+udata_close(UDataMemory *pData);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUDataMemoryPointer
+ * "Smart pointer" class, closes a UDataMemory via udata_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUDataMemoryPointer, UDataMemory, udata_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Get the pointer to the actual data inside the data memory.
+ * The data is read-only.
+ *
+ * ICU data must be at least 8-aligned, and should be 16-aligned.
+ *
+ * @param pData The pointer to data memory object
+ * @stable ICU 2.0
+ */
+U_STABLE const void * U_EXPORT2
+udata_getMemory(UDataMemory *pData);
+
+/**
+ * Get the information from the data memory header.
+ * This allows to get access to the header containing
+ * platform data properties etc. which is not part of
+ * the data itself and can therefore not be accessed
+ * via the pointer that <code>udata_getMemory()</code> returns.
+ *
+ * @param pData pointer to the data memory object
+ * @param pInfo pointer to a UDataInfo object;
+ *              its <code>size</code> field must be set correctly,
+ *              typically to <code>sizeof(UDataInfo)</code>.
+ *
+ * <code>*pInfo</code> will be filled with the UDataInfo structure
+ * in the data memory object. If this structure is smaller than
+ * <code>pInfo->size</code>, then the <code>size</code> will be
+ * adjusted and only part of the structure will be filled.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+udata_getInfo(UDataMemory *pData, UDataInfo *pInfo);
+
+/**
+ * This function bypasses the normal ICU data loading process and
+ * allows you to force ICU's system data to come out of a user-specified
+ * area in memory.
+ *
+ * ICU data must be at least 8-aligned, and should be 16-aligned.
+ * See http://userguide.icu-project.org/icudata
+ *
+ * The format of this data is that of the icu common data file, as is
+ * generated by the pkgdata tool with mode=common or mode=dll.
+ * You can read in a whole common mode file and pass the address to the start of the
+ * data, or (with the appropriate link options) pass in the pointer to
+ * the data that has been loaded from a dll by the operating system,
+ * as shown in this code:
+ *
+ *       extern const char U_IMPORT U_ICUDATA_ENTRY_POINT [];
+ *        // U_ICUDATA_ENTRY_POINT is same as entry point specified to pkgdata tool
+ *       UErrorCode  status = U_ZERO_ERROR;
+ *
+ *       udata_setCommonData(&U_ICUDATA_ENTRY_POINT, &status);
+ *
+ * It is important that the declaration be as above. The entry point
+ * must not be declared as an extern void*.
+ *
+ * Starting with ICU 4.4, it is possible to set several data packages,
+ * one per call to this function.
+ * udata_open() will look for data in the multiple data packages in the order
+ * in which they were set.
+ * The position of the linked-in or default-name ICU .data package in the
+ * search list depends on when the first data item is loaded that is not contained
+ * in the already explicitly set packages.
+ * If data was loaded implicitly before the first call to this function
+ * (for example, via opening a converter, constructing a UnicodeString
+ * from default-codepage data, using formatting or collation APIs, etc.),
+ * then the default data will be first in the list.
+ *
+ * This function has no effect on application (non ICU) data.  See udata_setAppData()
+ * for similar functionality for application data.
+ *
+ * @param data pointer to ICU common data
+ * @param err outgoing error status <code>U_USING_DEFAULT_WARNING, U_UNSUPPORTED_ERROR</code>
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+udata_setCommonData(const void *data, UErrorCode *err);
+
+
+/**
+ * This function bypasses the normal ICU data loading process for application-specific
+ * data and allows you to force the it to come out of a user-specified
+ * pointer.
+ *
+ * ICU data must be at least 8-aligned, and should be 16-aligned.
+ * See http://userguide.icu-project.org/icudata
+ *
+ * The format of this data is that of the icu common data file, like 'icudt26l.dat'
+ * or the corresponding shared library (DLL) file.
+ * The application must read in or otherwise construct an image of the data and then
+ * pass the address of it to this function.
+ *
+ *
+ * Warning:  setAppData will set a U_USING_DEFAULT_WARNING code if
+ *           data with the specifed path that has already been opened, or
+ *           if setAppData with the same path has already been called.
+ *           Any such calls to setAppData will have no effect.
+ *
+ *
+ * @param packageName the package name by which the application will refer
+ *             to (open) this data
+ * @param data pointer to the data
+ * @param err outgoing error status <code>U_USING_DEFAULT_WARNING, U_UNSUPPORTED_ERROR</code>
+ * @see udata_setCommonData
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+udata_setAppData(const char *packageName, const void *data, UErrorCode *err);
+
+/**
+ * Possible settings for udata_setFileAccess()
+ * @see udata_setFileAccess
+ * @stable ICU 3.4
+ */
+typedef enum UDataFileAccess {
+    /** ICU looks for data in single files first, then in packages. (default) @stable ICU 3.4 */
+    UDATA_FILES_FIRST,
+    /** An alias for the default access mode. @stable ICU 3.4 */
+    UDATA_DEFAULT_ACCESS = UDATA_FILES_FIRST,
+    /** ICU only loads data from packages, not from single files. @stable ICU 3.4 */
+    UDATA_ONLY_PACKAGES,
+    /** ICU loads data from packages first, and only from single files
+        if the data cannot be found in a package. @stable ICU 3.4 */
+    UDATA_PACKAGES_FIRST,
+    /** ICU does not access the file system for data loading. @stable ICU 3.4 */
+    UDATA_NO_FILES,
+    /** Number of real UDataFileAccess values. @stable ICU 3.4 */
+    UDATA_FILE_ACCESS_COUNT
+} UDataFileAccess;
+
+/**
+ * This function may be called to control how ICU loads data. It must be called
+ * before any ICU data is loaded, including application data loaded with 
+ * ures/ResourceBundle or udata APIs. This function is not multithread safe.  
+ * The results of calling it while other threads are loading data are undefined.
+ * @param access The type of file access to be used
+ * @param status Error code.
+ * @see UDataFileAccess
+ * @stable ICU 3.4 
+ */
+U_STABLE void U_EXPORT2
+udata_setFileAccess(UDataFileAccess access, UErrorCode *status);
+
+U_CDECL_END
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/uenum.h b/src/core/basetypes/icu-ucsdet/include/unicode/uenum.h
new file mode 100644
index 00000000..5408ec5a
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/uenum.h
@@ -0,0 +1,206 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2002-2013, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  uenum.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:2
+*
+*   created on: 2002jul08
+*   created by: Vladimir Weinstein
+*/
+
+#ifndef __UENUM_H
+#define __UENUM_H
+
+#include "unicode/utypes.h"
+#include "unicode/localpointer.h"
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/strenum.h"
+#endif
+
+/**
+ * \file
+ * \brief C API: String Enumeration 
+ */
+ 
+/**
+ * An enumeration object.
+ * For usage in C programs.
+ * @stable ICU 2.2
+ */
+struct UEnumeration;
+/** structure representing an enumeration object instance @stable ICU 2.2 */
+typedef struct UEnumeration UEnumeration;
+
+/**
+ * Disposes of resources in use by the iterator.  If en is NULL,
+ * does nothing.  After this call, any char* or UChar* pointer
+ * returned by uenum_unext() or uenum_next() is invalid.
+ * @param en UEnumeration structure pointer
+ * @stable ICU 2.2
+ */
+U_STABLE void U_EXPORT2
+uenum_close(UEnumeration* en);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUEnumerationPointer
+ * "Smart pointer" class, closes a UEnumeration via uenum_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUEnumerationPointer, UEnumeration, uenum_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Returns the number of elements that the iterator traverses.  If
+ * the iterator is out-of-sync with its service, status is set to
+ * U_ENUM_OUT_OF_SYNC_ERROR.
+ * This is a convenience function. It can end up being very
+ * expensive as all the items might have to be pre-fetched (depending
+ * on the type of data being traversed). Use with caution and only 
+ * when necessary.
+ * @param en UEnumeration structure pointer
+ * @param status error code, can be U_ENUM_OUT_OF_SYNC_ERROR if the
+ *               iterator is out of sync.
+ * @return number of elements in the iterator
+ * @stable ICU 2.2
+ */
+U_STABLE int32_t U_EXPORT2
+uenum_count(UEnumeration* en, UErrorCode* status);
+
+/**
+ * Returns the next element in the iterator's list.  If there are
+ * no more elements, returns NULL.  If the iterator is out-of-sync
+ * with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR and
+ * NULL is returned.  If the native service string is a char* string,
+ * it is converted to UChar* with the invariant converter.
+ * The result is terminated by (UChar)0.
+ * @param en the iterator object
+ * @param resultLength pointer to receive the length of the result
+ *                     (not including the terminating \\0).
+ *                     If the pointer is NULL it is ignored.
+ * @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
+ *               the iterator is out of sync with its service.
+ * @return a pointer to the string.  The string will be
+ *         zero-terminated.  The return pointer is owned by this iterator
+ *         and must not be deleted by the caller.  The pointer is valid
+ *         until the next call to any uenum_... method, including
+ *         uenum_next() or uenum_unext().  When all strings have been
+ *         traversed, returns NULL.
+ * @stable ICU 2.2
+ */
+U_STABLE const UChar* U_EXPORT2
+uenum_unext(UEnumeration* en,
+            int32_t* resultLength,
+            UErrorCode* status);
+
+/**
+ * Returns the next element in the iterator's list.  If there are
+ * no more elements, returns NULL.  If the iterator is out-of-sync
+ * with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR and
+ * NULL is returned.  If the native service string is a UChar*
+ * string, it is converted to char* with the invariant converter.
+ * The result is terminated by (char)0.  If the conversion fails
+ * (because a character cannot be converted) then status is set to
+ * U_INVARIANT_CONVERSION_ERROR and the return value is undefined
+ * (but non-NULL).
+ * @param en the iterator object
+ * @param resultLength pointer to receive the length of the result
+ *                     (not including the terminating \\0).
+ *                     If the pointer is NULL it is ignored.
+ * @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
+ *               the iterator is out of sync with its service.  Set to
+ *               U_INVARIANT_CONVERSION_ERROR if the underlying native string is
+ *               UChar* and conversion to char* with the invariant converter
+ *               fails. This error pertains only to current string, so iteration
+ *               might be able to continue successfully.
+ * @return a pointer to the string.  The string will be
+ *         zero-terminated.  The return pointer is owned by this iterator
+ *         and must not be deleted by the caller.  The pointer is valid
+ *         until the next call to any uenum_... method, including
+ *         uenum_next() or uenum_unext().  When all strings have been
+ *         traversed, returns NULL.
+ * @stable ICU 2.2
+ */
+U_STABLE const char* U_EXPORT2
+uenum_next(UEnumeration* en,
+           int32_t* resultLength,
+           UErrorCode* status);
+
+/**
+ * Resets the iterator to the current list of service IDs.  This
+ * re-establishes sync with the service and rewinds the iterator
+ * to start at the first element.
+ * @param en the iterator object
+ * @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
+ *               the iterator is out of sync with its service.  
+ * @stable ICU 2.2
+ */
+U_STABLE void U_EXPORT2
+uenum_reset(UEnumeration* en, UErrorCode* status);
+
+#if U_SHOW_CPLUSPLUS_API
+
+/**
+ * Given a StringEnumeration, wrap it in a UEnumeration.  The
+ * StringEnumeration is adopted; after this call, the caller must not
+ * delete it (regardless of error status).
+ * @param adopted the C++ StringEnumeration to be wrapped in a UEnumeration.
+ * @param ec the error code.
+ * @return a UEnumeration wrapping the adopted StringEnumeration.
+ * @stable ICU 4.2
+ */
+U_STABLE UEnumeration* U_EXPORT2
+uenum_openFromStringEnumeration(icu::StringEnumeration* adopted, UErrorCode* ec);
+
+#endif
+
+/**
+ * Given an array of const UChar* strings, return a UEnumeration.  String pointers from 0..count-1 must not be null.
+ * Do not free or modify either the string array or the characters it points to until this object has been destroyed with uenum_close.
+ * \snippet test/cintltst/uenumtst.c uenum_openUCharStringsEnumeration
+ * @param strings array of const UChar* strings (each null terminated). All storage is owned by the caller.
+ * @param count length of the array
+ * @param ec error code
+ * @return the new UEnumeration object. Caller is responsible for calling uenum_close to free memory.
+ * @see uenum_close
+ * @stable ICU 50
+ */
+U_STABLE UEnumeration* U_EXPORT2
+uenum_openUCharStringsEnumeration(const UChar* const strings[], int32_t count,
+                                 UErrorCode* ec);
+
+/* Note:  next function is not hidden as draft, as it is used internally (it was formerly an internal function). */
+
+/**
+ * Given an array of const char* strings (invariant chars only), return a UEnumeration.  String pointers from 0..count-1 must not be null.
+ * Do not free or modify either the string array or the characters it points to until this object has been destroyed with uenum_close.
+ * \snippet test/cintltst/uenumtst.c uenum_openCharStringsEnumeration
+ * @param strings array of char* strings (each null terminated).  All storage is owned by the caller.
+ * @param count length of the array
+ * @param ec error code
+ * @return the new UEnumeration object. Caller is responsible for calling uenum_close to free memory
+ * @see uenum_close
+ * @stable ICU 50
+ */
+U_STABLE UEnumeration* U_EXPORT2
+uenum_openCharStringsEnumeration(const char* const strings[], int32_t count,
+                                 UErrorCode* ec);
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/uiter.h b/src/core/basetypes/icu-ucsdet/include/unicode/uiter.h
new file mode 100644
index 00000000..0cdb8ffb
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/uiter.h
@@ -0,0 +1,707 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2002-2011 International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  uiter.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2002jan18
+*   created by: Markus W. Scherer
+*/
+
+#ifndef __UITER_H__
+#define __UITER_H__
+
+/**
+ * \file
+ * \brief C API: Unicode Character Iteration
+ *
+ * @see UCharIterator
+ */
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+    U_NAMESPACE_BEGIN
+
+    class CharacterIterator;
+    class Replaceable;
+
+    U_NAMESPACE_END
+#endif
+
+U_CDECL_BEGIN
+
+struct UCharIterator;
+typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */
+
+/**
+ * Origin constants for UCharIterator.getIndex() and UCharIterator.move().
+ * @see UCharIteratorMove
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef enum UCharIteratorOrigin {
+    UITER_START, UITER_CURRENT, UITER_LIMIT, UITER_ZERO, UITER_LENGTH
+} UCharIteratorOrigin;
+
+/** Constants for UCharIterator. @stable ICU 2.6 */
+enum {
+    /**
+     * Constant value that may be returned by UCharIteratorMove
+     * indicating that the final UTF-16 index is not known, but that the move succeeded.
+     * This can occur when moving relative to limit or length, or
+     * when moving relative to the current index after a setState()
+     * when the current UTF-16 index is not known.
+     *
+     * It would be very inefficient to have to count from the beginning of the text
+     * just to get the current/limit/length index after moving relative to it.
+     * The actual index can be determined with getIndex(UITER_CURRENT)
+     * which will count the UChars if necessary.
+     *
+     * @stable ICU 2.6
+     */
+    UITER_UNKNOWN_INDEX=-2
+};
+
+
+/**
+ * Constant for UCharIterator getState() indicating an error or
+ * an unknown state.
+ * Returned by uiter_getState()/UCharIteratorGetState
+ * when an error occurs.
+ * Also, some UCharIterator implementations may not be able to return
+ * a valid state for each position. This will be clearly documented
+ * for each such iterator (none of the public ones here).
+ *
+ * @stable ICU 2.6
+ */
+#define UITER_NO_STATE ((uint32_t)0xffffffff)
+
+/**
+ * Function type declaration for UCharIterator.getIndex().
+ *
+ * Gets the current position, or the start or limit of the
+ * iteration range.
+ *
+ * This function may perform slowly for UITER_CURRENT after setState() was called,
+ * or for UITER_LENGTH, because an iterator implementation may have to count
+ * UChars if the underlying storage is not UTF-16.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @param origin get the 0, start, limit, length, or current index
+ * @return the requested index, or U_SENTINEL in an error condition
+ *
+ * @see UCharIteratorOrigin
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef int32_t U_CALLCONV
+UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin);
+
+/**
+ * Function type declaration for UCharIterator.move().
+ *
+ * Use iter->move(iter, index, UITER_ZERO) like CharacterIterator::setIndex(index).
+ *
+ * Moves the current position relative to the start or limit of the
+ * iteration range, or relative to the current position itself.
+ * The movement is expressed in numbers of code units forward
+ * or backward by specifying a positive or negative delta.
+ * Out of bounds movement will be pinned to the start or limit.
+ *
+ * This function may perform slowly for moving relative to UITER_LENGTH
+ * because an iterator implementation may have to count the rest of the
+ * UChars if the native storage is not UTF-16.
+ *
+ * When moving relative to the limit or length, or
+ * relative to the current position after setState() was called,
+ * move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient
+ * determination of the actual UTF-16 index.
+ * The actual index can be determined with getIndex(UITER_CURRENT)
+ * which will count the UChars if necessary.
+ * See UITER_UNKNOWN_INDEX for details.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @param delta can be positive, zero, or negative
+ * @param origin move relative to the 0, start, limit, length, or current index
+ * @return the new index, or U_SENTINEL on an error condition,
+ *         or UITER_UNKNOWN_INDEX when the index is not known.
+ *
+ * @see UCharIteratorOrigin
+ * @see UCharIterator
+ * @see UITER_UNKNOWN_INDEX
+ * @stable ICU 2.1
+ */
+typedef int32_t U_CALLCONV
+UCharIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin);
+
+/**
+ * Function type declaration for UCharIterator.hasNext().
+ *
+ * Check if current() and next() can still
+ * return another code unit.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return boolean value for whether current() and next() can still return another code unit
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef UBool U_CALLCONV
+UCharIteratorHasNext(UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.hasPrevious().
+ *
+ * Check if previous() can still return another code unit.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return boolean value for whether previous() can still return another code unit
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef UBool U_CALLCONV
+UCharIteratorHasPrevious(UCharIterator *iter);
+ 
+/**
+ * Function type declaration for UCharIterator.current().
+ *
+ * Return the code unit at the current position,
+ * or U_SENTINEL if there is none (index is at the limit).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the current code unit
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef UChar32 U_CALLCONV
+UCharIteratorCurrent(UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.next().
+ *
+ * Return the code unit at the current index and increment
+ * the index (post-increment, like s[i++]),
+ * or return U_SENTINEL if there is none (index is at the limit).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the current code unit (and post-increment the current index)
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef UChar32 U_CALLCONV
+UCharIteratorNext(UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.previous().
+ *
+ * Decrement the index and return the code unit from there
+ * (pre-decrement, like s[--i]),
+ * or return U_SENTINEL if there is none (index is at the start).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the previous code unit (after pre-decrementing the current index)
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef UChar32 U_CALLCONV
+UCharIteratorPrevious(UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.reservedFn().
+ * Reserved for future use.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @param something some integer argument
+ * @return some integer
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef int32_t U_CALLCONV
+UCharIteratorReserved(UCharIterator *iter, int32_t something);
+
+/**
+ * Function type declaration for UCharIterator.getState().
+ *
+ * Get the "state" of the iterator in the form of a single 32-bit word.
+ * It is recommended that the state value be calculated to be as small as
+ * is feasible. For strings with limited lengths, fewer than 32 bits may
+ * be sufficient.
+ *
+ * This is used together with setState()/UCharIteratorSetState
+ * to save and restore the iterator position more efficiently than with
+ * getIndex()/move().
+ *
+ * The iterator state is defined as a uint32_t value because it is designed
+ * for use in ucol_nextSortKeyPart() which provides 32 bits to store the state
+ * of the character iterator.
+ *
+ * With some UCharIterator implementations (e.g., UTF-8),
+ * getting and setting the UTF-16 index with existing functions
+ * (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but
+ * relatively slow because the iterator has to "walk" from a known index
+ * to the requested one.
+ * This takes more time the farther it needs to go.
+ *
+ * An opaque state value allows an iterator implementation to provide
+ * an internal index (UTF-8: the source byte array index) for
+ * fast, constant-time restoration.
+ *
+ * After calling setState(), a getIndex(UITER_CURRENT) may be slow because
+ * the UTF-16 index may not be restored as well, but the iterator can deliver
+ * the correct text contents and move relative to the current position
+ * without performance degradation.
+ *
+ * Some UCharIterator implementations may not be able to return
+ * a valid state for each position, in which case they return UITER_NO_STATE instead.
+ * This will be clearly documented for each such iterator (none of the public ones here).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the state word
+ *
+ * @see UCharIterator
+ * @see UCharIteratorSetState
+ * @see UITER_NO_STATE
+ * @stable ICU 2.6
+ */
+typedef uint32_t U_CALLCONV
+UCharIteratorGetState(const UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.setState().
+ *
+ * Restore the "state" of the iterator using a state word from a getState() call.
+ * The iterator object need not be the same one as for which getState() was called,
+ * but it must be of the same type (set up using the same uiter_setXYZ function)
+ * and it must iterate over the same string
+ * (binary identical regardless of memory address).
+ * For more about the state word see UCharIteratorGetState.
+ *
+ * After calling setState(), a getIndex(UITER_CURRENT) may be slow because
+ * the UTF-16 index may not be restored as well, but the iterator can deliver
+ * the correct text contents and move relative to the current position
+ * without performance degradation.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @param state the state word from a getState() call
+ *              on a same-type, same-string iterator
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ *                   which must not indicate a failure before the function call.
+ *
+ * @see UCharIterator
+ * @see UCharIteratorGetState
+ * @stable ICU 2.6
+ */
+typedef void U_CALLCONV
+UCharIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
+
+
+/**
+ * C API for code unit iteration.
+ * This can be used as a C wrapper around
+ * CharacterIterator, Replaceable, or implemented using simple strings, etc.
+ *
+ * There are two roles for using UCharIterator:
+ *
+ * A "provider" sets the necessary function pointers and controls the "protected"
+ * fields of the UCharIterator structure. A "provider" passes a UCharIterator
+ * into C APIs that need a UCharIterator as an abstract, flexible string interface.
+ *
+ * Implementations of such C APIs are "callers" of UCharIterator functions;
+ * they only use the "public" function pointers and never access the "protected"
+ * fields directly.
+ *
+ * The current() and next() functions only check the current index against the
+ * limit, and previous() only checks the current index against the start,
+ * to see if the iterator already reached the end of the iteration range.
+ *
+ * The assumption - in all iterators - is that the index is moved via the API,
+ * which means it won't go out of bounds, or the index is modified by
+ * user code that knows enough about the iterator implementation to set valid
+ * index values.
+ *
+ * UCharIterator functions return code unit values 0..0xffff,
+ * or U_SENTINEL if the iteration bounds are reached.
+ *
+ * @stable ICU 2.1
+ */
+struct UCharIterator {
+    /**
+     * (protected) Pointer to string or wrapped object or similar.
+     * Not used by caller.
+     * @stable ICU 2.1
+     */
+    const void *context;
+
+    /**
+     * (protected) Length of string or similar.
+     * Not used by caller.
+     * @stable ICU 2.1
+     */
+    int32_t length;
+
+    /**
+     * (protected) Start index or similar.
+     * Not used by caller.
+     * @stable ICU 2.1
+     */
+    int32_t start;
+
+    /**
+     * (protected) Current index or similar.
+     * Not used by caller.
+     * @stable ICU 2.1
+     */
+    int32_t index;
+
+    /**
+     * (protected) Limit index or similar.
+     * Not used by caller.
+     * @stable ICU 2.1
+     */
+    int32_t limit;
+
+    /**
+     * (protected) Used by UTF-8 iterators and possibly others.
+     * @stable ICU 2.1
+     */
+    int32_t reservedField;
+
+    /**
+     * (public) Returns the current position or the
+     * start or limit index of the iteration range.
+     *
+     * @see UCharIteratorGetIndex
+     * @stable ICU 2.1
+     */
+    UCharIteratorGetIndex *getIndex;
+
+    /**
+     * (public) Moves the current position relative to the start or limit of the
+     * iteration range, or relative to the current position itself.
+     * The movement is expressed in numbers of code units forward
+     * or backward by specifying a positive or negative delta.
+     *
+     * @see UCharIteratorMove
+     * @stable ICU 2.1
+     */
+    UCharIteratorMove *move;
+
+    /**
+     * (public) Check if current() and next() can still
+     * return another code unit.
+     *
+     * @see UCharIteratorHasNext
+     * @stable ICU 2.1
+     */
+    UCharIteratorHasNext *hasNext;
+
+    /**
+     * (public) Check if previous() can still return another code unit.
+     *
+     * @see UCharIteratorHasPrevious
+     * @stable ICU 2.1
+     */
+    UCharIteratorHasPrevious *hasPrevious;
+
+    /**
+     * (public) Return the code unit at the current position,
+     * or U_SENTINEL if there is none (index is at the limit).
+     *
+     * @see UCharIteratorCurrent
+     * @stable ICU 2.1
+     */
+    UCharIteratorCurrent *current;
+
+    /**
+     * (public) Return the code unit at the current index and increment
+     * the index (post-increment, like s[i++]),
+     * or return U_SENTINEL if there is none (index is at the limit).
+     *
+     * @see UCharIteratorNext
+     * @stable ICU 2.1
+     */
+    UCharIteratorNext *next;
+
+    /**
+     * (public) Decrement the index and return the code unit from there
+     * (pre-decrement, like s[--i]),
+     * or return U_SENTINEL if there is none (index is at the start).
+     *
+     * @see UCharIteratorPrevious
+     * @stable ICU 2.1
+     */
+    UCharIteratorPrevious *previous;
+
+    /**
+     * (public) Reserved for future use. Currently NULL.
+     *
+     * @see UCharIteratorReserved
+     * @stable ICU 2.1
+     */
+    UCharIteratorReserved *reservedFn;
+
+    /**
+     * (public) Return the state of the iterator, to be restored later with setState().
+     * This function pointer is NULL if the iterator does not implement it.
+     *
+     * @see UCharIteratorGet
+     * @stable ICU 2.6
+     */
+    UCharIteratorGetState *getState;
+
+    /**
+     * (public) Restore the iterator state from the state word from a call
+     * to getState().
+     * This function pointer is NULL if the iterator does not implement it.
+     *
+     * @see UCharIteratorSet
+     * @stable ICU 2.6
+     */
+    UCharIteratorSetState *setState;
+};
+
+/**
+ * Helper function for UCharIterator to get the code point
+ * at the current index.
+ *
+ * Return the code point that includes the code unit at the current position,
+ * or U_SENTINEL if there is none (index is at the limit).
+ * If the current code unit is a lead or trail surrogate,
+ * then the following or preceding surrogate is used to form
+ * the code point value.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the current code point
+ *
+ * @see UCharIterator
+ * @see U16_GET
+ * @see UnicodeString::char32At()
+ * @stable ICU 2.1
+ */
+U_STABLE UChar32 U_EXPORT2
+uiter_current32(UCharIterator *iter);
+
+/**
+ * Helper function for UCharIterator to get the next code point.
+ *
+ * Return the code point at the current index and increment
+ * the index (post-increment, like s[i++]),
+ * or return U_SENTINEL if there is none (index is at the limit).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the current code point (and post-increment the current index)
+ *
+ * @see UCharIterator
+ * @see U16_NEXT
+ * @stable ICU 2.1
+ */
+U_STABLE UChar32 U_EXPORT2
+uiter_next32(UCharIterator *iter);
+
+/**
+ * Helper function for UCharIterator to get the previous code point.
+ *
+ * Decrement the index and return the code point from there
+ * (pre-decrement, like s[--i]),
+ * or return U_SENTINEL if there is none (index is at the start).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the previous code point (after pre-decrementing the current index)
+ *
+ * @see UCharIterator
+ * @see U16_PREV
+ * @stable ICU 2.1
+ */
+U_STABLE UChar32 U_EXPORT2
+uiter_previous32(UCharIterator *iter);
+
+/**
+ * Get the "state" of the iterator in the form of a single 32-bit word.
+ * This is a convenience function that calls iter->getState(iter)
+ * if iter->getState is not NULL;
+ * if it is NULL or any other error occurs, then UITER_NO_STATE is returned.
+ *
+ * Some UCharIterator implementations may not be able to return
+ * a valid state for each position, in which case they return UITER_NO_STATE instead.
+ * This will be clearly documented for each such iterator (none of the public ones here).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the state word
+ *
+ * @see UCharIterator
+ * @see UCharIteratorGetState
+ * @see UITER_NO_STATE
+ * @stable ICU 2.6
+ */
+U_STABLE uint32_t U_EXPORT2
+uiter_getState(const UCharIterator *iter);
+
+/**
+ * Restore the "state" of the iterator using a state word from a getState() call.
+ * This is a convenience function that calls iter->setState(iter, state, pErrorCode)
+ * if iter->setState is not NULL; if it is NULL, then U_UNSUPPORTED_ERROR is set.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @param state the state word from a getState() call
+ *              on a same-type, same-string iterator
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ *                   which must not indicate a failure before the function call.
+ *
+ * @see UCharIterator
+ * @see UCharIteratorSetState
+ * @stable ICU 2.6
+ */
+U_STABLE void U_EXPORT2
+uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
+
+/**
+ * Set up a UCharIterator to iterate over a string.
+ *
+ * Sets the UCharIterator function pointers for iteration over the string s
+ * with iteration boundaries start=index=0 and length=limit=string length.
+ * The "provider" may set the start, index, and limit values at any time
+ * within the range 0..length.
+ * The length field will be ignored.
+ *
+ * The string pointer s is set into UCharIterator.context without copying
+ * or reallocating the string contents.
+ *
+ * getState() simply returns the current index.
+ * move() will always return the final index.
+ *
+ * @param iter UCharIterator structure to be set for iteration
+ * @param s String to iterate over
+ * @param length Length of s, or -1 if NUL-terminated
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+U_STABLE void U_EXPORT2
+uiter_setString(UCharIterator *iter, const UChar *s, int32_t length);
+
+/**
+ * Set up a UCharIterator to iterate over a UTF-16BE string
+ * (byte vector with a big-endian pair of bytes per UChar).
+ *
+ * Everything works just like with a normal UChar iterator (uiter_setString),
+ * except that UChars are assembled from byte pairs,
+ * and that the length argument here indicates an even number of bytes.
+ *
+ * getState() simply returns the current index.
+ * move() will always return the final index.
+ *
+ * @param iter UCharIterator structure to be set for iteration
+ * @param s UTF-16BE string to iterate over
+ * @param length Length of s as an even number of bytes, or -1 if NUL-terminated
+ *               (NUL means pair of 0 bytes at even index from s)
+ *
+ * @see UCharIterator
+ * @see uiter_setString
+ * @stable ICU 2.6
+ */
+U_STABLE void U_EXPORT2
+uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length);
+
+/**
+ * Set up a UCharIterator to iterate over a UTF-8 string.
+ *
+ * Sets the UCharIterator function pointers for iteration over the UTF-8 string s
+ * with UTF-8 iteration boundaries 0 and length.
+ * The implementation counts the UTF-16 index on the fly and
+ * lazily evaluates the UTF-16 length of the text.
+ *
+ * The start field is used as the UTF-8 offset, the limit field as the UTF-8 length.
+ * When the reservedField is not 0, then it contains a supplementary code point
+ * and the UTF-16 index is between the two corresponding surrogates.
+ * At that point, the UTF-8 index is behind that code point.
+ *
+ * The UTF-8 string pointer s is set into UCharIterator.context without copying
+ * or reallocating the string contents.
+ *
+ * getState() returns a state value consisting of
+ * - the current UTF-8 source byte index (bits 31..1)
+ * - a flag (bit 0) that indicates whether the UChar position is in the middle
+ *   of a surrogate pair
+ *   (from a 4-byte UTF-8 sequence for the corresponding supplementary code point)
+ *
+ * getState() cannot also encode the UTF-16 index in the state value.
+ * move(relative to limit or length), or
+ * move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX.
+ *
+ * @param iter UCharIterator structure to be set for iteration
+ * @param s UTF-8 string to iterate over
+ * @param length Length of s in bytes, or -1 if NUL-terminated
+ *
+ * @see UCharIterator
+ * @stable ICU 2.6
+ */
+U_STABLE void U_EXPORT2
+uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length);
+
+#if U_SHOW_CPLUSPLUS_API
+
+/**
+ * Set up a UCharIterator to wrap around a C++ CharacterIterator.
+ *
+ * Sets the UCharIterator function pointers for iteration using the
+ * CharacterIterator charIter.
+ *
+ * The CharacterIterator pointer charIter is set into UCharIterator.context
+ * without copying or cloning the CharacterIterator object.
+ * The other "protected" UCharIterator fields are set to 0 and will be ignored.
+ * The iteration index and boundaries are controlled by the CharacterIterator.
+ *
+ * getState() simply returns the current index.
+ * move() will always return the final index.
+ *
+ * @param iter UCharIterator structure to be set for iteration
+ * @param charIter CharacterIterator to wrap
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+U_STABLE void U_EXPORT2
+uiter_setCharacterIterator(UCharIterator *iter, icu::CharacterIterator *charIter);
+
+/**
+ * Set up a UCharIterator to iterate over a C++ Replaceable.
+ *
+ * Sets the UCharIterator function pointers for iteration over the
+ * Replaceable rep with iteration boundaries start=index=0 and
+ * length=limit=rep->length().
+ * The "provider" may set the start, index, and limit values at any time
+ * within the range 0..length=rep->length().
+ * The length field will be ignored.
+ *
+ * The Replaceable pointer rep is set into UCharIterator.context without copying
+ * or cloning/reallocating the Replaceable object.
+ *
+ * getState() simply returns the current index.
+ * move() will always return the final index.
+ *
+ * @param iter UCharIterator structure to be set for iteration
+ * @param rep Replaceable to iterate over
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+U_STABLE void U_EXPORT2
+uiter_setReplaceable(UCharIterator *iter, const icu::Replaceable *rep);
+
+#endif
+
+U_CDECL_END
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/umachine.h b/src/core/basetypes/icu-ucsdet/include/unicode/umachine.h
new file mode 100644
index 00000000..53215921
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/umachine.h
@@ -0,0 +1,356 @@
+/*
+******************************************************************************
+*
+*   Copyright (C) 1999-2014, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*   file name:  umachine.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 1999sep13
+*   created by: Markus W. Scherer
+*
+*   This file defines basic types and constants for ICU to be
+*   platform-independent. umachine.h and utf.h are included into
+*   utypes.h to provide all the general definitions for ICU.
+*   All of these definitions used to be in utypes.h before
+*   the UTF-handling macros made this unmaintainable.
+*/
+
+#ifndef __UMACHINE_H__
+#define __UMACHINE_H__
+
+
+/**
+ * \file
+ * \brief Basic types and constants for UTF
+ *
+ * <h2> Basic types and constants for UTF </h2>
+ *   This file defines basic types and constants for utf.h to be
+ *   platform-independent. umachine.h and utf.h are included into
+ *   utypes.h to provide all the general definitions for ICU.
+ *   All of these definitions used to be in utypes.h before
+ *   the UTF-handling macros made this unmaintainable.
+ *
+ */
+/*==========================================================================*/
+/* Include platform-dependent definitions                                   */
+/* which are contained in the platform-specific file platform.h             */
+/*==========================================================================*/
+
+#include "unicode/ptypes.h" /* platform.h is included in ptypes.h */
+
+/*
+ * ANSI C headers:
+ * stddef.h defines wchar_t
+ */
+#include <stddef.h>
+
+/*==========================================================================*/
+/* For C wrappers, we use the symbol U_STABLE.                                */
+/* This works properly if the includer is C or C++.                         */
+/* Functions are declared   U_STABLE return-type U_EXPORT2 function-name()... */
+/*==========================================================================*/
+
+/**
+ * \def U_CFUNC
+ * This is used in a declaration of a library private ICU C function.
+ * @stable ICU 2.4
+ */
+
+/**
+ * \def U_CDECL_BEGIN
+ * This is used to begin a declaration of a library private ICU C API.
+ * @stable ICU 2.4
+ */
+
+/**
+ * \def U_CDECL_END
+ * This is used to end a declaration of a library private ICU C API
+ * @stable ICU 2.4
+ */
+
+#ifdef __cplusplus
+#   define U_CFUNC extern "C"
+#   define U_CDECL_BEGIN extern "C" {
+#   define U_CDECL_END   }
+#else
+#   define U_CFUNC extern
+#   define U_CDECL_BEGIN
+#   define U_CDECL_END
+#endif
+
+#ifndef U_ATTRIBUTE_DEPRECATED
+/**
+ * \def U_ATTRIBUTE_DEPRECATED
+ *  This is used for GCC specific attributes
+ * @internal
+ */
+#if U_GCC_MAJOR_MINOR >= 302
+#    define U_ATTRIBUTE_DEPRECATED __attribute__ ((deprecated))
+/**
+ * \def U_ATTRIBUTE_DEPRECATED
+ * This is used for Visual C++ specific attributes 
+ * @internal
+ */
+#elif defined(_MSC_VER) && (_MSC_VER >= 1400)
+#    define U_ATTRIBUTE_DEPRECATED __declspec(deprecated)
+#else
+#    define U_ATTRIBUTE_DEPRECATED
+#endif
+#endif
+
+/** This is used to declare a function as a public ICU C API @stable ICU 2.0*/
+#define U_CAPI U_CFUNC U_EXPORT
+/** This is used to declare a function as a stable public ICU C API*/
+#define U_STABLE U_CAPI
+/** This is used to declare a function as a draft public ICU C API  */
+#define U_DRAFT  U_CAPI
+/** This is used to declare a function as a deprecated public ICU C API  */
+#define U_DEPRECATED U_CAPI U_ATTRIBUTE_DEPRECATED
+/** This is used to declare a function as an obsolete public ICU C API  */
+#define U_OBSOLETE U_CAPI
+/** This is used to declare a function as an internal ICU C API  */
+#define U_INTERNAL U_CAPI
+
+/**
+ * \def U_OVERRIDE
+ * Defined to the C++11 "override" keyword if available.
+ * Denotes a class or member which is an override of the base class.
+ * May result in an error if it applied to something not an override.
+ * @internal
+ */
+
+/**
+ * \def U_FINAL
+ * Defined to the C++11 "final" keyword if available.
+ * Denotes a class or member which may not be overridden in subclasses.
+ * May result in an error if subclasses attempt to override.
+ * @internal
+ */
+
+#if defined(__cplusplus) && __cplusplus>=201103L
+/* C++11 */
+#ifndef U_OVERRIDE
+#define U_OVERRIDE override
+#endif
+#ifndef U_FINAL
+#define U_FINAL final
+#endif
+#else
+/* not C++11 - define to nothing */
+#ifndef U_OVERRIDE
+#define U_OVERRIDE
+#endif
+#ifndef U_FINAL
+#define U_FINAL
+#endif
+#endif
+
+/*==========================================================================*/
+/* limits for int32_t etc., like in POSIX inttypes.h                        */
+/*==========================================================================*/
+
+#ifndef INT8_MIN
+/** The smallest value an 8 bit signed integer can hold @stable ICU 2.0 */
+#   define INT8_MIN        ((int8_t)(-128))
+#endif
+#ifndef INT16_MIN
+/** The smallest value a 16 bit signed integer can hold @stable ICU 2.0 */
+#   define INT16_MIN       ((int16_t)(-32767-1))
+#endif
+#ifndef INT32_MIN
+/** The smallest value a 32 bit signed integer can hold @stable ICU 2.0 */
+#   define INT32_MIN       ((int32_t)(-2147483647-1))
+#endif
+
+#ifndef INT8_MAX
+/** The largest value an 8 bit signed integer can hold @stable ICU 2.0 */
+#   define INT8_MAX        ((int8_t)(127))
+#endif
+#ifndef INT16_MAX
+/** The largest value a 16 bit signed integer can hold @stable ICU 2.0 */
+#   define INT16_MAX       ((int16_t)(32767))
+#endif
+#ifndef INT32_MAX
+/** The largest value a 32 bit signed integer can hold @stable ICU 2.0 */
+#   define INT32_MAX       ((int32_t)(2147483647))
+#endif
+
+#ifndef UINT8_MAX
+/** The largest value an 8 bit unsigned integer can hold @stable ICU 2.0 */
+#   define UINT8_MAX       ((uint8_t)(255U))
+#endif
+#ifndef UINT16_MAX
+/** The largest value a 16 bit unsigned integer can hold @stable ICU 2.0 */
+#   define UINT16_MAX      ((uint16_t)(65535U))
+#endif
+#ifndef UINT32_MAX
+/** The largest value a 32 bit unsigned integer can hold @stable ICU 2.0 */
+#   define UINT32_MAX      ((uint32_t)(4294967295U))
+#endif
+
+#if defined(U_INT64_T_UNAVAILABLE)
+# error int64_t is required for decimal format and rule-based number format.
+#else
+# ifndef INT64_C
+/**
+ * Provides a platform independent way to specify a signed 64-bit integer constant.
+ * note: may be wrong for some 64 bit platforms - ensure your compiler provides INT64_C
+ * @stable ICU 2.8
+ */
+#   define INT64_C(c) c ## LL
+# endif
+# ifndef UINT64_C
+/**
+ * Provides a platform independent way to specify an unsigned 64-bit integer constant.
+ * note: may be wrong for some 64 bit platforms - ensure your compiler provides UINT64_C
+ * @stable ICU 2.8
+ */
+#   define UINT64_C(c) c ## ULL
+# endif
+# ifndef U_INT64_MIN
+/** The smallest value a 64 bit signed integer can hold @stable ICU 2.8 */
+#     define U_INT64_MIN       ((int64_t)(INT64_C(-9223372036854775807)-1))
+# endif
+# ifndef U_INT64_MAX
+/** The largest value a 64 bit signed integer can hold @stable ICU 2.8 */
+#     define U_INT64_MAX       ((int64_t)(INT64_C(9223372036854775807)))
+# endif
+# ifndef U_UINT64_MAX
+/** The largest value a 64 bit unsigned integer can hold @stable ICU 2.8 */
+#     define U_UINT64_MAX      ((uint64_t)(UINT64_C(18446744073709551615)))
+# endif
+#endif
+
+/*==========================================================================*/
+/* Boolean data type                                                        */
+/*==========================================================================*/
+
+/** The ICU boolean type @stable ICU 2.0 */
+typedef int8_t UBool;
+
+#ifndef TRUE
+/** The TRUE value of a UBool @stable ICU 2.0 */
+#   define TRUE  1
+#endif
+#ifndef FALSE
+/** The FALSE value of a UBool @stable ICU 2.0 */
+#   define FALSE 0
+#endif
+
+
+/*==========================================================================*/
+/* Unicode data types                                                       */
+/*==========================================================================*/
+
+/* wchar_t-related definitions -------------------------------------------- */
+
+/*
+ * \def U_WCHAR_IS_UTF16
+ * Defined if wchar_t uses UTF-16.
+ *
+ * @stable ICU 2.0
+ */
+/*
+ * \def U_WCHAR_IS_UTF32
+ * Defined if wchar_t uses UTF-32.
+ *
+ * @stable ICU 2.0
+ */
+#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
+#   ifdef __STDC_ISO_10646__
+#       if (U_SIZEOF_WCHAR_T==2)
+#           define U_WCHAR_IS_UTF16
+#       elif (U_SIZEOF_WCHAR_T==4)
+#           define  U_WCHAR_IS_UTF32
+#       endif
+#   elif defined __UCS2__
+#       if (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400) && (U_SIZEOF_WCHAR_T==2)
+#           define U_WCHAR_IS_UTF16
+#       endif
+#   elif defined(__UCS4__) || (U_PLATFORM == U_PF_OS400 && defined(__UTF32__))
+#       if (U_SIZEOF_WCHAR_T==4)
+#           define U_WCHAR_IS_UTF32
+#       endif
+#   elif U_PLATFORM_IS_DARWIN_BASED || (U_SIZEOF_WCHAR_T==4 && U_PLATFORM_IS_LINUX_BASED)
+#       define U_WCHAR_IS_UTF32
+#   elif U_PLATFORM_HAS_WIN32_API
+#       define U_WCHAR_IS_UTF16
+#   endif
+#endif
+
+/* UChar and UChar32 definitions -------------------------------------------- */
+
+/** Number of bytes in a UChar. @stable ICU 2.0 */
+#define U_SIZEOF_UCHAR 2
+
+/**
+ * \var UChar
+ * Define UChar to be UCHAR_TYPE, if that is #defined (for example, to char16_t),
+ * or wchar_t if that is 16 bits wide; always assumed to be unsigned.
+ * If neither is available, then define UChar to be uint16_t.
+ *
+ * This makes the definition of UChar platform-dependent
+ * but allows direct string type compatibility with platforms with
+ * 16-bit wchar_t types.
+ *
+ * @stable ICU 4.4
+ */
+#if defined(UCHAR_TYPE)
+    typedef UCHAR_TYPE UChar;
+/* Not #elif U_HAVE_CHAR16_T -- because that is type-incompatible with pre-C++11 callers
+    typedef char16_t UChar;  */
+#elif U_SIZEOF_WCHAR_T==2
+    typedef wchar_t UChar;
+#elif defined(__CHAR16_TYPE__)
+    typedef __CHAR16_TYPE__ UChar;
+#else
+    typedef uint16_t UChar;
+#endif
+
+/**
+ * Define UChar32 as a type for single Unicode code points.
+ * UChar32 is a signed 32-bit integer (same as int32_t).
+ *
+ * The Unicode code point range is 0..0x10ffff.
+ * All other values (negative or >=0x110000) are illegal as Unicode code points.
+ * They may be used as sentinel values to indicate "done", "error"
+ * or similar non-code point conditions.
+ *
+ * Before ICU 2.4 (Jitterbug 2146), UChar32 was defined
+ * to be wchar_t if that is 32 bits wide (wchar_t may be signed or unsigned)
+ * or else to be uint32_t.
+ * That is, the definition of UChar32 was platform-dependent.
+ *
+ * @see U_SENTINEL
+ * @stable ICU 2.4
+ */
+typedef int32_t UChar32;
+
+/**
+ * This value is intended for sentinel values for APIs that
+ * (take or) return single code points (UChar32).
+ * It is outside of the Unicode code point range 0..0x10ffff.
+ * 
+ * For example, a "done" or "error" value in a new API
+ * could be indicated with U_SENTINEL.
+ *
+ * ICU APIs designed before ICU 2.4 usually define service-specific "done"
+ * values, mostly 0xffff.
+ * Those may need to be distinguished from
+ * actual U+ffff text contents by calling functions like
+ * CharacterIterator::hasNext() or UnicodeString::length().
+ *
+ * @return -1
+ * @see UChar32
+ * @stable ICU 2.4
+ */
+#define U_SENTINEL (-1)
+
+#include "unicode/urename.h"
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/unistr.h b/src/core/basetypes/icu-ucsdet/include/unicode/unistr.h
new file mode 100644
index 00000000..c6e8b446
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/unistr.h
@@ -0,0 +1,4470 @@
+/*
+**********************************************************************
+*   Copyright (C) 1998-2013, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*
+* File unistr.h
+*
+* Modification History:
+*
+*   Date        Name        Description
+*   09/25/98    stephen     Creation.
+*   11/11/98    stephen     Changed per 11/9 code review.
+*   04/20/99    stephen     Overhauled per 4/16 code review.
+*   11/18/99    aliu        Made to inherit from Replaceable.  Added method
+*                           handleReplaceBetween(); other methods unchanged.
+*   06/25/01    grhoten     Remove dependency on iostream.
+******************************************************************************
+*/
+
+#ifndef UNISTR_H
+#define UNISTR_H
+
+/**
+ * \file 
+ * \brief C++ API: Unicode String 
+ */
+
+#include "unicode/utypes.h"
+#include "unicode/rep.h"
+#include "unicode/std_string.h"
+#include "unicode/stringpiece.h"
+#include "unicode/bytestream.h"
+#include "unicode/ucasemap.h"
+
+struct UConverter;          // unicode/ucnv.h
+class  StringThreadTest;
+
+#ifndef U_COMPARE_CODE_POINT_ORDER
+/* see also ustring.h and unorm.h */
+/**
+ * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
+ * Compare strings in code point order instead of code unit order.
+ * @stable ICU 2.2
+ */
+#define U_COMPARE_CODE_POINT_ORDER  0x8000
+#endif
+
+#ifndef USTRING_H
+/**
+ * \ingroup ustring_ustrlen
+ */
+U_STABLE int32_t U_EXPORT2
+u_strlen(const UChar *s);
+#endif
+
+/**
+ * \def U_STRING_CASE_MAPPER_DEFINED
+ * @internal
+ */
+#ifndef U_STRING_CASE_MAPPER_DEFINED
+#define U_STRING_CASE_MAPPER_DEFINED
+
+/**
+ * Internal string case mapping function type.
+ * @internal
+ */
+typedef int32_t U_CALLCONV
+UStringCaseMapper(const UCaseMap *csm,
+                  UChar *dest, int32_t destCapacity,
+                  const UChar *src, int32_t srcLength,
+                  UErrorCode *pErrorCode);
+
+#endif
+
+U_NAMESPACE_BEGIN
+
+class BreakIterator;        // unicode/brkiter.h
+class Locale;               // unicode/locid.h
+class StringCharacterIterator;
+class UnicodeStringAppendable;  // unicode/appendable.h
+
+/* The <iostream> include has been moved to unicode/ustream.h */
+
+/**
+ * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
+ * which constructs a Unicode string from an invariant-character char * string.
+ * About invariant characters see utypes.h.
+ * This constructor has no runtime dependency on conversion code and is
+ * therefore recommended over ones taking a charset name string
+ * (where the empty string "" indicates invariant-character conversion).
+ *
+ * @stable ICU 3.2
+ */
+#define US_INV icu::UnicodeString::kInvariant
+
+/**
+ * Unicode String literals in C++.
+ * Dependent on the platform properties, different UnicodeString
+ * constructors should be used to create a UnicodeString object from
+ * a string literal.
+ * The macros are defined for maximum performance.
+ * They work only for strings that contain "invariant characters", i.e.,
+ * only latin letters, digits, and some punctuation.
+ * See utypes.h for details.
+ *
+ * The string parameter must be a C string literal.
+ * The length of the string, not including the terminating
+ * <code>NUL</code>, must be specified as a constant.
+ * The U_STRING_DECL macro should be invoked exactly once for one
+ * such string variable before it is used.
+ * @stable ICU 2.0
+ */
+#if defined(U_DECLARE_UTF16)
+#   define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)
+#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
+#   define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)L ## cs, _length)
+#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
+#   define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)cs, _length)
+#else
+#   define UNICODE_STRING(cs, _length) icu::UnicodeString(cs, _length, US_INV)
+#endif
+
+/**
+ * Unicode String literals in C++.
+ * Dependent on the platform properties, different UnicodeString
+ * constructors should be used to create a UnicodeString object from
+ * a string literal.
+ * The macros are defined for improved performance.
+ * They work only for strings that contain "invariant characters", i.e.,
+ * only latin letters, digits, and some punctuation.
+ * See utypes.h for details.
+ *
+ * The string parameter must be a C string literal.
+ * @stable ICU 2.0
+ */
+#define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
+
+/**
+ * \def UNISTR_FROM_CHAR_EXPLICIT
+ * This can be defined to be empty or "explicit".
+ * If explicit, then the UnicodeString(UChar) and UnicodeString(UChar32)
+ * constructors are marked as explicit, preventing their inadvertent use.
+ * @stable ICU 49
+ */
+#ifndef UNISTR_FROM_CHAR_EXPLICIT
+# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
+    // Auto-"explicit" in ICU library code.
+#   define UNISTR_FROM_CHAR_EXPLICIT explicit
+# else
+    // Empty by default for source code compatibility.
+#   define UNISTR_FROM_CHAR_EXPLICIT
+# endif
+#endif
+
+/**
+ * \def UNISTR_FROM_STRING_EXPLICIT
+ * This can be defined to be empty or "explicit".
+ * If explicit, then the UnicodeString(const char *) and UnicodeString(const UChar *)
+ * constructors are marked as explicit, preventing their inadvertent use.
+ *
+ * In particular, this helps prevent accidentally depending on ICU conversion code
+ * by passing a string literal into an API with a const UnicodeString & parameter.
+ * @stable ICU 49
+ */
+#ifndef UNISTR_FROM_STRING_EXPLICIT
+# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
+    // Auto-"explicit" in ICU library code.
+#   define UNISTR_FROM_STRING_EXPLICIT explicit
+# else
+    // Empty by default for source code compatibility.
+#   define UNISTR_FROM_STRING_EXPLICIT
+# endif
+#endif
+
+/**
+ * UnicodeString is a string class that stores Unicode characters directly and provides
+ * similar functionality as the Java String and StringBuffer classes.
+ * It is a concrete implementation of the abstract class Replaceable (for transliteration).
+ *
+ * The UnicodeString class is not suitable for subclassing.
+ *
+ * <p>For an overview of Unicode strings in C and C++ see the
+ * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
+ *
+ * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>.
+ * A Unicode character may be stored with either one code unit
+ * (the most common case) or with a matched pair of special code units
+ * ("surrogates"). The data type for code units is UChar. 
+ * For single-character handling, a Unicode character code <em>point</em> is a value
+ * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p>
+ *
+ * <p>Indexes and offsets into and lengths of strings always count code units, not code points.
+ * This is the same as with multi-byte char* strings in traditional string handling.
+ * Operations on partial strings typically do not test for code point boundaries.
+ * If necessary, the user needs to take care of such boundaries by testing for the code unit
+ * values or by using functions like
+ * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
+ * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p>
+ *
+ * UnicodeString methods are more lenient with regard to input parameter values
+ * than other ICU APIs. In particular:
+ * - If indexes are out of bounds for a UnicodeString object
+ *   (<0 or >length()) then they are "pinned" to the nearest boundary.
+ * - If primitive string pointer values (e.g., const UChar * or char *)
+ *   for input strings are NULL, then those input string parameters are treated
+ *   as if they pointed to an empty string.
+ *   However, this is <em>not</em> the case for char * parameters for charset names
+ *   or other IDs.
+ * - Most UnicodeString methods do not take a UErrorCode parameter because
+ *   there are usually very few opportunities for failure other than a shortage
+ *   of memory, error codes in low-level C++ string methods would be inconvenient,
+ *   and the error code as the last parameter (ICU convention) would prevent
+ *   the use of default parameter values.
+ *   Instead, such methods set the UnicodeString into a "bogus" state
+ *   (see isBogus()) if an error occurs.
+ *
+ * In string comparisons, two UnicodeString objects that are both "bogus"
+ * compare equal (to be transitive and prevent endless loops in sorting),
+ * and a "bogus" string compares less than any non-"bogus" one.
+ *
+ * Const UnicodeString methods are thread-safe. Multiple threads can use
+ * const methods on the same UnicodeString object simultaneously,
+ * but non-const methods must not be called concurrently (in multiple threads)
+ * with any other (const or non-const) methods.
+ *
+ * Similarly, const UnicodeString & parameters are thread-safe.
+ * One object may be passed in as such a parameter concurrently in multiple threads.
+ * This includes the const UnicodeString & parameters for
+ * copy construction, assignment, and cloning.
+ *
+ * <p>UnicodeString uses several storage methods.
+ * String contents can be stored inside the UnicodeString object itself,
+ * in an allocated and shared buffer, or in an outside buffer that is "aliased".
+ * Most of this is done transparently, but careful aliasing in particular provides
+ * significant performance improvements.
+ * Also, the internal buffer is accessible via special functions.
+ * For details see the
+ * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
+ *
+ * @see utf.h
+ * @see CharacterIterator
+ * @stable ICU 2.0
+ */
+class U_COMMON_API UnicodeString : public Replaceable
+{
+public:
+
+  /**
+   * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
+   * which constructs a Unicode string from an invariant-character char * string.
+   * Use the macro US_INV instead of the full qualification for this value.
+   *
+   * @see US_INV
+   * @stable ICU 3.2
+   */
+  enum EInvariant {
+    /**
+     * @see EInvariant
+     * @stable ICU 3.2
+     */
+    kInvariant
+  };
+
+  //========================================
+  // Read-only operations
+  //========================================
+
+  /* Comparison - bitwise only - for international comparison use collation */
+
+  /**
+   * Equality operator. Performs only bitwise comparison.
+   * @param text The UnicodeString to compare to this one.
+   * @return TRUE if <TT>text</TT> contains the same characters as this one,
+   * FALSE otherwise.
+   * @stable ICU 2.0
+   */
+  inline UBool operator== (const UnicodeString& text) const;
+
+  /**
+   * Inequality operator. Performs only bitwise comparison.
+   * @param text The UnicodeString to compare to this one.
+   * @return FALSE if <TT>text</TT> contains the same characters as this one,
+   * TRUE otherwise.
+   * @stable ICU 2.0
+   */
+  inline UBool operator!= (const UnicodeString& text) const;
+
+  /**
+   * Greater than operator. Performs only bitwise comparison.
+   * @param text The UnicodeString to compare to this one.
+   * @return TRUE if the characters in this are bitwise
+   * greater than the characters in <code>text</code>, FALSE otherwise
+   * @stable ICU 2.0
+   */
+  inline UBool operator> (const UnicodeString& text) const;
+
+  /**
+   * Less than operator. Performs only bitwise comparison.
+   * @param text The UnicodeString to compare to this one.
+   * @return TRUE if the characters in this are bitwise
+   * less than the characters in <code>text</code>, FALSE otherwise
+   * @stable ICU 2.0
+   */
+  inline UBool operator< (const UnicodeString& text) const;
+
+  /**
+   * Greater than or equal operator. Performs only bitwise comparison.
+   * @param text The UnicodeString to compare to this one.
+   * @return TRUE if the characters in this are bitwise
+   * greater than or equal to the characters in <code>text</code>, FALSE otherwise
+   * @stable ICU 2.0
+   */
+  inline UBool operator>= (const UnicodeString& text) const;
+
+  /**
+   * Less than or equal operator. Performs only bitwise comparison.
+   * @param text The UnicodeString to compare to this one.
+   * @return TRUE if the characters in this are bitwise
+   * less than or equal to the characters in <code>text</code>, FALSE otherwise
+   * @stable ICU 2.0
+   */
+  inline UBool operator<= (const UnicodeString& text) const;
+
+  /**
+   * Compare the characters bitwise in this UnicodeString to
+   * the characters in <code>text</code>.
+   * @param text The UnicodeString to compare to this one.
+   * @return The result of bitwise character comparison: 0 if this
+   * contains the same characters as <code>text</code>, -1 if the characters in
+   * this are bitwise less than the characters in <code>text</code>, +1 if the
+   * characters in this are bitwise greater than the characters
+   * in <code>text</code>.
+   * @stable ICU 2.0
+   */
+  inline int8_t compare(const UnicodeString& text) const;
+
+  /**
+   * Compare the characters bitwise in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) with the characters
+   * in the <b>entire string</b> <TT>text</TT>.
+   * (The parameters "start" and "length" are not applied to the other text "text".)
+   * @param start the offset at which the compare operation begins
+   * @param length the number of characters of text to compare.
+   * @param text the other text to be compared against this string.
+   * @return The result of bitwise character comparison: 0 if this
+   * contains the same characters as <code>text</code>, -1 if the characters in
+   * this are bitwise less than the characters in <code>text</code>, +1 if the
+   * characters in this are bitwise greater than the characters
+   * in <code>text</code>.
+   * @stable ICU 2.0
+   */
+  inline int8_t compare(int32_t start,
+         int32_t length,
+         const UnicodeString& text) const;
+
+  /**
+   * Compare the characters bitwise in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) with the characters
+   * in <TT>srcText</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
+   * @param start the offset at which the compare operation begins
+   * @param length the number of characters in this to compare.
+   * @param srcText the text to be compared
+   * @param srcStart the offset into <TT>srcText</TT> to start comparison
+   * @param srcLength the number of characters in <TT>src</TT> to compare
+   * @return The result of bitwise character comparison: 0 if this
+   * contains the same characters as <code>srcText</code>, -1 if the characters in
+   * this are bitwise less than the characters in <code>srcText</code>, +1 if the
+   * characters in this are bitwise greater than the characters
+   * in <code>srcText</code>.
+   * @stable ICU 2.0
+   */
+   inline int8_t compare(int32_t start,
+         int32_t length,
+         const UnicodeString& srcText,
+         int32_t srcStart,
+         int32_t srcLength) const;
+
+  /**
+   * Compare the characters bitwise in this UnicodeString with the first
+   * <TT>srcLength</TT> characters in <TT>srcChars</TT>.
+   * @param srcChars The characters to compare to this UnicodeString.
+   * @param srcLength the number of characters in <TT>srcChars</TT> to compare
+   * @return The result of bitwise character comparison: 0 if this
+   * contains the same characters as <code>srcChars</code>, -1 if the characters in
+   * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
+   * characters in this are bitwise greater than the characters
+   * in <code>srcChars</code>.
+   * @stable ICU 2.0
+   */
+  inline int8_t compare(const UChar *srcChars,
+         int32_t srcLength) const;
+
+  /**
+   * Compare the characters bitwise in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) with the first
+   * <TT>length</TT> characters in <TT>srcChars</TT>
+   * @param start the offset at which the compare operation begins
+   * @param length the number of characters to compare.
+   * @param srcChars the characters to be compared
+   * @return The result of bitwise character comparison: 0 if this
+   * contains the same characters as <code>srcChars</code>, -1 if the characters in
+   * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
+   * characters in this are bitwise greater than the characters
+   * in <code>srcChars</code>.
+   * @stable ICU 2.0
+   */
+  inline int8_t compare(int32_t start,
+         int32_t length,
+         const UChar *srcChars) const;
+
+  /**
+   * Compare the characters bitwise in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) with the characters
+   * in <TT>srcChars</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
+   * @param start the offset at which the compare operation begins
+   * @param length the number of characters in this to compare
+   * @param srcChars the characters to be compared
+   * @param srcStart the offset into <TT>srcChars</TT> to start comparison
+   * @param srcLength the number of characters in <TT>srcChars</TT> to compare
+   * @return The result of bitwise character comparison: 0 if this
+   * contains the same characters as <code>srcChars</code>, -1 if the characters in
+   * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
+   * characters in this are bitwise greater than the characters
+   * in <code>srcChars</code>.
+   * @stable ICU 2.0
+   */
+  inline int8_t compare(int32_t start,
+         int32_t length,
+         const UChar *srcChars,
+         int32_t srcStart,
+         int32_t srcLength) const;
+
+  /**
+   * Compare the characters bitwise in the range
+   * [<TT>start</TT>, <TT>limit</TT>) with the characters
+   * in <TT>srcText</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcLimit</TT>).
+   * @param start the offset at which the compare operation begins
+   * @param limit the offset immediately following the compare operation
+   * @param srcText the text to be compared
+   * @param srcStart the offset into <TT>srcText</TT> to start comparison
+   * @param srcLimit the offset into <TT>srcText</TT> to limit comparison
+   * @return The result of bitwise character comparison: 0 if this
+   * contains the same characters as <code>srcText</code>, -1 if the characters in
+   * this are bitwise less than the characters in <code>srcText</code>, +1 if the
+   * characters in this are bitwise greater than the characters
+   * in <code>srcText</code>.
+   * @stable ICU 2.0
+   */
+  inline int8_t compareBetween(int32_t start,
+            int32_t limit,
+            const UnicodeString& srcText,
+            int32_t srcStart,
+            int32_t srcLimit) const;
+
+  /**
+   * Compare two Unicode strings in code point order.
+   * The result may be different from the results of compare(), operator<, etc.
+   * if supplementary characters are present:
+   *
+   * In UTF-16, supplementary characters (with code points U+10000 and above) are
+   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+   * which means that they compare as less than some other BMP characters like U+feff.
+   * This function compares Unicode strings in code point order.
+   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+   *
+   * @param text Another string to compare this one to.
+   * @return a negative/zero/positive integer corresponding to whether
+   * this string is less than/equal to/greater than the second one
+   * in code point order
+   * @stable ICU 2.0
+   */
+  inline int8_t compareCodePointOrder(const UnicodeString& text) const;
+
+  /**
+   * Compare two Unicode strings in code point order.
+   * The result may be different from the results of compare(), operator<, etc.
+   * if supplementary characters are present:
+   *
+   * In UTF-16, supplementary characters (with code points U+10000 and above) are
+   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+   * which means that they compare as less than some other BMP characters like U+feff.
+   * This function compares Unicode strings in code point order.
+   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+   *
+   * @param start The start offset in this string at which the compare operation begins.
+   * @param length The number of code units from this string to compare.
+   * @param srcText Another string to compare this one to.
+   * @return a negative/zero/positive integer corresponding to whether
+   * this string is less than/equal to/greater than the second one
+   * in code point order
+   * @stable ICU 2.0
+   */
+  inline int8_t compareCodePointOrder(int32_t start,
+                                      int32_t length,
+                                      const UnicodeString& srcText) const;
+
+  /**
+   * Compare two Unicode strings in code point order.
+   * The result may be different from the results of compare(), operator<, etc.
+   * if supplementary characters are present:
+   *
+   * In UTF-16, supplementary characters (with code points U+10000 and above) are
+   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+   * which means that they compare as less than some other BMP characters like U+feff.
+   * This function compares Unicode strings in code point order.
+   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+   *
+   * @param start The start offset in this string at which the compare operation begins.
+   * @param length The number of code units from this string to compare.
+   * @param srcText Another string to compare this one to.
+   * @param srcStart The start offset in that string at which the compare operation begins.
+   * @param srcLength The number of code units from that string to compare.
+   * @return a negative/zero/positive integer corresponding to whether
+   * this string is less than/equal to/greater than the second one
+   * in code point order
+   * @stable ICU 2.0
+   */
+   inline int8_t compareCodePointOrder(int32_t start,
+                                       int32_t length,
+                                       const UnicodeString& srcText,
+                                       int32_t srcStart,
+                                       int32_t srcLength) const;
+
+  /**
+   * Compare two Unicode strings in code point order.
+   * The result may be different from the results of compare(), operator<, etc.
+   * if supplementary characters are present:
+   *
+   * In UTF-16, supplementary characters (with code points U+10000 and above) are
+   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+   * which means that they compare as less than some other BMP characters like U+feff.
+   * This function compares Unicode strings in code point order.
+   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+   *
+   * @param srcChars A pointer to another string to compare this one to.
+   * @param srcLength The number of code units from that string to compare.
+   * @return a negative/zero/positive integer corresponding to whether
+   * this string is less than/equal to/greater than the second one
+   * in code point order
+   * @stable ICU 2.0
+   */
+  inline int8_t compareCodePointOrder(const UChar *srcChars,
+                                      int32_t srcLength) const;
+
+  /**
+   * Compare two Unicode strings in code point order.
+   * The result may be different from the results of compare(), operator<, etc.
+   * if supplementary characters are present:
+   *
+   * In UTF-16, supplementary characters (with code points U+10000 and above) are
+   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+   * which means that they compare as less than some other BMP characters like U+feff.
+   * This function compares Unicode strings in code point order.
+   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+   *
+   * @param start The start offset in this string at which the compare operation begins.
+   * @param length The number of code units from this string to compare.
+   * @param srcChars A pointer to another string to compare this one to.
+   * @return a negative/zero/positive integer corresponding to whether
+   * this string is less than/equal to/greater than the second one
+   * in code point order
+   * @stable ICU 2.0
+   */
+  inline int8_t compareCodePointOrder(int32_t start,
+                                      int32_t length,
+                                      const UChar *srcChars) const;
+
+  /**
+   * Compare two Unicode strings in code point order.
+   * The result may be different from the results of compare(), operator<, etc.
+   * if supplementary characters are present:
+   *
+   * In UTF-16, supplementary characters (with code points U+10000 and above) are
+   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+   * which means that they compare as less than some other BMP characters like U+feff.
+   * This function compares Unicode strings in code point order.
+   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+   *
+   * @param start The start offset in this string at which the compare operation begins.
+   * @param length The number of code units from this string to compare.
+   * @param srcChars A pointer to another string to compare this one to.
+   * @param srcStart The start offset in that string at which the compare operation begins.
+   * @param srcLength The number of code units from that string to compare.
+   * @return a negative/zero/positive integer corresponding to whether
+   * this string is less than/equal to/greater than the second one
+   * in code point order
+   * @stable ICU 2.0
+   */
+  inline int8_t compareCodePointOrder(int32_t start,
+                                      int32_t length,
+                                      const UChar *srcChars,
+                                      int32_t srcStart,
+                                      int32_t srcLength) const;
+
+  /**
+   * Compare two Unicode strings in code point order.
+   * The result may be different from the results of compare(), operator<, etc.
+   * if supplementary characters are present:
+   *
+   * In UTF-16, supplementary characters (with code points U+10000 and above) are
+   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+   * which means that they compare as less than some other BMP characters like U+feff.
+   * This function compares Unicode strings in code point order.
+   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+   *
+   * @param start The start offset in this string at which the compare operation begins.
+   * @param limit The offset after the last code unit from this string to compare.
+   * @param srcText Another string to compare this one to.
+   * @param srcStart The start offset in that string at which the compare operation begins.
+   * @param srcLimit The offset after the last code unit from that string to compare.
+   * @return a negative/zero/positive integer corresponding to whether
+   * this string is less than/equal to/greater than the second one
+   * in code point order
+   * @stable ICU 2.0
+   */
+  inline int8_t compareCodePointOrderBetween(int32_t start,
+                                             int32_t limit,
+                                             const UnicodeString& srcText,
+                                             int32_t srcStart,
+                                             int32_t srcLimit) const;
+
+  /**
+   * Compare two strings case-insensitively using full case folding.
+   * This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
+   *
+   * @param text Another string to compare this one to.
+   * @param options A bit set of options:
+   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+   *     Comparison in code unit order with default case folding.
+   *
+   *   - U_COMPARE_CODE_POINT_ORDER
+   *     Set to choose code point order instead of code unit order
+   *     (see u_strCompare for details).
+   *
+   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+   *
+   * @return A negative, zero, or positive integer indicating the comparison result.
+   * @stable ICU 2.0
+   */
+  inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
+
+  /**
+   * Compare two strings case-insensitively using full case folding.
+   * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
+   *
+   * @param start The start offset in this string at which the compare operation begins.
+   * @param length The number of code units from this string to compare.
+   * @param srcText Another string to compare this one to.
+   * @param options A bit set of options:
+   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+   *     Comparison in code unit order with default case folding.
+   *
+   *   - U_COMPARE_CODE_POINT_ORDER
+   *     Set to choose code point order instead of code unit order
+   *     (see u_strCompare for details).
+   *
+   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+   *
+   * @return A negative, zero, or positive integer indicating the comparison result.
+   * @stable ICU 2.0
+   */
+  inline int8_t caseCompare(int32_t start,
+         int32_t length,
+         const UnicodeString& srcText,
+         uint32_t options) const;
+
+  /**
+   * Compare two strings case-insensitively using full case folding.
+   * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
+   *
+   * @param start The start offset in this string at which the compare operation begins.
+   * @param length The number of code units from this string to compare.
+   * @param srcText Another string to compare this one to.
+   * @param srcStart The start offset in that string at which the compare operation begins.
+   * @param srcLength The number of code units from that string to compare.
+   * @param options A bit set of options:
+   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+   *     Comparison in code unit order with default case folding.
+   *
+   *   - U_COMPARE_CODE_POINT_ORDER
+   *     Set to choose code point order instead of code unit order
+   *     (see u_strCompare for details).
+   *
+   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+   *
+   * @return A negative, zero, or positive integer indicating the comparison result.
+   * @stable ICU 2.0
+   */
+  inline int8_t caseCompare(int32_t start,
+         int32_t length,
+         const UnicodeString& srcText,
+         int32_t srcStart,
+         int32_t srcLength,
+         uint32_t options) const;
+
+  /**
+   * Compare two strings case-insensitively using full case folding.
+   * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
+   *
+   * @param srcChars A pointer to another string to compare this one to.
+   * @param srcLength The number of code units from that string to compare.
+   * @param options A bit set of options:
+   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+   *     Comparison in code unit order with default case folding.
+   *
+   *   - U_COMPARE_CODE_POINT_ORDER
+   *     Set to choose code point order instead of code unit order
+   *     (see u_strCompare for details).
+   *
+   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+   *
+   * @return A negative, zero, or positive integer indicating the comparison result.
+   * @stable ICU 2.0
+   */
+  inline int8_t caseCompare(const UChar *srcChars,
+         int32_t srcLength,
+         uint32_t options) const;
+
+  /**
+   * Compare two strings case-insensitively using full case folding.
+   * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
+   *
+   * @param start The start offset in this string at which the compare operation begins.
+   * @param length The number of code units from this string to compare.
+   * @param srcChars A pointer to another string to compare this one to.
+   * @param options A bit set of options:
+   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+   *     Comparison in code unit order with default case folding.
+   *
+   *   - U_COMPARE_CODE_POINT_ORDER
+   *     Set to choose code point order instead of code unit order
+   *     (see u_strCompare for details).
+   *
+   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+   *
+   * @return A negative, zero, or positive integer indicating the comparison result.
+   * @stable ICU 2.0
+   */
+  inline int8_t caseCompare(int32_t start,
+         int32_t length,
+         const UChar *srcChars,
+         uint32_t options) const;
+
+  /**
+   * Compare two strings case-insensitively using full case folding.
+   * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
+   *
+   * @param start The start offset in this string at which the compare operation begins.
+   * @param length The number of code units from this string to compare.
+   * @param srcChars A pointer to another string to compare this one to.
+   * @param srcStart The start offset in that string at which the compare operation begins.
+   * @param srcLength The number of code units from that string to compare.
+   * @param options A bit set of options:
+   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+   *     Comparison in code unit order with default case folding.
+   *
+   *   - U_COMPARE_CODE_POINT_ORDER
+   *     Set to choose code point order instead of code unit order
+   *     (see u_strCompare for details).
+   *
+   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+   *
+   * @return A negative, zero, or positive integer indicating the comparison result.
+   * @stable ICU 2.0
+   */
+  inline int8_t caseCompare(int32_t start,
+         int32_t length,
+         const UChar *srcChars,
+         int32_t srcStart,
+         int32_t srcLength,
+         uint32_t options) const;
+
+  /**
+   * Compare two strings case-insensitively using full case folding.
+   * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
+   *
+   * @param start The start offset in this string at which the compare operation begins.
+   * @param limit The offset after the last code unit from this string to compare.
+   * @param srcText Another string to compare this one to.
+   * @param srcStart The start offset in that string at which the compare operation begins.
+   * @param srcLimit The offset after the last code unit from that string to compare.
+   * @param options A bit set of options:
+   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+   *     Comparison in code unit order with default case folding.
+   *
+   *   - U_COMPARE_CODE_POINT_ORDER
+   *     Set to choose code point order instead of code unit order
+   *     (see u_strCompare for details).
+   *
+   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+   *
+   * @return A negative, zero, or positive integer indicating the comparison result.
+   * @stable ICU 2.0
+   */
+  inline int8_t caseCompareBetween(int32_t start,
+            int32_t limit,
+            const UnicodeString& srcText,
+            int32_t srcStart,
+            int32_t srcLimit,
+            uint32_t options) const;
+
+  /**
+   * Determine if this starts with the characters in <TT>text</TT>
+   * @param text The text to match.
+   * @return TRUE if this starts with the characters in <TT>text</TT>,
+   * FALSE otherwise
+   * @stable ICU 2.0
+   */
+  inline UBool startsWith(const UnicodeString& text) const;
+
+  /**
+   * Determine if this starts with the characters in <TT>srcText</TT>
+   * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
+   * @param srcText The text to match.
+   * @param srcStart the offset into <TT>srcText</TT> to start matching
+   * @param srcLength the number of characters in <TT>srcText</TT> to match
+   * @return TRUE if this starts with the characters in <TT>text</TT>,
+   * FALSE otherwise
+   * @stable ICU 2.0
+   */
+  inline UBool startsWith(const UnicodeString& srcText,
+            int32_t srcStart,
+            int32_t srcLength) const;
+
+  /**
+   * Determine if this starts with the characters in <TT>srcChars</TT>
+   * @param srcChars The characters to match.
+   * @param srcLength the number of characters in <TT>srcChars</TT>
+   * @return TRUE if this starts with the characters in <TT>srcChars</TT>,
+   * FALSE otherwise
+   * @stable ICU 2.0
+   */
+  inline UBool startsWith(const UChar *srcChars,
+            int32_t srcLength) const;
+
+  /**
+   * Determine if this ends with the characters in <TT>srcChars</TT>
+   * in the range  [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
+   * @param srcChars The characters to match.
+   * @param srcStart the offset into <TT>srcText</TT> to start matching
+   * @param srcLength the number of characters in <TT>srcChars</TT> to match
+   * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise
+   * @stable ICU 2.0
+   */
+  inline UBool startsWith(const UChar *srcChars,
+            int32_t srcStart,
+            int32_t srcLength) const;
+
+  /**
+   * Determine if this ends with the characters in <TT>text</TT>
+   * @param text The text to match.
+   * @return TRUE if this ends with the characters in <TT>text</TT>,
+   * FALSE otherwise
+   * @stable ICU 2.0
+   */
+  inline UBool endsWith(const UnicodeString& text) const;
+
+  /**
+   * Determine if this ends with the characters in <TT>srcText</TT>
+   * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
+   * @param srcText The text to match.
+   * @param srcStart the offset into <TT>srcText</TT> to start matching
+   * @param srcLength the number of characters in <TT>srcText</TT> to match
+   * @return TRUE if this ends with the characters in <TT>text</TT>,
+   * FALSE otherwise
+   * @stable ICU 2.0
+   */
+  inline UBool endsWith(const UnicodeString& srcText,
+          int32_t srcStart,
+          int32_t srcLength) const;
+
+  /**
+   * Determine if this ends with the characters in <TT>srcChars</TT>
+   * @param srcChars The characters to match.
+   * @param srcLength the number of characters in <TT>srcChars</TT>
+   * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
+   * FALSE otherwise
+   * @stable ICU 2.0
+   */
+  inline UBool endsWith(const UChar *srcChars,
+          int32_t srcLength) const;
+
+  /**
+   * Determine if this ends with the characters in <TT>srcChars</TT>
+   * in the range  [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
+   * @param srcChars The characters to match.
+   * @param srcStart the offset into <TT>srcText</TT> to start matching
+   * @param srcLength the number of characters in <TT>srcChars</TT> to match
+   * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
+   * FALSE otherwise
+   * @stable ICU 2.0
+   */
+  inline UBool endsWith(const UChar *srcChars,
+          int32_t srcStart,
+          int32_t srcLength) const;
+
+
+  /* Searching - bitwise only */
+
+  /**
+   * Locate in this the first occurrence of the characters in <TT>text</TT>,
+   * using bitwise comparison.
+   * @param text The text to search for.
+   * @return The offset into this of the start of <TT>text</TT>,
+   * or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t indexOf(const UnicodeString& text) const;
+
+  /**
+   * Locate in this the first occurrence of the characters in <TT>text</TT>
+   * starting at offset <TT>start</TT>, using bitwise comparison.
+   * @param text The text to search for.
+   * @param start The offset at which searching will start.
+   * @return The offset into this of the start of <TT>text</TT>,
+   * or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t indexOf(const UnicodeString& text,
+              int32_t start) const;
+
+  /**
+   * Locate in this the first occurrence in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
+   * in <TT>text</TT>, using bitwise comparison.
+   * @param text The text to search for.
+   * @param start The offset at which searching will start.
+   * @param length The number of characters to search
+   * @return The offset into this of the start of <TT>text</TT>,
+   * or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t indexOf(const UnicodeString& text,
+              int32_t start,
+              int32_t length) const;
+
+  /**
+   * Locate in this the first occurrence in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
+   *  in <TT>srcText</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
+   * using bitwise comparison.
+   * @param srcText The text to search for.
+   * @param srcStart the offset into <TT>srcText</TT> at which
+   * to start matching
+   * @param srcLength the number of characters in <TT>srcText</TT> to match
+   * @param start the offset into this at which to start matching
+   * @param length the number of characters in this to search
+   * @return The offset into this of the start of <TT>text</TT>,
+   * or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t indexOf(const UnicodeString& srcText,
+              int32_t srcStart,
+              int32_t srcLength,
+              int32_t start,
+              int32_t length) const;
+
+  /**
+   * Locate in this the first occurrence of the characters in
+   * <TT>srcChars</TT>
+   * starting at offset <TT>start</TT>, using bitwise comparison.
+   * @param srcChars The text to search for.
+   * @param srcLength the number of characters in <TT>srcChars</TT> to match
+   * @param start the offset into this at which to start matching
+   * @return The offset into this of the start of <TT>text</TT>,
+   * or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t indexOf(const UChar *srcChars,
+              int32_t srcLength,
+              int32_t start) const;
+
+  /**
+   * Locate in this the first occurrence in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
+   * in <TT>srcChars</TT>, using bitwise comparison.
+   * @param srcChars The text to search for.
+   * @param srcLength the number of characters in <TT>srcChars</TT>
+   * @param start The offset at which searching will start.
+   * @param length The number of characters to search
+   * @return The offset into this of the start of <TT>srcChars</TT>,
+   * or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t indexOf(const UChar *srcChars,
+              int32_t srcLength,
+              int32_t start,
+              int32_t length) const;
+
+  /**
+   * Locate in this the first occurrence in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
+   * in <TT>srcChars</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
+   * using bitwise comparison.
+   * @param srcChars The text to search for.
+   * @param srcStart the offset into <TT>srcChars</TT> at which
+   * to start matching
+   * @param srcLength the number of characters in <TT>srcChars</TT> to match
+   * @param start the offset into this at which to start matching
+   * @param length the number of characters in this to search
+   * @return The offset into this of the start of <TT>text</TT>,
+   * or -1 if not found.
+   * @stable ICU 2.0
+   */
+  int32_t indexOf(const UChar *srcChars,
+              int32_t srcStart,
+              int32_t srcLength,
+              int32_t start,
+              int32_t length) const;
+
+  /**
+   * Locate in this the first occurrence of the BMP code point <code>c</code>,
+   * using bitwise comparison.
+   * @param c The code unit to search for.
+   * @return The offset into this of <TT>c</TT>, or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t indexOf(UChar c) const;
+
+  /**
+   * Locate in this the first occurrence of the code point <TT>c</TT>,
+   * using bitwise comparison.
+   *
+   * @param c The code point to search for.
+   * @return The offset into this of <TT>c</TT>, or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t indexOf(UChar32 c) const;
+
+  /**
+   * Locate in this the first occurrence of the BMP code point <code>c</code>,
+   * starting at offset <TT>start</TT>, using bitwise comparison.
+   * @param c The code unit to search for.
+   * @param start The offset at which searching will start.
+   * @return The offset into this of <TT>c</TT>, or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t indexOf(UChar c,
+              int32_t start) const;
+
+  /**
+   * Locate in this the first occurrence of the code point <TT>c</TT>
+   * starting at offset <TT>start</TT>, using bitwise comparison.
+   *
+   * @param c The code point to search for.
+   * @param start The offset at which searching will start.
+   * @return The offset into this of <TT>c</TT>, or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t indexOf(UChar32 c,
+              int32_t start) const;
+
+  /**
+   * Locate in this the first occurrence of the BMP code point <code>c</code>
+   * in the range [<TT>start</TT>, <TT>start + length</TT>),
+   * using bitwise comparison.
+   * @param c The code unit to search for.
+   * @param start the offset into this at which to start matching
+   * @param length the number of characters in this to search
+   * @return The offset into this of <TT>c</TT>, or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t indexOf(UChar c,
+              int32_t start,
+              int32_t length) const;
+
+  /**
+   * Locate in this the first occurrence of the code point <TT>c</TT>
+   * in the range [<TT>start</TT>, <TT>start + length</TT>),
+   * using bitwise comparison.
+   *
+   * @param c The code point to search for.
+   * @param start the offset into this at which to start matching
+   * @param length the number of characters in this to search
+   * @return The offset into this of <TT>c</TT>, or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t indexOf(UChar32 c,
+              int32_t start,
+              int32_t length) const;
+
+  /**
+   * Locate in this the last occurrence of the characters in <TT>text</TT>,
+   * using bitwise comparison.
+   * @param text The text to search for.
+   * @return The offset into this of the start of <TT>text</TT>,
+   * or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t lastIndexOf(const UnicodeString& text) const;
+
+  /**
+   * Locate in this the last occurrence of the characters in <TT>text</TT>
+   * starting at offset <TT>start</TT>, using bitwise comparison.
+   * @param text The text to search for.
+   * @param start The offset at which searching will start.
+   * @return The offset into this of the start of <TT>text</TT>,
+   * or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t lastIndexOf(const UnicodeString& text,
+              int32_t start) const;
+
+  /**
+   * Locate in this the last occurrence in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
+   * in <TT>text</TT>, using bitwise comparison.
+   * @param text The text to search for.
+   * @param start The offset at which searching will start.
+   * @param length The number of characters to search
+   * @return The offset into this of the start of <TT>text</TT>,
+   * or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t lastIndexOf(const UnicodeString& text,
+              int32_t start,
+              int32_t length) const;
+
+  /**
+   * Locate in this the last occurrence in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
+   * in <TT>srcText</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
+   * using bitwise comparison.
+   * @param srcText The text to search for.
+   * @param srcStart the offset into <TT>srcText</TT> at which
+   * to start matching
+   * @param srcLength the number of characters in <TT>srcText</TT> to match
+   * @param start the offset into this at which to start matching
+   * @param length the number of characters in this to search
+   * @return The offset into this of the start of <TT>text</TT>,
+   * or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t lastIndexOf(const UnicodeString& srcText,
+              int32_t srcStart,
+              int32_t srcLength,
+              int32_t start,
+              int32_t length) const;
+
+  /**
+   * Locate in this the last occurrence of the characters in <TT>srcChars</TT>
+   * starting at offset <TT>start</TT>, using bitwise comparison.
+   * @param srcChars The text to search for.
+   * @param srcLength the number of characters in <TT>srcChars</TT> to match
+   * @param start the offset into this at which to start matching
+   * @return The offset into this of the start of <TT>text</TT>,
+   * or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t lastIndexOf(const UChar *srcChars,
+              int32_t srcLength,
+              int32_t start) const;
+
+  /**
+   * Locate in this the last occurrence in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
+   * in <TT>srcChars</TT>, using bitwise comparison.
+   * @param srcChars The text to search for.
+   * @param srcLength the number of characters in <TT>srcChars</TT>
+   * @param start The offset at which searching will start.
+   * @param length The number of characters to search
+   * @return The offset into this of the start of <TT>srcChars</TT>,
+   * or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t lastIndexOf(const UChar *srcChars,
+              int32_t srcLength,
+              int32_t start,
+              int32_t length) const;
+
+  /**
+   * Locate in this the last occurrence in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
+   * in <TT>srcChars</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
+   * using bitwise comparison.
+   * @param srcChars The text to search for.
+   * @param srcStart the offset into <TT>srcChars</TT> at which
+   * to start matching
+   * @param srcLength the number of characters in <TT>srcChars</TT> to match
+   * @param start the offset into this at which to start matching
+   * @param length the number of characters in this to search
+   * @return The offset into this of the start of <TT>text</TT>,
+   * or -1 if not found.
+   * @stable ICU 2.0
+   */
+  int32_t lastIndexOf(const UChar *srcChars,
+              int32_t srcStart,
+              int32_t srcLength,
+              int32_t start,
+              int32_t length) const;
+
+  /**
+   * Locate in this the last occurrence of the BMP code point <code>c</code>,
+   * using bitwise comparison.
+   * @param c The code unit to search for.
+   * @return The offset into this of <TT>c</TT>, or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t lastIndexOf(UChar c) const;
+
+  /**
+   * Locate in this the last occurrence of the code point <TT>c</TT>,
+   * using bitwise comparison.
+   *
+   * @param c The code point to search for.
+   * @return The offset into this of <TT>c</TT>, or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t lastIndexOf(UChar32 c) const;
+
+  /**
+   * Locate in this the last occurrence of the BMP code point <code>c</code>
+   * starting at offset <TT>start</TT>, using bitwise comparison.
+   * @param c The code unit to search for.
+   * @param start The offset at which searching will start.
+   * @return The offset into this of <TT>c</TT>, or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t lastIndexOf(UChar c,
+              int32_t start) const;
+
+  /**
+   * Locate in this the last occurrence of the code point <TT>c</TT>
+   * starting at offset <TT>start</TT>, using bitwise comparison.
+   *
+   * @param c The code point to search for.
+   * @param start The offset at which searching will start.
+   * @return The offset into this of <TT>c</TT>, or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t lastIndexOf(UChar32 c,
+              int32_t start) const;
+
+  /**
+   * Locate in this the last occurrence of the BMP code point <code>c</code>
+   * in the range [<TT>start</TT>, <TT>start + length</TT>),
+   * using bitwise comparison.
+   * @param c The code unit to search for.
+   * @param start the offset into this at which to start matching
+   * @param length the number of characters in this to search
+   * @return The offset into this of <TT>c</TT>, or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t lastIndexOf(UChar c,
+              int32_t start,
+              int32_t length) const;
+
+  /**
+   * Locate in this the last occurrence of the code point <TT>c</TT>
+   * in the range [<TT>start</TT>, <TT>start + length</TT>),
+   * using bitwise comparison.
+   *
+   * @param c The code point to search for.
+   * @param start the offset into this at which to start matching
+   * @param length the number of characters in this to search
+   * @return The offset into this of <TT>c</TT>, or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t lastIndexOf(UChar32 c,
+              int32_t start,
+              int32_t length) const;
+
+
+  /* Character access */
+
+  /**
+   * Return the code unit at offset <tt>offset</tt>.
+   * If the offset is not valid (0..length()-1) then U+ffff is returned.
+   * @param offset a valid offset into the text
+   * @return the code unit at offset <tt>offset</tt>
+   *         or 0xffff if the offset is not valid for this string
+   * @stable ICU 2.0
+   */
+  inline UChar charAt(int32_t offset) const;
+
+  /**
+   * Return the code unit at offset <tt>offset</tt>.
+   * If the offset is not valid (0..length()-1) then U+ffff is returned.
+   * @param offset a valid offset into the text
+   * @return the code unit at offset <tt>offset</tt>
+   * @stable ICU 2.0
+   */
+  inline UChar operator[] (int32_t offset) const;
+
+  /**
+   * Return the code point that contains the code unit
+   * at offset <tt>offset</tt>.
+   * If the offset is not valid (0..length()-1) then U+ffff is returned.
+   * @param offset a valid offset into the text
+   * that indicates the text offset of any of the code units
+   * that will be assembled into a code point (21-bit value) and returned
+   * @return the code point of text at <tt>offset</tt>
+   *         or 0xffff if the offset is not valid for this string
+   * @stable ICU 2.0
+   */
+  UChar32 char32At(int32_t offset) const;
+
+  /**
+   * Adjust a random-access offset so that
+   * it points to the beginning of a Unicode character.
+   * The offset that is passed in points to
+   * any code unit of a code point,
+   * while the returned offset will point to the first code unit
+   * of the same code point.
+   * In UTF-16, if the input offset points to a second surrogate
+   * of a surrogate pair, then the returned offset will point
+   * to the first surrogate.
+   * @param offset a valid offset into one code point of the text
+   * @return offset of the first code unit of the same code point
+   * @see U16_SET_CP_START
+   * @stable ICU 2.0
+   */
+  int32_t getChar32Start(int32_t offset) const;
+
+  /**
+   * Adjust a random-access offset so that
+   * it points behind a Unicode character.
+   * The offset that is passed in points behind
+   * any code unit of a code point,
+   * while the returned offset will point behind the last code unit
+   * of the same code point.
+   * In UTF-16, if the input offset points behind the first surrogate
+   * (i.e., to the second surrogate)
+   * of a surrogate pair, then the returned offset will point
+   * behind the second surrogate (i.e., to the first surrogate).
+   * @param offset a valid offset after any code unit of a code point of the text
+   * @return offset of the first code unit after the same code point
+   * @see U16_SET_CP_LIMIT
+   * @stable ICU 2.0
+   */
+  int32_t getChar32Limit(int32_t offset) const;
+
+  /**
+   * Move the code unit index along the string by delta code points.
+   * Interpret the input index as a code unit-based offset into the string,
+   * move the index forward or backward by delta code points, and
+   * return the resulting index.
+   * The input index should point to the first code unit of a code point,
+   * if there is more than one.
+   *
+   * Both input and output indexes are code unit-based as for all
+   * string indexes/offsets in ICU (and other libraries, like MBCS char*).
+   * If delta<0 then the index is moved backward (toward the start of the string).
+   * If delta>0 then the index is moved forward (toward the end of the string).
+   *
+   * This behaves like CharacterIterator::move32(delta, kCurrent).
+   *
+   * Behavior for out-of-bounds indexes:
+   * <code>moveIndex32</code> pins the input index to 0..length(), i.e.,
+   * if the input index<0 then it is pinned to 0;
+   * if it is index>length() then it is pinned to length().
+   * Afterwards, the index is moved by <code>delta</code> code points
+   * forward or backward,
+   * but no further backward than to 0 and no further forward than to length().
+   * The resulting index return value will be in between 0 and length(), inclusively.
+   *
+   * Examples:
+   * <pre>
+   * // s has code points 'a' U+10000 'b' U+10ffff U+2029
+   * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape();
+   *
+   * // initial index: position of U+10000
+   * int32_t index=1;
+   *
+   * // the following examples will all result in index==4, position of U+10ffff
+   *
+   * // skip 2 code points from some position in the string
+   * index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
+   *
+   * // go to the 3rd code point from the start of s (0-based)
+   * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
+   *
+   * // go to the next-to-last code point of s
+   * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
+   * </pre>
+   *
+   * @param index input code unit index
+   * @param delta (signed) code point count to move the index forward or backward
+   *        in the string
+   * @return the resulting code unit index
+   * @stable ICU 2.0
+   */
+  int32_t moveIndex32(int32_t index, int32_t delta) const;
+
+  /* Substring extraction */
+
+  /**
+   * Copy the characters in the range
+   * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>,
+   * beginning at <tt>dstStart</tt>.
+   * If the string aliases to <code>dst</code> itself as an external buffer,
+   * then extract() will not copy the contents.
+   *
+   * @param start offset of first character which will be copied into the array
+   * @param length the number of characters to extract
+   * @param dst array in which to copy characters.  The length of <tt>dst</tt>
+   * must be at least (<tt>dstStart + length</tt>).
+   * @param dstStart the offset in <TT>dst</TT> where the first character
+   * will be extracted
+   * @stable ICU 2.0
+   */
+  inline void extract(int32_t start,
+           int32_t length,
+           UChar *dst,
+           int32_t dstStart = 0) const;
+
+  /**
+   * Copy the contents of the string into dest.
+   * This is a convenience function that
+   * checks if there is enough space in dest,
+   * extracts the entire string if possible,
+   * and NUL-terminates dest if possible.
+   *
+   * If the string fits into dest but cannot be NUL-terminated
+   * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
+   * If the string itself does not fit into dest
+   * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
+   *
+   * If the string aliases to <code>dest</code> itself as an external buffer,
+   * then extract() will not copy the contents.
+   *
+   * @param dest Destination string buffer.
+   * @param destCapacity Number of UChars available at dest.
+   * @param errorCode ICU error code.
+   * @return length()
+   * @stable ICU 2.0
+   */
+  int32_t
+  extract(UChar *dest, int32_t destCapacity,
+          UErrorCode &errorCode) const;
+
+  /**
+   * Copy the characters in the range
+   * [<tt>start</tt>, <tt>start + length</tt>) into the  UnicodeString
+   * <tt>target</tt>.
+   * @param start offset of first character which will be copied
+   * @param length the number of characters to extract
+   * @param target UnicodeString into which to copy characters.
+   * @return A reference to <TT>target</TT>
+   * @stable ICU 2.0
+   */
+  inline void extract(int32_t start,
+           int32_t length,
+           UnicodeString& target) const;
+
+  /**
+   * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
+   * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>.
+   * @param start offset of first character which will be copied into the array
+   * @param limit offset immediately following the last character to be copied
+   * @param dst array in which to copy characters.  The length of <tt>dst</tt>
+   * must be at least (<tt>dstStart + (limit - start)</tt>).
+   * @param dstStart the offset in <TT>dst</TT> where the first character
+   * will be extracted
+   * @stable ICU 2.0
+   */
+  inline void extractBetween(int32_t start,
+              int32_t limit,
+              UChar *dst,
+              int32_t dstStart = 0) const;
+
+  /**
+   * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
+   * into the UnicodeString <tt>target</tt>.  Replaceable API.
+   * @param start offset of first character which will be copied
+   * @param limit offset immediately following the last character to be copied
+   * @param target UnicodeString into which to copy characters.
+   * @return A reference to <TT>target</TT>
+   * @stable ICU 2.0
+   */
+  virtual void extractBetween(int32_t start,
+              int32_t limit,
+              UnicodeString& target) const;
+
+  /**
+   * Copy the characters in the range 
+   * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters.
+   * All characters must be invariant (see utypes.h).
+   * Use US_INV as the last, signature-distinguishing parameter.
+   *
+   * This function does not write any more than <code>targetLength</code>
+   * characters but returns the length of the entire output string
+   * so that one can allocate a larger buffer and call the function again
+   * if necessary.
+   * The output string is NUL-terminated if possible.
+   *
+   * @param start offset of first character which will be copied
+   * @param startLength the number of characters to extract
+   * @param target the target buffer for extraction, can be NULL
+   *               if targetLength is 0
+   * @param targetCapacity the length of the target buffer
+   * @param inv Signature-distinguishing paramater, use US_INV.
+   * @return the output string length, not including the terminating NUL
+   * @stable ICU 3.2
+   */
+  int32_t extract(int32_t start,
+           int32_t startLength,
+           char *target,
+           int32_t targetCapacity,
+           enum EInvariant inv) const;
+
+#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
+
+  /**
+   * Copy the characters in the range
+   * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
+   * in the platform's default codepage.
+   * This function does not write any more than <code>targetLength</code>
+   * characters but returns the length of the entire output string
+   * so that one can allocate a larger buffer and call the function again
+   * if necessary.
+   * The output string is NUL-terminated if possible.
+   *
+   * @param start offset of first character which will be copied
+   * @param startLength the number of characters to extract
+   * @param target the target buffer for extraction
+   * @param targetLength the length of the target buffer
+   * If <TT>target</TT> is NULL, then the number of bytes required for
+   * <TT>target</TT> is returned.
+   * @return the output string length, not including the terminating NUL
+   * @stable ICU 2.0
+   */
+  int32_t extract(int32_t start,
+           int32_t startLength,
+           char *target,
+           uint32_t targetLength) const;
+
+#endif
+
+#if !UCONFIG_NO_CONVERSION
+
+  /**
+   * Copy the characters in the range
+   * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
+   * in a specified codepage.
+   * The output string is NUL-terminated.
+   *
+   * Recommendation: For invariant-character strings use
+   * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
+   * because it avoids object code dependencies of UnicodeString on
+   * the conversion code.
+   *
+   * @param start offset of first character which will be copied
+   * @param startLength the number of characters to extract
+   * @param target the target buffer for extraction
+   * @param codepage the desired codepage for the characters.  0 has
+   * the special meaning of the default codepage
+   * If <code>codepage</code> is an empty string (<code>""</code>),
+   * then a simple conversion is performed on the codepage-invariant
+   * subset ("invariant characters") of the platform encoding. See utypes.h.
+   * If <TT>target</TT> is NULL, then the number of bytes required for
+   * <TT>target</TT> is returned. It is assumed that the target is big enough
+   * to fit all of the characters.
+   * @return the output string length, not including the terminating NUL
+   * @stable ICU 2.0
+   */
+  inline int32_t extract(int32_t start,
+                 int32_t startLength,
+                 char *target,
+                 const char *codepage = 0) const;
+
+  /**
+   * Copy the characters in the range
+   * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
+   * in a specified codepage.
+   * This function does not write any more than <code>targetLength</code>
+   * characters but returns the length of the entire output string
+   * so that one can allocate a larger buffer and call the function again
+   * if necessary.
+   * The output string is NUL-terminated if possible.
+   *
+   * Recommendation: For invariant-character strings use
+   * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
+   * because it avoids object code dependencies of UnicodeString on
+   * the conversion code.
+   *
+   * @param start offset of first character which will be copied
+   * @param startLength the number of characters to extract
+   * @param target the target buffer for extraction
+   * @param targetLength the length of the target buffer
+   * @param codepage the desired codepage for the characters.  0 has
+   * the special meaning of the default codepage
+   * If <code>codepage</code> is an empty string (<code>""</code>),
+   * then a simple conversion is performed on the codepage-invariant
+   * subset ("invariant characters") of the platform encoding. See utypes.h.
+   * If <TT>target</TT> is NULL, then the number of bytes required for
+   * <TT>target</TT> is returned.
+   * @return the output string length, not including the terminating NUL
+   * @stable ICU 2.0
+   */
+  int32_t extract(int32_t start,
+           int32_t startLength,
+           char *target,
+           uint32_t targetLength,
+           const char *codepage) const;
+
+  /**
+   * Convert the UnicodeString into a codepage string using an existing UConverter.
+   * The output string is NUL-terminated if possible.
+   *
+   * This function avoids the overhead of opening and closing a converter if
+   * multiple strings are extracted.
+   *
+   * @param dest destination string buffer, can be NULL if destCapacity==0
+   * @param destCapacity the number of chars available at dest
+   * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
+   *        or NULL for the default converter
+   * @param errorCode normal ICU error code
+   * @return the length of the output string, not counting the terminating NUL;
+   *         if the length is greater than destCapacity, then the string will not fit
+   *         and a buffer of the indicated length would need to be passed in
+   * @stable ICU 2.0
+   */
+  int32_t extract(char *dest, int32_t destCapacity,
+                  UConverter *cnv,
+                  UErrorCode &errorCode) const;
+
+#endif
+
+  /**
+   * Create a temporary substring for the specified range.
+   * Unlike the substring constructor and setTo() functions,
+   * the object returned here will be a read-only alias (using getBuffer())
+   * rather than copying the text.
+   * As a result, this substring operation is much faster but requires
+   * that the original string not be modified or deleted during the lifetime
+   * of the returned substring object.
+   * @param start offset of the first character visible in the substring
+   * @param length length of the substring
+   * @return a read-only alias UnicodeString object for the substring
+   * @stable ICU 4.4
+   */
+  UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
+
+  /**
+   * Create a temporary substring for the specified range.
+   * Same as tempSubString(start, length) except that the substring range
+   * is specified as a (start, limit) pair (with an exclusive limit index)
+   * rather than a (start, length) pair.
+   * @param start offset of the first character visible in the substring
+   * @param limit offset immediately following the last character visible in the substring
+   * @return a read-only alias UnicodeString object for the substring
+   * @stable ICU 4.4
+   */
+  inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
+
+  /**
+   * Convert the UnicodeString to UTF-8 and write the result
+   * to a ByteSink. This is called by toUTF8String().
+   * Unpaired surrogates are replaced with U+FFFD.
+   * Calls u_strToUTF8WithSub().
+   *
+   * @param sink A ByteSink to which the UTF-8 version of the string is written.
+   *             sink.Flush() is called at the end.
+   * @stable ICU 4.2
+   * @see toUTF8String
+   */
+  void toUTF8(ByteSink &sink) const;
+
+#if U_HAVE_STD_STRING
+
+  /**
+   * Convert the UnicodeString to UTF-8 and append the result
+   * to a standard string.
+   * Unpaired surrogates are replaced with U+FFFD.
+   * Calls toUTF8().
+   *
+   * @param result A standard string (or a compatible object)
+   *        to which the UTF-8 version of the string is appended.
+   * @return The string object.
+   * @stable ICU 4.2
+   * @see toUTF8
+   */
+  template<typename StringClass>
+  StringClass &toUTF8String(StringClass &result) const {
+    StringByteSink<StringClass> sbs(&result);
+    toUTF8(sbs);
+    return result;
+  }
+
+#endif
+
+  /**
+   * Convert the UnicodeString to UTF-32.
+   * Unpaired surrogates are replaced with U+FFFD.
+   * Calls u_strToUTF32WithSub().
+   *
+   * @param utf32 destination string buffer, can be NULL if capacity==0
+   * @param capacity the number of UChar32s available at utf32
+   * @param errorCode Standard ICU error code. Its input value must
+   *                  pass the U_SUCCESS() test, or else the function returns
+   *                  immediately. Check for U_FAILURE() on output or use with
+   *                  function chaining. (See User Guide for details.)
+   * @return The length of the UTF-32 string.
+   * @see fromUTF32
+   * @stable ICU 4.2
+   */
+  int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
+
+  /* Length operations */
+
+  /**
+   * Return the length of the UnicodeString object.
+   * The length is the number of UChar code units are in the UnicodeString.
+   * If you want the number of code points, please use countChar32().
+   * @return the length of the UnicodeString object
+   * @see countChar32
+   * @stable ICU 2.0
+   */
+  inline int32_t length(void) const;
+
+  /**
+   * Count Unicode code points in the length UChar code units of the string.
+   * A code point may occupy either one or two UChar code units.
+   * Counting code points involves reading all code units.
+   *
+   * This functions is basically the inverse of moveIndex32().
+   *
+   * @param start the index of the first code unit to check
+   * @param length the number of UChar code units to check
+   * @return the number of code points in the specified code units
+   * @see length
+   * @stable ICU 2.0
+   */
+  int32_t
+  countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
+
+  /**
+   * Check if the length UChar code units of the string
+   * contain more Unicode code points than a certain number.
+   * This is more efficient than counting all code points in this part of the string
+   * and comparing that number with a threshold.
+   * This function may not need to scan the string at all if the length
+   * falls within a certain range, and
+   * never needs to count more than 'number+1' code points.
+   * Logically equivalent to (countChar32(start, length)>number).
+   * A Unicode code point may occupy either one or two UChar code units.
+   *
+   * @param start the index of the first code unit to check (0 for the entire string)
+   * @param length the number of UChar code units to check
+   *               (use INT32_MAX for the entire string; remember that start/length
+   *                values are pinned)
+   * @param number The number of code points in the (sub)string is compared against
+   *               the 'number' parameter.
+   * @return Boolean value for whether the string contains more Unicode code points
+   *         than 'number'. Same as (u_countChar32(s, length)>number).
+   * @see countChar32
+   * @see u_strHasMoreChar32Than
+   * @stable ICU 2.4
+   */
+  UBool
+  hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
+
+  /**
+   * Determine if this string is empty.
+   * @return TRUE if this string contains 0 characters, FALSE otherwise.
+   * @stable ICU 2.0
+   */
+  inline UBool isEmpty(void) const;
+
+  /**
+   * Return the capacity of the internal buffer of the UnicodeString object.
+   * This is useful together with the getBuffer functions.
+   * See there for details.
+   *
+   * @return the number of UChars available in the internal buffer
+   * @see getBuffer
+   * @stable ICU 2.0
+   */
+  inline int32_t getCapacity(void) const;
+
+  /* Other operations */
+
+  /**
+   * Generate a hash code for this object.
+   * @return The hash code of this UnicodeString.
+   * @stable ICU 2.0
+   */
+  inline int32_t hashCode(void) const;
+
+  /**
+   * Determine if this object contains a valid string.
+   * A bogus string has no value. It is different from an empty string,
+   * although in both cases isEmpty() returns TRUE and length() returns 0.
+   * setToBogus() and isBogus() can be used to indicate that no string value is available.
+   * For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and
+   * length() returns 0.
+   *
+   * @return TRUE if the string is bogus/invalid, FALSE otherwise
+   * @see setToBogus()
+   * @stable ICU 2.0
+   */
+  inline UBool isBogus(void) const;
+
+
+  //========================================
+  // Write operations
+  //========================================
+
+  /* Assignment operations */
+
+  /**
+   * Assignment operator.  Replace the characters in this UnicodeString
+   * with the characters from <TT>srcText</TT>.
+   * @param srcText The text containing the characters to replace
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  UnicodeString &operator=(const UnicodeString &srcText);
+
+  /**
+   * Almost the same as the assignment operator.
+   * Replace the characters in this UnicodeString
+   * with the characters from <code>srcText</code>.
+   *
+   * This function works the same as the assignment operator
+   * for all strings except for ones that are readonly aliases.
+   *
+   * Starting with ICU 2.4, the assignment operator and the copy constructor
+   * allocate a new buffer and copy the buffer contents even for readonly aliases.
+   * This function implements the old, more efficient but less safe behavior
+   * of making this string also a readonly alias to the same buffer.
+   *
+   * The fastCopyFrom function must be used only if it is known that the lifetime of
+   * this UnicodeString does not exceed the lifetime of the aliased buffer
+   * including its contents, for example for strings from resource bundles
+   * or aliases to string constants.
+   *
+   * @param src The text containing the characters to replace.
+   * @return a reference to this
+   * @stable ICU 2.4
+   */
+  UnicodeString &fastCopyFrom(const UnicodeString &src);
+
+  /**
+   * Assignment operator.  Replace the characters in this UnicodeString
+   * with the code unit <TT>ch</TT>.
+   * @param ch the code unit to replace
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& operator= (UChar ch);
+
+  /**
+   * Assignment operator.  Replace the characters in this UnicodeString
+   * with the code point <TT>ch</TT>.
+   * @param ch the code point to replace
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& operator= (UChar32 ch);
+
+  /**
+   * Set the text in the UnicodeString object to the characters
+   * in <TT>srcText</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcText.length()</TT>).
+   * <TT>srcText</TT> is not modified.
+   * @param srcText the source for the new characters
+   * @param srcStart the offset into <TT>srcText</TT> where new characters
+   * will be obtained
+   * @return a reference to this
+   * @stable ICU 2.2
+   */
+  inline UnicodeString& setTo(const UnicodeString& srcText,
+               int32_t srcStart);
+
+  /**
+   * Set the text in the UnicodeString object to the characters
+   * in <TT>srcText</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
+   * <TT>srcText</TT> is not modified.
+   * @param srcText the source for the new characters
+   * @param srcStart the offset into <TT>srcText</TT> where new characters
+   * will be obtained
+   * @param srcLength the number of characters in <TT>srcText</TT> in the
+   * replace string.
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& setTo(const UnicodeString& srcText,
+               int32_t srcStart,
+               int32_t srcLength);
+
+  /**
+   * Set the text in the UnicodeString object to the characters in
+   * <TT>srcText</TT>.
+   * <TT>srcText</TT> is not modified.
+   * @param srcText the source for the new characters
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& setTo(const UnicodeString& srcText);
+
+  /**
+   * Set the characters in the UnicodeString object to the characters
+   * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
+   * @param srcChars the source for the new characters
+   * @param srcLength the number of Unicode characters in srcChars.
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& setTo(const UChar *srcChars,
+               int32_t srcLength);
+
+  /**
+   * Set the characters in the UnicodeString object to the code unit
+   * <TT>srcChar</TT>.
+   * @param srcChar the code unit which becomes the UnicodeString's character
+   * content
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  UnicodeString& setTo(UChar srcChar);
+
+  /**
+   * Set the characters in the UnicodeString object to the code point
+   * <TT>srcChar</TT>.
+   * @param srcChar the code point which becomes the UnicodeString's character
+   * content
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  UnicodeString& setTo(UChar32 srcChar);
+
+  /**
+   * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor.
+   * The text will be used for the UnicodeString object, but
+   * it will not be released when the UnicodeString is destroyed.
+   * This has copy-on-write semantics:
+   * When the string is modified, then the buffer is first copied into
+   * newly allocated memory.
+   * The aliased buffer is never modified.
+   *
+   * In an assignment to another UnicodeString, when using the copy constructor
+   * or the assignment operator, the text will be copied.
+   * When using fastCopyFrom(), the text will be aliased again,
+   * so that both strings then alias the same readonly-text.
+   *
+   * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
+   *                     This must be true if <code>textLength==-1</code>.
+   * @param text The characters to alias for the UnicodeString.
+   * @param textLength The number of Unicode characters in <code>text</code> to alias.
+   *                   If -1, then this constructor will determine the length
+   *                   by calling <code>u_strlen()</code>.
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  UnicodeString &setTo(UBool isTerminated,
+                       const UChar *text,
+                       int32_t textLength);
+
+  /**
+   * Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor.
+   * The text will be used for the UnicodeString object, but
+   * it will not be released when the UnicodeString is destroyed.
+   * This has write-through semantics:
+   * For as long as the capacity of the buffer is sufficient, write operations
+   * will directly affect the buffer. When more capacity is necessary, then
+   * a new buffer will be allocated and the contents copied as with regularly
+   * constructed strings.
+   * In an assignment to another UnicodeString, the buffer will be copied.
+   * The extract(UChar *dst) function detects whether the dst pointer is the same
+   * as the string buffer itself and will in this case not copy the contents.
+   *
+   * @param buffer The characters to alias for the UnicodeString.
+   * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
+   * @param buffCapacity The size of <code>buffer</code> in UChars.
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  UnicodeString &setTo(UChar *buffer,
+                       int32_t buffLength,
+                       int32_t buffCapacity);
+
+  /**
+   * Make this UnicodeString object invalid.
+   * The string will test TRUE with isBogus().
+   *
+   * A bogus string has no value. It is different from an empty string.
+   * It can be used to indicate that no string value is available.
+   * getBuffer() and getTerminatedBuffer() return NULL, and
+   * length() returns 0.
+   *
+   * This utility function is used throughout the UnicodeString
+   * implementation to indicate that a UnicodeString operation failed,
+   * and may be used in other functions,
+   * especially but not exclusively when such functions do not
+   * take a UErrorCode for simplicity.
+   *
+   * The following methods, and no others, will clear a string object's bogus flag:
+   * - remove()
+   * - remove(0, INT32_MAX)
+   * - truncate(0)
+   * - operator=() (assignment operator)
+   * - setTo(...)
+   *
+   * The simplest ways to turn a bogus string into an empty one
+   * is to use the remove() function.
+   * Examples for other functions that are equivalent to "set to empty string":
+   * \code
+   * if(s.isBogus()) {
+   *   s.remove();           // set to an empty string (remove all), or
+   *   s.remove(0, INT32_MAX); // set to an empty string (remove all), or
+   *   s.truncate(0);        // set to an empty string (complete truncation), or
+   *   s=UnicodeString();    // assign an empty string, or
+   *   s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
+   *   static const UChar nul=0;
+   *   s.setTo(&nul, 0);     // set to an empty C Unicode string
+   * }
+   * \endcode
+   *
+   * @see isBogus()
+   * @stable ICU 2.0
+   */
+  void setToBogus();
+
+  /**
+   * Set the character at the specified offset to the specified character.
+   * @param offset A valid offset into the text of the character to set
+   * @param ch The new character
+   * @return A reference to this
+   * @stable ICU 2.0
+   */
+  UnicodeString& setCharAt(int32_t offset,
+               UChar ch);
+
+
+  /* Append operations */
+
+  /**
+   * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString
+   * object.
+   * @param ch the code unit to be appended
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+ inline  UnicodeString& operator+= (UChar ch);
+
+  /**
+   * Append operator. Append the code point <TT>ch</TT> to the UnicodeString
+   * object.
+   * @param ch the code point to be appended
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+ inline  UnicodeString& operator+= (UChar32 ch);
+
+  /**
+   * Append operator. Append the characters in <TT>srcText</TT> to the
+   * UnicodeString object. <TT>srcText</TT> is not modified.
+   * @param srcText the source for the new characters
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& operator+= (const UnicodeString& srcText);
+
+  /**
+   * Append the characters
+   * in <TT>srcText</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the
+   * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT>
+   * is not modified.
+   * @param srcText the source for the new characters
+   * @param srcStart the offset into <TT>srcText</TT> where new characters
+   * will be obtained
+   * @param srcLength the number of characters in <TT>srcText</TT> in
+   * the append string
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& append(const UnicodeString& srcText,
+            int32_t srcStart,
+            int32_t srcLength);
+
+  /**
+   * Append the characters in <TT>srcText</TT> to the UnicodeString object.
+   * <TT>srcText</TT> is not modified.
+   * @param srcText the source for the new characters
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& append(const UnicodeString& srcText);
+
+  /**
+   * Append the characters in <TT>srcChars</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString
+   * object at offset
+   * <TT>start</TT>. <TT>srcChars</TT> is not modified.
+   * @param srcChars the source for the new characters
+   * @param srcStart the offset into <TT>srcChars</TT> where new characters
+   * will be obtained
+   * @param srcLength the number of characters in <TT>srcChars</TT> in
+   *                  the append string; can be -1 if <TT>srcChars</TT> is NUL-terminated
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& append(const UChar *srcChars,
+            int32_t srcStart,
+            int32_t srcLength);
+
+  /**
+   * Append the characters in <TT>srcChars</TT> to the UnicodeString object
+   * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
+   * @param srcChars the source for the new characters
+   * @param srcLength the number of Unicode characters in <TT>srcChars</TT>;
+   *                  can be -1 if <TT>srcChars</TT> is NUL-terminated
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& append(const UChar *srcChars,
+            int32_t srcLength);
+
+  /**
+   * Append the code unit <TT>srcChar</TT> to the UnicodeString object.
+   * @param srcChar the code unit to append
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& append(UChar srcChar);
+
+  /**
+   * Append the code point <TT>srcChar</TT> to the UnicodeString object.
+   * @param srcChar the code point to append
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  UnicodeString& append(UChar32 srcChar);
+
+
+  /* Insert operations */
+
+  /**
+   * Insert the characters in <TT>srcText</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
+   * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
+   * @param start the offset where the insertion begins
+   * @param srcText the source for the new characters
+   * @param srcStart the offset into <TT>srcText</TT> where new characters
+   * will be obtained
+   * @param srcLength the number of characters in <TT>srcText</TT> in
+   * the insert string
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& insert(int32_t start,
+            const UnicodeString& srcText,
+            int32_t srcStart,
+            int32_t srcLength);
+
+  /**
+   * Insert the characters in <TT>srcText</TT> into the UnicodeString object
+   * at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
+   * @param start the offset where the insertion begins
+   * @param srcText the source for the new characters
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& insert(int32_t start,
+            const UnicodeString& srcText);
+
+  /**
+   * Insert the characters in <TT>srcChars</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
+   *  object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
+   * @param start the offset at which the insertion begins
+   * @param srcChars the source for the new characters
+   * @param srcStart the offset into <TT>srcChars</TT> where new characters
+   * will be obtained
+   * @param srcLength the number of characters in <TT>srcChars</TT>
+   * in the insert string
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& insert(int32_t start,
+            const UChar *srcChars,
+            int32_t srcStart,
+            int32_t srcLength);
+
+  /**
+   * Insert the characters in <TT>srcChars</TT> into the UnicodeString object
+   * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
+   * @param start the offset where the insertion begins
+   * @param srcChars the source for the new characters
+   * @param srcLength the number of Unicode characters in srcChars.
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& insert(int32_t start,
+            const UChar *srcChars,
+            int32_t srcLength);
+
+  /**
+   * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at
+   * offset <TT>start</TT>.
+   * @param start the offset at which the insertion occurs
+   * @param srcChar the code unit to insert
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& insert(int32_t start,
+            UChar srcChar);
+
+  /**
+   * Insert the code point <TT>srcChar</TT> into the UnicodeString object at
+   * offset <TT>start</TT>.
+   * @param start the offset at which the insertion occurs
+   * @param srcChar the code point to insert
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& insert(int32_t start,
+            UChar32 srcChar);
+
+
+  /* Replace operations */
+
+  /**
+   * Replace the characters in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
+   * <TT>srcText</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
+   * <TT>srcText</TT> is not modified.
+   * @param start the offset at which the replace operation begins
+   * @param length the number of characters to replace. The character at
+   * <TT>start + length</TT> is not modified.
+   * @param srcText the source for the new characters
+   * @param srcStart the offset into <TT>srcText</TT> where new characters
+   * will be obtained
+   * @param srcLength the number of characters in <TT>srcText</TT> in
+   * the replace string
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  UnicodeString& replace(int32_t start,
+             int32_t length,
+             const UnicodeString& srcText,
+             int32_t srcStart,
+             int32_t srcLength);
+
+  /**
+   * Replace the characters in the range
+   * [<TT>start</TT>, <TT>start + length</TT>)
+   * with the characters in <TT>srcText</TT>.  <TT>srcText</TT> is
+   *  not modified.
+   * @param start the offset at which the replace operation begins
+   * @param length the number of characters to replace. The character at
+   * <TT>start + length</TT> is not modified.
+   * @param srcText the source for the new characters
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  UnicodeString& replace(int32_t start,
+             int32_t length,
+             const UnicodeString& srcText);
+
+  /**
+   * Replace the characters in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
+   * <TT>srcChars</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT>
+   * is not modified.
+   * @param start the offset at which the replace operation begins
+   * @param length the number of characters to replace.  The character at
+   * <TT>start + length</TT> is not modified.
+   * @param srcChars the source for the new characters
+   * @param srcStart the offset into <TT>srcChars</TT> where new characters
+   * will be obtained
+   * @param srcLength the number of characters in <TT>srcChars</TT>
+   * in the replace string
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  UnicodeString& replace(int32_t start,
+             int32_t length,
+             const UChar *srcChars,
+             int32_t srcStart,
+             int32_t srcLength);
+
+  /**
+   * Replace the characters in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
+   * <TT>srcChars</TT>.  <TT>srcChars</TT> is not modified.
+   * @param start the offset at which the replace operation begins
+   * @param length number of characters to replace.  The character at
+   * <TT>start + length</TT> is not modified.
+   * @param srcChars the source for the new characters
+   * @param srcLength the number of Unicode characters in srcChars
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& replace(int32_t start,
+             int32_t length,
+             const UChar *srcChars,
+             int32_t srcLength);
+
+  /**
+   * Replace the characters in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) with the code unit
+   * <TT>srcChar</TT>.
+   * @param start the offset at which the replace operation begins
+   * @param length the number of characters to replace.  The character at
+   * <TT>start + length</TT> is not modified.
+   * @param srcChar the new code unit
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& replace(int32_t start,
+             int32_t length,
+             UChar srcChar);
+
+  /**
+   * Replace the characters in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) with the code point
+   * <TT>srcChar</TT>.
+   * @param start the offset at which the replace operation begins
+   * @param length the number of characters to replace.  The character at
+   * <TT>start + length</TT> is not modified.
+   * @param srcChar the new code point
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
+
+  /**
+   * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
+   * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified.
+   * @param start the offset at which the replace operation begins
+   * @param limit the offset immediately following the replace range
+   * @param srcText the source for the new characters
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& replaceBetween(int32_t start,
+                int32_t limit,
+                const UnicodeString& srcText);
+
+  /**
+   * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
+   * with the characters in <TT>srcText</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified.
+   * @param start the offset at which the replace operation begins
+   * @param limit the offset immediately following the replace range
+   * @param srcText the source for the new characters
+   * @param srcStart the offset into <TT>srcChars</TT> where new characters
+   * will be obtained
+   * @param srcLimit the offset immediately following the range to copy
+   * in <TT>srcText</TT>
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& replaceBetween(int32_t start,
+                int32_t limit,
+                const UnicodeString& srcText,
+                int32_t srcStart,
+                int32_t srcLimit);
+
+  /**
+   * Replace a substring of this object with the given text.
+   * @param start the beginning index, inclusive; <code>0 <= start
+   * <= limit</code>.
+   * @param limit the ending index, exclusive; <code>start <= limit
+   * <= length()</code>.
+   * @param text the text to replace characters <code>start</code>
+   * to <code>limit - 1</code>
+   * @stable ICU 2.0
+   */
+  virtual void handleReplaceBetween(int32_t start,
+                                    int32_t limit,
+                                    const UnicodeString& text);
+
+  /**
+   * Replaceable API
+   * @return TRUE if it has MetaData
+   * @stable ICU 2.4
+   */
+  virtual UBool hasMetaData() const;
+
+  /**
+   * Copy a substring of this object, retaining attribute (out-of-band)
+   * information.  This method is used to duplicate or reorder substrings.
+   * The destination index must not overlap the source range.
+   *
+   * @param start the beginning index, inclusive; <code>0 <= start <=
+   * limit</code>.
+   * @param limit the ending index, exclusive; <code>start <= limit <=
+   * length()</code>.
+   * @param dest the destination index.  The characters from
+   * <code>start..limit-1</code> will be copied to <code>dest</code>.
+   * Implementations of this method may assume that <code>dest <= start ||
+   * dest >= limit</code>.
+   * @stable ICU 2.0
+   */
+  virtual void copy(int32_t start, int32_t limit, int32_t dest);
+
+  /* Search and replace operations */
+
+  /**
+   * Replace all occurrences of characters in oldText with the characters
+   * in newText
+   * @param oldText the text containing the search text
+   * @param newText the text containing the replacement text
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& findAndReplace(const UnicodeString& oldText,
+                const UnicodeString& newText);
+
+  /**
+   * Replace all occurrences of characters in oldText with characters
+   * in newText
+   * in the range [<TT>start</TT>, <TT>start + length</TT>).
+   * @param start the start of the range in which replace will performed
+   * @param length the length of the range in which replace will be performed
+   * @param oldText the text containing the search text
+   * @param newText the text containing the replacement text
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& findAndReplace(int32_t start,
+                int32_t length,
+                const UnicodeString& oldText,
+                const UnicodeString& newText);
+
+  /**
+   * Replace all occurrences of characters in oldText in the range
+   * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters
+   * in newText in the range
+   * [<TT>newStart</TT>, <TT>newStart + newLength</TT>)
+   * in the range [<TT>start</TT>, <TT>start + length</TT>).
+   * @param start the start of the range in which replace will performed
+   * @param length the length of the range in which replace will be performed
+   * @param oldText the text containing the search text
+   * @param oldStart the start of the search range in <TT>oldText</TT>
+   * @param oldLength the length of the search range in <TT>oldText</TT>
+   * @param newText the text containing the replacement text
+   * @param newStart the start of the replacement range in <TT>newText</TT>
+   * @param newLength the length of the replacement range in <TT>newText</TT>
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  UnicodeString& findAndReplace(int32_t start,
+                int32_t length,
+                const UnicodeString& oldText,
+                int32_t oldStart,
+                int32_t oldLength,
+                const UnicodeString& newText,
+                int32_t newStart,
+                int32_t newLength);
+
+
+  /* Remove operations */
+
+  /**
+   * Remove all characters from the UnicodeString object.
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& remove(void);
+
+  /**
+   * Remove the characters in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object.
+   * @param start the offset of the first character to remove
+   * @param length the number of characters to remove
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& remove(int32_t start,
+                               int32_t length = (int32_t)INT32_MAX);
+
+  /**
+   * Remove the characters in the range
+   * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object.
+   * @param start the offset of the first character to remove
+   * @param limit the offset immediately following the range to remove
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& removeBetween(int32_t start,
+                                      int32_t limit = (int32_t)INT32_MAX);
+
+  /**
+   * Retain only the characters in the range
+   * [<code>start</code>, <code>limit</code>) from the UnicodeString object.
+   * Removes characters before <code>start</code> and at and after <code>limit</code>.
+   * @param start the offset of the first character to retain
+   * @param limit the offset immediately following the range to retain
+   * @return a reference to this
+   * @stable ICU 4.4
+   */
+  inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
+
+  /* Length operations */
+
+  /**
+   * Pad the start of this UnicodeString with the character <TT>padChar</TT>.
+   * If the length of this UnicodeString is less than targetLength,
+   * length() - targetLength copies of padChar will be added to the
+   * beginning of this UnicodeString.
+   * @param targetLength the desired length of the string
+   * @param padChar the character to use for padding. Defaults to
+   * space (U+0020)
+   * @return TRUE if the text was padded, FALSE otherwise.
+   * @stable ICU 2.0
+   */
+  UBool padLeading(int32_t targetLength,
+                    UChar padChar = 0x0020);
+
+  /**
+   * Pad the end of this UnicodeString with the character <TT>padChar</TT>.
+   * If the length of this UnicodeString is less than targetLength,
+   * length() - targetLength copies of padChar will be added to the
+   * end of this UnicodeString.
+   * @param targetLength the desired length of the string
+   * @param padChar the character to use for padding. Defaults to
+   * space (U+0020)
+   * @return TRUE if the text was padded, FALSE otherwise.
+   * @stable ICU 2.0
+   */
+  UBool padTrailing(int32_t targetLength,
+                     UChar padChar = 0x0020);
+
+  /**
+   * Truncate this UnicodeString to the <TT>targetLength</TT>.
+   * @param targetLength the desired length of this UnicodeString.
+   * @return TRUE if the text was truncated, FALSE otherwise
+   * @stable ICU 2.0
+   */
+  inline UBool truncate(int32_t targetLength);
+
+  /**
+   * Trims leading and trailing whitespace from this UnicodeString.
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  UnicodeString& trim(void);
+
+
+  /* Miscellaneous operations */
+
+  /**
+   * Reverse this UnicodeString in place.
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& reverse(void);
+
+  /**
+   * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in
+   * this UnicodeString.
+   * @param start the start of the range to reverse
+   * @param length the number of characters to to reverse
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& reverse(int32_t start,
+             int32_t length);
+
+  /**
+   * Convert the characters in this to UPPER CASE following the conventions of
+   * the default locale.
+   * @return A reference to this.
+   * @stable ICU 2.0
+   */
+  UnicodeString& toUpper(void);
+
+  /**
+   * Convert the characters in this to UPPER CASE following the conventions of
+   * a specific locale.
+   * @param locale The locale containing the conventions to use.
+   * @return A reference to this.
+   * @stable ICU 2.0
+   */
+  UnicodeString& toUpper(const Locale& locale);
+
+  /**
+   * Convert the characters in this to lower case following the conventions of
+   * the default locale.
+   * @return A reference to this.
+   * @stable ICU 2.0
+   */
+  UnicodeString& toLower(void);
+
+  /**
+   * Convert the characters in this to lower case following the conventions of
+   * a specific locale.
+   * @param locale The locale containing the conventions to use.
+   * @return A reference to this.
+   * @stable ICU 2.0
+   */
+  UnicodeString& toLower(const Locale& locale);
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+  /**
+   * Titlecase this string, convenience function using the default locale.
+   *
+   * Casing is locale-dependent and context-sensitive.
+   * Titlecasing uses a break iterator to find the first characters of words
+   * that are to be titlecased. It titlecases those characters and lowercases
+   * all others.
+   *
+   * The titlecase break iterator can be provided to customize for arbitrary
+   * styles, using rules and dictionaries beyond the standard iterators.
+   * It may be more efficient to always provide an iterator to avoid
+   * opening and closing one for each string.
+   * The standard titlecase iterator for the root locale implements the
+   * algorithm of Unicode TR 21.
+   *
+   * This function uses only the setText(), first() and next() methods of the
+   * provided break iterator.
+   *
+   * @param titleIter A break iterator to find the first characters of words
+   *                  that are to be titlecased.
+   *                  If none is provided (0), then a standard titlecase
+   *                  break iterator is opened.
+   *                  Otherwise the provided iterator is set to the string's text.
+   * @return A reference to this.
+   * @stable ICU 2.1
+   */
+  UnicodeString &toTitle(BreakIterator *titleIter);
+
+  /**
+   * Titlecase this string.
+   *
+   * Casing is locale-dependent and context-sensitive.
+   * Titlecasing uses a break iterator to find the first characters of words
+   * that are to be titlecased. It titlecases those characters and lowercases
+   * all others.
+   *
+   * The titlecase break iterator can be provided to customize for arbitrary
+   * styles, using rules and dictionaries beyond the standard iterators.
+   * It may be more efficient to always provide an iterator to avoid
+   * opening and closing one for each string.
+   * The standard titlecase iterator for the root locale implements the
+   * algorithm of Unicode TR 21.
+   *
+   * This function uses only the setText(), first() and next() methods of the
+   * provided break iterator.
+   *
+   * @param titleIter A break iterator to find the first characters of words
+   *                  that are to be titlecased.
+   *                  If none is provided (0), then a standard titlecase
+   *                  break iterator is opened.
+   *                  Otherwise the provided iterator is set to the string's text.
+   * @param locale    The locale to consider.
+   * @return A reference to this.
+   * @stable ICU 2.1
+   */
+  UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
+
+  /**
+   * Titlecase this string, with options.
+   *
+   * Casing is locale-dependent and context-sensitive.
+   * Titlecasing uses a break iterator to find the first characters of words
+   * that are to be titlecased. It titlecases those characters and lowercases
+   * all others. (This can be modified with options.)
+   *
+   * The titlecase break iterator can be provided to customize for arbitrary
+   * styles, using rules and dictionaries beyond the standard iterators.
+   * It may be more efficient to always provide an iterator to avoid
+   * opening and closing one for each string.
+   * The standard titlecase iterator for the root locale implements the
+   * algorithm of Unicode TR 21.
+   *
+   * This function uses only the setText(), first() and next() methods of the
+   * provided break iterator.
+   *
+   * @param titleIter A break iterator to find the first characters of words
+   *                  that are to be titlecased.
+   *                  If none is provided (0), then a standard titlecase
+   *                  break iterator is opened.
+   *                  Otherwise the provided iterator is set to the string's text.
+   * @param locale    The locale to consider.
+   * @param options Options bit set, see ucasemap_open().
+   * @return A reference to this.
+   * @see U_TITLECASE_NO_LOWERCASE
+   * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
+   * @see ucasemap_open
+   * @stable ICU 3.8
+   */
+  UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
+
+#endif
+
+  /**
+   * Case-folds the characters in this string.
+   *
+   * Case-folding is locale-independent and not context-sensitive,
+   * but there is an option for whether to include or exclude mappings for dotted I
+   * and dotless i that are marked with 'T' in CaseFolding.txt.
+   *
+   * The result may be longer or shorter than the original.
+   *
+   * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
+   * @return A reference to this.
+   * @stable ICU 2.0
+   */
+  UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
+
+  //========================================
+  // Access to the internal buffer
+  //========================================
+
+  /**
+   * Get a read/write pointer to the internal buffer.
+   * The buffer is guaranteed to be large enough for at least minCapacity UChars,
+   * writable, and is still owned by the UnicodeString object.
+   * Calls to getBuffer(minCapacity) must not be nested, and
+   * must be matched with calls to releaseBuffer(newLength).
+   * If the string buffer was read-only or shared,
+   * then it will be reallocated and copied.
+   *
+   * An attempted nested call will return 0, and will not further modify the
+   * state of the UnicodeString object.
+   * It also returns 0 if the string is bogus.
+   *
+   * The actual capacity of the string buffer may be larger than minCapacity.
+   * getCapacity() returns the actual capacity.
+   * For many operations, the full capacity should be used to avoid reallocations.
+   *
+   * While the buffer is "open" between getBuffer(minCapacity)
+   * and releaseBuffer(newLength), the following applies:
+   * - The string length is set to 0.
+   * - Any read API call on the UnicodeString object will behave like on a 0-length string.
+   * - Any write API call on the UnicodeString object is disallowed and will have no effect.
+   * - You can read from and write to the returned buffer.
+   * - The previous string contents will still be in the buffer;
+   *   if you want to use it, then you need to call length() before getBuffer(minCapacity).
+   *   If the length() was greater than minCapacity, then any contents after minCapacity
+   *   may be lost.
+   *   The buffer contents is not NUL-terminated by getBuffer().
+   *   If length()<getCapacity() then you can terminate it by writing a NUL
+   *   at index length().
+   * - You must call releaseBuffer(newLength) before and in order to
+   *   return to normal UnicodeString operation.
+   *
+   * @param minCapacity the minimum number of UChars that are to be available
+   *        in the buffer, starting at the returned pointer;
+   *        default to the current string capacity if minCapacity==-1
+   * @return a writable pointer to the internal string buffer,
+   *         or 0 if an error occurs (nested calls, out of memory)
+   *
+   * @see releaseBuffer
+   * @see getTerminatedBuffer()
+   * @stable ICU 2.0
+   */
+  UChar *getBuffer(int32_t minCapacity);
+
+  /**
+   * Release a read/write buffer on a UnicodeString object with an
+   * "open" getBuffer(minCapacity).
+   * This function must be called in a matched pair with getBuffer(minCapacity).
+   * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
+   *
+   * It will set the string length to newLength, at most to the current capacity.
+   * If newLength==-1 then it will set the length according to the
+   * first NUL in the buffer, or to the capacity if there is no NUL.
+   *
+   * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
+   *
+   * @param newLength the new length of the UnicodeString object;
+   *        defaults to the current capacity if newLength is greater than that;
+   *        if newLength==-1, it defaults to u_strlen(buffer) but not more than
+   *        the current capacity of the string
+   *
+   * @see getBuffer(int32_t minCapacity)
+   * @stable ICU 2.0
+   */
+  void releaseBuffer(int32_t newLength=-1);
+
+  /**
+   * Get a read-only pointer to the internal buffer.
+   * This can be called at any time on a valid UnicodeString.
+   *
+   * It returns 0 if the string is bogus, or
+   * during an "open" getBuffer(minCapacity).
+   *
+   * It can be called as many times as desired.
+   * The pointer that it returns will remain valid until the UnicodeString object is modified,
+   * at which time the pointer is semantically invalidated and must not be used any more.
+   *
+   * The capacity of the buffer can be determined with getCapacity().
+   * The part after length() may or may not be initialized and valid,
+   * depending on the history of the UnicodeString object.
+   *
+   * The buffer contents is (probably) not NUL-terminated.
+   * You can check if it is with
+   * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>.
+   * (See getTerminatedBuffer().)
+   *
+   * The buffer may reside in read-only memory. Its contents must not
+   * be modified.
+   *
+   * @return a read-only pointer to the internal string buffer,
+   *         or 0 if the string is empty or bogus
+   *
+   * @see getBuffer(int32_t minCapacity)
+   * @see getTerminatedBuffer()
+   * @stable ICU 2.0
+   */
+  inline const UChar *getBuffer() const;
+
+  /**
+   * Get a read-only pointer to the internal buffer,
+   * making sure that it is NUL-terminated.
+   * This can be called at any time on a valid UnicodeString.
+   *
+   * It returns 0 if the string is bogus, or
+   * during an "open" getBuffer(minCapacity), or if the buffer cannot
+   * be NUL-terminated (because memory allocation failed).
+   *
+   * It can be called as many times as desired.
+   * The pointer that it returns will remain valid until the UnicodeString object is modified,
+   * at which time the pointer is semantically invalidated and must not be used any more.
+   *
+   * The capacity of the buffer can be determined with getCapacity().
+   * The part after length()+1 may or may not be initialized and valid,
+   * depending on the history of the UnicodeString object.
+   *
+   * The buffer contents is guaranteed to be NUL-terminated.
+   * getTerminatedBuffer() may reallocate the buffer if a terminating NUL
+   * is written.
+   * For this reason, this function is not const, unlike getBuffer().
+   * Note that a UnicodeString may also contain NUL characters as part of its contents.
+   *
+   * The buffer may reside in read-only memory. Its contents must not
+   * be modified.
+   *
+   * @return a read-only pointer to the internal string buffer,
+   *         or 0 if the string is empty or bogus
+   *
+   * @see getBuffer(int32_t minCapacity)
+   * @see getBuffer()
+   * @stable ICU 2.2
+   */
+  const UChar *getTerminatedBuffer();
+
+  //========================================
+  // Constructors
+  //========================================
+
+  /** Construct an empty UnicodeString.
+   * @stable ICU 2.0
+   */
+  inline UnicodeString();
+
+  /**
+   * Construct a UnicodeString with capacity to hold <TT>capacity</TT> UChars
+   * @param capacity the number of UChars this UnicodeString should hold
+   * before a resize is necessary; if count is greater than 0 and count
+   * code points c take up more space than capacity, then capacity is adjusted
+   * accordingly.
+   * @param c is used to initially fill the string
+   * @param count specifies how many code points c are to be written in the
+   *              string
+   * @stable ICU 2.0
+   */
+  UnicodeString(int32_t capacity, UChar32 c, int32_t count);
+
+  /**
+   * Single UChar (code unit) constructor.
+   *
+   * It is recommended to mark this constructor "explicit" by
+   * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>
+   * on the compiler command line or similar.
+   * @param ch the character to place in the UnicodeString
+   * @stable ICU 2.0
+   */
+  UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar ch);
+
+  /**
+   * Single UChar32 (code point) constructor.
+   *
+   * It is recommended to mark this constructor "explicit" by
+   * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>
+   * on the compiler command line or similar.
+   * @param ch the character to place in the UnicodeString
+   * @stable ICU 2.0
+   */
+  UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch);
+
+  /**
+   * UChar* constructor.
+   *
+   * It is recommended to mark this constructor "explicit" by
+   * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
+   * on the compiler command line or similar.
+   * @param text The characters to place in the UnicodeString.  <TT>text</TT>
+   * must be NULL (U+0000) terminated.
+   * @stable ICU 2.0
+   */
+  UNISTR_FROM_STRING_EXPLICIT UnicodeString(const UChar *text);
+
+  /**
+   * UChar* constructor.
+   * @param text The characters to place in the UnicodeString.
+   * @param textLength The number of Unicode characters in <TT>text</TT>
+   * to copy.
+   * @stable ICU 2.0
+   */
+  UnicodeString(const UChar *text,
+        int32_t textLength);
+
+  /**
+   * Readonly-aliasing UChar* constructor.
+   * The text will be used for the UnicodeString object, but
+   * it will not be released when the UnicodeString is destroyed.
+   * This has copy-on-write semantics:
+   * When the string is modified, then the buffer is first copied into
+   * newly allocated memory.
+   * The aliased buffer is never modified.
+   *
+   * In an assignment to another UnicodeString, when using the copy constructor
+   * or the assignment operator, the text will be copied.
+   * When using fastCopyFrom(), the text will be aliased again,
+   * so that both strings then alias the same readonly-text.
+   *
+   * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
+   *                     This must be true if <code>textLength==-1</code>.
+   * @param text The characters to alias for the UnicodeString.
+   * @param textLength The number of Unicode characters in <code>text</code> to alias.
+   *                   If -1, then this constructor will determine the length
+   *                   by calling <code>u_strlen()</code>.
+   * @stable ICU 2.0
+   */
+  UnicodeString(UBool isTerminated,
+                const UChar *text,
+                int32_t textLength);
+
+  /**
+   * Writable-aliasing UChar* constructor.
+   * The text will be used for the UnicodeString object, but
+   * it will not be released when the UnicodeString is destroyed.
+   * This has write-through semantics:
+   * For as long as the capacity of the buffer is sufficient, write operations
+   * will directly affect the buffer. When more capacity is necessary, then
+   * a new buffer will be allocated and the contents copied as with regularly
+   * constructed strings.
+   * In an assignment to another UnicodeString, the buffer will be copied.
+   * The extract(UChar *dst) function detects whether the dst pointer is the same
+   * as the string buffer itself and will in this case not copy the contents.
+   *
+   * @param buffer The characters to alias for the UnicodeString.
+   * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
+   * @param buffCapacity The size of <code>buffer</code> in UChars.
+   * @stable ICU 2.0
+   */
+  UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
+
+#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
+
+  /**
+   * char* constructor.
+   * Uses the default converter (and thus depends on the ICU conversion code)
+   * unless U_CHARSET_IS_UTF8 is set to 1.
+   *
+   * For ASCII (really "invariant character") strings it is more efficient to use
+   * the constructor that takes a US_INV (for its enum EInvariant).
+   * For ASCII (invariant-character) string literals, see UNICODE_STRING and
+   * UNICODE_STRING_SIMPLE.
+   *
+   * It is recommended to mark this constructor "explicit" by
+   * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
+   * on the compiler command line or similar.
+   * @param codepageData an array of bytes, null-terminated,
+   *                     in the platform's default codepage.
+   * @stable ICU 2.0
+   * @see UNICODE_STRING
+   * @see UNICODE_STRING_SIMPLE
+   */
+  UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);
+
+  /**
+   * char* constructor.
+   * Uses the default converter (and thus depends on the ICU conversion code)
+   * unless U_CHARSET_IS_UTF8 is set to 1.
+   * @param codepageData an array of bytes in the platform's default codepage.
+   * @param dataLength The number of bytes in <TT>codepageData</TT>.
+   * @stable ICU 2.0
+   */
+  UnicodeString(const char *codepageData, int32_t dataLength);
+
+#endif
+
+#if !UCONFIG_NO_CONVERSION
+
+  /**
+   * char* constructor.
+   * @param codepageData an array of bytes, null-terminated
+   * @param codepage the encoding of <TT>codepageData</TT>.  The special
+   * value 0 for <TT>codepage</TT> indicates that the text is in the
+   * platform's default codepage.
+   *
+   * If <code>codepage</code> is an empty string (<code>""</code>),
+   * then a simple conversion is performed on the codepage-invariant
+   * subset ("invariant characters") of the platform encoding. See utypes.h.
+   * Recommendation: For invariant-character strings use the constructor
+   * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
+   * because it avoids object code dependencies of UnicodeString on
+   * the conversion code.
+   *
+   * @stable ICU 2.0
+   */
+  UnicodeString(const char *codepageData, const char *codepage);
+
+  /**
+   * char* constructor.
+   * @param codepageData an array of bytes.
+   * @param dataLength The number of bytes in <TT>codepageData</TT>.
+   * @param codepage the encoding of <TT>codepageData</TT>.  The special
+   * value 0 for <TT>codepage</TT> indicates that the text is in the
+   * platform's default codepage.
+   * If <code>codepage</code> is an empty string (<code>""</code>),
+   * then a simple conversion is performed on the codepage-invariant
+   * subset ("invariant characters") of the platform encoding. See utypes.h.
+   * Recommendation: For invariant-character strings use the constructor
+   * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
+   * because it avoids object code dependencies of UnicodeString on
+   * the conversion code.
+   *
+   * @stable ICU 2.0
+   */
+  UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
+
+  /**
+   * char * / UConverter constructor.
+   * This constructor uses an existing UConverter object to
+   * convert the codepage string to Unicode and construct a UnicodeString
+   * from that.
+   *
+   * The converter is reset at first.
+   * If the error code indicates a failure before this constructor is called,
+   * or if an error occurs during conversion or construction,
+   * then the string will be bogus.
+   *
+   * This function avoids the overhead of opening and closing a converter if
+   * multiple strings are constructed.
+   *
+   * @param src input codepage string
+   * @param srcLength length of the input string, can be -1 for NUL-terminated strings
+   * @param cnv converter object (ucnv_resetToUnicode() will be called),
+   *        can be NULL for the default converter
+   * @param errorCode normal ICU error code
+   * @stable ICU 2.0
+   */
+  UnicodeString(
+        const char *src, int32_t srcLength,
+        UConverter *cnv,
+        UErrorCode &errorCode);
+
+#endif
+
+  /**
+   * Constructs a Unicode string from an invariant-character char * string.
+   * About invariant characters see utypes.h.
+   * This constructor has no runtime dependency on conversion code and is
+   * therefore recommended over ones taking a charset name string
+   * (where the empty string "" indicates invariant-character conversion).
+   *
+   * Use the macro US_INV as the third, signature-distinguishing parameter.
+   *
+   * For example:
+   * \code
+   * void fn(const char *s) {
+   *   UnicodeString ustr(s, -1, US_INV);
+   *   // use ustr ...
+   * }
+   * \endcode
+   *
+   * @param src String using only invariant characters.
+   * @param length Length of src, or -1 if NUL-terminated.
+   * @param inv Signature-distinguishing paramater, use US_INV.
+   *
+   * @see US_INV
+   * @stable ICU 3.2
+   */
+  UnicodeString(const char *src, int32_t length, enum EInvariant inv);
+
+
+  /**
+   * Copy constructor.
+   * @param that The UnicodeString object to copy.
+   * @stable ICU 2.0
+   */
+  UnicodeString(const UnicodeString& that);
+
+  /**
+   * 'Substring' constructor from tail of source string.
+   * @param src The UnicodeString object to copy.
+   * @param srcStart The offset into <tt>src</tt> at which to start copying.
+   * @stable ICU 2.2
+   */
+  UnicodeString(const UnicodeString& src, int32_t srcStart);
+
+  /**
+   * 'Substring' constructor from subrange of source string.
+   * @param src The UnicodeString object to copy.
+   * @param srcStart The offset into <tt>src</tt> at which to start copying.
+   * @param srcLength The number of characters from <tt>src</tt> to copy.
+   * @stable ICU 2.2
+   */
+  UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
+
+  /**
+   * Clone this object, an instance of a subclass of Replaceable.
+   * Clones can be used concurrently in multiple threads.
+   * If a subclass does not implement clone(), or if an error occurs,
+   * then NULL is returned.
+   * The clone functions in all subclasses return a pointer to a Replaceable
+   * because some compilers do not support covariant (same-as-this)
+   * return types; cast to the appropriate subclass if necessary.
+   * The caller must delete the clone.
+   *
+   * @return a clone of this object
+   *
+   * @see Replaceable::clone
+   * @see getDynamicClassID
+   * @stable ICU 2.6
+   */
+  virtual Replaceable *clone() const;
+
+  /** Destructor.
+   * @stable ICU 2.0
+   */
+  virtual ~UnicodeString();
+
+  /**
+   * Create a UnicodeString from a UTF-8 string.
+   * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
+   * Calls u_strFromUTF8WithSub().
+   *
+   * @param utf8 UTF-8 input string.
+   *             Note that a StringPiece can be implicitly constructed
+   *             from a std::string or a NUL-terminated const char * string.
+   * @return A UnicodeString with equivalent UTF-16 contents.
+   * @see toUTF8
+   * @see toUTF8String
+   * @stable ICU 4.2
+   */
+  static UnicodeString fromUTF8(const StringPiece &utf8);
+
+  /**
+   * Create a UnicodeString from a UTF-32 string.
+   * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
+   * Calls u_strFromUTF32WithSub().
+   *
+   * @param utf32 UTF-32 input string. Must not be NULL.
+   * @param length Length of the input string, or -1 if NUL-terminated.
+   * @return A UnicodeString with equivalent UTF-16 contents.
+   * @see toUTF32
+   * @stable ICU 4.2
+   */
+  static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
+
+  /* Miscellaneous operations */
+
+  /**
+   * Unescape a string of characters and return a string containing
+   * the result.  The following escape sequences are recognized:
+   *
+   * \\uhhhh       4 hex digits; h in [0-9A-Fa-f]
+   * \\Uhhhhhhhh   8 hex digits
+   * \\xhh         1-2 hex digits
+   * \\ooo         1-3 octal digits; o in [0-7]
+   * \\cX          control-X; X is masked with 0x1F
+   *
+   * as well as the standard ANSI C escapes:
+   *
+   * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
+   * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
+   * \\&quot; => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
+   *
+   * Anything else following a backslash is generically escaped.  For
+   * example, "[a\\-z]" returns "[a-z]".
+   *
+   * If an escape sequence is ill-formed, this method returns an empty
+   * string.  An example of an ill-formed sequence is "\\u" followed by
+   * fewer than 4 hex digits.
+   *
+   * This function is similar to u_unescape() but not identical to it.
+   * The latter takes a source char*, so it does escape recognition
+   * and also invariant conversion.
+   *
+   * @return a string with backslash escapes interpreted, or an
+   * empty string on error.
+   * @see UnicodeString#unescapeAt()
+   * @see u_unescape()
+   * @see u_unescapeAt()
+   * @stable ICU 2.0
+   */
+  UnicodeString unescape() const;
+
+  /**
+   * Unescape a single escape sequence and return the represented
+   * character.  See unescape() for a listing of the recognized escape
+   * sequences.  The character at offset-1 is assumed (without
+   * checking) to be a backslash.  If the escape sequence is
+   * ill-formed, or the offset is out of range, U_SENTINEL=-1 is
+   * returned.
+   *
+   * @param offset an input output parameter.  On input, it is the
+   * offset into this string where the escape sequence is located,
+   * after the initial backslash.  On output, it is advanced after the
+   * last character parsed.  On error, it is not advanced at all.
+   * @return the character represented by the escape sequence at
+   * offset, or U_SENTINEL=-1 on error.
+   * @see UnicodeString#unescape()
+   * @see u_unescape()
+   * @see u_unescapeAt()
+   * @stable ICU 2.0
+   */
+  UChar32 unescapeAt(int32_t &offset) const;
+
+  /**
+   * ICU "poor man's RTTI", returns a UClassID for this class.
+   *
+   * @stable ICU 2.2
+   */
+  static UClassID U_EXPORT2 getStaticClassID();
+
+  /**
+   * ICU "poor man's RTTI", returns a UClassID for the actual class.
+   *
+   * @stable ICU 2.2
+   */
+  virtual UClassID getDynamicClassID() const;
+
+  //========================================
+  // Implementation methods
+  //========================================
+
+protected:
+  /**
+   * Implement Replaceable::getLength() (see jitterbug 1027).
+   * @stable ICU 2.4
+   */
+  virtual int32_t getLength() const;
+
+  /**
+   * The change in Replaceable to use virtual getCharAt() allows
+   * UnicodeString::charAt() to be inline again (see jitterbug 709).
+   * @stable ICU 2.4
+   */
+  virtual UChar getCharAt(int32_t offset) const;
+
+  /**
+   * The change in Replaceable to use virtual getChar32At() allows
+   * UnicodeString::char32At() to be inline again (see jitterbug 709).
+   * @stable ICU 2.4
+   */
+  virtual UChar32 getChar32At(int32_t offset) const;
+
+private:
+  // For char* constructors. Could be made public.
+  UnicodeString &setToUTF8(const StringPiece &utf8);
+  // For extract(char*).
+  // We could make a toUTF8(target, capacity, errorCode) public but not
+  // this version: New API will be cleaner if we make callers create substrings
+  // rather than having start+length on every method,
+  // and it should take a UErrorCode&.
+  int32_t
+  toUTF8(int32_t start, int32_t len,
+         char *target, int32_t capacity) const;
+
+  /**
+   * Internal string contents comparison, called by operator==.
+   * Requires: this & text not bogus and have same lengths.
+   */
+  UBool doEquals(const UnicodeString &text, int32_t len) const;
+
+  inline int8_t
+  doCompare(int32_t start,
+           int32_t length,
+           const UnicodeString& srcText,
+           int32_t srcStart,
+           int32_t srcLength) const;
+
+  int8_t doCompare(int32_t start,
+           int32_t length,
+           const UChar *srcChars,
+           int32_t srcStart,
+           int32_t srcLength) const;
+
+  inline int8_t
+  doCompareCodePointOrder(int32_t start,
+                          int32_t length,
+                          const UnicodeString& srcText,
+                          int32_t srcStart,
+                          int32_t srcLength) const;
+
+  int8_t doCompareCodePointOrder(int32_t start,
+                                 int32_t length,
+                                 const UChar *srcChars,
+                                 int32_t srcStart,
+                                 int32_t srcLength) const;
+
+  inline int8_t
+  doCaseCompare(int32_t start,
+                int32_t length,
+                const UnicodeString &srcText,
+                int32_t srcStart,
+                int32_t srcLength,
+                uint32_t options) const;
+
+  int8_t
+  doCaseCompare(int32_t start,
+                int32_t length,
+                const UChar *srcChars,
+                int32_t srcStart,
+                int32_t srcLength,
+                uint32_t options) const;
+
+  int32_t doIndexOf(UChar c,
+            int32_t start,
+            int32_t length) const;
+
+  int32_t doIndexOf(UChar32 c,
+                        int32_t start,
+                        int32_t length) const;
+
+  int32_t doLastIndexOf(UChar c,
+                int32_t start,
+                int32_t length) const;
+
+  int32_t doLastIndexOf(UChar32 c,
+                            int32_t start,
+                            int32_t length) const;
+
+  void doExtract(int32_t start,
+         int32_t length,
+         UChar *dst,
+         int32_t dstStart) const;
+
+  inline void doExtract(int32_t start,
+         int32_t length,
+         UnicodeString& target) const;
+
+  inline UChar doCharAt(int32_t offset)  const;
+
+  UnicodeString& doReplace(int32_t start,
+               int32_t length,
+               const UnicodeString& srcText,
+               int32_t srcStart,
+               int32_t srcLength);
+
+  UnicodeString& doReplace(int32_t start,
+               int32_t length,
+               const UChar *srcChars,
+               int32_t srcStart,
+               int32_t srcLength);
+
+  UnicodeString& doReverse(int32_t start,
+               int32_t length);
+
+  // calculate hash code
+  int32_t doHashCode(void) const;
+
+  // get pointer to start of array
+  // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
+  inline UChar* getArrayStart(void);
+  inline const UChar* getArrayStart(void) const;
+
+  // A UnicodeString object (not necessarily its current buffer)
+  // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
+  inline UBool isWritable() const;
+
+  // Is the current buffer writable?
+  inline UBool isBufferWritable() const;
+
+  // None of the following does releaseArray().
+  inline void setLength(int32_t len);        // sets only fShortLength and fLength
+  inline void setToEmpty();                  // sets fFlags=kShortString
+  inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags
+
+  // allocate the array; result may be fStackBuffer
+  // sets refCount to 1 if appropriate
+  // sets fArray, fCapacity, and fFlags
+  // returns boolean for success or failure
+  UBool allocate(int32_t capacity);
+
+  // release the array if owned
+  void releaseArray(void);
+
+  // turn a bogus string into an empty one
+  void unBogus();
+
+  // implements assigment operator, copy constructor, and fastCopyFrom()
+  UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
+
+  // Pin start and limit to acceptable values.
+  inline void pinIndex(int32_t& start) const;
+  inline void pinIndices(int32_t& start,
+                         int32_t& length) const;
+
+#if !UCONFIG_NO_CONVERSION
+
+  /* Internal extract() using UConverter. */
+  int32_t doExtract(int32_t start, int32_t length,
+                    char *dest, int32_t destCapacity,
+                    UConverter *cnv,
+                    UErrorCode &errorCode) const;
+
+  /*
+   * Real constructor for converting from codepage data.
+   * It assumes that it is called with !fRefCounted.
+   *
+   * If <code>codepage==0</code>, then the default converter
+   * is used for the platform encoding.
+   * If <code>codepage</code> is an empty string (<code>""</code>),
+   * then a simple conversion is performed on the codepage-invariant
+   * subset ("invariant characters") of the platform encoding. See utypes.h.
+   */
+  void doCodepageCreate(const char *codepageData,
+                        int32_t dataLength,
+                        const char *codepage);
+
+  /*
+   * Worker function for creating a UnicodeString from
+   * a codepage string using a UConverter.
+   */
+  void
+  doCodepageCreate(const char *codepageData,
+                   int32_t dataLength,
+                   UConverter *converter,
+                   UErrorCode &status);
+
+#endif
+
+  /*
+   * This function is called when write access to the array
+   * is necessary.
+   *
+   * We need to make a copy of the array if
+   * the buffer is read-only, or
+   * the buffer is refCounted (shared), and refCount>1, or
+   * the buffer is too small.
+   *
+   * Return FALSE if memory could not be allocated.
+   */
+  UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
+                            int32_t growCapacity = -1,
+                            UBool doCopyArray = TRUE,
+                            int32_t **pBufferToDelete = 0,
+                            UBool forceClone = FALSE);
+
+  /**
+   * Common function for UnicodeString case mappings.
+   * The stringCaseMapper has the same type UStringCaseMapper
+   * as in ustr_imp.h for ustrcase_map().
+   */
+  UnicodeString &
+  caseMap(const UCaseMap *csm, UStringCaseMapper *stringCaseMapper);
+
+  // ref counting
+  void addRef(void);
+  int32_t removeRef(void);
+  int32_t refCount(void) const;
+
+  // constants
+  enum {
+    // Set the stack buffer size so that sizeof(UnicodeString) is,
+    // naturally (without padding), a multiple of sizeof(pointer).
+    US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for short strings
+    kInvalidUChar=0xffff, // invalid UChar index
+    kGrowSize=128, // grow size for this buffer
+    kInvalidHashCode=0, // invalid hash code
+    kEmptyHashCode=1, // hash code for empty string
+
+    // bit flag values for fFlags
+    kIsBogus=1,         // this string is bogus, i.e., not valid or NULL
+    kUsingStackBuffer=2,// using fUnion.fStackBuffer instead of fUnion.fFields
+    kRefCounted=4,      // there is a refCount field before the characters in fArray
+    kBufferIsReadonly=8,// do not write to this buffer
+    kOpenGetBuffer=16,  // getBuffer(minCapacity) was called (is "open"),
+                        // and releaseBuffer(newLength) must be called
+
+    // combined values for convenience
+    kShortString=kUsingStackBuffer,
+    kLongString=kRefCounted,
+    kReadonlyAlias=kBufferIsReadonly,
+    kWritableAlias=0
+  };
+
+  friend class StringThreadTest;
+  friend class UnicodeStringAppendable;
+
+  union StackBufferOrFields;        // forward declaration necessary before friend declaration
+  friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
+
+  /*
+   * The following are all the class fields that are stored
+   * in each UnicodeString object.
+   * Note that UnicodeString has virtual functions,
+   * therefore there is an implicit vtable pointer
+   * as the first real field.
+   * The fields should be aligned such that no padding is necessary.
+   * On 32-bit machines, the size should be 32 bytes,
+   * on 64-bit machines (8-byte pointers), it should be 40 bytes.
+   *
+   * We use a hack to achieve this.
+   *
+   * With at least some compilers, each of the following is forced to
+   * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
+   * rounded up with additional padding if the fields do not already fit that requirement:
+   * - sizeof(class UnicodeString)
+   * - offsetof(UnicodeString, fUnion)
+   * - sizeof(fUnion)
+   * - sizeof(fFields)
+   *
+   * In order to avoid padding, we make sizeof(fStackBuffer)=16 (=8 UChars)
+   * which is at least as large as sizeof(fFields) on 32-bit and 64-bit machines.
+   * (Padding at the end of fFields is ok:
+   * As long as there is no padding after fStackBuffer, it is not wasted space.)
+   *
+   * We further assume that the compiler does not reorder the fields,
+   * so that fRestOfStackBuffer (which holds a few more UChars) immediately follows after fUnion,
+   * with at most some padding (but no other field) in between.
+   * (Padding there would be wasted space, but functionally harmless.)
+   *
+   * We use a few more sizeof(pointer)'s chunks of space with
+   * fRestOfStackBuffer, fShortLength and fFlags,
+   * to get up exactly to the intended sizeof(UnicodeString).
+   */
+  // (implicit) *vtable;
+  union StackBufferOrFields {
+    // fStackBuffer is used iff (fFlags&kUsingStackBuffer)
+    // else fFields is used
+    UChar fStackBuffer[8];  // buffer for short strings, together with fRestOfStackBuffer
+    struct {
+      UChar   *fArray;    // the Unicode data
+      int32_t fCapacity;  // capacity of fArray (in UChars)
+      int32_t fLength;    // number of characters in fArray if >127; else undefined
+    } fFields;
+  } fUnion;
+  UChar fRestOfStackBuffer[US_STACKBUF_SIZE-8];
+  int8_t fShortLength;  // 0..127: length  <0: real length is in fUnion.fFields.fLength
+  uint8_t fFlags;       // bit flags: see constants above
+};
+
+/**
+ * Create a new UnicodeString with the concatenation of two others.
+ *
+ * @param s1 The first string to be copied to the new one.
+ * @param s2 The second string to be copied to the new one, after s1.
+ * @return UnicodeString(s1).append(s2)
+ * @stable ICU 2.8
+ */
+U_COMMON_API UnicodeString U_EXPORT2
+operator+ (const UnicodeString &s1, const UnicodeString &s2);
+
+//========================================
+// Inline members
+//========================================
+
+//========================================
+// Privates
+//========================================
+
+inline void
+UnicodeString::pinIndex(int32_t& start) const
+{
+  // pin index
+  if(start < 0) {
+    start = 0;
+  } else if(start > length()) {
+    start = length();
+  }
+}
+
+inline void
+UnicodeString::pinIndices(int32_t& start,
+                          int32_t& _length) const
+{
+  // pin indices
+  int32_t len = length();
+  if(start < 0) {
+    start = 0;
+  } else if(start > len) {
+    start = len;
+  }
+  if(_length < 0) {
+    _length = 0;
+  } else if(_length > (len - start)) {
+    _length = (len - start);
+  }
+}
+
+inline UChar*
+UnicodeString::getArrayStart()
+{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
+
+inline const UChar*
+UnicodeString::getArrayStart() const
+{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
+
+//========================================
+// Default constructor
+//========================================
+
+inline
+UnicodeString::UnicodeString()
+  : fShortLength(0),
+    fFlags(kShortString)
+{}
+
+//========================================
+// Read-only implementation methods
+//========================================
+inline int32_t
+UnicodeString::length() const
+{ return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; }
+
+inline int32_t
+UnicodeString::getCapacity() const
+{ return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; }
+
+inline int32_t
+UnicodeString::hashCode() const
+{ return doHashCode(); }
+
+inline UBool
+UnicodeString::isBogus() const
+{ return (UBool)(fFlags & kIsBogus); }
+
+inline UBool
+UnicodeString::isWritable() const
+{ return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); }
+
+inline UBool
+UnicodeString::isBufferWritable() const
+{
+  return (UBool)(
+      !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
+      (!(fFlags&kRefCounted) || refCount()==1));
+}
+
+inline const UChar *
+UnicodeString::getBuffer() const {
+  if(fFlags&(kIsBogus|kOpenGetBuffer)) {
+    return 0;
+  } else if(fFlags&kUsingStackBuffer) {
+    return fUnion.fStackBuffer;
+  } else {
+    return fUnion.fFields.fArray;
+  }
+}
+
+//========================================
+// Read-only alias methods
+//========================================
+inline int8_t
+UnicodeString::doCompare(int32_t start,
+              int32_t thisLength,
+              const UnicodeString& srcText,
+              int32_t srcStart,
+              int32_t srcLength) const
+{
+  if(srcText.isBogus()) {
+    return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
+  } else {
+    srcText.pinIndices(srcStart, srcLength);
+    return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
+  }
+}
+
+inline UBool
+UnicodeString::operator== (const UnicodeString& text) const
+{
+  if(isBogus()) {
+    return text.isBogus();
+  } else {
+    int32_t len = length(), textLength = text.length();
+    return !text.isBogus() && len == textLength && doEquals(text, len);
+  }
+}
+
+inline UBool
+UnicodeString::operator!= (const UnicodeString& text) const
+{ return (! operator==(text)); }
+
+inline UBool
+UnicodeString::operator> (const UnicodeString& text) const
+{ return doCompare(0, length(), text, 0, text.length()) == 1; }
+
+inline UBool
+UnicodeString::operator< (const UnicodeString& text) const
+{ return doCompare(0, length(), text, 0, text.length()) == -1; }
+
+inline UBool
+UnicodeString::operator>= (const UnicodeString& text) const
+{ return doCompare(0, length(), text, 0, text.length()) != -1; }
+
+inline UBool
+UnicodeString::operator<= (const UnicodeString& text) const
+{ return doCompare(0, length(), text, 0, text.length()) != 1; }
+
+inline int8_t
+UnicodeString::compare(const UnicodeString& text) const
+{ return doCompare(0, length(), text, 0, text.length()); }
+
+inline int8_t
+UnicodeString::compare(int32_t start,
+               int32_t _length,
+               const UnicodeString& srcText) const
+{ return doCompare(start, _length, srcText, 0, srcText.length()); }
+
+inline int8_t
+UnicodeString::compare(const UChar *srcChars,
+               int32_t srcLength) const
+{ return doCompare(0, length(), srcChars, 0, srcLength); }
+
+inline int8_t
+UnicodeString::compare(int32_t start,
+               int32_t _length,
+               const UnicodeString& srcText,
+               int32_t srcStart,
+               int32_t srcLength) const
+{ return doCompare(start, _length, srcText, srcStart, srcLength); }
+
+inline int8_t
+UnicodeString::compare(int32_t start,
+               int32_t _length,
+               const UChar *srcChars) const
+{ return doCompare(start, _length, srcChars, 0, _length); }
+
+inline int8_t
+UnicodeString::compare(int32_t start,
+               int32_t _length,
+               const UChar *srcChars,
+               int32_t srcStart,
+               int32_t srcLength) const
+{ return doCompare(start, _length, srcChars, srcStart, srcLength); }
+
+inline int8_t
+UnicodeString::compareBetween(int32_t start,
+                  int32_t limit,
+                  const UnicodeString& srcText,
+                  int32_t srcStart,
+                  int32_t srcLimit) const
+{ return doCompare(start, limit - start,
+           srcText, srcStart, srcLimit - srcStart); }
+
+inline int8_t
+UnicodeString::doCompareCodePointOrder(int32_t start,
+                                       int32_t thisLength,
+                                       const UnicodeString& srcText,
+                                       int32_t srcStart,
+                                       int32_t srcLength) const
+{
+  if(srcText.isBogus()) {
+    return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
+  } else {
+    srcText.pinIndices(srcStart, srcLength);
+    return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
+  }
+}
+
+inline int8_t
+UnicodeString::compareCodePointOrder(const UnicodeString& text) const
+{ return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
+
+inline int8_t
+UnicodeString::compareCodePointOrder(int32_t start,
+                                     int32_t _length,
+                                     const UnicodeString& srcText) const
+{ return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
+
+inline int8_t
+UnicodeString::compareCodePointOrder(const UChar *srcChars,
+                                     int32_t srcLength) const
+{ return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
+
+inline int8_t
+UnicodeString::compareCodePointOrder(int32_t start,
+                                     int32_t _length,
+                                     const UnicodeString& srcText,
+                                     int32_t srcStart,
+                                     int32_t srcLength) const
+{ return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
+
+inline int8_t
+UnicodeString::compareCodePointOrder(int32_t start,
+                                     int32_t _length,
+                                     const UChar *srcChars) const
+{ return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
+
+inline int8_t
+UnicodeString::compareCodePointOrder(int32_t start,
+                                     int32_t _length,
+                                     const UChar *srcChars,
+                                     int32_t srcStart,
+                                     int32_t srcLength) const
+{ return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
+
+inline int8_t
+UnicodeString::compareCodePointOrderBetween(int32_t start,
+                                            int32_t limit,
+                                            const UnicodeString& srcText,
+                                            int32_t srcStart,
+                                            int32_t srcLimit) const
+{ return doCompareCodePointOrder(start, limit - start,
+           srcText, srcStart, srcLimit - srcStart); }
+
+inline int8_t
+UnicodeString::doCaseCompare(int32_t start,
+                             int32_t thisLength,
+                             const UnicodeString &srcText,
+                             int32_t srcStart,
+                             int32_t srcLength,
+                             uint32_t options) const
+{
+  if(srcText.isBogus()) {
+    return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
+  } else {
+    srcText.pinIndices(srcStart, srcLength);
+    return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
+  }
+}
+
+inline int8_t
+UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
+  return doCaseCompare(0, length(), text, 0, text.length(), options);
+}
+
+inline int8_t
+UnicodeString::caseCompare(int32_t start,
+                           int32_t _length,
+                           const UnicodeString &srcText,
+                           uint32_t options) const {
+  return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
+}
+
+inline int8_t
+UnicodeString::caseCompare(const UChar *srcChars,
+                           int32_t srcLength,
+                           uint32_t options) const {
+  return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
+}
+
+inline int8_t
+UnicodeString::caseCompare(int32_t start,
+                           int32_t _length,
+                           const UnicodeString &srcText,
+                           int32_t srcStart,
+                           int32_t srcLength,
+                           uint32_t options) const {
+  return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
+}
+
+inline int8_t
+UnicodeString::caseCompare(int32_t start,
+                           int32_t _length,
+                           const UChar *srcChars,
+                           uint32_t options) const {
+  return doCaseCompare(start, _length, srcChars, 0, _length, options);
+}
+
+inline int8_t
+UnicodeString::caseCompare(int32_t start,
+                           int32_t _length,
+                           const UChar *srcChars,
+                           int32_t srcStart,
+                           int32_t srcLength,
+                           uint32_t options) const {
+  return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
+}
+
+inline int8_t
+UnicodeString::caseCompareBetween(int32_t start,
+                                  int32_t limit,
+                                  const UnicodeString &srcText,
+                                  int32_t srcStart,
+                                  int32_t srcLimit,
+                                  uint32_t options) const {
+  return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
+}
+
+inline int32_t
+UnicodeString::indexOf(const UnicodeString& srcText,
+               int32_t srcStart,
+               int32_t srcLength,
+               int32_t start,
+               int32_t _length) const
+{
+  if(!srcText.isBogus()) {
+    srcText.pinIndices(srcStart, srcLength);
+    if(srcLength > 0) {
+      return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
+    }
+  }
+  return -1;
+}
+
+inline int32_t
+UnicodeString::indexOf(const UnicodeString& text) const
+{ return indexOf(text, 0, text.length(), 0, length()); }
+
+inline int32_t
+UnicodeString::indexOf(const UnicodeString& text,
+               int32_t start) const {
+  pinIndex(start);
+  return indexOf(text, 0, text.length(), start, length() - start);
+}
+
+inline int32_t
+UnicodeString::indexOf(const UnicodeString& text,
+               int32_t start,
+               int32_t _length) const
+{ return indexOf(text, 0, text.length(), start, _length); }
+
+inline int32_t
+UnicodeString::indexOf(const UChar *srcChars,
+               int32_t srcLength,
+               int32_t start) const {
+  pinIndex(start);
+  return indexOf(srcChars, 0, srcLength, start, length() - start);
+}
+
+inline int32_t
+UnicodeString::indexOf(const UChar *srcChars,
+               int32_t srcLength,
+               int32_t start,
+               int32_t _length) const
+{ return indexOf(srcChars, 0, srcLength, start, _length); }
+
+inline int32_t
+UnicodeString::indexOf(UChar c,
+               int32_t start,
+               int32_t _length) const
+{ return doIndexOf(c, start, _length); }
+
+inline int32_t
+UnicodeString::indexOf(UChar32 c,
+               int32_t start,
+               int32_t _length) const
+{ return doIndexOf(c, start, _length); }
+
+inline int32_t
+UnicodeString::indexOf(UChar c) const
+{ return doIndexOf(c, 0, length()); }
+
+inline int32_t
+UnicodeString::indexOf(UChar32 c) const
+{ return indexOf(c, 0, length()); }
+
+inline int32_t
+UnicodeString::indexOf(UChar c,
+               int32_t start) const {
+  pinIndex(start);
+  return doIndexOf(c, start, length() - start);
+}
+
+inline int32_t
+UnicodeString::indexOf(UChar32 c,
+               int32_t start) const {
+  pinIndex(start);
+  return indexOf(c, start, length() - start);
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(const UChar *srcChars,
+               int32_t srcLength,
+               int32_t start,
+               int32_t _length) const
+{ return lastIndexOf(srcChars, 0, srcLength, start, _length); }
+
+inline int32_t
+UnicodeString::lastIndexOf(const UChar *srcChars,
+               int32_t srcLength,
+               int32_t start) const {
+  pinIndex(start);
+  return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(const UnicodeString& srcText,
+               int32_t srcStart,
+               int32_t srcLength,
+               int32_t start,
+               int32_t _length) const
+{
+  if(!srcText.isBogus()) {
+    srcText.pinIndices(srcStart, srcLength);
+    if(srcLength > 0) {
+      return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
+    }
+  }
+  return -1;
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(const UnicodeString& text,
+               int32_t start,
+               int32_t _length) const
+{ return lastIndexOf(text, 0, text.length(), start, _length); }
+
+inline int32_t
+UnicodeString::lastIndexOf(const UnicodeString& text,
+               int32_t start) const {
+  pinIndex(start);
+  return lastIndexOf(text, 0, text.length(), start, length() - start);
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(const UnicodeString& text) const
+{ return lastIndexOf(text, 0, text.length(), 0, length()); }
+
+inline int32_t
+UnicodeString::lastIndexOf(UChar c,
+               int32_t start,
+               int32_t _length) const
+{ return doLastIndexOf(c, start, _length); }
+
+inline int32_t
+UnicodeString::lastIndexOf(UChar32 c,
+               int32_t start,
+               int32_t _length) const {
+  return doLastIndexOf(c, start, _length);
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(UChar c) const
+{ return doLastIndexOf(c, 0, length()); }
+
+inline int32_t
+UnicodeString::lastIndexOf(UChar32 c) const {
+  return lastIndexOf(c, 0, length());
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(UChar c,
+               int32_t start) const {
+  pinIndex(start);
+  return doLastIndexOf(c, start, length() - start);
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(UChar32 c,
+               int32_t start) const {
+  pinIndex(start);
+  return lastIndexOf(c, start, length() - start);
+}
+
+inline UBool
+UnicodeString::startsWith(const UnicodeString& text) const
+{ return compare(0, text.length(), text, 0, text.length()) == 0; }
+
+inline UBool
+UnicodeString::startsWith(const UnicodeString& srcText,
+              int32_t srcStart,
+              int32_t srcLength) const
+{ return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
+
+inline UBool
+UnicodeString::startsWith(const UChar *srcChars, int32_t srcLength) const {
+  if(srcLength < 0) {
+    srcLength = u_strlen(srcChars);
+  }
+  return doCompare(0, srcLength, srcChars, 0, srcLength) == 0;
+}
+
+inline UBool
+UnicodeString::startsWith(const UChar *srcChars, int32_t srcStart, int32_t srcLength) const {
+  if(srcLength < 0) {
+    srcLength = u_strlen(srcChars);
+  }
+  return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;
+}
+
+inline UBool
+UnicodeString::endsWith(const UnicodeString& text) const
+{ return doCompare(length() - text.length(), text.length(),
+           text, 0, text.length()) == 0; }
+
+inline UBool
+UnicodeString::endsWith(const UnicodeString& srcText,
+            int32_t srcStart,
+            int32_t srcLength) const {
+  srcText.pinIndices(srcStart, srcLength);
+  return doCompare(length() - srcLength, srcLength,
+                   srcText, srcStart, srcLength) == 0;
+}
+
+inline UBool
+UnicodeString::endsWith(const UChar *srcChars,
+            int32_t srcLength) const {
+  if(srcLength < 0) {
+    srcLength = u_strlen(srcChars);
+  }
+  return doCompare(length() - srcLength, srcLength,
+                   srcChars, 0, srcLength) == 0;
+}
+
+inline UBool
+UnicodeString::endsWith(const UChar *srcChars,
+            int32_t srcStart,
+            int32_t srcLength) const {
+  if(srcLength < 0) {
+    srcLength = u_strlen(srcChars + srcStart);
+  }
+  return doCompare(length() - srcLength, srcLength,
+                   srcChars, srcStart, srcLength) == 0;
+}
+
+//========================================
+// replace
+//========================================
+inline UnicodeString&
+UnicodeString::replace(int32_t start,
+               int32_t _length,
+               const UnicodeString& srcText)
+{ return doReplace(start, _length, srcText, 0, srcText.length()); }
+
+inline UnicodeString&
+UnicodeString::replace(int32_t start,
+               int32_t _length,
+               const UnicodeString& srcText,
+               int32_t srcStart,
+               int32_t srcLength)
+{ return doReplace(start, _length, srcText, srcStart, srcLength); }
+
+inline UnicodeString&
+UnicodeString::replace(int32_t start,
+               int32_t _length,
+               const UChar *srcChars,
+               int32_t srcLength)
+{ return doReplace(start, _length, srcChars, 0, srcLength); }
+
+inline UnicodeString&
+UnicodeString::replace(int32_t start,
+               int32_t _length,
+               const UChar *srcChars,
+               int32_t srcStart,
+               int32_t srcLength)
+{ return doReplace(start, _length, srcChars, srcStart, srcLength); }
+
+inline UnicodeString&
+UnicodeString::replace(int32_t start,
+               int32_t _length,
+               UChar srcChar)
+{ return doReplace(start, _length, &srcChar, 0, 1); }
+
+inline UnicodeString&
+UnicodeString::replaceBetween(int32_t start,
+                  int32_t limit,
+                  const UnicodeString& srcText)
+{ return doReplace(start, limit - start, srcText, 0, srcText.length()); }
+
+inline UnicodeString&
+UnicodeString::replaceBetween(int32_t start,
+                  int32_t limit,
+                  const UnicodeString& srcText,
+                  int32_t srcStart,
+                  int32_t srcLimit)
+{ return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
+
+inline UnicodeString&
+UnicodeString::findAndReplace(const UnicodeString& oldText,
+                  const UnicodeString& newText)
+{ return findAndReplace(0, length(), oldText, 0, oldText.length(),
+            newText, 0, newText.length()); }
+
+inline UnicodeString&
+UnicodeString::findAndReplace(int32_t start,
+                  int32_t _length,
+                  const UnicodeString& oldText,
+                  const UnicodeString& newText)
+{ return findAndReplace(start, _length, oldText, 0, oldText.length(),
+            newText, 0, newText.length()); }
+
+// ============================
+// extract
+// ============================
+inline void
+UnicodeString::doExtract(int32_t start,
+             int32_t _length,
+             UnicodeString& target) const
+{ target.replace(0, target.length(), *this, start, _length); }
+
+inline void
+UnicodeString::extract(int32_t start,
+               int32_t _length,
+               UChar *target,
+               int32_t targetStart) const
+{ doExtract(start, _length, target, targetStart); }
+
+inline void
+UnicodeString::extract(int32_t start,
+               int32_t _length,
+               UnicodeString& target) const
+{ doExtract(start, _length, target); }
+
+#if !UCONFIG_NO_CONVERSION
+
+inline int32_t
+UnicodeString::extract(int32_t start,
+               int32_t _length,
+               char *dst,
+               const char *codepage) const
+
+{
+  // This dstSize value will be checked explicitly
+  return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
+}
+
+#endif
+
+inline void
+UnicodeString::extractBetween(int32_t start,
+                  int32_t limit,
+                  UChar *dst,
+                  int32_t dstStart) const {
+  pinIndex(start);
+  pinIndex(limit);
+  doExtract(start, limit - start, dst, dstStart);
+}
+
+inline UnicodeString
+UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
+    return tempSubString(start, limit - start);
+}
+
+inline UChar
+UnicodeString::doCharAt(int32_t offset) const
+{
+  if((uint32_t)offset < (uint32_t)length()) {
+    return getArrayStart()[offset];
+  } else {
+    return kInvalidUChar;
+  }
+}
+
+inline UChar
+UnicodeString::charAt(int32_t offset) const
+{ return doCharAt(offset); }
+
+inline UChar
+UnicodeString::operator[] (int32_t offset) const
+{ return doCharAt(offset); }
+
+inline UBool
+UnicodeString::isEmpty() const {
+  return fShortLength == 0;
+}
+
+//========================================
+// Write implementation methods
+//========================================
+inline void
+UnicodeString::setLength(int32_t len) {
+  if(len <= 127) {
+    fShortLength = (int8_t)len;
+  } else {
+    fShortLength = (int8_t)-1;
+    fUnion.fFields.fLength = len;
+  }
+}
+
+inline void
+UnicodeString::setToEmpty() {
+  fShortLength = 0;
+  fFlags = kShortString;
+}
+
+inline void
+UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) {
+  setLength(len);
+  fUnion.fFields.fArray = array;
+  fUnion.fFields.fCapacity = capacity;
+}
+
+inline UnicodeString&
+UnicodeString::operator= (UChar ch)
+{ return doReplace(0, length(), &ch, 0, 1); }
+
+inline UnicodeString&
+UnicodeString::operator= (UChar32 ch)
+{ return replace(0, length(), ch); }
+
+inline UnicodeString&
+UnicodeString::setTo(const UnicodeString& srcText,
+             int32_t srcStart,
+             int32_t srcLength)
+{
+  unBogus();
+  return doReplace(0, length(), srcText, srcStart, srcLength);
+}
+
+inline UnicodeString&
+UnicodeString::setTo(const UnicodeString& srcText,
+             int32_t srcStart)
+{
+  unBogus();
+  srcText.pinIndex(srcStart);
+  return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
+}
+
+inline UnicodeString&
+UnicodeString::setTo(const UnicodeString& srcText)
+{
+  return copyFrom(srcText);
+}
+
+inline UnicodeString&
+UnicodeString::setTo(const UChar *srcChars,
+             int32_t srcLength)
+{
+  unBogus();
+  return doReplace(0, length(), srcChars, 0, srcLength);
+}
+
+inline UnicodeString&
+UnicodeString::setTo(UChar srcChar)
+{
+  unBogus();
+  return doReplace(0, length(), &srcChar, 0, 1);
+}
+
+inline UnicodeString&
+UnicodeString::setTo(UChar32 srcChar)
+{
+  unBogus();
+  return replace(0, length(), srcChar);
+}
+
+inline UnicodeString&
+UnicodeString::append(const UnicodeString& srcText,
+              int32_t srcStart,
+              int32_t srcLength)
+{ return doReplace(length(), 0, srcText, srcStart, srcLength); }
+
+inline UnicodeString&
+UnicodeString::append(const UnicodeString& srcText)
+{ return doReplace(length(), 0, srcText, 0, srcText.length()); }
+
+inline UnicodeString&
+UnicodeString::append(const UChar *srcChars,
+              int32_t srcStart,
+              int32_t srcLength)
+{ return doReplace(length(), 0, srcChars, srcStart, srcLength); }
+
+inline UnicodeString&
+UnicodeString::append(const UChar *srcChars,
+              int32_t srcLength)
+{ return doReplace(length(), 0, srcChars, 0, srcLength); }
+
+inline UnicodeString&
+UnicodeString::append(UChar srcChar)
+{ return doReplace(length(), 0, &srcChar, 0, 1); }
+
+inline UnicodeString&
+UnicodeString::operator+= (UChar ch)
+{ return doReplace(length(), 0, &ch, 0, 1); }
+
+inline UnicodeString&
+UnicodeString::operator+= (UChar32 ch) {
+  return append(ch);
+}
+
+inline UnicodeString&
+UnicodeString::operator+= (const UnicodeString& srcText)
+{ return doReplace(length(), 0, srcText, 0, srcText.length()); }
+
+inline UnicodeString&
+UnicodeString::insert(int32_t start,
+              const UnicodeString& srcText,
+              int32_t srcStart,
+              int32_t srcLength)
+{ return doReplace(start, 0, srcText, srcStart, srcLength); }
+
+inline UnicodeString&
+UnicodeString::insert(int32_t start,
+              const UnicodeString& srcText)
+{ return doReplace(start, 0, srcText, 0, srcText.length()); }
+
+inline UnicodeString&
+UnicodeString::insert(int32_t start,
+              const UChar *srcChars,
+              int32_t srcStart,
+              int32_t srcLength)
+{ return doReplace(start, 0, srcChars, srcStart, srcLength); }
+
+inline UnicodeString&
+UnicodeString::insert(int32_t start,
+              const UChar *srcChars,
+              int32_t srcLength)
+{ return doReplace(start, 0, srcChars, 0, srcLength); }
+
+inline UnicodeString&
+UnicodeString::insert(int32_t start,
+              UChar srcChar)
+{ return doReplace(start, 0, &srcChar, 0, 1); }
+
+inline UnicodeString&
+UnicodeString::insert(int32_t start,
+              UChar32 srcChar)
+{ return replace(start, 0, srcChar); }
+
+
+inline UnicodeString&
+UnicodeString::remove()
+{
+  // remove() of a bogus string makes the string empty and non-bogus
+  if(isBogus()) {
+    setToEmpty();
+  } else {
+    fShortLength = 0;
+  }
+  return *this;
+}
+
+inline UnicodeString&
+UnicodeString::remove(int32_t start,
+             int32_t _length)
+{
+    if(start <= 0 && _length == INT32_MAX) {
+        // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
+        return remove();
+    }
+    return doReplace(start, _length, NULL, 0, 0);
+}
+
+inline UnicodeString&
+UnicodeString::removeBetween(int32_t start,
+                int32_t limit)
+{ return doReplace(start, limit - start, NULL, 0, 0); }
+
+inline UnicodeString &
+UnicodeString::retainBetween(int32_t start, int32_t limit) {
+  truncate(limit);
+  return doReplace(0, start, NULL, 0, 0);
+}
+
+inline UBool
+UnicodeString::truncate(int32_t targetLength)
+{
+  if(isBogus() && targetLength == 0) {
+    // truncate(0) of a bogus string makes the string empty and non-bogus
+    unBogus();
+    return FALSE;
+  } else if((uint32_t)targetLength < (uint32_t)length()) {
+    setLength(targetLength);
+    return TRUE;
+  } else {
+    return FALSE;
+  }
+}
+
+inline UnicodeString&
+UnicodeString::reverse()
+{ return doReverse(0, length()); }
+
+inline UnicodeString&
+UnicodeString::reverse(int32_t start,
+               int32_t _length)
+{ return doReverse(start, _length); }
+
+U_NAMESPACE_END
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/uobject.h b/src/core/basetypes/icu-ucsdet/include/unicode/uobject.h
new file mode 100644
index 00000000..54ceace6
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/uobject.h
@@ -0,0 +1,320 @@
+/*
+******************************************************************************
+*
+*   Copyright (C) 2002-2012, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*   file name:  uobject.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2002jun26
+*   created by: Markus W. Scherer
+*/
+
+#ifndef __UOBJECT_H__
+#define __UOBJECT_H__
+
+#include "unicode/utypes.h"
+
+/**
+ * \file
+ * \brief C++ API: Common ICU base class UObject.
+ */
+
+/**
+ * @{
+ * \def U_NO_THROW
+ *         Define this to define the throw() specification so
+ *                 certain functions do not throw any exceptions
+ *
+ *         UMemory operator new methods should have the throw() specification 
+ *         appended to them, so that the compiler adds the additional NULL check 
+ *         before calling constructors. Without, if <code>operator new</code> returns NULL the 
+ *         constructor is still called, and if the constructor references member 
+ *         data, (which it typically does), the result is a segmentation violation.
+ *
+ * @stable ICU 4.2
+ */
+#ifndef U_NO_THROW
+#define U_NO_THROW throw()
+#endif
+
+/** @} */
+
+/*===========================================================================*/
+/* UClassID-based RTTI */
+/*===========================================================================*/
+
+/**
+ * UClassID is used to identify classes without using the compiler's RTTI.
+ * This was used before C++ compilers consistently supported RTTI.
+ * ICU 4.6 requires compiler RTTI to be turned on.
+ *
+ * Each class hierarchy which needs
+ * to implement polymorphic clone() or operator==() defines two methods,
+ * described in detail below.  UClassID values can be compared using
+ * operator==(). Nothing else should be done with them.
+ *
+ * \par
+ * In class hierarchies that implement "poor man's RTTI",
+ * each concrete subclass implements getDynamicClassID() in the same way:
+ *
+ * \code
+ *      class Derived {
+ *      public:
+ *          virtual UClassID getDynamicClassID() const
+ *            { return Derived::getStaticClassID(); }
+ *      }
+ * \endcode
+ *
+ * Each concrete class implements getStaticClassID() as well, which allows
+ * clients to test for a specific type.
+ *
+ * \code
+ *      class Derived {
+ *      public:
+ *          static UClassID U_EXPORT2 getStaticClassID();
+ *      private:
+ *          static char fgClassID;
+ *      }
+ *
+ *      // In Derived.cpp:
+ *      UClassID Derived::getStaticClassID()
+ *        { return (UClassID)&Derived::fgClassID; }
+ *      char Derived::fgClassID = 0; // Value is irrelevant
+ * \endcode
+ * @stable ICU 2.0
+ */
+typedef void* UClassID;
+
+U_NAMESPACE_BEGIN
+
+/**
+ * UMemory is the common ICU base class.
+ * All other ICU C++ classes are derived from UMemory (starting with ICU 2.4).
+ *
+ * This is primarily to make it possible and simple to override the
+ * C++ memory management by adding new/delete operators to this base class.
+ *
+ * To override ALL ICU memory management, including that from plain C code,
+ * replace the allocation functions declared in cmemory.h
+ *
+ * UMemory does not contain any virtual functions.
+ * Common "boilerplate" functions are defined in UObject.
+ *
+ * @stable ICU 2.4
+ */
+class U_COMMON_API UMemory {
+public:
+
+/* test versions for debugging shaper heap memory problems */
+#ifdef SHAPER_MEMORY_DEBUG  
+    static void * NewArray(int size, int count);
+    static void * GrowArray(void * array, int newSize );
+    static void   FreeArray(void * array );
+#endif
+
+#if U_OVERRIDE_CXX_ALLOCATION
+    /**
+     * Override for ICU4C C++ memory management.
+     * simple, non-class types are allocated using the macros in common/cmemory.h
+     * (uprv_malloc(), uprv_free(), uprv_realloc());
+     * they or something else could be used here to implement C++ new/delete
+     * for ICU4C C++ classes
+     * @stable ICU 2.4
+     */
+    static void * U_EXPORT2 operator new(size_t size) U_NO_THROW;
+
+    /**
+     * Override for ICU4C C++ memory management.
+     * See new().
+     * @stable ICU 2.4
+     */
+    static void * U_EXPORT2 operator new[](size_t size) U_NO_THROW;
+
+    /**
+     * Override for ICU4C C++ memory management.
+     * simple, non-class types are allocated using the macros in common/cmemory.h
+     * (uprv_malloc(), uprv_free(), uprv_realloc());
+     * they or something else could be used here to implement C++ new/delete
+     * for ICU4C C++ classes
+     * @stable ICU 2.4
+     */
+    static void U_EXPORT2 operator delete(void *p) U_NO_THROW;
+
+    /**
+     * Override for ICU4C C++ memory management.
+     * See delete().
+     * @stable ICU 2.4
+     */
+    static void U_EXPORT2 operator delete[](void *p) U_NO_THROW;
+
+#if U_HAVE_PLACEMENT_NEW
+    /**
+     * Override for ICU4C C++ memory management for STL.
+     * See new().
+     * @stable ICU 2.6
+     */
+    static inline void * U_EXPORT2 operator new(size_t, void *ptr) U_NO_THROW { return ptr; }
+
+    /**
+     * Override for ICU4C C++ memory management for STL.
+     * See delete().
+     * @stable ICU 2.6
+     */
+    static inline void U_EXPORT2 operator delete(void *, void *) U_NO_THROW {}
+#endif /* U_HAVE_PLACEMENT_NEW */
+#if U_HAVE_DEBUG_LOCATION_NEW
+    /**
+      * This method overrides the MFC debug version of the operator new
+      * 
+      * @param size   The requested memory size
+      * @param file   The file where the allocation was requested
+      * @param line   The line where the allocation was requested 
+      */ 
+    static void * U_EXPORT2 operator new(size_t size, const char* file, int line) U_NO_THROW;
+    /**
+      * This method provides a matching delete for the MFC debug new
+      * 
+      * @param p      The pointer to the allocated memory
+      * @param file   The file where the allocation was requested
+      * @param line   The line where the allocation was requested 
+      */ 
+    static void U_EXPORT2 operator delete(void* p, const char* file, int line) U_NO_THROW;
+#endif /* U_HAVE_DEBUG_LOCATION_NEW */
+#endif /* U_OVERRIDE_CXX_ALLOCATION */
+
+    /*
+     * Assignment operator not declared. The compiler will provide one
+     * which does nothing since this class does not contain any data members.
+     * API/code coverage may show the assignment operator as present and
+     * untested - ignore.
+     * Subclasses need this assignment operator if they use compiler-provided
+     * assignment operators of their own. An alternative to not declaring one
+     * here would be to declare and empty-implement a protected or public one.
+    UMemory &UMemory::operator=(const UMemory &);
+     */
+};
+
+/**
+ * UObject is the common ICU "boilerplate" class.
+ * UObject inherits UMemory (starting with ICU 2.4),
+ * and all other public ICU C++ classes
+ * are derived from UObject (starting with ICU 2.2).
+ *
+ * UObject contains common virtual functions, in particular a virtual destructor.
+ *
+ * The clone() function is not available in UObject because it is not
+ * implemented by all ICU classes.
+ * Many ICU services provide a clone() function for their class trees,
+ * defined on the service's C++ base class, and all subclasses within that
+ * service class tree return a pointer to the service base class
+ * (which itself is a subclass of UObject).
+ * This is because some compilers do not support covariant (same-as-this)
+ * return types; cast to the appropriate subclass if necessary.
+ *
+ * @stable ICU 2.2
+ */
+class U_COMMON_API UObject : public UMemory {
+public:
+    /**
+     * Destructor.
+     *
+     * @stable ICU 2.2
+     */
+    virtual ~UObject();
+
+    /**
+     * ICU4C "poor man's RTTI", returns a UClassID for the actual ICU class.
+     * The base class implementation returns a dummy value.
+     *
+     * Use compiler RTTI rather than ICU's "poor man's RTTI".
+     * Since ICU 4.6, new ICU C++ class hierarchies do not implement "poor man's RTTI".
+     *
+     * @stable ICU 2.2
+     */
+    virtual UClassID getDynamicClassID() const;
+
+protected:
+    // the following functions are protected to prevent instantiation and
+    // direct use of UObject itself
+
+    // default constructor
+    // inline UObject() {}
+
+    // copy constructor
+    // inline UObject(const UObject &other) {}
+
+#if 0
+    // TODO Sometime in the future. Implement operator==().
+    // (This comment inserted in 2.2)
+    // some or all of the following "boilerplate" functions may be made public
+    // in a future ICU4C release when all subclasses implement them
+
+    // assignment operator
+    // (not virtual, see "Taligent's Guide to Designing Programs" pp.73..74)
+    // commented out because the implementation is the same as a compiler's default
+    // UObject &operator=(const UObject &other) { return *this; }
+
+    // comparison operators
+    virtual inline UBool operator==(const UObject &other) const { return this==&other; }
+    inline UBool operator!=(const UObject &other) const { return !operator==(other); }
+
+    // clone() commented out from the base class:
+    // some compilers do not support co-variant return types
+    // (i.e., subclasses would have to return UObject * as well, instead of SubClass *)
+    // see also UObject class documentation.
+    // virtual UObject *clone() const;
+#endif
+
+    /*
+     * Assignment operator not declared. The compiler will provide one
+     * which does nothing since this class does not contain any data members.
+     * API/code coverage may show the assignment operator as present and
+     * untested - ignore.
+     * Subclasses need this assignment operator if they use compiler-provided
+     * assignment operators of their own. An alternative to not declaring one
+     * here would be to declare and empty-implement a protected or public one.
+    UObject &UObject::operator=(const UObject &);
+     */
+};
+
+#ifndef U_HIDE_INTERNAL_API
+/**
+ * This is a simple macro to add ICU RTTI to an ICU object implementation.
+ * This does not go into the header. This should only be used in *.cpp files.
+ *
+ * @param myClass The name of the class that needs RTTI defined.
+ * @internal
+ */
+#define UOBJECT_DEFINE_RTTI_IMPLEMENTATION(myClass) \
+    UClassID U_EXPORT2 myClass::getStaticClassID() { \
+        static char classID = 0; \
+        return (UClassID)&classID; \
+    } \
+    UClassID myClass::getDynamicClassID() const \
+    { return myClass::getStaticClassID(); }
+
+
+/**
+ * This macro adds ICU RTTI to an ICU abstract class implementation.
+ * This macro should be invoked in *.cpp files.  The corresponding
+ * header should declare getStaticClassID.
+ *
+ * @param myClass The name of the class that needs RTTI defined.
+ * @internal
+ */
+#define UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(myClass) \
+    UClassID U_EXPORT2 myClass::getStaticClassID() { \
+        static char classID = 0; \
+        return (UClassID)&classID; \
+    }
+
+#endif  /* U_HIDE_INTERNAL_API */
+
+U_NAMESPACE_END
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/urename.h b/src/core/basetypes/icu-ucsdet/include/unicode/urename.h
new file mode 100644
index 00000000..a8172626
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/urename.h
@@ -0,0 +1,1784 @@
+/*
+*******************************************************************************
+*   Copyright (C) 2002-2014, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*
+*   file name:  urename.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   Created by: Perl script tools/genren.pl written by Vladimir Weinstein
+*
+*  Contains data for renaming ICU exports.
+*  Gets included by umachine.h
+*
+*  THIS FILE IS MACHINE-GENERATED, DON'T PLAY WITH IT IF YOU DON'T KNOW WHAT
+*  YOU ARE DOING, OTHERWISE VERY BAD THINGS WILL HAPPEN!
+*/
+
+#ifndef URENAME_H
+#define URENAME_H
+
+/* U_DISABLE_RENAMING can be defined in the following ways:
+ *   - when running configure, e.g.
+ *        runConfigureICU Linux --disable-renaming
+ *   - by changing the default setting of U_DISABLE_RENAMING in uconfig.h
+ */
+
+#include "unicode/uconfig.h"
+
+#if !U_DISABLE_RENAMING
+
+/* We need the U_ICU_ENTRY_POINT_RENAME definition. There's a default one in unicode/uvernum.h we can use, but we will give
+   the platform a chance to define it first.
+   Normally (if utypes.h or umachine.h was included first) this will not be necessary as it will already be defined.
+ */
+
+#ifndef U_ICU_ENTRY_POINT_RENAME
+#include "unicode/umachine.h"
+#endif
+
+/* If we still don't have U_ICU_ENTRY_POINT_RENAME use the default. */
+#ifndef U_ICU_ENTRY_POINT_RENAME
+#include "unicode/uvernum.h"
+#endif
+
+/* Error out before the following defines cause very strange and unexpected code breakage */
+#ifndef U_ICU_ENTRY_POINT_RENAME
+#error U_ICU_ENTRY_POINT_RENAME is not defined - cannot continue. Consider defining U_DISABLE_RENAMING if renaming should not be used.
+#endif
+
+
+/* C exports renaming data */
+
+#define T_CString_int64ToString U_ICU_ENTRY_POINT_RENAME(T_CString_int64ToString)
+#define T_CString_integerToString U_ICU_ENTRY_POINT_RENAME(T_CString_integerToString)
+#define T_CString_stringToInteger U_ICU_ENTRY_POINT_RENAME(T_CString_stringToInteger)
+#define T_CString_toLowerCase U_ICU_ENTRY_POINT_RENAME(T_CString_toLowerCase)
+#define T_CString_toUpperCase U_ICU_ENTRY_POINT_RENAME(T_CString_toUpperCase)
+#define UCNV_FROM_U_CALLBACK_ESCAPE U_ICU_ENTRY_POINT_RENAME(UCNV_FROM_U_CALLBACK_ESCAPE)
+#define UCNV_FROM_U_CALLBACK_SKIP U_ICU_ENTRY_POINT_RENAME(UCNV_FROM_U_CALLBACK_SKIP)
+#define UCNV_FROM_U_CALLBACK_STOP U_ICU_ENTRY_POINT_RENAME(UCNV_FROM_U_CALLBACK_STOP)
+#define UCNV_FROM_U_CALLBACK_SUBSTITUTE U_ICU_ENTRY_POINT_RENAME(UCNV_FROM_U_CALLBACK_SUBSTITUTE)
+#define UCNV_TO_U_CALLBACK_ESCAPE U_ICU_ENTRY_POINT_RENAME(UCNV_TO_U_CALLBACK_ESCAPE)
+#define UCNV_TO_U_CALLBACK_SKIP U_ICU_ENTRY_POINT_RENAME(UCNV_TO_U_CALLBACK_SKIP)
+#define UCNV_TO_U_CALLBACK_STOP U_ICU_ENTRY_POINT_RENAME(UCNV_TO_U_CALLBACK_STOP)
+#define UCNV_TO_U_CALLBACK_SUBSTITUTE U_ICU_ENTRY_POINT_RENAME(UCNV_TO_U_CALLBACK_SUBSTITUTE)
+#define UDataMemory_createNewInstance U_ICU_ENTRY_POINT_RENAME(UDataMemory_createNewInstance)
+#define UDataMemory_init U_ICU_ENTRY_POINT_RENAME(UDataMemory_init)
+#define UDataMemory_isLoaded U_ICU_ENTRY_POINT_RENAME(UDataMemory_isLoaded)
+#define UDataMemory_normalizeDataPointer U_ICU_ENTRY_POINT_RENAME(UDataMemory_normalizeDataPointer)
+#define UDataMemory_setData U_ICU_ENTRY_POINT_RENAME(UDataMemory_setData)
+#define UDatamemory_assign U_ICU_ENTRY_POINT_RENAME(UDatamemory_assign)
+#define _ASCIIData U_ICU_ENTRY_POINT_RENAME(_ASCIIData)
+#define _Bocu1Data U_ICU_ENTRY_POINT_RENAME(_Bocu1Data)
+#define _CESU8Data U_ICU_ENTRY_POINT_RENAME(_CESU8Data)
+#define _CompoundTextData U_ICU_ENTRY_POINT_RENAME(_CompoundTextData)
+#define _HZData U_ICU_ENTRY_POINT_RENAME(_HZData)
+#define _IMAPData U_ICU_ENTRY_POINT_RENAME(_IMAPData)
+#define _ISCIIData U_ICU_ENTRY_POINT_RENAME(_ISCIIData)
+#define _ISO2022Data U_ICU_ENTRY_POINT_RENAME(_ISO2022Data)
+#define _LMBCSData1 U_ICU_ENTRY_POINT_RENAME(_LMBCSData1)
+#define _LMBCSData11 U_ICU_ENTRY_POINT_RENAME(_LMBCSData11)
+#define _LMBCSData16 U_ICU_ENTRY_POINT_RENAME(_LMBCSData16)
+#define _LMBCSData17 U_ICU_ENTRY_POINT_RENAME(_LMBCSData17)
+#define _LMBCSData18 U_ICU_ENTRY_POINT_RENAME(_LMBCSData18)
+#define _LMBCSData19 U_ICU_ENTRY_POINT_RENAME(_LMBCSData19)
+#define _LMBCSData2 U_ICU_ENTRY_POINT_RENAME(_LMBCSData2)
+#define _LMBCSData3 U_ICU_ENTRY_POINT_RENAME(_LMBCSData3)
+#define _LMBCSData4 U_ICU_ENTRY_POINT_RENAME(_LMBCSData4)
+#define _LMBCSData5 U_ICU_ENTRY_POINT_RENAME(_LMBCSData5)
+#define _LMBCSData6 U_ICU_ENTRY_POINT_RENAME(_LMBCSData6)
+#define _LMBCSData8 U_ICU_ENTRY_POINT_RENAME(_LMBCSData8)
+#define _Latin1Data U_ICU_ENTRY_POINT_RENAME(_Latin1Data)
+#define _MBCSData U_ICU_ENTRY_POINT_RENAME(_MBCSData)
+#define _SCSUData U_ICU_ENTRY_POINT_RENAME(_SCSUData)
+#define _UTF16BEData U_ICU_ENTRY_POINT_RENAME(_UTF16BEData)
+#define _UTF16Data U_ICU_ENTRY_POINT_RENAME(_UTF16Data)
+#define _UTF16LEData U_ICU_ENTRY_POINT_RENAME(_UTF16LEData)
+#define _UTF32BEData U_ICU_ENTRY_POINT_RENAME(_UTF32BEData)
+#define _UTF32Data U_ICU_ENTRY_POINT_RENAME(_UTF32Data)
+#define _UTF32LEData U_ICU_ENTRY_POINT_RENAME(_UTF32LEData)
+#define _UTF7Data U_ICU_ENTRY_POINT_RENAME(_UTF7Data)
+#define _UTF8Data U_ICU_ENTRY_POINT_RENAME(_UTF8Data)
+#define cmemory_cleanup U_ICU_ENTRY_POINT_RENAME(cmemory_cleanup)
+#define cmemory_inUse U_ICU_ENTRY_POINT_RENAME(cmemory_inUse)
+#define gTimeZoneFilesInitOnce U_ICU_ENTRY_POINT_RENAME(gTimeZoneFilesInitOnce)
+#define izrule_clone U_ICU_ENTRY_POINT_RENAME(izrule_clone)
+#define izrule_close U_ICU_ENTRY_POINT_RENAME(izrule_close)
+#define izrule_equals U_ICU_ENTRY_POINT_RENAME(izrule_equals)
+#define izrule_getDSTSavings U_ICU_ENTRY_POINT_RENAME(izrule_getDSTSavings)
+#define izrule_getDynamicClassID U_ICU_ENTRY_POINT_RENAME(izrule_getDynamicClassID)
+#define izrule_getFinalStart U_ICU_ENTRY_POINT_RENAME(izrule_getFinalStart)
+#define izrule_getFirstStart U_ICU_ENTRY_POINT_RENAME(izrule_getFirstStart)
+#define izrule_getName U_ICU_ENTRY_POINT_RENAME(izrule_getName)
+#define izrule_getNextStart U_ICU_ENTRY_POINT_RENAME(izrule_getNextStart)
+#define izrule_getPreviousStart U_ICU_ENTRY_POINT_RENAME(izrule_getPreviousStart)
+#define izrule_getRawOffset U_ICU_ENTRY_POINT_RENAME(izrule_getRawOffset)
+#define izrule_getStaticClassID U_ICU_ENTRY_POINT_RENAME(izrule_getStaticClassID)
+#define izrule_isEquivalentTo U_ICU_ENTRY_POINT_RENAME(izrule_isEquivalentTo)
+#define izrule_open U_ICU_ENTRY_POINT_RENAME(izrule_open)
+#define le_close U_ICU_ENTRY_POINT_RENAME(le_close)
+#define le_create U_ICU_ENTRY_POINT_RENAME(le_create)
+#define le_getCharIndices U_ICU_ENTRY_POINT_RENAME(le_getCharIndices)
+#define le_getCharIndicesWithBase U_ICU_ENTRY_POINT_RENAME(le_getCharIndicesWithBase)
+#define le_getGlyphCount U_ICU_ENTRY_POINT_RENAME(le_getGlyphCount)
+#define le_getGlyphPosition U_ICU_ENTRY_POINT_RENAME(le_getGlyphPosition)
+#define le_getGlyphPositions U_ICU_ENTRY_POINT_RENAME(le_getGlyphPositions)
+#define le_getGlyphs U_ICU_ENTRY_POINT_RENAME(le_getGlyphs)
+#define le_layoutChars U_ICU_ENTRY_POINT_RENAME(le_layoutChars)
+#define le_reset U_ICU_ENTRY_POINT_RENAME(le_reset)
+#define locale_getKeywords U_ICU_ENTRY_POINT_RENAME(locale_getKeywords)
+#define locale_getKeywordsStart U_ICU_ENTRY_POINT_RENAME(locale_getKeywordsStart)
+#define locale_get_default U_ICU_ENTRY_POINT_RENAME(locale_get_default)
+#define locale_set_default U_ICU_ENTRY_POINT_RENAME(locale_set_default)
+#define pl_addFontRun U_ICU_ENTRY_POINT_RENAME(pl_addFontRun)
+#define pl_addLocaleRun U_ICU_ENTRY_POINT_RENAME(pl_addLocaleRun)
+#define pl_addValueRun U_ICU_ENTRY_POINT_RENAME(pl_addValueRun)
+#define pl_close U_ICU_ENTRY_POINT_RENAME(pl_close)
+#define pl_closeFontRuns U_ICU_ENTRY_POINT_RENAME(pl_closeFontRuns)
+#define pl_closeLine U_ICU_ENTRY_POINT_RENAME(pl_closeLine)
+#define pl_closeLocaleRuns U_ICU_ENTRY_POINT_RENAME(pl_closeLocaleRuns)
+#define pl_closeValueRuns U_ICU_ENTRY_POINT_RENAME(pl_closeValueRuns)
+#define pl_countLineRuns U_ICU_ENTRY_POINT_RENAME(pl_countLineRuns)
+#define pl_create U_ICU_ENTRY_POINT_RENAME(pl_create)
+#define pl_getAscent U_ICU_ENTRY_POINT_RENAME(pl_getAscent)
+#define pl_getDescent U_ICU_ENTRY_POINT_RENAME(pl_getDescent)
+#define pl_getFontRunCount U_ICU_ENTRY_POINT_RENAME(pl_getFontRunCount)
+#define pl_getFontRunFont U_ICU_ENTRY_POINT_RENAME(pl_getFontRunFont)
+#define pl_getFontRunLastLimit U_ICU_ENTRY_POINT_RENAME(pl_getFontRunLastLimit)
+#define pl_getFontRunLimit U_ICU_ENTRY_POINT_RENAME(pl_getFontRunLimit)
+#define pl_getLeading U_ICU_ENTRY_POINT_RENAME(pl_getLeading)
+#define pl_getLineAscent U_ICU_ENTRY_POINT_RENAME(pl_getLineAscent)
+#define pl_getLineDescent U_ICU_ENTRY_POINT_RENAME(pl_getLineDescent)
+#define pl_getLineLeading U_ICU_ENTRY_POINT_RENAME(pl_getLineLeading)
+#define pl_getLineVisualRun U_ICU_ENTRY_POINT_RENAME(pl_getLineVisualRun)
+#define pl_getLineWidth U_ICU_ENTRY_POINT_RENAME(pl_getLineWidth)
+#define pl_getLocaleRunCount U_ICU_ENTRY_POINT_RENAME(pl_getLocaleRunCount)
+#define pl_getLocaleRunLastLimit U_ICU_ENTRY_POINT_RENAME(pl_getLocaleRunLastLimit)
+#define pl_getLocaleRunLimit U_ICU_ENTRY_POINT_RENAME(pl_getLocaleRunLimit)
+#define pl_getLocaleRunLocale U_ICU_ENTRY_POINT_RENAME(pl_getLocaleRunLocale)
+#define pl_getParagraphLevel U_ICU_ENTRY_POINT_RENAME(pl_getParagraphLevel)
+#define pl_getTextDirection U_ICU_ENTRY_POINT_RENAME(pl_getTextDirection)
+#define pl_getValueRunCount U_ICU_ENTRY_POINT_RENAME(pl_getValueRunCount)
+#define pl_getValueRunLastLimit U_ICU_ENTRY_POINT_RENAME(pl_getValueRunLastLimit)
+#define pl_getValueRunLimit U_ICU_ENTRY_POINT_RENAME(pl_getValueRunLimit)
+#define pl_getValueRunValue U_ICU_ENTRY_POINT_RENAME(pl_getValueRunValue)
+#define pl_getVisualRunAscent U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunAscent)
+#define pl_getVisualRunDescent U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunDescent)
+#define pl_getVisualRunDirection U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunDirection)
+#define pl_getVisualRunFont U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunFont)
+#define pl_getVisualRunGlyphCount U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunGlyphCount)
+#define pl_getVisualRunGlyphToCharMap U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunGlyphToCharMap)
+#define pl_getVisualRunGlyphs U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunGlyphs)
+#define pl_getVisualRunLeading U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunLeading)
+#define pl_getVisualRunPositions U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunPositions)
+#define pl_isComplex U_ICU_ENTRY_POINT_RENAME(pl_isComplex)
+#define pl_nextLine U_ICU_ENTRY_POINT_RENAME(pl_nextLine)
+#define pl_openEmptyFontRuns U_ICU_ENTRY_POINT_RENAME(pl_openEmptyFontRuns)
+#define pl_openEmptyLocaleRuns U_ICU_ENTRY_POINT_RENAME(pl_openEmptyLocaleRuns)
+#define pl_openEmptyValueRuns U_ICU_ENTRY_POINT_RENAME(pl_openEmptyValueRuns)
+#define pl_openFontRuns U_ICU_ENTRY_POINT_RENAME(pl_openFontRuns)
+#define pl_openLocaleRuns U_ICU_ENTRY_POINT_RENAME(pl_openLocaleRuns)
+#define pl_openValueRuns U_ICU_ENTRY_POINT_RENAME(pl_openValueRuns)
+#define pl_reflow U_ICU_ENTRY_POINT_RENAME(pl_reflow)
+#define pl_resetFontRuns U_ICU_ENTRY_POINT_RENAME(pl_resetFontRuns)
+#define pl_resetLocaleRuns U_ICU_ENTRY_POINT_RENAME(pl_resetLocaleRuns)
+#define pl_resetValueRuns U_ICU_ENTRY_POINT_RENAME(pl_resetValueRuns)
+#define res_countArrayItems U_ICU_ENTRY_POINT_RENAME(res_countArrayItems)
+#define res_findResource U_ICU_ENTRY_POINT_RENAME(res_findResource)
+#define res_getAlias U_ICU_ENTRY_POINT_RENAME(res_getAlias)
+#define res_getArrayItem U_ICU_ENTRY_POINT_RENAME(res_getArrayItem)
+#define res_getBinary U_ICU_ENTRY_POINT_RENAME(res_getBinary)
+#define res_getIntVector U_ICU_ENTRY_POINT_RENAME(res_getIntVector)
+#define res_getPublicType U_ICU_ENTRY_POINT_RENAME(res_getPublicType)
+#define res_getResource U_ICU_ENTRY_POINT_RENAME(res_getResource)
+#define res_getString U_ICU_ENTRY_POINT_RENAME(res_getString)
+#define res_getTableItemByIndex U_ICU_ENTRY_POINT_RENAME(res_getTableItemByIndex)
+#define res_getTableItemByKey U_ICU_ENTRY_POINT_RENAME(res_getTableItemByKey)
+#define res_load U_ICU_ENTRY_POINT_RENAME(res_load)
+#define res_read U_ICU_ENTRY_POINT_RENAME(res_read)
+#define res_unload U_ICU_ENTRY_POINT_RENAME(res_unload)
+#define u_UCharsToChars U_ICU_ENTRY_POINT_RENAME(u_UCharsToChars)
+#define u_austrcpy U_ICU_ENTRY_POINT_RENAME(u_austrcpy)
+#define u_austrncpy U_ICU_ENTRY_POINT_RENAME(u_austrncpy)
+#define u_catclose U_ICU_ENTRY_POINT_RENAME(u_catclose)
+#define u_catgets U_ICU_ENTRY_POINT_RENAME(u_catgets)
+#define u_catopen U_ICU_ENTRY_POINT_RENAME(u_catopen)
+#define u_charAge U_ICU_ENTRY_POINT_RENAME(u_charAge)
+#define u_charDigitValue U_ICU_ENTRY_POINT_RENAME(u_charDigitValue)
+#define u_charDirection U_ICU_ENTRY_POINT_RENAME(u_charDirection)
+#define u_charFromName U_ICU_ENTRY_POINT_RENAME(u_charFromName)
+#define u_charMirror U_ICU_ENTRY_POINT_RENAME(u_charMirror)
+#define u_charName U_ICU_ENTRY_POINT_RENAME(u_charName)
+#define u_charType U_ICU_ENTRY_POINT_RENAME(u_charType)
+#define u_charsToUChars U_ICU_ENTRY_POINT_RENAME(u_charsToUChars)
+#define u_cleanup U_ICU_ENTRY_POINT_RENAME(u_cleanup)
+#define u_countChar32 U_ICU_ENTRY_POINT_RENAME(u_countChar32)
+#define u_digit U_ICU_ENTRY_POINT_RENAME(u_digit)
+#define u_enumCharNames U_ICU_ENTRY_POINT_RENAME(u_enumCharNames)
+#define u_enumCharTypes U_ICU_ENTRY_POINT_RENAME(u_enumCharTypes)
+#define u_errorName U_ICU_ENTRY_POINT_RENAME(u_errorName)
+#define u_fadopt U_ICU_ENTRY_POINT_RENAME(u_fadopt)
+#define u_fclose U_ICU_ENTRY_POINT_RENAME(u_fclose)
+#define u_feof U_ICU_ENTRY_POINT_RENAME(u_feof)
+#define u_fflush U_ICU_ENTRY_POINT_RENAME(u_fflush)
+#define u_fgetConverter U_ICU_ENTRY_POINT_RENAME(u_fgetConverter)
+#define u_fgetNumberFormat U_ICU_ENTRY_POINT_RENAME(u_fgetNumberFormat)
+#define u_fgetc U_ICU_ENTRY_POINT_RENAME(u_fgetc)
+#define u_fgetcodepage U_ICU_ENTRY_POINT_RENAME(u_fgetcodepage)
+#define u_fgetcx U_ICU_ENTRY_POINT_RENAME(u_fgetcx)
+#define u_fgetfile U_ICU_ENTRY_POINT_RENAME(u_fgetfile)
+#define u_fgetlocale U_ICU_ENTRY_POINT_RENAME(u_fgetlocale)
+#define u_fgets U_ICU_ENTRY_POINT_RENAME(u_fgets)
+#define u_file_read U_ICU_ENTRY_POINT_RENAME(u_file_read)
+#define u_file_write U_ICU_ENTRY_POINT_RENAME(u_file_write)
+#define u_file_write_flush U_ICU_ENTRY_POINT_RENAME(u_file_write_flush)
+#define u_finit U_ICU_ENTRY_POINT_RENAME(u_finit)
+#define u_flushDefaultConverter U_ICU_ENTRY_POINT_RENAME(u_flushDefaultConverter)
+#define u_foldCase U_ICU_ENTRY_POINT_RENAME(u_foldCase)
+#define u_fopen U_ICU_ENTRY_POINT_RENAME(u_fopen)
+#define u_fopen_u U_ICU_ENTRY_POINT_RENAME(u_fopen_u)
+#define u_forDigit U_ICU_ENTRY_POINT_RENAME(u_forDigit)
+#define u_formatMessage U_ICU_ENTRY_POINT_RENAME(u_formatMessage)
+#define u_formatMessageWithError U_ICU_ENTRY_POINT_RENAME(u_formatMessageWithError)
+#define u_fprintf U_ICU_ENTRY_POINT_RENAME(u_fprintf)
+#define u_fprintf_u U_ICU_ENTRY_POINT_RENAME(u_fprintf_u)
+#define u_fputc U_ICU_ENTRY_POINT_RENAME(u_fputc)
+#define u_fputs U_ICU_ENTRY_POINT_RENAME(u_fputs)
+#define u_frewind U_ICU_ENTRY_POINT_RENAME(u_frewind)
+#define u_fscanf U_ICU_ENTRY_POINT_RENAME(u_fscanf)
+#define u_fscanf_u U_ICU_ENTRY_POINT_RENAME(u_fscanf_u)
+#define u_fsetcodepage U_ICU_ENTRY_POINT_RENAME(u_fsetcodepage)
+#define u_fsetlocale U_ICU_ENTRY_POINT_RENAME(u_fsetlocale)
+#define u_fsettransliterator U_ICU_ENTRY_POINT_RENAME(u_fsettransliterator)
+#define u_fstropen U_ICU_ENTRY_POINT_RENAME(u_fstropen)
+#define u_fungetc U_ICU_ENTRY_POINT_RENAME(u_fungetc)
+#define u_getBidiPairedBracket U_ICU_ENTRY_POINT_RENAME(u_getBidiPairedBracket)
+#define u_getCombiningClass U_ICU_ENTRY_POINT_RENAME(u_getCombiningClass)
+#define u_getDataDirectory U_ICU_ENTRY_POINT_RENAME(u_getDataDirectory)
+#define u_getDataVersion U_ICU_ENTRY_POINT_RENAME(u_getDataVersion)
+#define u_getDefaultConverter U_ICU_ENTRY_POINT_RENAME(u_getDefaultConverter)
+#define u_getFC_NFKC_Closure U_ICU_ENTRY_POINT_RENAME(u_getFC_NFKC_Closure)
+#define u_getISOComment U_ICU_ENTRY_POINT_RENAME(u_getISOComment)
+#define u_getIntPropertyMaxValue U_ICU_ENTRY_POINT_RENAME(u_getIntPropertyMaxValue)
+#define u_getIntPropertyMinValue U_ICU_ENTRY_POINT_RENAME(u_getIntPropertyMinValue)
+#define u_getIntPropertyValue U_ICU_ENTRY_POINT_RENAME(u_getIntPropertyValue)
+#define u_getMainProperties U_ICU_ENTRY_POINT_RENAME(u_getMainProperties)
+#define u_getNumericValue U_ICU_ENTRY_POINT_RENAME(u_getNumericValue)
+#define u_getPropertyEnum U_ICU_ENTRY_POINT_RENAME(u_getPropertyEnum)
+#define u_getPropertyName U_ICU_ENTRY_POINT_RENAME(u_getPropertyName)
+#define u_getPropertyValueEnum U_ICU_ENTRY_POINT_RENAME(u_getPropertyValueEnum)
+#define u_getPropertyValueName U_ICU_ENTRY_POINT_RENAME(u_getPropertyValueName)
+#define u_getTimeZoneFilesDirectory U_ICU_ENTRY_POINT_RENAME(u_getTimeZoneFilesDirectory)
+#define u_getUnicodeProperties U_ICU_ENTRY_POINT_RENAME(u_getUnicodeProperties)
+#define u_getUnicodeVersion U_ICU_ENTRY_POINT_RENAME(u_getUnicodeVersion)
+#define u_getVersion U_ICU_ENTRY_POINT_RENAME(u_getVersion)
+#define u_get_stdout U_ICU_ENTRY_POINT_RENAME(u_get_stdout)
+#define u_hasBinaryProperty U_ICU_ENTRY_POINT_RENAME(u_hasBinaryProperty)
+#define u_init U_ICU_ENTRY_POINT_RENAME(u_init)
+#define u_isIDIgnorable U_ICU_ENTRY_POINT_RENAME(u_isIDIgnorable)
+#define u_isIDPart U_ICU_ENTRY_POINT_RENAME(u_isIDPart)
+#define u_isIDStart U_ICU_ENTRY_POINT_RENAME(u_isIDStart)
+#define u_isISOControl U_ICU_ENTRY_POINT_RENAME(u_isISOControl)
+#define u_isJavaIDPart U_ICU_ENTRY_POINT_RENAME(u_isJavaIDPart)
+#define u_isJavaIDStart U_ICU_ENTRY_POINT_RENAME(u_isJavaIDStart)
+#define u_isJavaSpaceChar U_ICU_ENTRY_POINT_RENAME(u_isJavaSpaceChar)
+#define u_isMirrored U_ICU_ENTRY_POINT_RENAME(u_isMirrored)
+#define u_isUAlphabetic U_ICU_ENTRY_POINT_RENAME(u_isUAlphabetic)
+#define u_isULowercase U_ICU_ENTRY_POINT_RENAME(u_isULowercase)
+#define u_isUUppercase U_ICU_ENTRY_POINT_RENAME(u_isUUppercase)
+#define u_isUWhiteSpace U_ICU_ENTRY_POINT_RENAME(u_isUWhiteSpace)
+#define u_isWhitespace U_ICU_ENTRY_POINT_RENAME(u_isWhitespace)
+#define u_isalnum U_ICU_ENTRY_POINT_RENAME(u_isalnum)
+#define u_isalnumPOSIX U_ICU_ENTRY_POINT_RENAME(u_isalnumPOSIX)
+#define u_isalpha U_ICU_ENTRY_POINT_RENAME(u_isalpha)
+#define u_isbase U_ICU_ENTRY_POINT_RENAME(u_isbase)
+#define u_isblank U_ICU_ENTRY_POINT_RENAME(u_isblank)
+#define u_iscntrl U_ICU_ENTRY_POINT_RENAME(u_iscntrl)
+#define u_isdefined U_ICU_ENTRY_POINT_RENAME(u_isdefined)
+#define u_isdigit U_ICU_ENTRY_POINT_RENAME(u_isdigit)
+#define u_isgraph U_ICU_ENTRY_POINT_RENAME(u_isgraph)
+#define u_isgraphPOSIX U_ICU_ENTRY_POINT_RENAME(u_isgraphPOSIX)
+#define u_islower U_ICU_ENTRY_POINT_RENAME(u_islower)
+#define u_isprint U_ICU_ENTRY_POINT_RENAME(u_isprint)
+#define u_isprintPOSIX U_ICU_ENTRY_POINT_RENAME(u_isprintPOSIX)
+#define u_ispunct U_ICU_ENTRY_POINT_RENAME(u_ispunct)
+#define u_isspace U_ICU_ENTRY_POINT_RENAME(u_isspace)
+#define u_istitle U_ICU_ENTRY_POINT_RENAME(u_istitle)
+#define u_isupper U_ICU_ENTRY_POINT_RENAME(u_isupper)
+#define u_isxdigit U_ICU_ENTRY_POINT_RENAME(u_isxdigit)
+#define u_locbund_close U_ICU_ENTRY_POINT_RENAME(u_locbund_close)
+#define u_locbund_getNumberFormat U_ICU_ENTRY_POINT_RENAME(u_locbund_getNumberFormat)
+#define u_locbund_init U_ICU_ENTRY_POINT_RENAME(u_locbund_init)
+#define u_memcasecmp U_ICU_ENTRY_POINT_RENAME(u_memcasecmp)
+#define u_memchr U_ICU_ENTRY_POINT_RENAME(u_memchr)
+#define u_memchr32 U_ICU_ENTRY_POINT_RENAME(u_memchr32)
+#define u_memcmp U_ICU_ENTRY_POINT_RENAME(u_memcmp)
+#define u_memcmpCodePointOrder U_ICU_ENTRY_POINT_RENAME(u_memcmpCodePointOrder)
+#define u_memcpy U_ICU_ENTRY_POINT_RENAME(u_memcpy)
+#define u_memmove U_ICU_ENTRY_POINT_RENAME(u_memmove)
+#define u_memrchr U_ICU_ENTRY_POINT_RENAME(u_memrchr)
+#define u_memrchr32 U_ICU_ENTRY_POINT_RENAME(u_memrchr32)
+#define u_memset U_ICU_ENTRY_POINT_RENAME(u_memset)
+#define u_parseMessage U_ICU_ENTRY_POINT_RENAME(u_parseMessage)
+#define u_parseMessageWithError U_ICU_ENTRY_POINT_RENAME(u_parseMessageWithError)
+#define u_printf U_ICU_ENTRY_POINT_RENAME(u_printf)
+#define u_printf_parse U_ICU_ENTRY_POINT_RENAME(u_printf_parse)
+#define u_printf_u U_ICU_ENTRY_POINT_RENAME(u_printf_u)
+#define u_releaseDefaultConverter U_ICU_ENTRY_POINT_RENAME(u_releaseDefaultConverter)
+#define u_scanf_parse U_ICU_ENTRY_POINT_RENAME(u_scanf_parse)
+#define u_setAtomicIncDecFunctions U_ICU_ENTRY_POINT_RENAME(u_setAtomicIncDecFunctions)
+#define u_setDataDirectory U_ICU_ENTRY_POINT_RENAME(u_setDataDirectory)
+#define u_setMemoryFunctions U_ICU_ENTRY_POINT_RENAME(u_setMemoryFunctions)
+#define u_setMutexFunctions U_ICU_ENTRY_POINT_RENAME(u_setMutexFunctions)
+#define u_setTimeZoneFilesDirectory U_ICU_ENTRY_POINT_RENAME(u_setTimeZoneFilesDirectory)
+#define u_shapeArabic U_ICU_ENTRY_POINT_RENAME(u_shapeArabic)
+#define u_snprintf U_ICU_ENTRY_POINT_RENAME(u_snprintf)
+#define u_snprintf_u U_ICU_ENTRY_POINT_RENAME(u_snprintf_u)
+#define u_sprintf U_ICU_ENTRY_POINT_RENAME(u_sprintf)
+#define u_sprintf_u U_ICU_ENTRY_POINT_RENAME(u_sprintf_u)
+#define u_sscanf U_ICU_ENTRY_POINT_RENAME(u_sscanf)
+#define u_sscanf_u U_ICU_ENTRY_POINT_RENAME(u_sscanf_u)
+#define u_strCaseCompare U_ICU_ENTRY_POINT_RENAME(u_strCaseCompare)
+#define u_strCompare U_ICU_ENTRY_POINT_RENAME(u_strCompare)
+#define u_strCompareIter U_ICU_ENTRY_POINT_RENAME(u_strCompareIter)
+#define u_strFindFirst U_ICU_ENTRY_POINT_RENAME(u_strFindFirst)
+#define u_strFindLast U_ICU_ENTRY_POINT_RENAME(u_strFindLast)
+#define u_strFoldCase U_ICU_ENTRY_POINT_RENAME(u_strFoldCase)
+#define u_strFromJavaModifiedUTF8WithSub U_ICU_ENTRY_POINT_RENAME(u_strFromJavaModifiedUTF8WithSub)
+#define u_strFromPunycode U_ICU_ENTRY_POINT_RENAME(u_strFromPunycode)
+#define u_strFromUTF32 U_ICU_ENTRY_POINT_RENAME(u_strFromUTF32)
+#define u_strFromUTF32WithSub U_ICU_ENTRY_POINT_RENAME(u_strFromUTF32WithSub)
+#define u_strFromUTF8 U_ICU_ENTRY_POINT_RENAME(u_strFromUTF8)
+#define u_strFromUTF8Lenient U_ICU_ENTRY_POINT_RENAME(u_strFromUTF8Lenient)
+#define u_strFromUTF8WithSub U_ICU_ENTRY_POINT_RENAME(u_strFromUTF8WithSub)
+#define u_strFromWCS U_ICU_ENTRY_POINT_RENAME(u_strFromWCS)
+#define u_strHasMoreChar32Than U_ICU_ENTRY_POINT_RENAME(u_strHasMoreChar32Than)
+#define u_strToJavaModifiedUTF8 U_ICU_ENTRY_POINT_RENAME(u_strToJavaModifiedUTF8)
+#define u_strToLower U_ICU_ENTRY_POINT_RENAME(u_strToLower)
+#define u_strToPunycode U_ICU_ENTRY_POINT_RENAME(u_strToPunycode)
+#define u_strToTitle U_ICU_ENTRY_POINT_RENAME(u_strToTitle)
+#define u_strToUTF32 U_ICU_ENTRY_POINT_RENAME(u_strToUTF32)
+#define u_strToUTF32WithSub U_ICU_ENTRY_POINT_RENAME(u_strToUTF32WithSub)
+#define u_strToUTF8 U_ICU_ENTRY_POINT_RENAME(u_strToUTF8)
+#define u_strToUTF8WithSub U_ICU_ENTRY_POINT_RENAME(u_strToUTF8WithSub)
+#define u_strToUpper U_ICU_ENTRY_POINT_RENAME(u_strToUpper)
+#define u_strToWCS U_ICU_ENTRY_POINT_RENAME(u_strToWCS)
+#define u_strcasecmp U_ICU_ENTRY_POINT_RENAME(u_strcasecmp)
+#define u_strcat U_ICU_ENTRY_POINT_RENAME(u_strcat)
+#define u_strchr U_ICU_ENTRY_POINT_RENAME(u_strchr)
+#define u_strchr32 U_ICU_ENTRY_POINT_RENAME(u_strchr32)
+#define u_strcmp U_ICU_ENTRY_POINT_RENAME(u_strcmp)
+#define u_strcmpCodePointOrder U_ICU_ENTRY_POINT_RENAME(u_strcmpCodePointOrder)
+#define u_strcmpFold U_ICU_ENTRY_POINT_RENAME(u_strcmpFold)
+#define u_strcpy U_ICU_ENTRY_POINT_RENAME(u_strcpy)
+#define u_strcspn U_ICU_ENTRY_POINT_RENAME(u_strcspn)
+#define u_strlen U_ICU_ENTRY_POINT_RENAME(u_strlen)
+#define u_strncasecmp U_ICU_ENTRY_POINT_RENAME(u_strncasecmp)
+#define u_strncat U_ICU_ENTRY_POINT_RENAME(u_strncat)
+#define u_strncmp U_ICU_ENTRY_POINT_RENAME(u_strncmp)
+#define u_strncmpCodePointOrder U_ICU_ENTRY_POINT_RENAME(u_strncmpCodePointOrder)
+#define u_strncpy U_ICU_ENTRY_POINT_RENAME(u_strncpy)
+#define u_strpbrk U_ICU_ENTRY_POINT_RENAME(u_strpbrk)
+#define u_strrchr U_ICU_ENTRY_POINT_RENAME(u_strrchr)
+#define u_strrchr32 U_ICU_ENTRY_POINT_RENAME(u_strrchr32)
+#define u_strrstr U_ICU_ENTRY_POINT_RENAME(u_strrstr)
+#define u_strspn U_ICU_ENTRY_POINT_RENAME(u_strspn)
+#define u_strstr U_ICU_ENTRY_POINT_RENAME(u_strstr)
+#define u_strtok_r U_ICU_ENTRY_POINT_RENAME(u_strtok_r)
+#define u_terminateChars U_ICU_ENTRY_POINT_RENAME(u_terminateChars)
+#define u_terminateUChar32s U_ICU_ENTRY_POINT_RENAME(u_terminateUChar32s)
+#define u_terminateUChars U_ICU_ENTRY_POINT_RENAME(u_terminateUChars)
+#define u_terminateWChars U_ICU_ENTRY_POINT_RENAME(u_terminateWChars)
+#define u_tolower U_ICU_ENTRY_POINT_RENAME(u_tolower)
+#define u_totitle U_ICU_ENTRY_POINT_RENAME(u_totitle)
+#define u_toupper U_ICU_ENTRY_POINT_RENAME(u_toupper)
+#define u_uastrcpy U_ICU_ENTRY_POINT_RENAME(u_uastrcpy)
+#define u_uastrncpy U_ICU_ENTRY_POINT_RENAME(u_uastrncpy)
+#define u_unescape U_ICU_ENTRY_POINT_RENAME(u_unescape)
+#define u_unescapeAt U_ICU_ENTRY_POINT_RENAME(u_unescapeAt)
+#define u_versionFromString U_ICU_ENTRY_POINT_RENAME(u_versionFromString)
+#define u_versionFromUString U_ICU_ENTRY_POINT_RENAME(u_versionFromUString)
+#define u_versionToString U_ICU_ENTRY_POINT_RENAME(u_versionToString)
+#define u_vformatMessage U_ICU_ENTRY_POINT_RENAME(u_vformatMessage)
+#define u_vformatMessageWithError U_ICU_ENTRY_POINT_RENAME(u_vformatMessageWithError)
+#define u_vfprintf U_ICU_ENTRY_POINT_RENAME(u_vfprintf)
+#define u_vfprintf_u U_ICU_ENTRY_POINT_RENAME(u_vfprintf_u)
+#define u_vfscanf U_ICU_ENTRY_POINT_RENAME(u_vfscanf)
+#define u_vfscanf_u U_ICU_ENTRY_POINT_RENAME(u_vfscanf_u)
+#define u_vparseMessage U_ICU_ENTRY_POINT_RENAME(u_vparseMessage)
+#define u_vparseMessageWithError U_ICU_ENTRY_POINT_RENAME(u_vparseMessageWithError)
+#define u_vsnprintf U_ICU_ENTRY_POINT_RENAME(u_vsnprintf)
+#define u_vsnprintf_u U_ICU_ENTRY_POINT_RENAME(u_vsnprintf_u)
+#define u_vsprintf U_ICU_ENTRY_POINT_RENAME(u_vsprintf)
+#define u_vsprintf_u U_ICU_ENTRY_POINT_RENAME(u_vsprintf_u)
+#define u_vsscanf U_ICU_ENTRY_POINT_RENAME(u_vsscanf)
+#define u_vsscanf_u U_ICU_ENTRY_POINT_RENAME(u_vsscanf_u)
+#define u_writeIdenticalLevelRun U_ICU_ENTRY_POINT_RENAME(u_writeIdenticalLevelRun)
+#define ubidi_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(ubidi_addPropertyStarts)
+#define ubidi_close U_ICU_ENTRY_POINT_RENAME(ubidi_close)
+#define ubidi_countParagraphs U_ICU_ENTRY_POINT_RENAME(ubidi_countParagraphs)
+#define ubidi_countRuns U_ICU_ENTRY_POINT_RENAME(ubidi_countRuns)
+#define ubidi_getBaseDirection U_ICU_ENTRY_POINT_RENAME(ubidi_getBaseDirection)
+#define ubidi_getClass U_ICU_ENTRY_POINT_RENAME(ubidi_getClass)
+#define ubidi_getClassCallback U_ICU_ENTRY_POINT_RENAME(ubidi_getClassCallback)
+#define ubidi_getCustomizedClass U_ICU_ENTRY_POINT_RENAME(ubidi_getCustomizedClass)
+#define ubidi_getDirection U_ICU_ENTRY_POINT_RENAME(ubidi_getDirection)
+#define ubidi_getJoiningGroup U_ICU_ENTRY_POINT_RENAME(ubidi_getJoiningGroup)
+#define ubidi_getJoiningType U_ICU_ENTRY_POINT_RENAME(ubidi_getJoiningType)
+#define ubidi_getLength U_ICU_ENTRY_POINT_RENAME(ubidi_getLength)
+#define ubidi_getLevelAt U_ICU_ENTRY_POINT_RENAME(ubidi_getLevelAt)
+#define ubidi_getLevels U_ICU_ENTRY_POINT_RENAME(ubidi_getLevels)
+#define ubidi_getLogicalIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getLogicalIndex)
+#define ubidi_getLogicalMap U_ICU_ENTRY_POINT_RENAME(ubidi_getLogicalMap)
+#define ubidi_getLogicalRun U_ICU_ENTRY_POINT_RENAME(ubidi_getLogicalRun)
+#define ubidi_getMaxValue U_ICU_ENTRY_POINT_RENAME(ubidi_getMaxValue)
+#define ubidi_getMemory U_ICU_ENTRY_POINT_RENAME(ubidi_getMemory)
+#define ubidi_getMirror U_ICU_ENTRY_POINT_RENAME(ubidi_getMirror)
+#define ubidi_getPairedBracket U_ICU_ENTRY_POINT_RENAME(ubidi_getPairedBracket)
+#define ubidi_getPairedBracketType U_ICU_ENTRY_POINT_RENAME(ubidi_getPairedBracketType)
+#define ubidi_getParaLevel U_ICU_ENTRY_POINT_RENAME(ubidi_getParaLevel)
+#define ubidi_getParaLevelAtIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getParaLevelAtIndex)
+#define ubidi_getParagraph U_ICU_ENTRY_POINT_RENAME(ubidi_getParagraph)
+#define ubidi_getParagraphByIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getParagraphByIndex)
+#define ubidi_getProcessedLength U_ICU_ENTRY_POINT_RENAME(ubidi_getProcessedLength)
+#define ubidi_getReorderingMode U_ICU_ENTRY_POINT_RENAME(ubidi_getReorderingMode)
+#define ubidi_getReorderingOptions U_ICU_ENTRY_POINT_RENAME(ubidi_getReorderingOptions)
+#define ubidi_getResultLength U_ICU_ENTRY_POINT_RENAME(ubidi_getResultLength)
+#define ubidi_getRuns U_ICU_ENTRY_POINT_RENAME(ubidi_getRuns)
+#define ubidi_getSingleton U_ICU_ENTRY_POINT_RENAME(ubidi_getSingleton)
+#define ubidi_getText U_ICU_ENTRY_POINT_RENAME(ubidi_getText)
+#define ubidi_getVisualIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getVisualIndex)
+#define ubidi_getVisualMap U_ICU_ENTRY_POINT_RENAME(ubidi_getVisualMap)
+#define ubidi_getVisualRun U_ICU_ENTRY_POINT_RENAME(ubidi_getVisualRun)
+#define ubidi_invertMap U_ICU_ENTRY_POINT_RENAME(ubidi_invertMap)
+#define ubidi_isBidiControl U_ICU_ENTRY_POINT_RENAME(ubidi_isBidiControl)
+#define ubidi_isInverse U_ICU_ENTRY_POINT_RENAME(ubidi_isInverse)
+#define ubidi_isJoinControl U_ICU_ENTRY_POINT_RENAME(ubidi_isJoinControl)
+#define ubidi_isMirrored U_ICU_ENTRY_POINT_RENAME(ubidi_isMirrored)
+#define ubidi_isOrderParagraphsLTR U_ICU_ENTRY_POINT_RENAME(ubidi_isOrderParagraphsLTR)
+#define ubidi_open U_ICU_ENTRY_POINT_RENAME(ubidi_open)
+#define ubidi_openSized U_ICU_ENTRY_POINT_RENAME(ubidi_openSized)
+#define ubidi_orderParagraphsLTR U_ICU_ENTRY_POINT_RENAME(ubidi_orderParagraphsLTR)
+#define ubidi_reorderLogical U_ICU_ENTRY_POINT_RENAME(ubidi_reorderLogical)
+#define ubidi_reorderVisual U_ICU_ENTRY_POINT_RENAME(ubidi_reorderVisual)
+#define ubidi_setClassCallback U_ICU_ENTRY_POINT_RENAME(ubidi_setClassCallback)
+#define ubidi_setContext U_ICU_ENTRY_POINT_RENAME(ubidi_setContext)
+#define ubidi_setInverse U_ICU_ENTRY_POINT_RENAME(ubidi_setInverse)
+#define ubidi_setLine U_ICU_ENTRY_POINT_RENAME(ubidi_setLine)
+#define ubidi_setPara U_ICU_ENTRY_POINT_RENAME(ubidi_setPara)
+#define ubidi_setReorderingMode U_ICU_ENTRY_POINT_RENAME(ubidi_setReorderingMode)
+#define ubidi_setReorderingOptions U_ICU_ENTRY_POINT_RENAME(ubidi_setReorderingOptions)
+#define ubidi_writeReordered U_ICU_ENTRY_POINT_RENAME(ubidi_writeReordered)
+#define ubidi_writeReverse U_ICU_ENTRY_POINT_RENAME(ubidi_writeReverse)
+#define ublock_getCode U_ICU_ENTRY_POINT_RENAME(ublock_getCode)
+#define ubrk_close U_ICU_ENTRY_POINT_RENAME(ubrk_close)
+#define ubrk_countAvailable U_ICU_ENTRY_POINT_RENAME(ubrk_countAvailable)
+#define ubrk_current U_ICU_ENTRY_POINT_RENAME(ubrk_current)
+#define ubrk_first U_ICU_ENTRY_POINT_RENAME(ubrk_first)
+#define ubrk_following U_ICU_ENTRY_POINT_RENAME(ubrk_following)
+#define ubrk_getAvailable U_ICU_ENTRY_POINT_RENAME(ubrk_getAvailable)
+#define ubrk_getLocaleByType U_ICU_ENTRY_POINT_RENAME(ubrk_getLocaleByType)
+#define ubrk_getRuleStatus U_ICU_ENTRY_POINT_RENAME(ubrk_getRuleStatus)
+#define ubrk_getRuleStatusVec U_ICU_ENTRY_POINT_RENAME(ubrk_getRuleStatusVec)
+#define ubrk_isBoundary U_ICU_ENTRY_POINT_RENAME(ubrk_isBoundary)
+#define ubrk_last U_ICU_ENTRY_POINT_RENAME(ubrk_last)
+#define ubrk_next U_ICU_ENTRY_POINT_RENAME(ubrk_next)
+#define ubrk_open U_ICU_ENTRY_POINT_RENAME(ubrk_open)
+#define ubrk_openRules U_ICU_ENTRY_POINT_RENAME(ubrk_openRules)
+#define ubrk_preceding U_ICU_ENTRY_POINT_RENAME(ubrk_preceding)
+#define ubrk_previous U_ICU_ENTRY_POINT_RENAME(ubrk_previous)
+#define ubrk_refreshUText U_ICU_ENTRY_POINT_RENAME(ubrk_refreshUText)
+#define ubrk_safeClone U_ICU_ENTRY_POINT_RENAME(ubrk_safeClone)
+#define ubrk_setText U_ICU_ENTRY_POINT_RENAME(ubrk_setText)
+#define ubrk_setUText U_ICU_ENTRY_POINT_RENAME(ubrk_setUText)
+#define ubrk_swap U_ICU_ENTRY_POINT_RENAME(ubrk_swap)
+#define ucache_compareKeys U_ICU_ENTRY_POINT_RENAME(ucache_compareKeys)
+#define ucache_deleteKey U_ICU_ENTRY_POINT_RENAME(ucache_deleteKey)
+#define ucache_hashKeys U_ICU_ENTRY_POINT_RENAME(ucache_hashKeys)
+#define ucal_add U_ICU_ENTRY_POINT_RENAME(ucal_add)
+#define ucal_clear U_ICU_ENTRY_POINT_RENAME(ucal_clear)
+#define ucal_clearField U_ICU_ENTRY_POINT_RENAME(ucal_clearField)
+#define ucal_clone U_ICU_ENTRY_POINT_RENAME(ucal_clone)
+#define ucal_close U_ICU_ENTRY_POINT_RENAME(ucal_close)
+#define ucal_countAvailable U_ICU_ENTRY_POINT_RENAME(ucal_countAvailable)
+#define ucal_equivalentTo U_ICU_ENTRY_POINT_RENAME(ucal_equivalentTo)
+#define ucal_get U_ICU_ENTRY_POINT_RENAME(ucal_get)
+#define ucal_getAttribute U_ICU_ENTRY_POINT_RENAME(ucal_getAttribute)
+#define ucal_getAvailable U_ICU_ENTRY_POINT_RENAME(ucal_getAvailable)
+#define ucal_getCanonicalTimeZoneID U_ICU_ENTRY_POINT_RENAME(ucal_getCanonicalTimeZoneID)
+#define ucal_getDSTSavings U_ICU_ENTRY_POINT_RENAME(ucal_getDSTSavings)
+#define ucal_getDayOfWeekType U_ICU_ENTRY_POINT_RENAME(ucal_getDayOfWeekType)
+#define ucal_getDefaultTimeZone U_ICU_ENTRY_POINT_RENAME(ucal_getDefaultTimeZone)
+#define ucal_getFieldDifference U_ICU_ENTRY_POINT_RENAME(ucal_getFieldDifference)
+#define ucal_getGregorianChange U_ICU_ENTRY_POINT_RENAME(ucal_getGregorianChange)
+#define ucal_getKeywordValuesForLocale U_ICU_ENTRY_POINT_RENAME(ucal_getKeywordValuesForLocale)
+#define ucal_getLimit U_ICU_ENTRY_POINT_RENAME(ucal_getLimit)
+#define ucal_getLocaleByType U_ICU_ENTRY_POINT_RENAME(ucal_getLocaleByType)
+#define ucal_getMillis U_ICU_ENTRY_POINT_RENAME(ucal_getMillis)
+#define ucal_getNow U_ICU_ENTRY_POINT_RENAME(ucal_getNow)
+#define ucal_getTZDataVersion U_ICU_ENTRY_POINT_RENAME(ucal_getTZDataVersion)
+#define ucal_getTimeZoneDisplayName U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneDisplayName)
+#define ucal_getTimeZoneID U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneID)
+#define ucal_getTimeZoneIDForWindowsID U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneIDForWindowsID)
+#define ucal_getTimeZoneTransitionDate U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneTransitionDate)
+#define ucal_getType U_ICU_ENTRY_POINT_RENAME(ucal_getType)
+#define ucal_getWeekendTransition U_ICU_ENTRY_POINT_RENAME(ucal_getWeekendTransition)
+#define ucal_getWindowsTimeZoneID U_ICU_ENTRY_POINT_RENAME(ucal_getWindowsTimeZoneID)
+#define ucal_inDaylightTime U_ICU_ENTRY_POINT_RENAME(ucal_inDaylightTime)
+#define ucal_isSet U_ICU_ENTRY_POINT_RENAME(ucal_isSet)
+#define ucal_isWeekend U_ICU_ENTRY_POINT_RENAME(ucal_isWeekend)
+#define ucal_open U_ICU_ENTRY_POINT_RENAME(ucal_open)
+#define ucal_openCountryTimeZones U_ICU_ENTRY_POINT_RENAME(ucal_openCountryTimeZones)
+#define ucal_openTimeZoneIDEnumeration U_ICU_ENTRY_POINT_RENAME(ucal_openTimeZoneIDEnumeration)
+#define ucal_openTimeZones U_ICU_ENTRY_POINT_RENAME(ucal_openTimeZones)
+#define ucal_roll U_ICU_ENTRY_POINT_RENAME(ucal_roll)
+#define ucal_set U_ICU_ENTRY_POINT_RENAME(ucal_set)
+#define ucal_setAttribute U_ICU_ENTRY_POINT_RENAME(ucal_setAttribute)
+#define ucal_setDate U_ICU_ENTRY_POINT_RENAME(ucal_setDate)
+#define ucal_setDateTime U_ICU_ENTRY_POINT_RENAME(ucal_setDateTime)
+#define ucal_setDefaultTimeZone U_ICU_ENTRY_POINT_RENAME(ucal_setDefaultTimeZone)
+#define ucal_setGregorianChange U_ICU_ENTRY_POINT_RENAME(ucal_setGregorianChange)
+#define ucal_setMillis U_ICU_ENTRY_POINT_RENAME(ucal_setMillis)
+#define ucal_setTimeZone U_ICU_ENTRY_POINT_RENAME(ucal_setTimeZone)
+#define ucase_addCaseClosure U_ICU_ENTRY_POINT_RENAME(ucase_addCaseClosure)
+#define ucase_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(ucase_addPropertyStarts)
+#define ucase_addStringCaseClosure U_ICU_ENTRY_POINT_RENAME(ucase_addStringCaseClosure)
+#define ucase_fold U_ICU_ENTRY_POINT_RENAME(ucase_fold)
+#define ucase_getCaseLocale U_ICU_ENTRY_POINT_RENAME(ucase_getCaseLocale)
+#define ucase_getSingleton U_ICU_ENTRY_POINT_RENAME(ucase_getSingleton)
+#define ucase_getType U_ICU_ENTRY_POINT_RENAME(ucase_getType)
+#define ucase_getTypeOrIgnorable U_ICU_ENTRY_POINT_RENAME(ucase_getTypeOrIgnorable)
+#define ucase_hasBinaryProperty U_ICU_ENTRY_POINT_RENAME(ucase_hasBinaryProperty)
+#define ucase_isCaseSensitive U_ICU_ENTRY_POINT_RENAME(ucase_isCaseSensitive)
+#define ucase_isSoftDotted U_ICU_ENTRY_POINT_RENAME(ucase_isSoftDotted)
+#define ucase_toFullFolding U_ICU_ENTRY_POINT_RENAME(ucase_toFullFolding)
+#define ucase_toFullLower U_ICU_ENTRY_POINT_RENAME(ucase_toFullLower)
+#define ucase_toFullTitle U_ICU_ENTRY_POINT_RENAME(ucase_toFullTitle)
+#define ucase_toFullUpper U_ICU_ENTRY_POINT_RENAME(ucase_toFullUpper)
+#define ucase_tolower U_ICU_ENTRY_POINT_RENAME(ucase_tolower)
+#define ucase_totitle U_ICU_ENTRY_POINT_RENAME(ucase_totitle)
+#define ucase_toupper U_ICU_ENTRY_POINT_RENAME(ucase_toupper)
+#define ucasemap_close U_ICU_ENTRY_POINT_RENAME(ucasemap_close)
+#define ucasemap_getBreakIterator U_ICU_ENTRY_POINT_RENAME(ucasemap_getBreakIterator)
+#define ucasemap_getLocale U_ICU_ENTRY_POINT_RENAME(ucasemap_getLocale)
+#define ucasemap_getOptions U_ICU_ENTRY_POINT_RENAME(ucasemap_getOptions)
+#define ucasemap_internalUTF8ToTitle U_ICU_ENTRY_POINT_RENAME(ucasemap_internalUTF8ToTitle)
+#define ucasemap_mapUTF8 U_ICU_ENTRY_POINT_RENAME(ucasemap_mapUTF8)
+#define ucasemap_open U_ICU_ENTRY_POINT_RENAME(ucasemap_open)
+#define ucasemap_setBreakIterator U_ICU_ENTRY_POINT_RENAME(ucasemap_setBreakIterator)
+#define ucasemap_setLocale U_ICU_ENTRY_POINT_RENAME(ucasemap_setLocale)
+#define ucasemap_setOptions U_ICU_ENTRY_POINT_RENAME(ucasemap_setOptions)
+#define ucasemap_toTitle U_ICU_ENTRY_POINT_RENAME(ucasemap_toTitle)
+#define ucasemap_utf8FoldCase U_ICU_ENTRY_POINT_RENAME(ucasemap_utf8FoldCase)
+#define ucasemap_utf8ToLower U_ICU_ENTRY_POINT_RENAME(ucasemap_utf8ToLower)
+#define ucasemap_utf8ToTitle U_ICU_ENTRY_POINT_RENAME(ucasemap_utf8ToTitle)
+#define ucasemap_utf8ToUpper U_ICU_ENTRY_POINT_RENAME(ucasemap_utf8ToUpper)
+#define uchar_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(uchar_addPropertyStarts)
+#define uchar_swapNames U_ICU_ENTRY_POINT_RENAME(uchar_swapNames)
+#define ucln_cleanupOne U_ICU_ENTRY_POINT_RENAME(ucln_cleanupOne)
+#define ucln_common_registerCleanup U_ICU_ENTRY_POINT_RENAME(ucln_common_registerCleanup)
+#define ucln_i18n_registerCleanup U_ICU_ENTRY_POINT_RENAME(ucln_i18n_registerCleanup)
+#define ucln_io_registerCleanup U_ICU_ENTRY_POINT_RENAME(ucln_io_registerCleanup)
+#define ucln_lib_cleanup U_ICU_ENTRY_POINT_RENAME(ucln_lib_cleanup)
+#define ucln_registerCleanup U_ICU_ENTRY_POINT_RENAME(ucln_registerCleanup)
+#define ucnv_MBCSFromUChar32 U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSFromUChar32)
+#define ucnv_MBCSFromUnicodeWithOffsets U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSFromUnicodeWithOffsets)
+#define ucnv_MBCSGetFilteredUnicodeSetForUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSGetFilteredUnicodeSetForUnicode)
+#define ucnv_MBCSGetType U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSGetType)
+#define ucnv_MBCSGetUnicodeSetForUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSGetUnicodeSetForUnicode)
+#define ucnv_MBCSIsLeadByte U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSIsLeadByte)
+#define ucnv_MBCSSimpleGetNextUChar U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSSimpleGetNextUChar)
+#define ucnv_MBCSToUnicodeWithOffsets U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSToUnicodeWithOffsets)
+#define ucnv_bld_countAvailableConverters U_ICU_ENTRY_POINT_RENAME(ucnv_bld_countAvailableConverters)
+#define ucnv_bld_getAvailableConverter U_ICU_ENTRY_POINT_RENAME(ucnv_bld_getAvailableConverter)
+#define ucnv_canCreateConverter U_ICU_ENTRY_POINT_RENAME(ucnv_canCreateConverter)
+#define ucnv_cbFromUWriteBytes U_ICU_ENTRY_POINT_RENAME(ucnv_cbFromUWriteBytes)
+#define ucnv_cbFromUWriteSub U_ICU_ENTRY_POINT_RENAME(ucnv_cbFromUWriteSub)
+#define ucnv_cbFromUWriteUChars U_ICU_ENTRY_POINT_RENAME(ucnv_cbFromUWriteUChars)
+#define ucnv_cbToUWriteSub U_ICU_ENTRY_POINT_RENAME(ucnv_cbToUWriteSub)
+#define ucnv_cbToUWriteUChars U_ICU_ENTRY_POINT_RENAME(ucnv_cbToUWriteUChars)
+#define ucnv_close U_ICU_ENTRY_POINT_RENAME(ucnv_close)
+#define ucnv_compareNames U_ICU_ENTRY_POINT_RENAME(ucnv_compareNames)
+#define ucnv_convert U_ICU_ENTRY_POINT_RENAME(ucnv_convert)
+#define ucnv_convertEx U_ICU_ENTRY_POINT_RENAME(ucnv_convertEx)
+#define ucnv_countAliases U_ICU_ENTRY_POINT_RENAME(ucnv_countAliases)
+#define ucnv_countAvailable U_ICU_ENTRY_POINT_RENAME(ucnv_countAvailable)
+#define ucnv_countStandards U_ICU_ENTRY_POINT_RENAME(ucnv_countStandards)
+#define ucnv_createAlgorithmicConverter U_ICU_ENTRY_POINT_RENAME(ucnv_createAlgorithmicConverter)
+#define ucnv_createConverter U_ICU_ENTRY_POINT_RENAME(ucnv_createConverter)
+#define ucnv_createConverterFromPackage U_ICU_ENTRY_POINT_RENAME(ucnv_createConverterFromPackage)
+#define ucnv_createConverterFromSharedData U_ICU_ENTRY_POINT_RENAME(ucnv_createConverterFromSharedData)
+#define ucnv_detectUnicodeSignature U_ICU_ENTRY_POINT_RENAME(ucnv_detectUnicodeSignature)
+#define ucnv_extContinueMatchFromU U_ICU_ENTRY_POINT_RENAME(ucnv_extContinueMatchFromU)
+#define ucnv_extContinueMatchToU U_ICU_ENTRY_POINT_RENAME(ucnv_extContinueMatchToU)
+#define ucnv_extGetUnicodeSet U_ICU_ENTRY_POINT_RENAME(ucnv_extGetUnicodeSet)
+#define ucnv_extInitialMatchFromU U_ICU_ENTRY_POINT_RENAME(ucnv_extInitialMatchFromU)
+#define ucnv_extInitialMatchToU U_ICU_ENTRY_POINT_RENAME(ucnv_extInitialMatchToU)
+#define ucnv_extSimpleMatchFromU U_ICU_ENTRY_POINT_RENAME(ucnv_extSimpleMatchFromU)
+#define ucnv_extSimpleMatchToU U_ICU_ENTRY_POINT_RENAME(ucnv_extSimpleMatchToU)
+#define ucnv_fixFileSeparator U_ICU_ENTRY_POINT_RENAME(ucnv_fixFileSeparator)
+#define ucnv_flushCache U_ICU_ENTRY_POINT_RENAME(ucnv_flushCache)
+#define ucnv_fromAlgorithmic U_ICU_ENTRY_POINT_RENAME(ucnv_fromAlgorithmic)
+#define ucnv_fromUChars U_ICU_ENTRY_POINT_RENAME(ucnv_fromUChars)
+#define ucnv_fromUCountPending U_ICU_ENTRY_POINT_RENAME(ucnv_fromUCountPending)
+#define ucnv_fromUWriteBytes U_ICU_ENTRY_POINT_RENAME(ucnv_fromUWriteBytes)
+#define ucnv_fromUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_fromUnicode)
+#define ucnv_fromUnicode_UTF8 U_ICU_ENTRY_POINT_RENAME(ucnv_fromUnicode_UTF8)
+#define ucnv_fromUnicode_UTF8_OFFSETS_LOGIC U_ICU_ENTRY_POINT_RENAME(ucnv_fromUnicode_UTF8_OFFSETS_LOGIC)
+#define ucnv_getAlias U_ICU_ENTRY_POINT_RENAME(ucnv_getAlias)
+#define ucnv_getAliases U_ICU_ENTRY_POINT_RENAME(ucnv_getAliases)
+#define ucnv_getAvailableName U_ICU_ENTRY_POINT_RENAME(ucnv_getAvailableName)
+#define ucnv_getCCSID U_ICU_ENTRY_POINT_RENAME(ucnv_getCCSID)
+#define ucnv_getCanonicalName U_ICU_ENTRY_POINT_RENAME(ucnv_getCanonicalName)
+#define ucnv_getCompleteUnicodeSet U_ICU_ENTRY_POINT_RENAME(ucnv_getCompleteUnicodeSet)
+#define ucnv_getDefaultName U_ICU_ENTRY_POINT_RENAME(ucnv_getDefaultName)
+#define ucnv_getDisplayName U_ICU_ENTRY_POINT_RENAME(ucnv_getDisplayName)
+#define ucnv_getFromUCallBack U_ICU_ENTRY_POINT_RENAME(ucnv_getFromUCallBack)
+#define ucnv_getInvalidChars U_ICU_ENTRY_POINT_RENAME(ucnv_getInvalidChars)
+#define ucnv_getInvalidUChars U_ICU_ENTRY_POINT_RENAME(ucnv_getInvalidUChars)
+#define ucnv_getMaxCharSize U_ICU_ENTRY_POINT_RENAME(ucnv_getMaxCharSize)
+#define ucnv_getMinCharSize U_ICU_ENTRY_POINT_RENAME(ucnv_getMinCharSize)
+#define ucnv_getName U_ICU_ENTRY_POINT_RENAME(ucnv_getName)
+#define ucnv_getNextUChar U_ICU_ENTRY_POINT_RENAME(ucnv_getNextUChar)
+#define ucnv_getNonSurrogateUnicodeSet U_ICU_ENTRY_POINT_RENAME(ucnv_getNonSurrogateUnicodeSet)
+#define ucnv_getPlatform U_ICU_ENTRY_POINT_RENAME(ucnv_getPlatform)
+#define ucnv_getStandard U_ICU_ENTRY_POINT_RENAME(ucnv_getStandard)
+#define ucnv_getStandardName U_ICU_ENTRY_POINT_RENAME(ucnv_getStandardName)
+#define ucnv_getStarters U_ICU_ENTRY_POINT_RENAME(ucnv_getStarters)
+#define ucnv_getSubstChars U_ICU_ENTRY_POINT_RENAME(ucnv_getSubstChars)
+#define ucnv_getToUCallBack U_ICU_ENTRY_POINT_RENAME(ucnv_getToUCallBack)
+#define ucnv_getType U_ICU_ENTRY_POINT_RENAME(ucnv_getType)
+#define ucnv_getUnicodeSet U_ICU_ENTRY_POINT_RENAME(ucnv_getUnicodeSet)
+#define ucnv_incrementRefCount U_ICU_ENTRY_POINT_RENAME(ucnv_incrementRefCount)
+#define ucnv_io_countKnownConverters U_ICU_ENTRY_POINT_RENAME(ucnv_io_countKnownConverters)
+#define ucnv_io_getConverterName U_ICU_ENTRY_POINT_RENAME(ucnv_io_getConverterName)
+#define ucnv_io_stripASCIIForCompare U_ICU_ENTRY_POINT_RENAME(ucnv_io_stripASCIIForCompare)
+#define ucnv_io_stripEBCDICForCompare U_ICU_ENTRY_POINT_RENAME(ucnv_io_stripEBCDICForCompare)
+#define ucnv_isAmbiguous U_ICU_ENTRY_POINT_RENAME(ucnv_isAmbiguous)
+#define ucnv_isFixedWidth U_ICU_ENTRY_POINT_RENAME(ucnv_isFixedWidth)
+#define ucnv_load U_ICU_ENTRY_POINT_RENAME(ucnv_load)
+#define ucnv_loadSharedData U_ICU_ENTRY_POINT_RENAME(ucnv_loadSharedData)
+#define ucnv_open U_ICU_ENTRY_POINT_RENAME(ucnv_open)
+#define ucnv_openAllNames U_ICU_ENTRY_POINT_RENAME(ucnv_openAllNames)
+#define ucnv_openCCSID U_ICU_ENTRY_POINT_RENAME(ucnv_openCCSID)
+#define ucnv_openPackage U_ICU_ENTRY_POINT_RENAME(ucnv_openPackage)
+#define ucnv_openStandardNames U_ICU_ENTRY_POINT_RENAME(ucnv_openStandardNames)
+#define ucnv_openU U_ICU_ENTRY_POINT_RENAME(ucnv_openU)
+#define ucnv_reset U_ICU_ENTRY_POINT_RENAME(ucnv_reset)
+#define ucnv_resetFromUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_resetFromUnicode)
+#define ucnv_resetToUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_resetToUnicode)
+#define ucnv_safeClone U_ICU_ENTRY_POINT_RENAME(ucnv_safeClone)
+#define ucnv_setDefaultName U_ICU_ENTRY_POINT_RENAME(ucnv_setDefaultName)
+#define ucnv_setFallback U_ICU_ENTRY_POINT_RENAME(ucnv_setFallback)
+#define ucnv_setFromUCallBack U_ICU_ENTRY_POINT_RENAME(ucnv_setFromUCallBack)
+#define ucnv_setSubstChars U_ICU_ENTRY_POINT_RENAME(ucnv_setSubstChars)
+#define ucnv_setSubstString U_ICU_ENTRY_POINT_RENAME(ucnv_setSubstString)
+#define ucnv_setToUCallBack U_ICU_ENTRY_POINT_RENAME(ucnv_setToUCallBack)
+#define ucnv_swap U_ICU_ENTRY_POINT_RENAME(ucnv_swap)
+#define ucnv_swapAliases U_ICU_ENTRY_POINT_RENAME(ucnv_swapAliases)
+#define ucnv_toAlgorithmic U_ICU_ENTRY_POINT_RENAME(ucnv_toAlgorithmic)
+#define ucnv_toUChars U_ICU_ENTRY_POINT_RENAME(ucnv_toUChars)
+#define ucnv_toUCountPending U_ICU_ENTRY_POINT_RENAME(ucnv_toUCountPending)
+#define ucnv_toUWriteCodePoint U_ICU_ENTRY_POINT_RENAME(ucnv_toUWriteCodePoint)
+#define ucnv_toUWriteUChars U_ICU_ENTRY_POINT_RENAME(ucnv_toUWriteUChars)
+#define ucnv_toUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_toUnicode)
+#define ucnv_unload U_ICU_ENTRY_POINT_RENAME(ucnv_unload)
+#define ucnv_unloadSharedDataIfReady U_ICU_ENTRY_POINT_RENAME(ucnv_unloadSharedDataIfReady)
+#define ucnv_usesFallback U_ICU_ENTRY_POINT_RENAME(ucnv_usesFallback)
+#define ucnvsel_close U_ICU_ENTRY_POINT_RENAME(ucnvsel_close)
+#define ucnvsel_open U_ICU_ENTRY_POINT_RENAME(ucnvsel_open)
+#define ucnvsel_openFromSerialized U_ICU_ENTRY_POINT_RENAME(ucnvsel_openFromSerialized)
+#define ucnvsel_selectForString U_ICU_ENTRY_POINT_RENAME(ucnvsel_selectForString)
+#define ucnvsel_selectForUTF8 U_ICU_ENTRY_POINT_RENAME(ucnvsel_selectForUTF8)
+#define ucnvsel_serialize U_ICU_ENTRY_POINT_RENAME(ucnvsel_serialize)
+#define ucol_cloneBinary U_ICU_ENTRY_POINT_RENAME(ucol_cloneBinary)
+#define ucol_close U_ICU_ENTRY_POINT_RENAME(ucol_close)
+#define ucol_closeElements U_ICU_ENTRY_POINT_RENAME(ucol_closeElements)
+#define ucol_countAvailable U_ICU_ENTRY_POINT_RENAME(ucol_countAvailable)
+#define ucol_equal U_ICU_ENTRY_POINT_RENAME(ucol_equal)
+#define ucol_equals U_ICU_ENTRY_POINT_RENAME(ucol_equals)
+#define ucol_getAttribute U_ICU_ENTRY_POINT_RENAME(ucol_getAttribute)
+#define ucol_getAvailable U_ICU_ENTRY_POINT_RENAME(ucol_getAvailable)
+#define ucol_getBound U_ICU_ENTRY_POINT_RENAME(ucol_getBound)
+#define ucol_getContractions U_ICU_ENTRY_POINT_RENAME(ucol_getContractions)
+#define ucol_getContractionsAndExpansions U_ICU_ENTRY_POINT_RENAME(ucol_getContractionsAndExpansions)
+#define ucol_getDisplayName U_ICU_ENTRY_POINT_RENAME(ucol_getDisplayName)
+#define ucol_getEquivalentReorderCodes U_ICU_ENTRY_POINT_RENAME(ucol_getEquivalentReorderCodes)
+#define ucol_getFunctionalEquivalent U_ICU_ENTRY_POINT_RENAME(ucol_getFunctionalEquivalent)
+#define ucol_getKeywordValues U_ICU_ENTRY_POINT_RENAME(ucol_getKeywordValues)
+#define ucol_getKeywordValuesForLocale U_ICU_ENTRY_POINT_RENAME(ucol_getKeywordValuesForLocale)
+#define ucol_getKeywords U_ICU_ENTRY_POINT_RENAME(ucol_getKeywords)
+#define ucol_getLocale U_ICU_ENTRY_POINT_RENAME(ucol_getLocale)
+#define ucol_getLocaleByType U_ICU_ENTRY_POINT_RENAME(ucol_getLocaleByType)
+#define ucol_getMaxExpansion U_ICU_ENTRY_POINT_RENAME(ucol_getMaxExpansion)
+#define ucol_getMaxVariable U_ICU_ENTRY_POINT_RENAME(ucol_getMaxVariable)
+#define ucol_getOffset U_ICU_ENTRY_POINT_RENAME(ucol_getOffset)
+#define ucol_getReorderCodes U_ICU_ENTRY_POINT_RENAME(ucol_getReorderCodes)
+#define ucol_getRules U_ICU_ENTRY_POINT_RENAME(ucol_getRules)
+#define ucol_getRulesEx U_ICU_ENTRY_POINT_RENAME(ucol_getRulesEx)
+#define ucol_getShortDefinitionString U_ICU_ENTRY_POINT_RENAME(ucol_getShortDefinitionString)
+#define ucol_getSortKey U_ICU_ENTRY_POINT_RENAME(ucol_getSortKey)
+#define ucol_getStrength U_ICU_ENTRY_POINT_RENAME(ucol_getStrength)
+#define ucol_getTailoredSet U_ICU_ENTRY_POINT_RENAME(ucol_getTailoredSet)
+#define ucol_getUCAVersion U_ICU_ENTRY_POINT_RENAME(ucol_getUCAVersion)
+#define ucol_getUnsafeSet U_ICU_ENTRY_POINT_RENAME(ucol_getUnsafeSet)
+#define ucol_getVariableTop U_ICU_ENTRY_POINT_RENAME(ucol_getVariableTop)
+#define ucol_getVersion U_ICU_ENTRY_POINT_RENAME(ucol_getVersion)
+#define ucol_greater U_ICU_ENTRY_POINT_RENAME(ucol_greater)
+#define ucol_greaterOrEqual U_ICU_ENTRY_POINT_RENAME(ucol_greaterOrEqual)
+#define ucol_keyHashCode U_ICU_ENTRY_POINT_RENAME(ucol_keyHashCode)
+#define ucol_looksLikeCollationBinary U_ICU_ENTRY_POINT_RENAME(ucol_looksLikeCollationBinary)
+#define ucol_mergeSortkeys U_ICU_ENTRY_POINT_RENAME(ucol_mergeSortkeys)
+#define ucol_next U_ICU_ENTRY_POINT_RENAME(ucol_next)
+#define ucol_nextSortKeyPart U_ICU_ENTRY_POINT_RENAME(ucol_nextSortKeyPart)
+#define ucol_normalizeShortDefinitionString U_ICU_ENTRY_POINT_RENAME(ucol_normalizeShortDefinitionString)
+#define ucol_open U_ICU_ENTRY_POINT_RENAME(ucol_open)
+#define ucol_openAvailableLocales U_ICU_ENTRY_POINT_RENAME(ucol_openAvailableLocales)
+#define ucol_openBinary U_ICU_ENTRY_POINT_RENAME(ucol_openBinary)
+#define ucol_openElements U_ICU_ENTRY_POINT_RENAME(ucol_openElements)
+#define ucol_openFromShortString U_ICU_ENTRY_POINT_RENAME(ucol_openFromShortString)
+#define ucol_openRules U_ICU_ENTRY_POINT_RENAME(ucol_openRules)
+#define ucol_prepareShortStringOpen U_ICU_ENTRY_POINT_RENAME(ucol_prepareShortStringOpen)
+#define ucol_previous U_ICU_ENTRY_POINT_RENAME(ucol_previous)
+#define ucol_primaryOrder U_ICU_ENTRY_POINT_RENAME(ucol_primaryOrder)
+#define ucol_reset U_ICU_ENTRY_POINT_RENAME(ucol_reset)
+#define ucol_restoreVariableTop U_ICU_ENTRY_POINT_RENAME(ucol_restoreVariableTop)
+#define ucol_safeClone U_ICU_ENTRY_POINT_RENAME(ucol_safeClone)
+#define ucol_secondaryOrder U_ICU_ENTRY_POINT_RENAME(ucol_secondaryOrder)
+#define ucol_setAttribute U_ICU_ENTRY_POINT_RENAME(ucol_setAttribute)
+#define ucol_setMaxVariable U_ICU_ENTRY_POINT_RENAME(ucol_setMaxVariable)
+#define ucol_setOffset U_ICU_ENTRY_POINT_RENAME(ucol_setOffset)
+#define ucol_setReorderCodes U_ICU_ENTRY_POINT_RENAME(ucol_setReorderCodes)
+#define ucol_setStrength U_ICU_ENTRY_POINT_RENAME(ucol_setStrength)
+#define ucol_setText U_ICU_ENTRY_POINT_RENAME(ucol_setText)
+#define ucol_setVariableTop U_ICU_ENTRY_POINT_RENAME(ucol_setVariableTop)
+#define ucol_strcoll U_ICU_ENTRY_POINT_RENAME(ucol_strcoll)
+#define ucol_strcollIter U_ICU_ENTRY_POINT_RENAME(ucol_strcollIter)
+#define ucol_strcollUTF8 U_ICU_ENTRY_POINT_RENAME(ucol_strcollUTF8)
+#define ucol_swap U_ICU_ENTRY_POINT_RENAME(ucol_swap)
+#define ucol_swapInverseUCA U_ICU_ENTRY_POINT_RENAME(ucol_swapInverseUCA)
+#define ucol_tertiaryOrder U_ICU_ENTRY_POINT_RENAME(ucol_tertiaryOrder)
+#define ucsdet_close U_ICU_ENTRY_POINT_RENAME(ucsdet_close)
+#define ucsdet_detect U_ICU_ENTRY_POINT_RENAME(ucsdet_detect)
+#define ucsdet_detectAll U_ICU_ENTRY_POINT_RENAME(ucsdet_detectAll)
+#define ucsdet_enableInputFilter U_ICU_ENTRY_POINT_RENAME(ucsdet_enableInputFilter)
+#define ucsdet_getAllDetectableCharsets U_ICU_ENTRY_POINT_RENAME(ucsdet_getAllDetectableCharsets)
+#define ucsdet_getConfidence U_ICU_ENTRY_POINT_RENAME(ucsdet_getConfidence)
+#define ucsdet_getDetectableCharsets U_ICU_ENTRY_POINT_RENAME(ucsdet_getDetectableCharsets)
+#define ucsdet_getLanguage U_ICU_ENTRY_POINT_RENAME(ucsdet_getLanguage)
+#define ucsdet_getName U_ICU_ENTRY_POINT_RENAME(ucsdet_getName)
+#define ucsdet_getUChars U_ICU_ENTRY_POINT_RENAME(ucsdet_getUChars)
+#define ucsdet_isInputFilterEnabled U_ICU_ENTRY_POINT_RENAME(ucsdet_isInputFilterEnabled)
+#define ucsdet_open U_ICU_ENTRY_POINT_RENAME(ucsdet_open)
+#define ucsdet_setDeclaredEncoding U_ICU_ENTRY_POINT_RENAME(ucsdet_setDeclaredEncoding)
+#define ucsdet_setDetectableCharset U_ICU_ENTRY_POINT_RENAME(ucsdet_setDetectableCharset)
+#define ucsdet_setText U_ICU_ENTRY_POINT_RENAME(ucsdet_setText)
+#define ucurr_countCurrencies U_ICU_ENTRY_POINT_RENAME(ucurr_countCurrencies)
+#define ucurr_forLocale U_ICU_ENTRY_POINT_RENAME(ucurr_forLocale)
+#define ucurr_forLocaleAndDate U_ICU_ENTRY_POINT_RENAME(ucurr_forLocaleAndDate)
+#define ucurr_getDefaultFractionDigits U_ICU_ENTRY_POINT_RENAME(ucurr_getDefaultFractionDigits)
+#define ucurr_getDefaultFractionDigitsForUsage U_ICU_ENTRY_POINT_RENAME(ucurr_getDefaultFractionDigitsForUsage)
+#define ucurr_getKeywordValuesForLocale U_ICU_ENTRY_POINT_RENAME(ucurr_getKeywordValuesForLocale)
+#define ucurr_getName U_ICU_ENTRY_POINT_RENAME(ucurr_getName)
+#define ucurr_getNumericCode U_ICU_ENTRY_POINT_RENAME(ucurr_getNumericCode)
+#define ucurr_getPluralName U_ICU_ENTRY_POINT_RENAME(ucurr_getPluralName)
+#define ucurr_getRoundingIncrement U_ICU_ENTRY_POINT_RENAME(ucurr_getRoundingIncrement)
+#define ucurr_getRoundingIncrementForUsage U_ICU_ENTRY_POINT_RENAME(ucurr_getRoundingIncrementForUsage)
+#define ucurr_isAvailable U_ICU_ENTRY_POINT_RENAME(ucurr_isAvailable)
+#define ucurr_openISOCurrencies U_ICU_ENTRY_POINT_RENAME(ucurr_openISOCurrencies)
+#define ucurr_register U_ICU_ENTRY_POINT_RENAME(ucurr_register)
+#define ucurr_unregister U_ICU_ENTRY_POINT_RENAME(ucurr_unregister)
+#define udat_adoptNumberFormat U_ICU_ENTRY_POINT_RENAME(udat_adoptNumberFormat)
+#define udat_adoptNumberFormatForFields U_ICU_ENTRY_POINT_RENAME(udat_adoptNumberFormatForFields)
+#define udat_applyPattern U_ICU_ENTRY_POINT_RENAME(udat_applyPattern)
+#define udat_applyPatternRelative U_ICU_ENTRY_POINT_RENAME(udat_applyPatternRelative)
+#define udat_clone U_ICU_ENTRY_POINT_RENAME(udat_clone)
+#define udat_close U_ICU_ENTRY_POINT_RENAME(udat_close)
+#define udat_countAvailable U_ICU_ENTRY_POINT_RENAME(udat_countAvailable)
+#define udat_countSymbols U_ICU_ENTRY_POINT_RENAME(udat_countSymbols)
+#define udat_format U_ICU_ENTRY_POINT_RENAME(udat_format)
+#define udat_get2DigitYearStart U_ICU_ENTRY_POINT_RENAME(udat_get2DigitYearStart)
+#define udat_getAvailable U_ICU_ENTRY_POINT_RENAME(udat_getAvailable)
+#define udat_getBooleanAttribute U_ICU_ENTRY_POINT_RENAME(udat_getBooleanAttribute)
+#define udat_getCalendar U_ICU_ENTRY_POINT_RENAME(udat_getCalendar)
+#define udat_getContext U_ICU_ENTRY_POINT_RENAME(udat_getContext)
+#define udat_getLocaleByType U_ICU_ENTRY_POINT_RENAME(udat_getLocaleByType)
+#define udat_getNumberFormat U_ICU_ENTRY_POINT_RENAME(udat_getNumberFormat)
+#define udat_getNumberFormatForField U_ICU_ENTRY_POINT_RENAME(udat_getNumberFormatForField)
+#define udat_getSymbols U_ICU_ENTRY_POINT_RENAME(udat_getSymbols)
+#define udat_isLenient U_ICU_ENTRY_POINT_RENAME(udat_isLenient)
+#define udat_open U_ICU_ENTRY_POINT_RENAME(udat_open)
+#define udat_parse U_ICU_ENTRY_POINT_RENAME(udat_parse)
+#define udat_parseCalendar U_ICU_ENTRY_POINT_RENAME(udat_parseCalendar)
+#define udat_registerOpener U_ICU_ENTRY_POINT_RENAME(udat_registerOpener)
+#define udat_set2DigitYearStart U_ICU_ENTRY_POINT_RENAME(udat_set2DigitYearStart)
+#define udat_setBooleanAttribute U_ICU_ENTRY_POINT_RENAME(udat_setBooleanAttribute)
+#define udat_setCalendar U_ICU_ENTRY_POINT_RENAME(udat_setCalendar)
+#define udat_setContext U_ICU_ENTRY_POINT_RENAME(udat_setContext)
+#define udat_setLenient U_ICU_ENTRY_POINT_RENAME(udat_setLenient)
+#define udat_setNumberFormat U_ICU_ENTRY_POINT_RENAME(udat_setNumberFormat)
+#define udat_setSymbols U_ICU_ENTRY_POINT_RENAME(udat_setSymbols)
+#define udat_toCalendarDateField U_ICU_ENTRY_POINT_RENAME(udat_toCalendarDateField)
+#define udat_toPattern U_ICU_ENTRY_POINT_RENAME(udat_toPattern)
+#define udat_toPatternRelativeDate U_ICU_ENTRY_POINT_RENAME(udat_toPatternRelativeDate)
+#define udat_toPatternRelativeTime U_ICU_ENTRY_POINT_RENAME(udat_toPatternRelativeTime)
+#define udat_unregisterOpener U_ICU_ENTRY_POINT_RENAME(udat_unregisterOpener)
+#define udata_checkCommonData U_ICU_ENTRY_POINT_RENAME(udata_checkCommonData)
+#define udata_close U_ICU_ENTRY_POINT_RENAME(udata_close)
+#define udata_closeSwapper U_ICU_ENTRY_POINT_RENAME(udata_closeSwapper)
+#define udata_getHeaderSize U_ICU_ENTRY_POINT_RENAME(udata_getHeaderSize)
+#define udata_getInfo U_ICU_ENTRY_POINT_RENAME(udata_getInfo)
+#define udata_getInfoSize U_ICU_ENTRY_POINT_RENAME(udata_getInfoSize)
+#define udata_getLength U_ICU_ENTRY_POINT_RENAME(udata_getLength)
+#define udata_getMemory U_ICU_ENTRY_POINT_RENAME(udata_getMemory)
+#define udata_getRawMemory U_ICU_ENTRY_POINT_RENAME(udata_getRawMemory)
+#define udata_open U_ICU_ENTRY_POINT_RENAME(udata_open)
+#define udata_openChoice U_ICU_ENTRY_POINT_RENAME(udata_openChoice)
+#define udata_openSwapper U_ICU_ENTRY_POINT_RENAME(udata_openSwapper)
+#define udata_openSwapperForInputData U_ICU_ENTRY_POINT_RENAME(udata_openSwapperForInputData)
+#define udata_printError U_ICU_ENTRY_POINT_RENAME(udata_printError)
+#define udata_readInt16 U_ICU_ENTRY_POINT_RENAME(udata_readInt16)
+#define udata_readInt32 U_ICU_ENTRY_POINT_RENAME(udata_readInt32)
+#define udata_setAppData U_ICU_ENTRY_POINT_RENAME(udata_setAppData)
+#define udata_setCommonData U_ICU_ENTRY_POINT_RENAME(udata_setCommonData)
+#define udata_setFileAccess U_ICU_ENTRY_POINT_RENAME(udata_setFileAccess)
+#define udata_swapDataHeader U_ICU_ENTRY_POINT_RENAME(udata_swapDataHeader)
+#define udata_swapInvStringBlock U_ICU_ENTRY_POINT_RENAME(udata_swapInvStringBlock)
+#define udatpg_addPattern U_ICU_ENTRY_POINT_RENAME(udatpg_addPattern)
+#define udatpg_clone U_ICU_ENTRY_POINT_RENAME(udatpg_clone)
+#define udatpg_close U_ICU_ENTRY_POINT_RENAME(udatpg_close)
+#define udatpg_getAppendItemFormat U_ICU_ENTRY_POINT_RENAME(udatpg_getAppendItemFormat)
+#define udatpg_getAppendItemName U_ICU_ENTRY_POINT_RENAME(udatpg_getAppendItemName)
+#define udatpg_getBaseSkeleton U_ICU_ENTRY_POINT_RENAME(udatpg_getBaseSkeleton)
+#define udatpg_getBestPattern U_ICU_ENTRY_POINT_RENAME(udatpg_getBestPattern)
+#define udatpg_getBestPatternWithOptions U_ICU_ENTRY_POINT_RENAME(udatpg_getBestPatternWithOptions)
+#define udatpg_getDateTimeFormat U_ICU_ENTRY_POINT_RENAME(udatpg_getDateTimeFormat)
+#define udatpg_getDecimal U_ICU_ENTRY_POINT_RENAME(udatpg_getDecimal)
+#define udatpg_getPatternForSkeleton U_ICU_ENTRY_POINT_RENAME(udatpg_getPatternForSkeleton)
+#define udatpg_getSkeleton U_ICU_ENTRY_POINT_RENAME(udatpg_getSkeleton)
+#define udatpg_open U_ICU_ENTRY_POINT_RENAME(udatpg_open)
+#define udatpg_openBaseSkeletons U_ICU_ENTRY_POINT_RENAME(udatpg_openBaseSkeletons)
+#define udatpg_openEmpty U_ICU_ENTRY_POINT_RENAME(udatpg_openEmpty)
+#define udatpg_openSkeletons U_ICU_ENTRY_POINT_RENAME(udatpg_openSkeletons)
+#define udatpg_replaceFieldTypes U_ICU_ENTRY_POINT_RENAME(udatpg_replaceFieldTypes)
+#define udatpg_replaceFieldTypesWithOptions U_ICU_ENTRY_POINT_RENAME(udatpg_replaceFieldTypesWithOptions)
+#define udatpg_setAppendItemFormat U_ICU_ENTRY_POINT_RENAME(udatpg_setAppendItemFormat)
+#define udatpg_setAppendItemName U_ICU_ENTRY_POINT_RENAME(udatpg_setAppendItemName)
+#define udatpg_setDateTimeFormat U_ICU_ENTRY_POINT_RENAME(udatpg_setDateTimeFormat)
+#define udatpg_setDecimal U_ICU_ENTRY_POINT_RENAME(udatpg_setDecimal)
+#define udict_swap U_ICU_ENTRY_POINT_RENAME(udict_swap)
+#define udtitvfmt_close U_ICU_ENTRY_POINT_RENAME(udtitvfmt_close)
+#define udtitvfmt_format U_ICU_ENTRY_POINT_RENAME(udtitvfmt_format)
+#define udtitvfmt_open U_ICU_ENTRY_POINT_RENAME(udtitvfmt_open)
+#define uenum_close U_ICU_ENTRY_POINT_RENAME(uenum_close)
+#define uenum_count U_ICU_ENTRY_POINT_RENAME(uenum_count)
+#define uenum_next U_ICU_ENTRY_POINT_RENAME(uenum_next)
+#define uenum_nextDefault U_ICU_ENTRY_POINT_RENAME(uenum_nextDefault)
+#define uenum_openCharStringsEnumeration U_ICU_ENTRY_POINT_RENAME(uenum_openCharStringsEnumeration)
+#define uenum_openFromStringEnumeration U_ICU_ENTRY_POINT_RENAME(uenum_openFromStringEnumeration)
+#define uenum_openUCharStringsEnumeration U_ICU_ENTRY_POINT_RENAME(uenum_openUCharStringsEnumeration)
+#define uenum_reset U_ICU_ENTRY_POINT_RENAME(uenum_reset)
+#define uenum_unext U_ICU_ENTRY_POINT_RENAME(uenum_unext)
+#define uenum_unextDefault U_ICU_ENTRY_POINT_RENAME(uenum_unextDefault)
+#define ufile_close_translit U_ICU_ENTRY_POINT_RENAME(ufile_close_translit)
+#define ufile_fill_uchar_buffer U_ICU_ENTRY_POINT_RENAME(ufile_fill_uchar_buffer)
+#define ufile_flush_io U_ICU_ENTRY_POINT_RENAME(ufile_flush_io)
+#define ufile_flush_translit U_ICU_ENTRY_POINT_RENAME(ufile_flush_translit)
+#define ufile_getch U_ICU_ENTRY_POINT_RENAME(ufile_getch)
+#define ufile_getch32 U_ICU_ENTRY_POINT_RENAME(ufile_getch32)
+#define ufmt_64tou U_ICU_ENTRY_POINT_RENAME(ufmt_64tou)
+#define ufmt_close U_ICU_ENTRY_POINT_RENAME(ufmt_close)
+#define ufmt_defaultCPToUnicode U_ICU_ENTRY_POINT_RENAME(ufmt_defaultCPToUnicode)
+#define ufmt_digitvalue U_ICU_ENTRY_POINT_RENAME(ufmt_digitvalue)
+#define ufmt_getArrayItemByIndex U_ICU_ENTRY_POINT_RENAME(ufmt_getArrayItemByIndex)
+#define ufmt_getArrayLength U_ICU_ENTRY_POINT_RENAME(ufmt_getArrayLength)
+#define ufmt_getDate U_ICU_ENTRY_POINT_RENAME(ufmt_getDate)
+#define ufmt_getDecNumChars U_ICU_ENTRY_POINT_RENAME(ufmt_getDecNumChars)
+#define ufmt_getDouble U_ICU_ENTRY_POINT_RENAME(ufmt_getDouble)
+#define ufmt_getInt64 U_ICU_ENTRY_POINT_RENAME(ufmt_getInt64)
+#define ufmt_getLong U_ICU_ENTRY_POINT_RENAME(ufmt_getLong)
+#define ufmt_getObject U_ICU_ENTRY_POINT_RENAME(ufmt_getObject)
+#define ufmt_getType U_ICU_ENTRY_POINT_RENAME(ufmt_getType)
+#define ufmt_getUChars U_ICU_ENTRY_POINT_RENAME(ufmt_getUChars)
+#define ufmt_isNumeric U_ICU_ENTRY_POINT_RENAME(ufmt_isNumeric)
+#define ufmt_isdigit U_ICU_ENTRY_POINT_RENAME(ufmt_isdigit)
+#define ufmt_open U_ICU_ENTRY_POINT_RENAME(ufmt_open)
+#define ufmt_ptou U_ICU_ENTRY_POINT_RENAME(ufmt_ptou)
+#define ufmt_uto64 U_ICU_ENTRY_POINT_RENAME(ufmt_uto64)
+#define ufmt_utop U_ICU_ENTRY_POINT_RENAME(ufmt_utop)
+#define ugender_getInstance U_ICU_ENTRY_POINT_RENAME(ugender_getInstance)
+#define ugender_getListGender U_ICU_ENTRY_POINT_RENAME(ugender_getListGender)
+#define uhash_close U_ICU_ENTRY_POINT_RENAME(uhash_close)
+#define uhash_compareCaselessUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_compareCaselessUnicodeString)
+#define uhash_compareChars U_ICU_ENTRY_POINT_RENAME(uhash_compareChars)
+#define uhash_compareIChars U_ICU_ENTRY_POINT_RENAME(uhash_compareIChars)
+#define uhash_compareLong U_ICU_ENTRY_POINT_RENAME(uhash_compareLong)
+#define uhash_compareScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_compareScriptSet)
+#define uhash_compareUChars U_ICU_ENTRY_POINT_RENAME(uhash_compareUChars)
+#define uhash_compareUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_compareUnicodeString)
+#define uhash_count U_ICU_ENTRY_POINT_RENAME(uhash_count)
+#define uhash_deleteHashtable U_ICU_ENTRY_POINT_RENAME(uhash_deleteHashtable)
+#define uhash_deleteScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_deleteScriptSet)
+#define uhash_equals U_ICU_ENTRY_POINT_RENAME(uhash_equals)
+#define uhash_equalsScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_equalsScriptSet)
+#define uhash_find U_ICU_ENTRY_POINT_RENAME(uhash_find)
+#define uhash_get U_ICU_ENTRY_POINT_RENAME(uhash_get)
+#define uhash_geti U_ICU_ENTRY_POINT_RENAME(uhash_geti)
+#define uhash_hashCaselessUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_hashCaselessUnicodeString)
+#define uhash_hashChars U_ICU_ENTRY_POINT_RENAME(uhash_hashChars)
+#define uhash_hashIChars U_ICU_ENTRY_POINT_RENAME(uhash_hashIChars)
+#define uhash_hashLong U_ICU_ENTRY_POINT_RENAME(uhash_hashLong)
+#define uhash_hashScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_hashScriptSet)
+#define uhash_hashUChars U_ICU_ENTRY_POINT_RENAME(uhash_hashUChars)
+#define uhash_hashUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_hashUnicodeString)
+#define uhash_iget U_ICU_ENTRY_POINT_RENAME(uhash_iget)
+#define uhash_igeti U_ICU_ENTRY_POINT_RENAME(uhash_igeti)
+#define uhash_init U_ICU_ENTRY_POINT_RENAME(uhash_init)
+#define uhash_iput U_ICU_ENTRY_POINT_RENAME(uhash_iput)
+#define uhash_iputi U_ICU_ENTRY_POINT_RENAME(uhash_iputi)
+#define uhash_iremove U_ICU_ENTRY_POINT_RENAME(uhash_iremove)
+#define uhash_iremovei U_ICU_ENTRY_POINT_RENAME(uhash_iremovei)
+#define uhash_nextElement U_ICU_ENTRY_POINT_RENAME(uhash_nextElement)
+#define uhash_open U_ICU_ENTRY_POINT_RENAME(uhash_open)
+#define uhash_openSize U_ICU_ENTRY_POINT_RENAME(uhash_openSize)
+#define uhash_put U_ICU_ENTRY_POINT_RENAME(uhash_put)
+#define uhash_puti U_ICU_ENTRY_POINT_RENAME(uhash_puti)
+#define uhash_remove U_ICU_ENTRY_POINT_RENAME(uhash_remove)
+#define uhash_removeAll U_ICU_ENTRY_POINT_RENAME(uhash_removeAll)
+#define uhash_removeElement U_ICU_ENTRY_POINT_RENAME(uhash_removeElement)
+#define uhash_removei U_ICU_ENTRY_POINT_RENAME(uhash_removei)
+#define uhash_setKeyComparator U_ICU_ENTRY_POINT_RENAME(uhash_setKeyComparator)
+#define uhash_setKeyDeleter U_ICU_ENTRY_POINT_RENAME(uhash_setKeyDeleter)
+#define uhash_setKeyHasher U_ICU_ENTRY_POINT_RENAME(uhash_setKeyHasher)
+#define uhash_setResizePolicy U_ICU_ENTRY_POINT_RENAME(uhash_setResizePolicy)
+#define uhash_setValueComparator U_ICU_ENTRY_POINT_RENAME(uhash_setValueComparator)
+#define uhash_setValueDeleter U_ICU_ENTRY_POINT_RENAME(uhash_setValueDeleter)
+#define uidna_IDNToASCII U_ICU_ENTRY_POINT_RENAME(uidna_IDNToASCII)
+#define uidna_IDNToUnicode U_ICU_ENTRY_POINT_RENAME(uidna_IDNToUnicode)
+#define uidna_close U_ICU_ENTRY_POINT_RENAME(uidna_close)
+#define uidna_compare U_ICU_ENTRY_POINT_RENAME(uidna_compare)
+#define uidna_labelToASCII U_ICU_ENTRY_POINT_RENAME(uidna_labelToASCII)
+#define uidna_labelToASCII_UTF8 U_ICU_ENTRY_POINT_RENAME(uidna_labelToASCII_UTF8)
+#define uidna_labelToUnicode U_ICU_ENTRY_POINT_RENAME(uidna_labelToUnicode)
+#define uidna_labelToUnicodeUTF8 U_ICU_ENTRY_POINT_RENAME(uidna_labelToUnicodeUTF8)
+#define uidna_nameToASCII U_ICU_ENTRY_POINT_RENAME(uidna_nameToASCII)
+#define uidna_nameToASCII_UTF8 U_ICU_ENTRY_POINT_RENAME(uidna_nameToASCII_UTF8)
+#define uidna_nameToUnicode U_ICU_ENTRY_POINT_RENAME(uidna_nameToUnicode)
+#define uidna_nameToUnicodeUTF8 U_ICU_ENTRY_POINT_RENAME(uidna_nameToUnicodeUTF8)
+#define uidna_openUTS46 U_ICU_ENTRY_POINT_RENAME(uidna_openUTS46)
+#define uidna_toASCII U_ICU_ENTRY_POINT_RENAME(uidna_toASCII)
+#define uidna_toUnicode U_ICU_ENTRY_POINT_RENAME(uidna_toUnicode)
+#define uiter_current32 U_ICU_ENTRY_POINT_RENAME(uiter_current32)
+#define uiter_getState U_ICU_ENTRY_POINT_RENAME(uiter_getState)
+#define uiter_next32 U_ICU_ENTRY_POINT_RENAME(uiter_next32)
+#define uiter_previous32 U_ICU_ENTRY_POINT_RENAME(uiter_previous32)
+#define uiter_setCharacterIterator U_ICU_ENTRY_POINT_RENAME(uiter_setCharacterIterator)
+#define uiter_setReplaceable U_ICU_ENTRY_POINT_RENAME(uiter_setReplaceable)
+#define uiter_setState U_ICU_ENTRY_POINT_RENAME(uiter_setState)
+#define uiter_setString U_ICU_ENTRY_POINT_RENAME(uiter_setString)
+#define uiter_setUTF16BE U_ICU_ENTRY_POINT_RENAME(uiter_setUTF16BE)
+#define uiter_setUTF8 U_ICU_ENTRY_POINT_RENAME(uiter_setUTF8)
+#define uldn_close U_ICU_ENTRY_POINT_RENAME(uldn_close)
+#define uldn_getContext U_ICU_ENTRY_POINT_RENAME(uldn_getContext)
+#define uldn_getDialectHandling U_ICU_ENTRY_POINT_RENAME(uldn_getDialectHandling)
+#define uldn_getLocale U_ICU_ENTRY_POINT_RENAME(uldn_getLocale)
+#define uldn_keyDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_keyDisplayName)
+#define uldn_keyValueDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_keyValueDisplayName)
+#define uldn_languageDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_languageDisplayName)
+#define uldn_localeDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_localeDisplayName)
+#define uldn_open U_ICU_ENTRY_POINT_RENAME(uldn_open)
+#define uldn_openForContext U_ICU_ENTRY_POINT_RENAME(uldn_openForContext)
+#define uldn_regionDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_regionDisplayName)
+#define uldn_scriptCodeDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_scriptCodeDisplayName)
+#define uldn_scriptDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_scriptDisplayName)
+#define uldn_variantDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_variantDisplayName)
+#define ulist_addItemBeginList U_ICU_ENTRY_POINT_RENAME(ulist_addItemBeginList)
+#define ulist_addItemEndList U_ICU_ENTRY_POINT_RENAME(ulist_addItemEndList)
+#define ulist_close_keyword_values_iterator U_ICU_ENTRY_POINT_RENAME(ulist_close_keyword_values_iterator)
+#define ulist_containsString U_ICU_ENTRY_POINT_RENAME(ulist_containsString)
+#define ulist_count_keyword_values U_ICU_ENTRY_POINT_RENAME(ulist_count_keyword_values)
+#define ulist_createEmptyList U_ICU_ENTRY_POINT_RENAME(ulist_createEmptyList)
+#define ulist_deleteList U_ICU_ENTRY_POINT_RENAME(ulist_deleteList)
+#define ulist_getListFromEnum U_ICU_ENTRY_POINT_RENAME(ulist_getListFromEnum)
+#define ulist_getListSize U_ICU_ENTRY_POINT_RENAME(ulist_getListSize)
+#define ulist_getNext U_ICU_ENTRY_POINT_RENAME(ulist_getNext)
+#define ulist_next_keyword_value U_ICU_ENTRY_POINT_RENAME(ulist_next_keyword_value)
+#define ulist_resetList U_ICU_ENTRY_POINT_RENAME(ulist_resetList)
+#define ulist_reset_keyword_values_iterator U_ICU_ENTRY_POINT_RENAME(ulist_reset_keyword_values_iterator)
+#define uloc_acceptLanguage U_ICU_ENTRY_POINT_RENAME(uloc_acceptLanguage)
+#define uloc_acceptLanguageFromHTTP U_ICU_ENTRY_POINT_RENAME(uloc_acceptLanguageFromHTTP)
+#define uloc_addLikelySubtags U_ICU_ENTRY_POINT_RENAME(uloc_addLikelySubtags)
+#define uloc_canonicalize U_ICU_ENTRY_POINT_RENAME(uloc_canonicalize)
+#define uloc_countAvailable U_ICU_ENTRY_POINT_RENAME(uloc_countAvailable)
+#define uloc_forLanguageTag U_ICU_ENTRY_POINT_RENAME(uloc_forLanguageTag)
+#define uloc_getAvailable U_ICU_ENTRY_POINT_RENAME(uloc_getAvailable)
+#define uloc_getBaseName U_ICU_ENTRY_POINT_RENAME(uloc_getBaseName)
+#define uloc_getCharacterOrientation U_ICU_ENTRY_POINT_RENAME(uloc_getCharacterOrientation)
+#define uloc_getCountry U_ICU_ENTRY_POINT_RENAME(uloc_getCountry)
+#define uloc_getCurrentCountryID U_ICU_ENTRY_POINT_RENAME(uloc_getCurrentCountryID)
+#define uloc_getCurrentLanguageID U_ICU_ENTRY_POINT_RENAME(uloc_getCurrentLanguageID)
+#define uloc_getDefault U_ICU_ENTRY_POINT_RENAME(uloc_getDefault)
+#define uloc_getDisplayCountry U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayCountry)
+#define uloc_getDisplayKeyword U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayKeyword)
+#define uloc_getDisplayKeywordValue U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayKeywordValue)
+#define uloc_getDisplayLanguage U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayLanguage)
+#define uloc_getDisplayName U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayName)
+#define uloc_getDisplayScript U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayScript)
+#define uloc_getDisplayScriptInContext U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayScriptInContext)
+#define uloc_getDisplayVariant U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayVariant)
+#define uloc_getISO3Country U_ICU_ENTRY_POINT_RENAME(uloc_getISO3Country)
+#define uloc_getISO3Language U_ICU_ENTRY_POINT_RENAME(uloc_getISO3Language)
+#define uloc_getISOCountries U_ICU_ENTRY_POINT_RENAME(uloc_getISOCountries)
+#define uloc_getISOLanguages U_ICU_ENTRY_POINT_RENAME(uloc_getISOLanguages)
+#define uloc_getKeywordValue U_ICU_ENTRY_POINT_RENAME(uloc_getKeywordValue)
+#define uloc_getLCID U_ICU_ENTRY_POINT_RENAME(uloc_getLCID)
+#define uloc_getLanguage U_ICU_ENTRY_POINT_RENAME(uloc_getLanguage)
+#define uloc_getLineOrientation U_ICU_ENTRY_POINT_RENAME(uloc_getLineOrientation)
+#define uloc_getLocaleForLCID U_ICU_ENTRY_POINT_RENAME(uloc_getLocaleForLCID)
+#define uloc_getName U_ICU_ENTRY_POINT_RENAME(uloc_getName)
+#define uloc_getParent U_ICU_ENTRY_POINT_RENAME(uloc_getParent)
+#define uloc_getScript U_ICU_ENTRY_POINT_RENAME(uloc_getScript)
+#define uloc_getTableStringWithFallback U_ICU_ENTRY_POINT_RENAME(uloc_getTableStringWithFallback)
+#define uloc_getVariant U_ICU_ENTRY_POINT_RENAME(uloc_getVariant)
+#define uloc_isRightToLeft U_ICU_ENTRY_POINT_RENAME(uloc_isRightToLeft)
+#define uloc_minimizeSubtags U_ICU_ENTRY_POINT_RENAME(uloc_minimizeSubtags)
+#define uloc_openKeywordList U_ICU_ENTRY_POINT_RENAME(uloc_openKeywordList)
+#define uloc_openKeywords U_ICU_ENTRY_POINT_RENAME(uloc_openKeywords)
+#define uloc_setDefault U_ICU_ENTRY_POINT_RENAME(uloc_setDefault)
+#define uloc_setKeywordValue U_ICU_ENTRY_POINT_RENAME(uloc_setKeywordValue)
+#define uloc_toLanguageTag U_ICU_ENTRY_POINT_RENAME(uloc_toLanguageTag)
+#define uloc_toLegacyKey U_ICU_ENTRY_POINT_RENAME(uloc_toLegacyKey)
+#define uloc_toLegacyType U_ICU_ENTRY_POINT_RENAME(uloc_toLegacyType)
+#define uloc_toUnicodeLocaleKey U_ICU_ENTRY_POINT_RENAME(uloc_toUnicodeLocaleKey)
+#define uloc_toUnicodeLocaleType U_ICU_ENTRY_POINT_RENAME(uloc_toUnicodeLocaleType)
+#define ulocdata_close U_ICU_ENTRY_POINT_RENAME(ulocdata_close)
+#define ulocdata_getCLDRVersion U_ICU_ENTRY_POINT_RENAME(ulocdata_getCLDRVersion)
+#define ulocdata_getDelimiter U_ICU_ENTRY_POINT_RENAME(ulocdata_getDelimiter)
+#define ulocdata_getExemplarSet U_ICU_ENTRY_POINT_RENAME(ulocdata_getExemplarSet)
+#define ulocdata_getLocaleDisplayPattern U_ICU_ENTRY_POINT_RENAME(ulocdata_getLocaleDisplayPattern)
+#define ulocdata_getLocaleSeparator U_ICU_ENTRY_POINT_RENAME(ulocdata_getLocaleSeparator)
+#define ulocdata_getMeasurementSystem U_ICU_ENTRY_POINT_RENAME(ulocdata_getMeasurementSystem)
+#define ulocdata_getNoSubstitute U_ICU_ENTRY_POINT_RENAME(ulocdata_getNoSubstitute)
+#define ulocdata_getPaperSize U_ICU_ENTRY_POINT_RENAME(ulocdata_getPaperSize)
+#define ulocdata_open U_ICU_ENTRY_POINT_RENAME(ulocdata_open)
+#define ulocdata_setNoSubstitute U_ICU_ENTRY_POINT_RENAME(ulocdata_setNoSubstitute)
+#define ulocimp_getCountry U_ICU_ENTRY_POINT_RENAME(ulocimp_getCountry)
+#define ulocimp_getLanguage U_ICU_ENTRY_POINT_RENAME(ulocimp_getLanguage)
+#define ulocimp_getScript U_ICU_ENTRY_POINT_RENAME(ulocimp_getScript)
+#define ulocimp_toBcpKey U_ICU_ENTRY_POINT_RENAME(ulocimp_toBcpKey)
+#define ulocimp_toBcpType U_ICU_ENTRY_POINT_RENAME(ulocimp_toBcpType)
+#define ulocimp_toLegacyKey U_ICU_ENTRY_POINT_RENAME(ulocimp_toLegacyKey)
+#define ulocimp_toLegacyType U_ICU_ENTRY_POINT_RENAME(ulocimp_toLegacyType)
+#define ultag_isUnicodeLocaleKey U_ICU_ENTRY_POINT_RENAME(ultag_isUnicodeLocaleKey)
+#define ultag_isUnicodeLocaleType U_ICU_ENTRY_POINT_RENAME(ultag_isUnicodeLocaleType)
+#define umsg_applyPattern U_ICU_ENTRY_POINT_RENAME(umsg_applyPattern)
+#define umsg_autoQuoteApostrophe U_ICU_ENTRY_POINT_RENAME(umsg_autoQuoteApostrophe)
+#define umsg_clone U_ICU_ENTRY_POINT_RENAME(umsg_clone)
+#define umsg_close U_ICU_ENTRY_POINT_RENAME(umsg_close)
+#define umsg_format U_ICU_ENTRY_POINT_RENAME(umsg_format)
+#define umsg_getLocale U_ICU_ENTRY_POINT_RENAME(umsg_getLocale)
+#define umsg_open U_ICU_ENTRY_POINT_RENAME(umsg_open)
+#define umsg_parse U_ICU_ENTRY_POINT_RENAME(umsg_parse)
+#define umsg_setLocale U_ICU_ENTRY_POINT_RENAME(umsg_setLocale)
+#define umsg_toPattern U_ICU_ENTRY_POINT_RENAME(umsg_toPattern)
+#define umsg_vformat U_ICU_ENTRY_POINT_RENAME(umsg_vformat)
+#define umsg_vparse U_ICU_ENTRY_POINT_RENAME(umsg_vparse)
+#define umtx_condBroadcast U_ICU_ENTRY_POINT_RENAME(umtx_condBroadcast)
+#define umtx_condSignal U_ICU_ENTRY_POINT_RENAME(umtx_condSignal)
+#define umtx_condWait U_ICU_ENTRY_POINT_RENAME(umtx_condWait)
+#define umtx_lock U_ICU_ENTRY_POINT_RENAME(umtx_lock)
+#define umtx_unlock U_ICU_ENTRY_POINT_RENAME(umtx_unlock)
+#define uniset_getUnicode32Instance U_ICU_ENTRY_POINT_RENAME(uniset_getUnicode32Instance)
+#define unorm2_append U_ICU_ENTRY_POINT_RENAME(unorm2_append)
+#define unorm2_close U_ICU_ENTRY_POINT_RENAME(unorm2_close)
+#define unorm2_composePair U_ICU_ENTRY_POINT_RENAME(unorm2_composePair)
+#define unorm2_getCombiningClass U_ICU_ENTRY_POINT_RENAME(unorm2_getCombiningClass)
+#define unorm2_getDecomposition U_ICU_ENTRY_POINT_RENAME(unorm2_getDecomposition)
+#define unorm2_getInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getInstance)
+#define unorm2_getNFCInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFCInstance)
+#define unorm2_getNFDInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFDInstance)
+#define unorm2_getNFKCCasefoldInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFKCCasefoldInstance)
+#define unorm2_getNFKCInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFKCInstance)
+#define unorm2_getNFKDInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFKDInstance)
+#define unorm2_getRawDecomposition U_ICU_ENTRY_POINT_RENAME(unorm2_getRawDecomposition)
+#define unorm2_hasBoundaryAfter U_ICU_ENTRY_POINT_RENAME(unorm2_hasBoundaryAfter)
+#define unorm2_hasBoundaryBefore U_ICU_ENTRY_POINT_RENAME(unorm2_hasBoundaryBefore)
+#define unorm2_isInert U_ICU_ENTRY_POINT_RENAME(unorm2_isInert)
+#define unorm2_isNormalized U_ICU_ENTRY_POINT_RENAME(unorm2_isNormalized)
+#define unorm2_normalize U_ICU_ENTRY_POINT_RENAME(unorm2_normalize)
+#define unorm2_normalizeSecondAndAppend U_ICU_ENTRY_POINT_RENAME(unorm2_normalizeSecondAndAppend)
+#define unorm2_openFiltered U_ICU_ENTRY_POINT_RENAME(unorm2_openFiltered)
+#define unorm2_quickCheck U_ICU_ENTRY_POINT_RENAME(unorm2_quickCheck)
+#define unorm2_spanQuickCheckYes U_ICU_ENTRY_POINT_RENAME(unorm2_spanQuickCheckYes)
+#define unorm2_swap U_ICU_ENTRY_POINT_RENAME(unorm2_swap)
+#define unorm_compare U_ICU_ENTRY_POINT_RENAME(unorm_compare)
+#define unorm_concatenate U_ICU_ENTRY_POINT_RENAME(unorm_concatenate)
+#define unorm_getFCD16 U_ICU_ENTRY_POINT_RENAME(unorm_getFCD16)
+#define unorm_getQuickCheck U_ICU_ENTRY_POINT_RENAME(unorm_getQuickCheck)
+#define unorm_isNormalized U_ICU_ENTRY_POINT_RENAME(unorm_isNormalized)
+#define unorm_isNormalizedWithOptions U_ICU_ENTRY_POINT_RENAME(unorm_isNormalizedWithOptions)
+#define unorm_next U_ICU_ENTRY_POINT_RENAME(unorm_next)
+#define unorm_normalize U_ICU_ENTRY_POINT_RENAME(unorm_normalize)
+#define unorm_previous U_ICU_ENTRY_POINT_RENAME(unorm_previous)
+#define unorm_quickCheck U_ICU_ENTRY_POINT_RENAME(unorm_quickCheck)
+#define unorm_quickCheckWithOptions U_ICU_ENTRY_POINT_RENAME(unorm_quickCheckWithOptions)
+#define unum_applyPattern U_ICU_ENTRY_POINT_RENAME(unum_applyPattern)
+#define unum_clone U_ICU_ENTRY_POINT_RENAME(unum_clone)
+#define unum_close U_ICU_ENTRY_POINT_RENAME(unum_close)
+#define unum_countAvailable U_ICU_ENTRY_POINT_RENAME(unum_countAvailable)
+#define unum_format U_ICU_ENTRY_POINT_RENAME(unum_format)
+#define unum_formatDecimal U_ICU_ENTRY_POINT_RENAME(unum_formatDecimal)
+#define unum_formatDouble U_ICU_ENTRY_POINT_RENAME(unum_formatDouble)
+#define unum_formatDoubleCurrency U_ICU_ENTRY_POINT_RENAME(unum_formatDoubleCurrency)
+#define unum_formatInt64 U_ICU_ENTRY_POINT_RENAME(unum_formatInt64)
+#define unum_formatUFormattable U_ICU_ENTRY_POINT_RENAME(unum_formatUFormattable)
+#define unum_getAttribute U_ICU_ENTRY_POINT_RENAME(unum_getAttribute)
+#define unum_getAvailable U_ICU_ENTRY_POINT_RENAME(unum_getAvailable)
+#define unum_getContext U_ICU_ENTRY_POINT_RENAME(unum_getContext)
+#define unum_getDoubleAttribute U_ICU_ENTRY_POINT_RENAME(unum_getDoubleAttribute)
+#define unum_getLocaleByType U_ICU_ENTRY_POINT_RENAME(unum_getLocaleByType)
+#define unum_getSymbol U_ICU_ENTRY_POINT_RENAME(unum_getSymbol)
+#define unum_getTextAttribute U_ICU_ENTRY_POINT_RENAME(unum_getTextAttribute)
+#define unum_open U_ICU_ENTRY_POINT_RENAME(unum_open)
+#define unum_parse U_ICU_ENTRY_POINT_RENAME(unum_parse)
+#define unum_parseDecimal U_ICU_ENTRY_POINT_RENAME(unum_parseDecimal)
+#define unum_parseDouble U_ICU_ENTRY_POINT_RENAME(unum_parseDouble)
+#define unum_parseDoubleCurrency U_ICU_ENTRY_POINT_RENAME(unum_parseDoubleCurrency)
+#define unum_parseInt64 U_ICU_ENTRY_POINT_RENAME(unum_parseInt64)
+#define unum_parseToUFormattable U_ICU_ENTRY_POINT_RENAME(unum_parseToUFormattable)
+#define unum_setAttribute U_ICU_ENTRY_POINT_RENAME(unum_setAttribute)
+#define unum_setContext U_ICU_ENTRY_POINT_RENAME(unum_setContext)
+#define unum_setDoubleAttribute U_ICU_ENTRY_POINT_RENAME(unum_setDoubleAttribute)
+#define unum_setSymbol U_ICU_ENTRY_POINT_RENAME(unum_setSymbol)
+#define unum_setTextAttribute U_ICU_ENTRY_POINT_RENAME(unum_setTextAttribute)
+#define unum_toPattern U_ICU_ENTRY_POINT_RENAME(unum_toPattern)
+#define unumsys_close U_ICU_ENTRY_POINT_RENAME(unumsys_close)
+#define unumsys_getDescription U_ICU_ENTRY_POINT_RENAME(unumsys_getDescription)
+#define unumsys_getName U_ICU_ENTRY_POINT_RENAME(unumsys_getName)
+#define unumsys_getRadix U_ICU_ENTRY_POINT_RENAME(unumsys_getRadix)
+#define unumsys_isAlgorithmic U_ICU_ENTRY_POINT_RENAME(unumsys_isAlgorithmic)
+#define unumsys_open U_ICU_ENTRY_POINT_RENAME(unumsys_open)
+#define unumsys_openAvailableNames U_ICU_ENTRY_POINT_RENAME(unumsys_openAvailableNames)
+#define unumsys_openByName U_ICU_ENTRY_POINT_RENAME(unumsys_openByName)
+#define uplrules_close U_ICU_ENTRY_POINT_RENAME(uplrules_close)
+#define uplrules_open U_ICU_ENTRY_POINT_RENAME(uplrules_open)
+#define uplrules_openForType U_ICU_ENTRY_POINT_RENAME(uplrules_openForType)
+#define uplrules_select U_ICU_ENTRY_POINT_RENAME(uplrules_select)
+#define uplug_closeLibrary U_ICU_ENTRY_POINT_RENAME(uplug_closeLibrary)
+#define uplug_findLibrary U_ICU_ENTRY_POINT_RENAME(uplug_findLibrary)
+#define uplug_getConfiguration U_ICU_ENTRY_POINT_RENAME(uplug_getConfiguration)
+#define uplug_getContext U_ICU_ENTRY_POINT_RENAME(uplug_getContext)
+#define uplug_getCurrentLevel U_ICU_ENTRY_POINT_RENAME(uplug_getCurrentLevel)
+#define uplug_getLibrary U_ICU_ENTRY_POINT_RENAME(uplug_getLibrary)
+#define uplug_getLibraryName U_ICU_ENTRY_POINT_RENAME(uplug_getLibraryName)
+#define uplug_getPlugInternal U_ICU_ENTRY_POINT_RENAME(uplug_getPlugInternal)
+#define uplug_getPlugLevel U_ICU_ENTRY_POINT_RENAME(uplug_getPlugLevel)
+#define uplug_getPlugLoadStatus U_ICU_ENTRY_POINT_RENAME(uplug_getPlugLoadStatus)
+#define uplug_getPlugName U_ICU_ENTRY_POINT_RENAME(uplug_getPlugName)
+#define uplug_getPluginFile U_ICU_ENTRY_POINT_RENAME(uplug_getPluginFile)
+#define uplug_getSymbolName U_ICU_ENTRY_POINT_RENAME(uplug_getSymbolName)
+#define uplug_init U_ICU_ENTRY_POINT_RENAME(uplug_init)
+#define uplug_loadPlugFromEntrypoint U_ICU_ENTRY_POINT_RENAME(uplug_loadPlugFromEntrypoint)
+#define uplug_loadPlugFromLibrary U_ICU_ENTRY_POINT_RENAME(uplug_loadPlugFromLibrary)
+#define uplug_nextPlug U_ICU_ENTRY_POINT_RENAME(uplug_nextPlug)
+#define uplug_openLibrary U_ICU_ENTRY_POINT_RENAME(uplug_openLibrary)
+#define uplug_removePlug U_ICU_ENTRY_POINT_RENAME(uplug_removePlug)
+#define uplug_setContext U_ICU_ENTRY_POINT_RENAME(uplug_setContext)
+#define uplug_setPlugLevel U_ICU_ENTRY_POINT_RENAME(uplug_setPlugLevel)
+#define uplug_setPlugName U_ICU_ENTRY_POINT_RENAME(uplug_setPlugName)
+#define uplug_setPlugNoUnload U_ICU_ENTRY_POINT_RENAME(uplug_setPlugNoUnload)
+#define uprops_getSource U_ICU_ENTRY_POINT_RENAME(uprops_getSource)
+#define upropsvec_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(upropsvec_addPropertyStarts)
+#define uprv_aestrncpy U_ICU_ENTRY_POINT_RENAME(uprv_aestrncpy)
+#define uprv_asciiFromEbcdic U_ICU_ENTRY_POINT_RENAME(uprv_asciiFromEbcdic)
+#define uprv_asciitolower U_ICU_ENTRY_POINT_RENAME(uprv_asciitolower)
+#define uprv_calloc U_ICU_ENTRY_POINT_RENAME(uprv_calloc)
+#define uprv_ceil U_ICU_ENTRY_POINT_RENAME(uprv_ceil)
+#define uprv_compareASCIIPropertyNames U_ICU_ENTRY_POINT_RENAME(uprv_compareASCIIPropertyNames)
+#define uprv_compareEBCDICPropertyNames U_ICU_ENTRY_POINT_RENAME(uprv_compareEBCDICPropertyNames)
+#define uprv_compareInvAscii U_ICU_ENTRY_POINT_RENAME(uprv_compareInvAscii)
+#define uprv_compareInvEbcdic U_ICU_ENTRY_POINT_RENAME(uprv_compareInvEbcdic)
+#define uprv_compareInvEbcdicAsAscii U_ICU_ENTRY_POINT_RENAME(uprv_compareInvEbcdicAsAscii)
+#define uprv_convertToLCID U_ICU_ENTRY_POINT_RENAME(uprv_convertToLCID)
+#define uprv_convertToPosix U_ICU_ENTRY_POINT_RENAME(uprv_convertToPosix)
+#define uprv_copyAscii U_ICU_ENTRY_POINT_RENAME(uprv_copyAscii)
+#define uprv_copyEbcdic U_ICU_ENTRY_POINT_RENAME(uprv_copyEbcdic)
+#define uprv_decContextClearStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextClearStatus)
+#define uprv_decContextDefault U_ICU_ENTRY_POINT_RENAME(uprv_decContextDefault)
+#define uprv_decContextGetRounding U_ICU_ENTRY_POINT_RENAME(uprv_decContextGetRounding)
+#define uprv_decContextGetStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextGetStatus)
+#define uprv_decContextRestoreStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextRestoreStatus)
+#define uprv_decContextSaveStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextSaveStatus)
+#define uprv_decContextSetRounding U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetRounding)
+#define uprv_decContextSetStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetStatus)
+#define uprv_decContextSetStatusFromString U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetStatusFromString)
+#define uprv_decContextSetStatusFromStringQuiet U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetStatusFromStringQuiet)
+#define uprv_decContextSetStatusQuiet U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetStatusQuiet)
+#define uprv_decContextStatusToString U_ICU_ENTRY_POINT_RENAME(uprv_decContextStatusToString)
+#define uprv_decContextTestSavedStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextTestSavedStatus)
+#define uprv_decContextTestStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextTestStatus)
+#define uprv_decContextZeroStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextZeroStatus)
+#define uprv_decNumberAbs U_ICU_ENTRY_POINT_RENAME(uprv_decNumberAbs)
+#define uprv_decNumberAdd U_ICU_ENTRY_POINT_RENAME(uprv_decNumberAdd)
+#define uprv_decNumberAnd U_ICU_ENTRY_POINT_RENAME(uprv_decNumberAnd)
+#define uprv_decNumberClass U_ICU_ENTRY_POINT_RENAME(uprv_decNumberClass)
+#define uprv_decNumberClassToString U_ICU_ENTRY_POINT_RENAME(uprv_decNumberClassToString)
+#define uprv_decNumberCompare U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompare)
+#define uprv_decNumberCompareSignal U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompareSignal)
+#define uprv_decNumberCompareTotal U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompareTotal)
+#define uprv_decNumberCompareTotalMag U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompareTotalMag)
+#define uprv_decNumberCopy U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCopy)
+#define uprv_decNumberCopyAbs U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCopyAbs)
+#define uprv_decNumberCopyNegate U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCopyNegate)
+#define uprv_decNumberCopySign U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCopySign)
+#define uprv_decNumberDivide U_ICU_ENTRY_POINT_RENAME(uprv_decNumberDivide)
+#define uprv_decNumberDivideInteger U_ICU_ENTRY_POINT_RENAME(uprv_decNumberDivideInteger)
+#define uprv_decNumberExp U_ICU_ENTRY_POINT_RENAME(uprv_decNumberExp)
+#define uprv_decNumberFMA U_ICU_ENTRY_POINT_RENAME(uprv_decNumberFMA)
+#define uprv_decNumberFromInt32 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberFromInt32)
+#define uprv_decNumberFromString U_ICU_ENTRY_POINT_RENAME(uprv_decNumberFromString)
+#define uprv_decNumberFromUInt32 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberFromUInt32)
+#define uprv_decNumberGetBCD U_ICU_ENTRY_POINT_RENAME(uprv_decNumberGetBCD)
+#define uprv_decNumberInvert U_ICU_ENTRY_POINT_RENAME(uprv_decNumberInvert)
+#define uprv_decNumberIsNormal U_ICU_ENTRY_POINT_RENAME(uprv_decNumberIsNormal)
+#define uprv_decNumberIsSubnormal U_ICU_ENTRY_POINT_RENAME(uprv_decNumberIsSubnormal)
+#define uprv_decNumberLn U_ICU_ENTRY_POINT_RENAME(uprv_decNumberLn)
+#define uprv_decNumberLog10 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberLog10)
+#define uprv_decNumberLogB U_ICU_ENTRY_POINT_RENAME(uprv_decNumberLogB)
+#define uprv_decNumberMax U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMax)
+#define uprv_decNumberMaxMag U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMaxMag)
+#define uprv_decNumberMin U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMin)
+#define uprv_decNumberMinMag U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMinMag)
+#define uprv_decNumberMinus U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMinus)
+#define uprv_decNumberMultiply U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMultiply)
+#define uprv_decNumberNextMinus U_ICU_ENTRY_POINT_RENAME(uprv_decNumberNextMinus)
+#define uprv_decNumberNextPlus U_ICU_ENTRY_POINT_RENAME(uprv_decNumberNextPlus)
+#define uprv_decNumberNextToward U_ICU_ENTRY_POINT_RENAME(uprv_decNumberNextToward)
+#define uprv_decNumberNormalize U_ICU_ENTRY_POINT_RENAME(uprv_decNumberNormalize)
+#define uprv_decNumberOr U_ICU_ENTRY_POINT_RENAME(uprv_decNumberOr)
+#define uprv_decNumberPlus U_ICU_ENTRY_POINT_RENAME(uprv_decNumberPlus)
+#define uprv_decNumberPower U_ICU_ENTRY_POINT_RENAME(uprv_decNumberPower)
+#define uprv_decNumberQuantize U_ICU_ENTRY_POINT_RENAME(uprv_decNumberQuantize)
+#define uprv_decNumberReduce U_ICU_ENTRY_POINT_RENAME(uprv_decNumberReduce)
+#define uprv_decNumberRemainder U_ICU_ENTRY_POINT_RENAME(uprv_decNumberRemainder)
+#define uprv_decNumberRemainderNear U_ICU_ENTRY_POINT_RENAME(uprv_decNumberRemainderNear)
+#define uprv_decNumberRescale U_ICU_ENTRY_POINT_RENAME(uprv_decNumberRescale)
+#define uprv_decNumberRotate U_ICU_ENTRY_POINT_RENAME(uprv_decNumberRotate)
+#define uprv_decNumberSameQuantum U_ICU_ENTRY_POINT_RENAME(uprv_decNumberSameQuantum)
+#define uprv_decNumberScaleB U_ICU_ENTRY_POINT_RENAME(uprv_decNumberScaleB)
+#define uprv_decNumberSetBCD U_ICU_ENTRY_POINT_RENAME(uprv_decNumberSetBCD)
+#define uprv_decNumberShift U_ICU_ENTRY_POINT_RENAME(uprv_decNumberShift)
+#define uprv_decNumberSquareRoot U_ICU_ENTRY_POINT_RENAME(uprv_decNumberSquareRoot)
+#define uprv_decNumberSubtract U_ICU_ENTRY_POINT_RENAME(uprv_decNumberSubtract)
+#define uprv_decNumberToEngString U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToEngString)
+#define uprv_decNumberToInt32 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToInt32)
+#define uprv_decNumberToIntegralExact U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToIntegralExact)
+#define uprv_decNumberToIntegralValue U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToIntegralValue)
+#define uprv_decNumberToString U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToString)
+#define uprv_decNumberToUInt32 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToUInt32)
+#define uprv_decNumberTrim U_ICU_ENTRY_POINT_RENAME(uprv_decNumberTrim)
+#define uprv_decNumberVersion U_ICU_ENTRY_POINT_RENAME(uprv_decNumberVersion)
+#define uprv_decNumberXor U_ICU_ENTRY_POINT_RENAME(uprv_decNumberXor)
+#define uprv_decNumberZero U_ICU_ENTRY_POINT_RENAME(uprv_decNumberZero)
+#define uprv_deleteConditionalCE32 U_ICU_ENTRY_POINT_RENAME(uprv_deleteConditionalCE32)
+#define uprv_deleteUObject U_ICU_ENTRY_POINT_RENAME(uprv_deleteUObject)
+#define uprv_dl_close U_ICU_ENTRY_POINT_RENAME(uprv_dl_close)
+#define uprv_dl_open U_ICU_ENTRY_POINT_RENAME(uprv_dl_open)
+#define uprv_dlsym_func U_ICU_ENTRY_POINT_RENAME(uprv_dlsym_func)
+#define uprv_eastrncpy U_ICU_ENTRY_POINT_RENAME(uprv_eastrncpy)
+#define uprv_ebcdicFromAscii U_ICU_ENTRY_POINT_RENAME(uprv_ebcdicFromAscii)
+#define uprv_ebcdicToLowercaseAscii U_ICU_ENTRY_POINT_RENAME(uprv_ebcdicToLowercaseAscii)
+#define uprv_ebcdictolower U_ICU_ENTRY_POINT_RENAME(uprv_ebcdictolower)
+#define uprv_fabs U_ICU_ENTRY_POINT_RENAME(uprv_fabs)
+#define uprv_floor U_ICU_ENTRY_POINT_RENAME(uprv_floor)
+#define uprv_fmax U_ICU_ENTRY_POINT_RENAME(uprv_fmax)
+#define uprv_fmin U_ICU_ENTRY_POINT_RENAME(uprv_fmin)
+#define uprv_fmod U_ICU_ENTRY_POINT_RENAME(uprv_fmod)
+#define uprv_free U_ICU_ENTRY_POINT_RENAME(uprv_free)
+#define uprv_getCharNameCharacters U_ICU_ENTRY_POINT_RENAME(uprv_getCharNameCharacters)
+#define uprv_getDefaultCodepage U_ICU_ENTRY_POINT_RENAME(uprv_getDefaultCodepage)
+#define uprv_getDefaultLocaleID U_ICU_ENTRY_POINT_RENAME(uprv_getDefaultLocaleID)
+#define uprv_getInfinity U_ICU_ENTRY_POINT_RENAME(uprv_getInfinity)
+#define uprv_getMaxCharNameLength U_ICU_ENTRY_POINT_RENAME(uprv_getMaxCharNameLength)
+#define uprv_getMaxValues U_ICU_ENTRY_POINT_RENAME(uprv_getMaxValues)
+#define uprv_getNaN U_ICU_ENTRY_POINT_RENAME(uprv_getNaN)
+#define uprv_getRawUTCtime U_ICU_ENTRY_POINT_RENAME(uprv_getRawUTCtime)
+#define uprv_getStaticCurrencyName U_ICU_ENTRY_POINT_RENAME(uprv_getStaticCurrencyName)
+#define uprv_getUTCtime U_ICU_ENTRY_POINT_RENAME(uprv_getUTCtime)
+#define uprv_haveProperties U_ICU_ENTRY_POINT_RENAME(uprv_haveProperties)
+#define uprv_int32Comparator U_ICU_ENTRY_POINT_RENAME(uprv_int32Comparator)
+#define uprv_isASCIILetter U_ICU_ENTRY_POINT_RENAME(uprv_isASCIILetter)
+#define uprv_isInfinite U_ICU_ENTRY_POINT_RENAME(uprv_isInfinite)
+#define uprv_isInvariantString U_ICU_ENTRY_POINT_RENAME(uprv_isInvariantString)
+#define uprv_isInvariantUString U_ICU_ENTRY_POINT_RENAME(uprv_isInvariantUString)
+#define uprv_isNaN U_ICU_ENTRY_POINT_RENAME(uprv_isNaN)
+#define uprv_isNegativeInfinity U_ICU_ENTRY_POINT_RENAME(uprv_isNegativeInfinity)
+#define uprv_isPositiveInfinity U_ICU_ENTRY_POINT_RENAME(uprv_isPositiveInfinity)
+#define uprv_itou U_ICU_ENTRY_POINT_RENAME(uprv_itou)
+#define uprv_log U_ICU_ENTRY_POINT_RENAME(uprv_log)
+#define uprv_malloc U_ICU_ENTRY_POINT_RENAME(uprv_malloc)
+#define uprv_mapFile U_ICU_ENTRY_POINT_RENAME(uprv_mapFile)
+#define uprv_max U_ICU_ENTRY_POINT_RENAME(uprv_max)
+#define uprv_maxMantissa U_ICU_ENTRY_POINT_RENAME(uprv_maxMantissa)
+#define uprv_maximumPtr U_ICU_ENTRY_POINT_RENAME(uprv_maximumPtr)
+#define uprv_min U_ICU_ENTRY_POINT_RENAME(uprv_min)
+#define uprv_modf U_ICU_ENTRY_POINT_RENAME(uprv_modf)
+#define uprv_parseCurrency U_ICU_ENTRY_POINT_RENAME(uprv_parseCurrency)
+#define uprv_pathIsAbsolute U_ICU_ENTRY_POINT_RENAME(uprv_pathIsAbsolute)
+#define uprv_pow U_ICU_ENTRY_POINT_RENAME(uprv_pow)
+#define uprv_pow10 U_ICU_ENTRY_POINT_RENAME(uprv_pow10)
+#define uprv_realloc U_ICU_ENTRY_POINT_RENAME(uprv_realloc)
+#define uprv_round U_ICU_ENTRY_POINT_RENAME(uprv_round)
+#define uprv_sortArray U_ICU_ENTRY_POINT_RENAME(uprv_sortArray)
+#define uprv_stableBinarySearch U_ICU_ENTRY_POINT_RENAME(uprv_stableBinarySearch)
+#define uprv_strCompare U_ICU_ENTRY_POINT_RENAME(uprv_strCompare)
+#define uprv_strdup U_ICU_ENTRY_POINT_RENAME(uprv_strdup)
+#define uprv_stricmp U_ICU_ENTRY_POINT_RENAME(uprv_stricmp)
+#define uprv_strndup U_ICU_ENTRY_POINT_RENAME(uprv_strndup)
+#define uprv_strnicmp U_ICU_ENTRY_POINT_RENAME(uprv_strnicmp)
+#define uprv_syntaxError U_ICU_ENTRY_POINT_RENAME(uprv_syntaxError)
+#define uprv_timezone U_ICU_ENTRY_POINT_RENAME(uprv_timezone)
+#define uprv_toupper U_ICU_ENTRY_POINT_RENAME(uprv_toupper)
+#define uprv_trunc U_ICU_ENTRY_POINT_RENAME(uprv_trunc)
+#define uprv_tzname U_ICU_ENTRY_POINT_RENAME(uprv_tzname)
+#define uprv_tzset U_ICU_ENTRY_POINT_RENAME(uprv_tzset)
+#define uprv_uint16Comparator U_ICU_ENTRY_POINT_RENAME(uprv_uint16Comparator)
+#define uprv_uint32Comparator U_ICU_ENTRY_POINT_RENAME(uprv_uint32Comparator)
+#define uprv_unmapFile U_ICU_ENTRY_POINT_RENAME(uprv_unmapFile)
+#define upvec_cloneArray U_ICU_ENTRY_POINT_RENAME(upvec_cloneArray)
+#define upvec_close U_ICU_ENTRY_POINT_RENAME(upvec_close)
+#define upvec_compact U_ICU_ENTRY_POINT_RENAME(upvec_compact)
+#define upvec_compactToUTrie2Handler U_ICU_ENTRY_POINT_RENAME(upvec_compactToUTrie2Handler)
+#define upvec_compactToUTrie2WithRowIndexes U_ICU_ENTRY_POINT_RENAME(upvec_compactToUTrie2WithRowIndexes)
+#define upvec_getArray U_ICU_ENTRY_POINT_RENAME(upvec_getArray)
+#define upvec_getRow U_ICU_ENTRY_POINT_RENAME(upvec_getRow)
+#define upvec_getValue U_ICU_ENTRY_POINT_RENAME(upvec_getValue)
+#define upvec_open U_ICU_ENTRY_POINT_RENAME(upvec_open)
+#define upvec_setValue U_ICU_ENTRY_POINT_RENAME(upvec_setValue)
+#define uregex_appendReplacement U_ICU_ENTRY_POINT_RENAME(uregex_appendReplacement)
+#define uregex_appendReplacementUText U_ICU_ENTRY_POINT_RENAME(uregex_appendReplacementUText)
+#define uregex_appendTail U_ICU_ENTRY_POINT_RENAME(uregex_appendTail)
+#define uregex_appendTailUText U_ICU_ENTRY_POINT_RENAME(uregex_appendTailUText)
+#define uregex_clone U_ICU_ENTRY_POINT_RENAME(uregex_clone)
+#define uregex_close U_ICU_ENTRY_POINT_RENAME(uregex_close)
+#define uregex_end U_ICU_ENTRY_POINT_RENAME(uregex_end)
+#define uregex_end64 U_ICU_ENTRY_POINT_RENAME(uregex_end64)
+#define uregex_find U_ICU_ENTRY_POINT_RENAME(uregex_find)
+#define uregex_find64 U_ICU_ENTRY_POINT_RENAME(uregex_find64)
+#define uregex_findNext U_ICU_ENTRY_POINT_RENAME(uregex_findNext)
+#define uregex_flags U_ICU_ENTRY_POINT_RENAME(uregex_flags)
+#define uregex_getFindProgressCallback U_ICU_ENTRY_POINT_RENAME(uregex_getFindProgressCallback)
+#define uregex_getMatchCallback U_ICU_ENTRY_POINT_RENAME(uregex_getMatchCallback)
+#define uregex_getStackLimit U_ICU_ENTRY_POINT_RENAME(uregex_getStackLimit)
+#define uregex_getText U_ICU_ENTRY_POINT_RENAME(uregex_getText)
+#define uregex_getTimeLimit U_ICU_ENTRY_POINT_RENAME(uregex_getTimeLimit)
+#define uregex_getUText U_ICU_ENTRY_POINT_RENAME(uregex_getUText)
+#define uregex_group U_ICU_ENTRY_POINT_RENAME(uregex_group)
+#define uregex_groupCount U_ICU_ENTRY_POINT_RENAME(uregex_groupCount)
+#define uregex_groupUText U_ICU_ENTRY_POINT_RENAME(uregex_groupUText)
+#define uregex_groupUTextDeep U_ICU_ENTRY_POINT_RENAME(uregex_groupUTextDeep)
+#define uregex_hasAnchoringBounds U_ICU_ENTRY_POINT_RENAME(uregex_hasAnchoringBounds)
+#define uregex_hasTransparentBounds U_ICU_ENTRY_POINT_RENAME(uregex_hasTransparentBounds)
+#define uregex_hitEnd U_ICU_ENTRY_POINT_RENAME(uregex_hitEnd)
+#define uregex_lookingAt U_ICU_ENTRY_POINT_RENAME(uregex_lookingAt)
+#define uregex_lookingAt64 U_ICU_ENTRY_POINT_RENAME(uregex_lookingAt64)
+#define uregex_matches U_ICU_ENTRY_POINT_RENAME(uregex_matches)
+#define uregex_matches64 U_ICU_ENTRY_POINT_RENAME(uregex_matches64)
+#define uregex_open U_ICU_ENTRY_POINT_RENAME(uregex_open)
+#define uregex_openC U_ICU_ENTRY_POINT_RENAME(uregex_openC)
+#define uregex_openUText U_ICU_ENTRY_POINT_RENAME(uregex_openUText)
+#define uregex_pattern U_ICU_ENTRY_POINT_RENAME(uregex_pattern)
+#define uregex_patternUText U_ICU_ENTRY_POINT_RENAME(uregex_patternUText)
+#define uregex_refreshUText U_ICU_ENTRY_POINT_RENAME(uregex_refreshUText)
+#define uregex_regionEnd U_ICU_ENTRY_POINT_RENAME(uregex_regionEnd)
+#define uregex_regionEnd64 U_ICU_ENTRY_POINT_RENAME(uregex_regionEnd64)
+#define uregex_regionStart U_ICU_ENTRY_POINT_RENAME(uregex_regionStart)
+#define uregex_regionStart64 U_ICU_ENTRY_POINT_RENAME(uregex_regionStart64)
+#define uregex_replaceAll U_ICU_ENTRY_POINT_RENAME(uregex_replaceAll)
+#define uregex_replaceAllUText U_ICU_ENTRY_POINT_RENAME(uregex_replaceAllUText)
+#define uregex_replaceFirst U_ICU_ENTRY_POINT_RENAME(uregex_replaceFirst)
+#define uregex_replaceFirstUText U_ICU_ENTRY_POINT_RENAME(uregex_replaceFirstUText)
+#define uregex_requireEnd U_ICU_ENTRY_POINT_RENAME(uregex_requireEnd)
+#define uregex_reset U_ICU_ENTRY_POINT_RENAME(uregex_reset)
+#define uregex_reset64 U_ICU_ENTRY_POINT_RENAME(uregex_reset64)
+#define uregex_setFindProgressCallback U_ICU_ENTRY_POINT_RENAME(uregex_setFindProgressCallback)
+#define uregex_setMatchCallback U_ICU_ENTRY_POINT_RENAME(uregex_setMatchCallback)
+#define uregex_setRegion U_ICU_ENTRY_POINT_RENAME(uregex_setRegion)
+#define uregex_setRegion64 U_ICU_ENTRY_POINT_RENAME(uregex_setRegion64)
+#define uregex_setRegionAndStart U_ICU_ENTRY_POINT_RENAME(uregex_setRegionAndStart)
+#define uregex_setStackLimit U_ICU_ENTRY_POINT_RENAME(uregex_setStackLimit)
+#define uregex_setText U_ICU_ENTRY_POINT_RENAME(uregex_setText)
+#define uregex_setTimeLimit U_ICU_ENTRY_POINT_RENAME(uregex_setTimeLimit)
+#define uregex_setUText U_ICU_ENTRY_POINT_RENAME(uregex_setUText)
+#define uregex_split U_ICU_ENTRY_POINT_RENAME(uregex_split)
+#define uregex_splitUText U_ICU_ENTRY_POINT_RENAME(uregex_splitUText)
+#define uregex_start U_ICU_ENTRY_POINT_RENAME(uregex_start)
+#define uregex_start64 U_ICU_ENTRY_POINT_RENAME(uregex_start64)
+#define uregex_ucstr_unescape_charAt U_ICU_ENTRY_POINT_RENAME(uregex_ucstr_unescape_charAt)
+#define uregex_useAnchoringBounds U_ICU_ENTRY_POINT_RENAME(uregex_useAnchoringBounds)
+#define uregex_useTransparentBounds U_ICU_ENTRY_POINT_RENAME(uregex_useTransparentBounds)
+#define uregex_utext_unescape_charAt U_ICU_ENTRY_POINT_RENAME(uregex_utext_unescape_charAt)
+#define uregion_areEqual U_ICU_ENTRY_POINT_RENAME(uregion_areEqual)
+#define uregion_contains U_ICU_ENTRY_POINT_RENAME(uregion_contains)
+#define uregion_getAvailable U_ICU_ENTRY_POINT_RENAME(uregion_getAvailable)
+#define uregion_getContainedRegions U_ICU_ENTRY_POINT_RENAME(uregion_getContainedRegions)
+#define uregion_getContainedRegionsOfType U_ICU_ENTRY_POINT_RENAME(uregion_getContainedRegionsOfType)
+#define uregion_getContainingRegion U_ICU_ENTRY_POINT_RENAME(uregion_getContainingRegion)
+#define uregion_getContainingRegionOfType U_ICU_ENTRY_POINT_RENAME(uregion_getContainingRegionOfType)
+#define uregion_getNumericCode U_ICU_ENTRY_POINT_RENAME(uregion_getNumericCode)
+#define uregion_getPreferredValues U_ICU_ENTRY_POINT_RENAME(uregion_getPreferredValues)
+#define uregion_getRegionCode U_ICU_ENTRY_POINT_RENAME(uregion_getRegionCode)
+#define uregion_getRegionFromCode U_ICU_ENTRY_POINT_RENAME(uregion_getRegionFromCode)
+#define uregion_getRegionFromNumericCode U_ICU_ENTRY_POINT_RENAME(uregion_getRegionFromNumericCode)
+#define uregion_getType U_ICU_ENTRY_POINT_RENAME(uregion_getType)
+#define ures_close U_ICU_ENTRY_POINT_RENAME(ures_close)
+#define ures_copyResb U_ICU_ENTRY_POINT_RENAME(ures_copyResb)
+#define ures_countArrayItems U_ICU_ENTRY_POINT_RENAME(ures_countArrayItems)
+#define ures_findResource U_ICU_ENTRY_POINT_RENAME(ures_findResource)
+#define ures_findSubResource U_ICU_ENTRY_POINT_RENAME(ures_findSubResource)
+#define ures_getBinary U_ICU_ENTRY_POINT_RENAME(ures_getBinary)
+#define ures_getByIndex U_ICU_ENTRY_POINT_RENAME(ures_getByIndex)
+#define ures_getByKey U_ICU_ENTRY_POINT_RENAME(ures_getByKey)
+#define ures_getByKeyWithFallback U_ICU_ENTRY_POINT_RENAME(ures_getByKeyWithFallback)
+#define ures_getFunctionalEquivalent U_ICU_ENTRY_POINT_RENAME(ures_getFunctionalEquivalent)
+#define ures_getInt U_ICU_ENTRY_POINT_RENAME(ures_getInt)
+#define ures_getIntVector U_ICU_ENTRY_POINT_RENAME(ures_getIntVector)
+#define ures_getKey U_ICU_ENTRY_POINT_RENAME(ures_getKey)
+#define ures_getKeywordValues U_ICU_ENTRY_POINT_RENAME(ures_getKeywordValues)
+#define ures_getLocale U_ICU_ENTRY_POINT_RENAME(ures_getLocale)
+#define ures_getLocaleByType U_ICU_ENTRY_POINT_RENAME(ures_getLocaleByType)
+#define ures_getLocaleInternal U_ICU_ENTRY_POINT_RENAME(ures_getLocaleInternal)
+#define ures_getName U_ICU_ENTRY_POINT_RENAME(ures_getName)
+#define ures_getNextResource U_ICU_ENTRY_POINT_RENAME(ures_getNextResource)
+#define ures_getNextString U_ICU_ENTRY_POINT_RENAME(ures_getNextString)
+#define ures_getSize U_ICU_ENTRY_POINT_RENAME(ures_getSize)
+#define ures_getString U_ICU_ENTRY_POINT_RENAME(ures_getString)
+#define ures_getStringByIndex U_ICU_ENTRY_POINT_RENAME(ures_getStringByIndex)
+#define ures_getStringByKey U_ICU_ENTRY_POINT_RENAME(ures_getStringByKey)
+#define ures_getStringByKeyWithFallback U_ICU_ENTRY_POINT_RENAME(ures_getStringByKeyWithFallback)
+#define ures_getType U_ICU_ENTRY_POINT_RENAME(ures_getType)
+#define ures_getUInt U_ICU_ENTRY_POINT_RENAME(ures_getUInt)
+#define ures_getUTF8String U_ICU_ENTRY_POINT_RENAME(ures_getUTF8String)
+#define ures_getUTF8StringByIndex U_ICU_ENTRY_POINT_RENAME(ures_getUTF8StringByIndex)
+#define ures_getUTF8StringByKey U_ICU_ENTRY_POINT_RENAME(ures_getUTF8StringByKey)
+#define ures_getVersion U_ICU_ENTRY_POINT_RENAME(ures_getVersion)
+#define ures_getVersionByKey U_ICU_ENTRY_POINT_RENAME(ures_getVersionByKey)
+#define ures_getVersionNumber U_ICU_ENTRY_POINT_RENAME(ures_getVersionNumber)
+#define ures_getVersionNumberInternal U_ICU_ENTRY_POINT_RENAME(ures_getVersionNumberInternal)
+#define ures_hasNext U_ICU_ENTRY_POINT_RENAME(ures_hasNext)
+#define ures_initStackObject U_ICU_ENTRY_POINT_RENAME(ures_initStackObject)
+#define ures_open U_ICU_ENTRY_POINT_RENAME(ures_open)
+#define ures_openAvailableLocales U_ICU_ENTRY_POINT_RENAME(ures_openAvailableLocales)
+#define ures_openDirect U_ICU_ENTRY_POINT_RENAME(ures_openDirect)
+#define ures_openFillIn U_ICU_ENTRY_POINT_RENAME(ures_openFillIn)
+#define ures_openU U_ICU_ENTRY_POINT_RENAME(ures_openU)
+#define ures_resetIterator U_ICU_ENTRY_POINT_RENAME(ures_resetIterator)
+#define ures_swap U_ICU_ENTRY_POINT_RENAME(ures_swap)
+#define uscript_breaksBetweenLetters U_ICU_ENTRY_POINT_RENAME(uscript_breaksBetweenLetters)
+#define uscript_closeRun U_ICU_ENTRY_POINT_RENAME(uscript_closeRun)
+#define uscript_getCode U_ICU_ENTRY_POINT_RENAME(uscript_getCode)
+#define uscript_getName U_ICU_ENTRY_POINT_RENAME(uscript_getName)
+#define uscript_getSampleString U_ICU_ENTRY_POINT_RENAME(uscript_getSampleString)
+#define uscript_getSampleUnicodeString U_ICU_ENTRY_POINT_RENAME(uscript_getSampleUnicodeString)
+#define uscript_getScript U_ICU_ENTRY_POINT_RENAME(uscript_getScript)
+#define uscript_getScriptExtensions U_ICU_ENTRY_POINT_RENAME(uscript_getScriptExtensions)
+#define uscript_getShortName U_ICU_ENTRY_POINT_RENAME(uscript_getShortName)
+#define uscript_getUsage U_ICU_ENTRY_POINT_RENAME(uscript_getUsage)
+#define uscript_hasScript U_ICU_ENTRY_POINT_RENAME(uscript_hasScript)
+#define uscript_isCased U_ICU_ENTRY_POINT_RENAME(uscript_isCased)
+#define uscript_isRightToLeft U_ICU_ENTRY_POINT_RENAME(uscript_isRightToLeft)
+#define uscript_nextRun U_ICU_ENTRY_POINT_RENAME(uscript_nextRun)
+#define uscript_openRun U_ICU_ENTRY_POINT_RENAME(uscript_openRun)
+#define uscript_resetRun U_ICU_ENTRY_POINT_RENAME(uscript_resetRun)
+#define uscript_setRunText U_ICU_ENTRY_POINT_RENAME(uscript_setRunText)
+#define usearch_close U_ICU_ENTRY_POINT_RENAME(usearch_close)
+#define usearch_first U_ICU_ENTRY_POINT_RENAME(usearch_first)
+#define usearch_following U_ICU_ENTRY_POINT_RENAME(usearch_following)
+#define usearch_getAttribute U_ICU_ENTRY_POINT_RENAME(usearch_getAttribute)
+#define usearch_getBreakIterator U_ICU_ENTRY_POINT_RENAME(usearch_getBreakIterator)
+#define usearch_getCollator U_ICU_ENTRY_POINT_RENAME(usearch_getCollator)
+#define usearch_getMatchedLength U_ICU_ENTRY_POINT_RENAME(usearch_getMatchedLength)
+#define usearch_getMatchedStart U_ICU_ENTRY_POINT_RENAME(usearch_getMatchedStart)
+#define usearch_getMatchedText U_ICU_ENTRY_POINT_RENAME(usearch_getMatchedText)
+#define usearch_getOffset U_ICU_ENTRY_POINT_RENAME(usearch_getOffset)
+#define usearch_getPattern U_ICU_ENTRY_POINT_RENAME(usearch_getPattern)
+#define usearch_getText U_ICU_ENTRY_POINT_RENAME(usearch_getText)
+#define usearch_handleNextCanonical U_ICU_ENTRY_POINT_RENAME(usearch_handleNextCanonical)
+#define usearch_handleNextExact U_ICU_ENTRY_POINT_RENAME(usearch_handleNextExact)
+#define usearch_handlePreviousCanonical U_ICU_ENTRY_POINT_RENAME(usearch_handlePreviousCanonical)
+#define usearch_handlePreviousExact U_ICU_ENTRY_POINT_RENAME(usearch_handlePreviousExact)
+#define usearch_last U_ICU_ENTRY_POINT_RENAME(usearch_last)
+#define usearch_next U_ICU_ENTRY_POINT_RENAME(usearch_next)
+#define usearch_open U_ICU_ENTRY_POINT_RENAME(usearch_open)
+#define usearch_openFromCollator U_ICU_ENTRY_POINT_RENAME(usearch_openFromCollator)
+#define usearch_preceding U_ICU_ENTRY_POINT_RENAME(usearch_preceding)
+#define usearch_previous U_ICU_ENTRY_POINT_RENAME(usearch_previous)
+#define usearch_reset U_ICU_ENTRY_POINT_RENAME(usearch_reset)
+#define usearch_search U_ICU_ENTRY_POINT_RENAME(usearch_search)
+#define usearch_searchBackwards U_ICU_ENTRY_POINT_RENAME(usearch_searchBackwards)
+#define usearch_setAttribute U_ICU_ENTRY_POINT_RENAME(usearch_setAttribute)
+#define usearch_setBreakIterator U_ICU_ENTRY_POINT_RENAME(usearch_setBreakIterator)
+#define usearch_setCollator U_ICU_ENTRY_POINT_RENAME(usearch_setCollator)
+#define usearch_setOffset U_ICU_ENTRY_POINT_RENAME(usearch_setOffset)
+#define usearch_setPattern U_ICU_ENTRY_POINT_RENAME(usearch_setPattern)
+#define usearch_setText U_ICU_ENTRY_POINT_RENAME(usearch_setText)
+#define uset_add U_ICU_ENTRY_POINT_RENAME(uset_add)
+#define uset_addAll U_ICU_ENTRY_POINT_RENAME(uset_addAll)
+#define uset_addAllCodePoints U_ICU_ENTRY_POINT_RENAME(uset_addAllCodePoints)
+#define uset_addRange U_ICU_ENTRY_POINT_RENAME(uset_addRange)
+#define uset_addString U_ICU_ENTRY_POINT_RENAME(uset_addString)
+#define uset_applyIntPropertyValue U_ICU_ENTRY_POINT_RENAME(uset_applyIntPropertyValue)
+#define uset_applyPattern U_ICU_ENTRY_POINT_RENAME(uset_applyPattern)
+#define uset_applyPropertyAlias U_ICU_ENTRY_POINT_RENAME(uset_applyPropertyAlias)
+#define uset_charAt U_ICU_ENTRY_POINT_RENAME(uset_charAt)
+#define uset_clear U_ICU_ENTRY_POINT_RENAME(uset_clear)
+#define uset_clone U_ICU_ENTRY_POINT_RENAME(uset_clone)
+#define uset_cloneAsThawed U_ICU_ENTRY_POINT_RENAME(uset_cloneAsThawed)
+#define uset_close U_ICU_ENTRY_POINT_RENAME(uset_close)
+#define uset_closeOver U_ICU_ENTRY_POINT_RENAME(uset_closeOver)
+#define uset_compact U_ICU_ENTRY_POINT_RENAME(uset_compact)
+#define uset_complement U_ICU_ENTRY_POINT_RENAME(uset_complement)
+#define uset_complementAll U_ICU_ENTRY_POINT_RENAME(uset_complementAll)
+#define uset_contains U_ICU_ENTRY_POINT_RENAME(uset_contains)
+#define uset_containsAll U_ICU_ENTRY_POINT_RENAME(uset_containsAll)
+#define uset_containsAllCodePoints U_ICU_ENTRY_POINT_RENAME(uset_containsAllCodePoints)
+#define uset_containsNone U_ICU_ENTRY_POINT_RENAME(uset_containsNone)
+#define uset_containsRange U_ICU_ENTRY_POINT_RENAME(uset_containsRange)
+#define uset_containsSome U_ICU_ENTRY_POINT_RENAME(uset_containsSome)
+#define uset_containsString U_ICU_ENTRY_POINT_RENAME(uset_containsString)
+#define uset_equals U_ICU_ENTRY_POINT_RENAME(uset_equals)
+#define uset_freeze U_ICU_ENTRY_POINT_RENAME(uset_freeze)
+#define uset_getItem U_ICU_ENTRY_POINT_RENAME(uset_getItem)
+#define uset_getItemCount U_ICU_ENTRY_POINT_RENAME(uset_getItemCount)
+#define uset_getSerializedRange U_ICU_ENTRY_POINT_RENAME(uset_getSerializedRange)
+#define uset_getSerializedRangeCount U_ICU_ENTRY_POINT_RENAME(uset_getSerializedRangeCount)
+#define uset_getSerializedSet U_ICU_ENTRY_POINT_RENAME(uset_getSerializedSet)
+#define uset_indexOf U_ICU_ENTRY_POINT_RENAME(uset_indexOf)
+#define uset_isEmpty U_ICU_ENTRY_POINT_RENAME(uset_isEmpty)
+#define uset_isFrozen U_ICU_ENTRY_POINT_RENAME(uset_isFrozen)
+#define uset_open U_ICU_ENTRY_POINT_RENAME(uset_open)
+#define uset_openEmpty U_ICU_ENTRY_POINT_RENAME(uset_openEmpty)
+#define uset_openPattern U_ICU_ENTRY_POINT_RENAME(uset_openPattern)
+#define uset_openPatternOptions U_ICU_ENTRY_POINT_RENAME(uset_openPatternOptions)
+#define uset_remove U_ICU_ENTRY_POINT_RENAME(uset_remove)
+#define uset_removeAll U_ICU_ENTRY_POINT_RENAME(uset_removeAll)
+#define uset_removeAllStrings U_ICU_ENTRY_POINT_RENAME(uset_removeAllStrings)
+#define uset_removeRange U_ICU_ENTRY_POINT_RENAME(uset_removeRange)
+#define uset_removeString U_ICU_ENTRY_POINT_RENAME(uset_removeString)
+#define uset_resemblesPattern U_ICU_ENTRY_POINT_RENAME(uset_resemblesPattern)
+#define uset_retain U_ICU_ENTRY_POINT_RENAME(uset_retain)
+#define uset_retainAll U_ICU_ENTRY_POINT_RENAME(uset_retainAll)
+#define uset_serialize U_ICU_ENTRY_POINT_RENAME(uset_serialize)
+#define uset_serializedContains U_ICU_ENTRY_POINT_RENAME(uset_serializedContains)
+#define uset_set U_ICU_ENTRY_POINT_RENAME(uset_set)
+#define uset_setSerializedToOne U_ICU_ENTRY_POINT_RENAME(uset_setSerializedToOne)
+#define uset_size U_ICU_ENTRY_POINT_RENAME(uset_size)
+#define uset_span U_ICU_ENTRY_POINT_RENAME(uset_span)
+#define uset_spanBack U_ICU_ENTRY_POINT_RENAME(uset_spanBack)
+#define uset_spanBackUTF8 U_ICU_ENTRY_POINT_RENAME(uset_spanBackUTF8)
+#define uset_spanUTF8 U_ICU_ENTRY_POINT_RENAME(uset_spanUTF8)
+#define uset_toPattern U_ICU_ENTRY_POINT_RENAME(uset_toPattern)
+#define uspoof_areConfusable U_ICU_ENTRY_POINT_RENAME(uspoof_areConfusable)
+#define uspoof_areConfusableUTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_areConfusableUTF8)
+#define uspoof_areConfusableUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_areConfusableUnicodeString)
+#define uspoof_check U_ICU_ENTRY_POINT_RENAME(uspoof_check)
+#define uspoof_checkUTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_checkUTF8)
+#define uspoof_checkUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_checkUnicodeString)
+#define uspoof_clone U_ICU_ENTRY_POINT_RENAME(uspoof_clone)
+#define uspoof_close U_ICU_ENTRY_POINT_RENAME(uspoof_close)
+#define uspoof_getAllowedChars U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedChars)
+#define uspoof_getAllowedLocales U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedLocales)
+#define uspoof_getAllowedUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedUnicodeSet)
+#define uspoof_getChecks U_ICU_ENTRY_POINT_RENAME(uspoof_getChecks)
+#define uspoof_getInclusionSet U_ICU_ENTRY_POINT_RENAME(uspoof_getInclusionSet)
+#define uspoof_getInclusionUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_getInclusionUnicodeSet)
+#define uspoof_getRecommendedSet U_ICU_ENTRY_POINT_RENAME(uspoof_getRecommendedSet)
+#define uspoof_getRecommendedUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_getRecommendedUnicodeSet)
+#define uspoof_getRestrictionLevel U_ICU_ENTRY_POINT_RENAME(uspoof_getRestrictionLevel)
+#define uspoof_getSkeleton U_ICU_ENTRY_POINT_RENAME(uspoof_getSkeleton)
+#define uspoof_getSkeletonUTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_getSkeletonUTF8)
+#define uspoof_getSkeletonUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_getSkeletonUnicodeString)
+#define uspoof_open U_ICU_ENTRY_POINT_RENAME(uspoof_open)
+#define uspoof_openFromSerialized U_ICU_ENTRY_POINT_RENAME(uspoof_openFromSerialized)
+#define uspoof_openFromSource U_ICU_ENTRY_POINT_RENAME(uspoof_openFromSource)
+#define uspoof_serialize U_ICU_ENTRY_POINT_RENAME(uspoof_serialize)
+#define uspoof_setAllowedChars U_ICU_ENTRY_POINT_RENAME(uspoof_setAllowedChars)
+#define uspoof_setAllowedLocales U_ICU_ENTRY_POINT_RENAME(uspoof_setAllowedLocales)
+#define uspoof_setAllowedUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_setAllowedUnicodeSet)
+#define uspoof_setChecks U_ICU_ENTRY_POINT_RENAME(uspoof_setChecks)
+#define uspoof_setRestrictionLevel U_ICU_ENTRY_POINT_RENAME(uspoof_setRestrictionLevel)
+#define uspoof_swap U_ICU_ENTRY_POINT_RENAME(uspoof_swap)
+#define usprep_close U_ICU_ENTRY_POINT_RENAME(usprep_close)
+#define usprep_open U_ICU_ENTRY_POINT_RENAME(usprep_open)
+#define usprep_openByType U_ICU_ENTRY_POINT_RENAME(usprep_openByType)
+#define usprep_prepare U_ICU_ENTRY_POINT_RENAME(usprep_prepare)
+#define usprep_swap U_ICU_ENTRY_POINT_RENAME(usprep_swap)
+#define ustr_hashCharsN U_ICU_ENTRY_POINT_RENAME(ustr_hashCharsN)
+#define ustr_hashICharsN U_ICU_ENTRY_POINT_RENAME(ustr_hashICharsN)
+#define ustr_hashUCharsN U_ICU_ENTRY_POINT_RENAME(ustr_hashUCharsN)
+#define ustrcase_internalFold U_ICU_ENTRY_POINT_RENAME(ustrcase_internalFold)
+#define ustrcase_internalToLower U_ICU_ENTRY_POINT_RENAME(ustrcase_internalToLower)
+#define ustrcase_internalToTitle U_ICU_ENTRY_POINT_RENAME(ustrcase_internalToTitle)
+#define ustrcase_internalToUpper U_ICU_ENTRY_POINT_RENAME(ustrcase_internalToUpper)
+#define ustrcase_map U_ICU_ENTRY_POINT_RENAME(ustrcase_map)
+#define ustrcase_setTempCaseMapLocale U_ICU_ENTRY_POINT_RENAME(ustrcase_setTempCaseMapLocale)
+#define utext_char32At U_ICU_ENTRY_POINT_RENAME(utext_char32At)
+#define utext_clone U_ICU_ENTRY_POINT_RENAME(utext_clone)
+#define utext_close U_ICU_ENTRY_POINT_RENAME(utext_close)
+#define utext_copy U_ICU_ENTRY_POINT_RENAME(utext_copy)
+#define utext_current32 U_ICU_ENTRY_POINT_RENAME(utext_current32)
+#define utext_equals U_ICU_ENTRY_POINT_RENAME(utext_equals)
+#define utext_extract U_ICU_ENTRY_POINT_RENAME(utext_extract)
+#define utext_freeze U_ICU_ENTRY_POINT_RENAME(utext_freeze)
+#define utext_getNativeIndex U_ICU_ENTRY_POINT_RENAME(utext_getNativeIndex)
+#define utext_getPreviousNativeIndex U_ICU_ENTRY_POINT_RENAME(utext_getPreviousNativeIndex)
+#define utext_hasMetaData U_ICU_ENTRY_POINT_RENAME(utext_hasMetaData)
+#define utext_isLengthExpensive U_ICU_ENTRY_POINT_RENAME(utext_isLengthExpensive)
+#define utext_isWritable U_ICU_ENTRY_POINT_RENAME(utext_isWritable)
+#define utext_moveIndex32 U_ICU_ENTRY_POINT_RENAME(utext_moveIndex32)
+#define utext_nativeLength U_ICU_ENTRY_POINT_RENAME(utext_nativeLength)
+#define utext_next32 U_ICU_ENTRY_POINT_RENAME(utext_next32)
+#define utext_next32From U_ICU_ENTRY_POINT_RENAME(utext_next32From)
+#define utext_openCharacterIterator U_ICU_ENTRY_POINT_RENAME(utext_openCharacterIterator)
+#define utext_openConstUnicodeString U_ICU_ENTRY_POINT_RENAME(utext_openConstUnicodeString)
+#define utext_openReplaceable U_ICU_ENTRY_POINT_RENAME(utext_openReplaceable)
+#define utext_openUChars U_ICU_ENTRY_POINT_RENAME(utext_openUChars)
+#define utext_openUTF8 U_ICU_ENTRY_POINT_RENAME(utext_openUTF8)
+#define utext_openUnicodeString U_ICU_ENTRY_POINT_RENAME(utext_openUnicodeString)
+#define utext_previous32 U_ICU_ENTRY_POINT_RENAME(utext_previous32)
+#define utext_previous32From U_ICU_ENTRY_POINT_RENAME(utext_previous32From)
+#define utext_replace U_ICU_ENTRY_POINT_RENAME(utext_replace)
+#define utext_setNativeIndex U_ICU_ENTRY_POINT_RENAME(utext_setNativeIndex)
+#define utext_setup U_ICU_ENTRY_POINT_RENAME(utext_setup)
+#define utf8_appendCharSafeBody U_ICU_ENTRY_POINT_RENAME(utf8_appendCharSafeBody)
+#define utf8_back1SafeBody U_ICU_ENTRY_POINT_RENAME(utf8_back1SafeBody)
+#define utf8_countTrailBytes U_ICU_ENTRY_POINT_RENAME(utf8_countTrailBytes)
+#define utf8_nextCharSafeBody U_ICU_ENTRY_POINT_RENAME(utf8_nextCharSafeBody)
+#define utf8_prevCharSafeBody U_ICU_ENTRY_POINT_RENAME(utf8_prevCharSafeBody)
+#define utmscale_fromInt64 U_ICU_ENTRY_POINT_RENAME(utmscale_fromInt64)
+#define utmscale_getTimeScaleValue U_ICU_ENTRY_POINT_RENAME(utmscale_getTimeScaleValue)
+#define utmscale_toInt64 U_ICU_ENTRY_POINT_RENAME(utmscale_toInt64)
+#define utrace_cleanup U_ICU_ENTRY_POINT_RENAME(utrace_cleanup)
+#define utrace_data U_ICU_ENTRY_POINT_RENAME(utrace_data)
+#define utrace_entry U_ICU_ENTRY_POINT_RENAME(utrace_entry)
+#define utrace_exit U_ICU_ENTRY_POINT_RENAME(utrace_exit)
+#define utrace_format U_ICU_ENTRY_POINT_RENAME(utrace_format)
+#define utrace_functionName U_ICU_ENTRY_POINT_RENAME(utrace_functionName)
+#define utrace_getFunctions U_ICU_ENTRY_POINT_RENAME(utrace_getFunctions)
+#define utrace_getLevel U_ICU_ENTRY_POINT_RENAME(utrace_getLevel)
+#define utrace_level U_ICU_ENTRY_POINT_RENAME(utrace_level)
+#define utrace_setFunctions U_ICU_ENTRY_POINT_RENAME(utrace_setFunctions)
+#define utrace_setLevel U_ICU_ENTRY_POINT_RENAME(utrace_setLevel)
+#define utrace_vformat U_ICU_ENTRY_POINT_RENAME(utrace_vformat)
+#define utrans_clone U_ICU_ENTRY_POINT_RENAME(utrans_clone)
+#define utrans_close U_ICU_ENTRY_POINT_RENAME(utrans_close)
+#define utrans_countAvailableIDs U_ICU_ENTRY_POINT_RENAME(utrans_countAvailableIDs)
+#define utrans_getAvailableID U_ICU_ENTRY_POINT_RENAME(utrans_getAvailableID)
+#define utrans_getID U_ICU_ENTRY_POINT_RENAME(utrans_getID)
+#define utrans_getSourceSet U_ICU_ENTRY_POINT_RENAME(utrans_getSourceSet)
+#define utrans_getUnicodeID U_ICU_ENTRY_POINT_RENAME(utrans_getUnicodeID)
+#define utrans_open U_ICU_ENTRY_POINT_RENAME(utrans_open)
+#define utrans_openIDs U_ICU_ENTRY_POINT_RENAME(utrans_openIDs)
+#define utrans_openInverse U_ICU_ENTRY_POINT_RENAME(utrans_openInverse)
+#define utrans_openU U_ICU_ENTRY_POINT_RENAME(utrans_openU)
+#define utrans_register U_ICU_ENTRY_POINT_RENAME(utrans_register)
+#define utrans_rep_caseContextIterator U_ICU_ENTRY_POINT_RENAME(utrans_rep_caseContextIterator)
+#define utrans_setFilter U_ICU_ENTRY_POINT_RENAME(utrans_setFilter)
+#define utrans_stripRules U_ICU_ENTRY_POINT_RENAME(utrans_stripRules)
+#define utrans_toRules U_ICU_ENTRY_POINT_RENAME(utrans_toRules)
+#define utrans_trans U_ICU_ENTRY_POINT_RENAME(utrans_trans)
+#define utrans_transIncremental U_ICU_ENTRY_POINT_RENAME(utrans_transIncremental)
+#define utrans_transIncrementalUChars U_ICU_ENTRY_POINT_RENAME(utrans_transIncrementalUChars)
+#define utrans_transUChars U_ICU_ENTRY_POINT_RENAME(utrans_transUChars)
+#define utrans_transliterator_cleanup U_ICU_ENTRY_POINT_RENAME(utrans_transliterator_cleanup)
+#define utrans_unregister U_ICU_ENTRY_POINT_RENAME(utrans_unregister)
+#define utrans_unregisterID U_ICU_ENTRY_POINT_RENAME(utrans_unregisterID)
+#define utrie2_clone U_ICU_ENTRY_POINT_RENAME(utrie2_clone)
+#define utrie2_cloneAsThawed U_ICU_ENTRY_POINT_RENAME(utrie2_cloneAsThawed)
+#define utrie2_close U_ICU_ENTRY_POINT_RENAME(utrie2_close)
+#define utrie2_enum U_ICU_ENTRY_POINT_RENAME(utrie2_enum)
+#define utrie2_enumForLeadSurrogate U_ICU_ENTRY_POINT_RENAME(utrie2_enumForLeadSurrogate)
+#define utrie2_freeze U_ICU_ENTRY_POINT_RENAME(utrie2_freeze)
+#define utrie2_fromUTrie U_ICU_ENTRY_POINT_RENAME(utrie2_fromUTrie)
+#define utrie2_get32 U_ICU_ENTRY_POINT_RENAME(utrie2_get32)
+#define utrie2_get32FromLeadSurrogateCodeUnit U_ICU_ENTRY_POINT_RENAME(utrie2_get32FromLeadSurrogateCodeUnit)
+#define utrie2_getVersion U_ICU_ENTRY_POINT_RENAME(utrie2_getVersion)
+#define utrie2_internalU8NextIndex U_ICU_ENTRY_POINT_RENAME(utrie2_internalU8NextIndex)
+#define utrie2_internalU8PrevIndex U_ICU_ENTRY_POINT_RENAME(utrie2_internalU8PrevIndex)
+#define utrie2_isFrozen U_ICU_ENTRY_POINT_RENAME(utrie2_isFrozen)
+#define utrie2_open U_ICU_ENTRY_POINT_RENAME(utrie2_open)
+#define utrie2_openDummy U_ICU_ENTRY_POINT_RENAME(utrie2_openDummy)
+#define utrie2_openFromSerialized U_ICU_ENTRY_POINT_RENAME(utrie2_openFromSerialized)
+#define utrie2_serialize U_ICU_ENTRY_POINT_RENAME(utrie2_serialize)
+#define utrie2_set32 U_ICU_ENTRY_POINT_RENAME(utrie2_set32)
+#define utrie2_set32ForLeadSurrogateCodeUnit U_ICU_ENTRY_POINT_RENAME(utrie2_set32ForLeadSurrogateCodeUnit)
+#define utrie2_setRange32 U_ICU_ENTRY_POINT_RENAME(utrie2_setRange32)
+#define utrie2_swap U_ICU_ENTRY_POINT_RENAME(utrie2_swap)
+#define utrie2_swapAnyVersion U_ICU_ENTRY_POINT_RENAME(utrie2_swapAnyVersion)
+#define utrie_clone U_ICU_ENTRY_POINT_RENAME(utrie_clone)
+#define utrie_close U_ICU_ENTRY_POINT_RENAME(utrie_close)
+#define utrie_defaultGetFoldingOffset U_ICU_ENTRY_POINT_RENAME(utrie_defaultGetFoldingOffset)
+#define utrie_enum U_ICU_ENTRY_POINT_RENAME(utrie_enum)
+#define utrie_get32 U_ICU_ENTRY_POINT_RENAME(utrie_get32)
+#define utrie_getData U_ICU_ENTRY_POINT_RENAME(utrie_getData)
+#define utrie_open U_ICU_ENTRY_POINT_RENAME(utrie_open)
+#define utrie_serialize U_ICU_ENTRY_POINT_RENAME(utrie_serialize)
+#define utrie_set32 U_ICU_ENTRY_POINT_RENAME(utrie_set32)
+#define utrie_setRange32 U_ICU_ENTRY_POINT_RENAME(utrie_setRange32)
+#define utrie_swap U_ICU_ENTRY_POINT_RENAME(utrie_swap)
+#define utrie_unserialize U_ICU_ENTRY_POINT_RENAME(utrie_unserialize)
+#define utrie_unserializeDummy U_ICU_ENTRY_POINT_RENAME(utrie_unserializeDummy)
+#define vzone_clone U_ICU_ENTRY_POINT_RENAME(vzone_clone)
+#define vzone_close U_ICU_ENTRY_POINT_RENAME(vzone_close)
+#define vzone_countTransitionRules U_ICU_ENTRY_POINT_RENAME(vzone_countTransitionRules)
+#define vzone_equals U_ICU_ENTRY_POINT_RENAME(vzone_equals)
+#define vzone_getDynamicClassID U_ICU_ENTRY_POINT_RENAME(vzone_getDynamicClassID)
+#define vzone_getLastModified U_ICU_ENTRY_POINT_RENAME(vzone_getLastModified)
+#define vzone_getNextTransition U_ICU_ENTRY_POINT_RENAME(vzone_getNextTransition)
+#define vzone_getOffset U_ICU_ENTRY_POINT_RENAME(vzone_getOffset)
+#define vzone_getOffset2 U_ICU_ENTRY_POINT_RENAME(vzone_getOffset2)
+#define vzone_getOffset3 U_ICU_ENTRY_POINT_RENAME(vzone_getOffset3)
+#define vzone_getPreviousTransition U_ICU_ENTRY_POINT_RENAME(vzone_getPreviousTransition)
+#define vzone_getRawOffset U_ICU_ENTRY_POINT_RENAME(vzone_getRawOffset)
+#define vzone_getStaticClassID U_ICU_ENTRY_POINT_RENAME(vzone_getStaticClassID)
+#define vzone_getTZURL U_ICU_ENTRY_POINT_RENAME(vzone_getTZURL)
+#define vzone_hasSameRules U_ICU_ENTRY_POINT_RENAME(vzone_hasSameRules)
+#define vzone_inDaylightTime U_ICU_ENTRY_POINT_RENAME(vzone_inDaylightTime)
+#define vzone_openData U_ICU_ENTRY_POINT_RENAME(vzone_openData)
+#define vzone_openID U_ICU_ENTRY_POINT_RENAME(vzone_openID)
+#define vzone_setLastModified U_ICU_ENTRY_POINT_RENAME(vzone_setLastModified)
+#define vzone_setRawOffset U_ICU_ENTRY_POINT_RENAME(vzone_setRawOffset)
+#define vzone_setTZURL U_ICU_ENTRY_POINT_RENAME(vzone_setTZURL)
+#define vzone_useDaylightTime U_ICU_ENTRY_POINT_RENAME(vzone_useDaylightTime)
+#define vzone_write U_ICU_ENTRY_POINT_RENAME(vzone_write)
+#define vzone_writeFromStart U_ICU_ENTRY_POINT_RENAME(vzone_writeFromStart)
+#define vzone_writeSimple U_ICU_ENTRY_POINT_RENAME(vzone_writeSimple)
+#define zrule_close U_ICU_ENTRY_POINT_RENAME(zrule_close)
+#define zrule_equals U_ICU_ENTRY_POINT_RENAME(zrule_equals)
+#define zrule_getDSTSavings U_ICU_ENTRY_POINT_RENAME(zrule_getDSTSavings)
+#define zrule_getName U_ICU_ENTRY_POINT_RENAME(zrule_getName)
+#define zrule_getRawOffset U_ICU_ENTRY_POINT_RENAME(zrule_getRawOffset)
+#define zrule_isEquivalentTo U_ICU_ENTRY_POINT_RENAME(zrule_isEquivalentTo)
+#define ztrans_adoptFrom U_ICU_ENTRY_POINT_RENAME(ztrans_adoptFrom)
+#define ztrans_adoptTo U_ICU_ENTRY_POINT_RENAME(ztrans_adoptTo)
+#define ztrans_clone U_ICU_ENTRY_POINT_RENAME(ztrans_clone)
+#define ztrans_close U_ICU_ENTRY_POINT_RENAME(ztrans_close)
+#define ztrans_equals U_ICU_ENTRY_POINT_RENAME(ztrans_equals)
+#define ztrans_getDynamicClassID U_ICU_ENTRY_POINT_RENAME(ztrans_getDynamicClassID)
+#define ztrans_getFrom U_ICU_ENTRY_POINT_RENAME(ztrans_getFrom)
+#define ztrans_getStaticClassID U_ICU_ENTRY_POINT_RENAME(ztrans_getStaticClassID)
+#define ztrans_getTime U_ICU_ENTRY_POINT_RENAME(ztrans_getTime)
+#define ztrans_getTo U_ICU_ENTRY_POINT_RENAME(ztrans_getTo)
+#define ztrans_open U_ICU_ENTRY_POINT_RENAME(ztrans_open)
+#define ztrans_openEmpty U_ICU_ENTRY_POINT_RENAME(ztrans_openEmpty)
+#define ztrans_setFrom U_ICU_ENTRY_POINT_RENAME(ztrans_setFrom)
+#define ztrans_setTime U_ICU_ENTRY_POINT_RENAME(ztrans_setTime)
+#define ztrans_setTo U_ICU_ENTRY_POINT_RENAME(ztrans_setTo)
+
+#endif
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/uset.h b/src/core/basetypes/icu-ucsdet/include/unicode/uset.h
new file mode 100644
index 00000000..eb3c9e6a
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/uset.h
@@ -0,0 +1,1126 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2002-2014, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  uset.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2002mar07
+*   created by: Markus W. Scherer
+*
+*   C version of UnicodeSet.
+*/
+
+
+/**
+ * \file
+ * \brief C API: Unicode Set
+ *
+ * <p>This is a C wrapper around the C++ UnicodeSet class.</p>
+ */
+
+#ifndef __USET_H__
+#define __USET_H__
+
+#include "unicode/utypes.h"
+#include "unicode/uchar.h"
+#include "unicode/localpointer.h"
+
+#ifndef UCNV_H
+struct USet;
+/**
+ * A UnicodeSet.  Use the uset_* API to manipulate.  Create with
+ * uset_open*, and destroy with uset_close.
+ * @stable ICU 2.4
+ */
+typedef struct USet USet;
+#endif
+
+/**
+ * Bitmask values to be passed to uset_openPatternOptions() or
+ * uset_applyPattern() taking an option parameter.
+ * @stable ICU 2.4
+ */
+enum {
+    /**
+     * Ignore white space within patterns unless quoted or escaped.
+     * @stable ICU 2.4
+     */
+    USET_IGNORE_SPACE = 1,  
+
+    /**
+     * Enable case insensitive matching.  E.g., "[ab]" with this flag
+     * will match 'a', 'A', 'b', and 'B'.  "[^ab]" with this flag will
+     * match all except 'a', 'A', 'b', and 'B'. This performs a full
+     * closure over case mappings, e.g. U+017F for s.
+     *
+     * The resulting set is a superset of the input for the code points but
+     * not for the strings.
+     * It performs a case mapping closure of the code points and adds
+     * full case folding strings for the code points, and reduces strings of
+     * the original set to their full case folding equivalents.
+     *
+     * This is designed for case-insensitive matches, for example
+     * in regular expressions. The full code point case closure allows checking of
+     * an input character directly against the closure set.
+     * Strings are matched by comparing the case-folded form from the closure
+     * set with an incremental case folding of the string in question.
+     *
+     * The closure set will also contain single code points if the original
+     * set contained case-equivalent strings (like U+00DF for "ss" or "Ss" etc.).
+     * This is not necessary (that is, redundant) for the above matching method
+     * but results in the same closure sets regardless of whether the original
+     * set contained the code point or a string.
+     *
+     * @stable ICU 2.4
+     */
+    USET_CASE_INSENSITIVE = 2,  
+
+    /**
+     * Enable case insensitive matching.  E.g., "[ab]" with this flag
+     * will match 'a', 'A', 'b', and 'B'.  "[^ab]" with this flag will
+     * match all except 'a', 'A', 'b', and 'B'. This adds the lower-,
+     * title-, and uppercase mappings as well as the case folding
+     * of each existing element in the set.
+     * @stable ICU 3.2
+     */
+    USET_ADD_CASE_MAPPINGS = 4
+};
+
+/**
+ * Argument values for whether span() and similar functions continue while
+ * the current character is contained vs. not contained in the set.
+ *
+ * The functionality is straightforward for sets with only single code points,
+ * without strings (which is the common case):
+ * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE work the same.
+ * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE are inverses of USET_SPAN_NOT_CONTAINED.
+ * - span() and spanBack() partition any string the same way when
+ *   alternating between span(USET_SPAN_NOT_CONTAINED) and
+ *   span(either "contained" condition).
+ * - Using a complemented (inverted) set and the opposite span conditions
+ *   yields the same results.
+ *
+ * When a set contains multi-code point strings, then these statements may not
+ * be true, depending on the strings in the set (for example, whether they
+ * overlap with each other) and the string that is processed.
+ * For a set with strings:
+ * - The complement of the set contains the opposite set of code points,
+ *   but the same set of strings.
+ *   Therefore, complementing both the set and the span conditions
+ *   may yield different results.
+ * - When starting spans at different positions in a string
+ *   (span(s, ...) vs. span(s+1, ...)) the ends of the spans may be different
+ *   because a set string may start before the later position.
+ * - span(USET_SPAN_SIMPLE) may be shorter than
+ *   span(USET_SPAN_CONTAINED) because it will not recursively try
+ *   all possible paths.
+ *   For example, with a set which contains the three strings "xy", "xya" and "ax",
+ *   span("xyax", USET_SPAN_CONTAINED) will return 4 but
+ *   span("xyax", USET_SPAN_SIMPLE) will return 3.
+ *   span(USET_SPAN_SIMPLE) will never be longer than
+ *   span(USET_SPAN_CONTAINED).
+ * - With either "contained" condition, span() and spanBack() may partition
+ *   a string in different ways.
+ *   For example, with a set which contains the two strings "ab" and "ba",
+ *   and when processing the string "aba",
+ *   span() will yield contained/not-contained boundaries of { 0, 2, 3 }
+ *   while spanBack() will yield boundaries of { 0, 1, 3 }.
+ *
+ * Note: If it is important to get the same boundaries whether iterating forward
+ * or backward through a string, then either only span() should be used and
+ * the boundaries cached for backward operation, or an ICU BreakIterator
+ * could be used.
+ *
+ * Note: Unpaired surrogates are treated like surrogate code points.
+ * Similarly, set strings match only on code point boundaries,
+ * never in the middle of a surrogate pair.
+ * Illegal UTF-8 sequences are treated like U+FFFD.
+ * When processing UTF-8 strings, malformed set strings
+ * (strings with unpaired surrogates which cannot be converted to UTF-8)
+ * are ignored.
+ *
+ * @stable ICU 3.8
+ */
+typedef enum USetSpanCondition {
+    /**
+     * Continues a span() while there is no set element at the current position.
+     * Increments by one code point at a time.
+     * Stops before the first set element (character or string).
+     * (For code points only, this is like while contains(current)==FALSE).
+     *
+     * When span() returns, the substring between where it started and the position
+     * it returned consists only of characters that are not in the set,
+     * and none of its strings overlap with the span.
+     *
+     * @stable ICU 3.8
+     */
+    USET_SPAN_NOT_CONTAINED = 0,
+    /**
+     * Spans the longest substring that is a concatenation of set elements (characters or strings).
+     * (For characters only, this is like while contains(current)==TRUE).
+     *
+     * When span() returns, the substring between where it started and the position
+     * it returned consists only of set elements (characters or strings) that are in the set.
+     *
+     * If a set contains strings, then the span will be the longest substring for which there
+     * exists at least one non-overlapping concatenation of set elements (characters or strings).
+     * This is equivalent to a POSIX regular expression for <code>(OR of each set element)*</code>.
+     * (Java/ICU/Perl regex stops at the first match of an OR.)
+     *
+     * @stable ICU 3.8
+     */
+    USET_SPAN_CONTAINED = 1,
+    /**
+     * Continues a span() while there is a set element at the current position.
+     * Increments by the longest matching element at each position.
+     * (For characters only, this is like while contains(current)==TRUE).
+     *
+     * When span() returns, the substring between where it started and the position
+     * it returned consists only of set elements (characters or strings) that are in the set.
+     *
+     * If a set only contains single characters, then this is the same
+     * as USET_SPAN_CONTAINED.
+     *
+     * If a set contains strings, then the span will be the longest substring
+     * with a match at each position with the longest single set element (character or string).
+     *
+     * Use this span condition together with other longest-match algorithms,
+     * such as ICU converters (ucnv_getUnicodeSet()).
+     *
+     * @stable ICU 3.8
+     */
+    USET_SPAN_SIMPLE = 2,
+    /**
+     * One more than the last span condition.
+     * @stable ICU 3.8
+     */
+    USET_SPAN_CONDITION_COUNT
+} USetSpanCondition;
+
+enum {
+    /**
+     * Capacity of USerializedSet::staticArray.
+     * Enough for any single-code point set.
+     * Also provides padding for nice sizeof(USerializedSet).
+     * @stable ICU 2.4
+     */
+    USET_SERIALIZED_STATIC_ARRAY_CAPACITY=8
+};
+
+/**
+ * A serialized form of a Unicode set.  Limited manipulations are
+ * possible directly on a serialized set.  See below.
+ * @stable ICU 2.4
+ */
+typedef struct USerializedSet {
+    /**
+     * The serialized Unicode Set.
+     * @stable ICU 2.4
+     */
+    const uint16_t *array;
+    /**
+     * The length of the array that contains BMP characters.
+     * @stable ICU 2.4
+     */
+    int32_t bmpLength;
+    /**
+     * The total length of the array.
+     * @stable ICU 2.4
+     */
+    int32_t length;
+    /**
+     * A small buffer for the array to reduce memory allocations.
+     * @stable ICU 2.4
+     */
+    uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY];
+} USerializedSet;
+
+/*********************************************************************
+ * USet API
+ *********************************************************************/
+
+/**
+ * Create an empty USet object.
+ * Equivalent to uset_open(1, 0).
+ * @return a newly created USet.  The caller must call uset_close() on
+ * it when done.
+ * @stable ICU 4.2
+ */
+U_STABLE USet* U_EXPORT2
+uset_openEmpty(void);
+
+/**
+ * Creates a USet object that contains the range of characters
+ * start..end, inclusive.  If <code>start > end</code> 
+ * then an empty set is created (same as using uset_openEmpty()).
+ * @param start first character of the range, inclusive
+ * @param end last character of the range, inclusive
+ * @return a newly created USet.  The caller must call uset_close() on
+ * it when done.
+ * @stable ICU 2.4
+ */
+U_STABLE USet* U_EXPORT2
+uset_open(UChar32 start, UChar32 end);
+
+/**
+ * Creates a set from the given pattern.  See the UnicodeSet class
+ * description for the syntax of the pattern language.
+ * @param pattern a string specifying what characters are in the set
+ * @param patternLength the length of the pattern, or -1 if null
+ * terminated
+ * @param ec the error code
+ * @stable ICU 2.4
+ */
+U_STABLE USet* U_EXPORT2
+uset_openPattern(const UChar* pattern, int32_t patternLength,
+                 UErrorCode* ec);
+
+/**
+ * Creates a set from the given pattern.  See the UnicodeSet class
+ * description for the syntax of the pattern language.
+ * @param pattern a string specifying what characters are in the set
+ * @param patternLength the length of the pattern, or -1 if null
+ * terminated
+ * @param options bitmask for options to apply to the pattern.
+ * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
+ * @param ec the error code
+ * @stable ICU 2.4
+ */
+U_STABLE USet* U_EXPORT2
+uset_openPatternOptions(const UChar* pattern, int32_t patternLength,
+                 uint32_t options,
+                 UErrorCode* ec);
+
+/**
+ * Disposes of the storage used by a USet object.  This function should
+ * be called exactly once for objects returned by uset_open().
+ * @param set the object to dispose of
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+uset_close(USet* set);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUSetPointer
+ * "Smart pointer" class, closes a USet via uset_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUSetPointer, USet, uset_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Returns a copy of this object.
+ * If this set is frozen, then the clone will be frozen as well.
+ * Use uset_cloneAsThawed() for a mutable clone of a frozen set.
+ * @param set the original set
+ * @return the newly allocated copy of the set
+ * @see uset_cloneAsThawed
+ * @stable ICU 3.8
+ */
+U_STABLE USet * U_EXPORT2
+uset_clone(const USet *set);
+
+/**
+ * Determines whether the set has been frozen (made immutable) or not.
+ * See the ICU4J Freezable interface for details.
+ * @param set the set
+ * @return TRUE/FALSE for whether the set has been frozen
+ * @see uset_freeze
+ * @see uset_cloneAsThawed
+ * @stable ICU 3.8
+ */
+U_STABLE UBool U_EXPORT2
+uset_isFrozen(const USet *set);
+
+/**
+ * Freeze the set (make it immutable).
+ * Once frozen, it cannot be unfrozen and is therefore thread-safe
+ * until it is deleted.
+ * See the ICU4J Freezable interface for details.
+ * Freezing the set may also make some operations faster, for example
+ * uset_contains() and uset_span().
+ * A frozen set will not be modified. (It remains frozen.)
+ * @param set the set
+ * @return the same set, now frozen
+ * @see uset_isFrozen
+ * @see uset_cloneAsThawed
+ * @stable ICU 3.8
+ */
+U_STABLE void U_EXPORT2
+uset_freeze(USet *set);
+
+/**
+ * Clone the set and make the clone mutable.
+ * See the ICU4J Freezable interface for details.
+ * @param set the set
+ * @return the mutable clone
+ * @see uset_freeze
+ * @see uset_isFrozen
+ * @see uset_clone
+ * @stable ICU 3.8
+ */
+U_STABLE USet * U_EXPORT2
+uset_cloneAsThawed(const USet *set);
+
+/**
+ * Causes the USet object to represent the range <code>start - end</code>.
+ * If <code>start > end</code> then this USet is set to an empty range.
+ * A frozen set will not be modified.
+ * @param set the object to set to the given range
+ * @param start first character in the set, inclusive
+ * @param end last character in the set, inclusive
+ * @stable ICU 3.2
+ */
+U_STABLE void U_EXPORT2
+uset_set(USet* set,
+         UChar32 start, UChar32 end);
+
+/**
+ * Modifies the set to represent the set specified by the given
+ * pattern. See the UnicodeSet class description for the syntax of 
+ * the pattern language. See also the User Guide chapter about UnicodeSet.
+ * <em>Empties the set passed before applying the pattern.</em>
+ * A frozen set will not be modified.
+ * @param set               The set to which the pattern is to be applied. 
+ * @param pattern           A pointer to UChar string specifying what characters are in the set.
+ *                          The character at pattern[0] must be a '['.
+ * @param patternLength     The length of the UChar string. -1 if NUL terminated.
+ * @param options           A bitmask for options to apply to the pattern.
+ *                          Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
+ * @param status            Returns an error if the pattern cannot be parsed.
+ * @return                  Upon successful parse, the value is either
+ *                          the index of the character after the closing ']' 
+ *                          of the parsed pattern.
+ *                          If the status code indicates failure, then the return value 
+ *                          is the index of the error in the source.
+ *
+ * @stable ICU 2.8
+ */
+U_STABLE int32_t U_EXPORT2 
+uset_applyPattern(USet *set,
+                  const UChar *pattern, int32_t patternLength,
+                  uint32_t options,
+                  UErrorCode *status);
+
+/**
+ * Modifies the set to contain those code points which have the given value
+ * for the given binary or enumerated property, as returned by
+ * u_getIntPropertyValue.  Prior contents of this set are lost.
+ * A frozen set will not be modified.
+ *
+ * @param set the object to contain the code points defined by the property
+ *
+ * @param prop a property in the range UCHAR_BIN_START..UCHAR_BIN_LIMIT-1
+ * or UCHAR_INT_START..UCHAR_INT_LIMIT-1
+ * or UCHAR_MASK_START..UCHAR_MASK_LIMIT-1.
+ *
+ * @param value a value in the range u_getIntPropertyMinValue(prop)..
+ * u_getIntPropertyMaxValue(prop), with one exception.  If prop is
+ * UCHAR_GENERAL_CATEGORY_MASK, then value should not be a UCharCategory, but
+ * rather a mask value produced by U_GET_GC_MASK().  This allows grouped
+ * categories such as [:L:] to be represented.
+ *
+ * @param ec error code input/output parameter
+ *
+ * @stable ICU 3.2
+ */
+U_STABLE void U_EXPORT2
+uset_applyIntPropertyValue(USet* set,
+                           UProperty prop, int32_t value, UErrorCode* ec);
+
+/**
+ * Modifies the set to contain those code points which have the
+ * given value for the given property.  Prior contents of this
+ * set are lost.
+ * A frozen set will not be modified.
+ *
+ * @param set the object to contain the code points defined by the given
+ * property and value alias
+ *
+ * @param prop a string specifying a property alias, either short or long.
+ * The name is matched loosely.  See PropertyAliases.txt for names and a
+ * description of loose matching.  If the value string is empty, then this
+ * string is interpreted as either a General_Category value alias, a Script
+ * value alias, a binary property alias, or a special ID.  Special IDs are
+ * matched loosely and correspond to the following sets:
+ *
+ * "ANY" = [\\u0000-\\U0010FFFF],
+ * "ASCII" = [\\u0000-\\u007F],
+ * "Assigned" = [:^Cn:].
+ *
+ * @param propLength the length of the prop, or -1 if NULL
+ *
+ * @param value a string specifying a value alias, either short or long.
+ * The name is matched loosely.  See PropertyValueAliases.txt for names
+ * and a description of loose matching.  In addition to aliases listed,
+ * numeric values and canonical combining classes may be expressed
+ * numerically, e.g., ("nv", "0.5") or ("ccc", "220").  The value string
+ * may also be empty.
+ *
+ * @param valueLength the length of the value, or -1 if NULL
+ *
+ * @param ec error code input/output parameter
+ *
+ * @stable ICU 3.2
+ */
+U_STABLE void U_EXPORT2
+uset_applyPropertyAlias(USet* set,
+                        const UChar *prop, int32_t propLength,
+                        const UChar *value, int32_t valueLength,
+                        UErrorCode* ec);
+
+/**
+ * Return true if the given position, in the given pattern, appears
+ * to be the start of a UnicodeSet pattern.
+ *
+ * @param pattern a string specifying the pattern
+ * @param patternLength the length of the pattern, or -1 if NULL
+ * @param pos the given position
+ * @stable ICU 3.2
+ */
+U_STABLE UBool U_EXPORT2
+uset_resemblesPattern(const UChar *pattern, int32_t patternLength,
+                      int32_t pos);
+
+/**
+ * Returns a string representation of this set.  If the result of
+ * calling this function is passed to a uset_openPattern(), it
+ * will produce another set that is equal to this one.
+ * @param set the set
+ * @param result the string to receive the rules, may be NULL
+ * @param resultCapacity the capacity of result, may be 0 if result is NULL
+ * @param escapeUnprintable if TRUE then convert unprintable
+ * character to their hex escape representations, \\uxxxx or
+ * \\Uxxxxxxxx.  Unprintable characters are those other than
+ * U+000A, U+0020..U+007E.
+ * @param ec error code.
+ * @return length of string, possibly larger than resultCapacity
+ * @stable ICU 2.4
+ */
+U_STABLE int32_t U_EXPORT2
+uset_toPattern(const USet* set,
+               UChar* result, int32_t resultCapacity,
+               UBool escapeUnprintable,
+               UErrorCode* ec);
+
+/**
+ * Adds the given character to the given USet.  After this call,
+ * uset_contains(set, c) will return TRUE.
+ * A frozen set will not be modified.
+ * @param set the object to which to add the character
+ * @param c the character to add
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+uset_add(USet* set, UChar32 c);
+
+/**
+ * Adds all of the elements in the specified set to this set if
+ * they're not already present.  This operation effectively
+ * modifies this set so that its value is the <i>union</i> of the two
+ * sets.  The behavior of this operation is unspecified if the specified
+ * collection is modified while the operation is in progress.
+ * A frozen set will not be modified.
+ *
+ * @param set the object to which to add the set
+ * @param additionalSet the source set whose elements are to be added to this set.
+ * @stable ICU 2.6
+ */
+U_STABLE void U_EXPORT2
+uset_addAll(USet* set, const USet *additionalSet);
+
+/**
+ * Adds the given range of characters to the given USet.  After this call,
+ * uset_contains(set, start, end) will return TRUE.
+ * A frozen set will not be modified.
+ * @param set the object to which to add the character
+ * @param start the first character of the range to add, inclusive
+ * @param end the last character of the range to add, inclusive
+ * @stable ICU 2.2
+ */
+U_STABLE void U_EXPORT2
+uset_addRange(USet* set, UChar32 start, UChar32 end);
+
+/**
+ * Adds the given string to the given USet.  After this call,
+ * uset_containsString(set, str, strLen) will return TRUE.
+ * A frozen set will not be modified.
+ * @param set the object to which to add the character
+ * @param str the string to add
+ * @param strLen the length of the string or -1 if null terminated.
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+uset_addString(USet* set, const UChar* str, int32_t strLen);
+
+/**
+ * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"}
+ * If this set already any particular character, it has no effect on that character.
+ * A frozen set will not be modified.
+ * @param set the object to which to add the character
+ * @param str the source string
+ * @param strLen the length of the string or -1 if null terminated.
+ * @stable ICU 3.4
+ */
+U_STABLE void U_EXPORT2
+uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen);
+
+/**
+ * Removes the given character from the given USet.  After this call,
+ * uset_contains(set, c) will return FALSE.
+ * A frozen set will not be modified.
+ * @param set the object from which to remove the character
+ * @param c the character to remove
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+uset_remove(USet* set, UChar32 c);
+
+/**
+ * Removes the given range of characters from the given USet.  After this call,
+ * uset_contains(set, start, end) will return FALSE.
+ * A frozen set will not be modified.
+ * @param set the object to which to add the character
+ * @param start the first character of the range to remove, inclusive
+ * @param end the last character of the range to remove, inclusive
+ * @stable ICU 2.2
+ */
+U_STABLE void U_EXPORT2
+uset_removeRange(USet* set, UChar32 start, UChar32 end);
+
+/**
+ * Removes the given string to the given USet.  After this call,
+ * uset_containsString(set, str, strLen) will return FALSE.
+ * A frozen set will not be modified.
+ * @param set the object to which to add the character
+ * @param str the string to remove
+ * @param strLen the length of the string or -1 if null terminated.
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+uset_removeString(USet* set, const UChar* str, int32_t strLen);
+
+/**
+ * Removes from this set all of its elements that are contained in the
+ * specified set.  This operation effectively modifies this
+ * set so that its value is the <i>asymmetric set difference</i> of
+ * the two sets.
+ * A frozen set will not be modified.
+ * @param set the object from which the elements are to be removed
+ * @param removeSet the object that defines which elements will be
+ * removed from this set
+ * @stable ICU 3.2
+ */
+U_STABLE void U_EXPORT2
+uset_removeAll(USet* set, const USet* removeSet);
+
+/**
+ * Retain only the elements in this set that are contained in the
+ * specified range.  If <code>start > end</code> then an empty range is
+ * retained, leaving the set empty.  This is equivalent to
+ * a boolean logic AND, or a set INTERSECTION.
+ * A frozen set will not be modified.
+ *
+ * @param set the object for which to retain only the specified range
+ * @param start first character, inclusive, of range to be retained
+ * to this set.
+ * @param end last character, inclusive, of range to be retained
+ * to this set.
+ * @stable ICU 3.2
+ */
+U_STABLE void U_EXPORT2
+uset_retain(USet* set, UChar32 start, UChar32 end);
+
+/**
+ * Retains only the elements in this set that are contained in the
+ * specified set.  In other words, removes from this set all of
+ * its elements that are not contained in the specified set.  This
+ * operation effectively modifies this set so that its value is
+ * the <i>intersection</i> of the two sets.
+ * A frozen set will not be modified.
+ *
+ * @param set the object on which to perform the retain
+ * @param retain set that defines which elements this set will retain
+ * @stable ICU 3.2
+ */
+U_STABLE void U_EXPORT2
+uset_retainAll(USet* set, const USet* retain);
+
+/**
+ * Reallocate this objects internal structures to take up the least
+ * possible space, without changing this object's value.
+ * A frozen set will not be modified.
+ *
+ * @param set the object on which to perfrom the compact
+ * @stable ICU 3.2
+ */
+U_STABLE void U_EXPORT2
+uset_compact(USet* set);
+
+/**
+ * Inverts this set.  This operation modifies this set so that
+ * its value is its complement.  This operation does not affect
+ * the multicharacter strings, if any.
+ * A frozen set will not be modified.
+ * @param set the set
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+uset_complement(USet* set);
+
+/**
+ * Complements in this set all elements contained in the specified
+ * set.  Any character in the other set will be removed if it is
+ * in this set, or will be added if it is not in this set.
+ * A frozen set will not be modified.
+ *
+ * @param set the set with which to complement
+ * @param complement set that defines which elements will be xor'ed
+ * from this set.
+ * @stable ICU 3.2
+ */
+U_STABLE void U_EXPORT2
+uset_complementAll(USet* set, const USet* complement);
+
+/**
+ * Removes all of the elements from this set.  This set will be
+ * empty after this call returns.
+ * A frozen set will not be modified.
+ * @param set the set
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+uset_clear(USet* set);
+
+/**
+ * Close this set over the given attribute.  For the attribute
+ * USET_CASE, the result is to modify this set so that:
+ *
+ * 1. For each character or string 'a' in this set, all strings or
+ * characters 'b' such that foldCase(a) == foldCase(b) are added
+ * to this set.
+ *
+ * 2. For each string 'e' in the resulting set, if e !=
+ * foldCase(e), 'e' will be removed.
+ *
+ * Example: [aq\\u00DF{Bc}{bC}{Fi}] => [aAqQ\\u00DF\\uFB01{ss}{bc}{fi}]
+ *
+ * (Here foldCase(x) refers to the operation u_strFoldCase, and a
+ * == b denotes that the contents are the same, not pointer
+ * comparison.)
+ *
+ * A frozen set will not be modified.
+ *
+ * @param set the set
+ *
+ * @param attributes bitmask for attributes to close over.
+ * Currently only the USET_CASE bit is supported.  Any undefined bits
+ * are ignored.
+ * @stable ICU 4.2
+ */
+U_STABLE void U_EXPORT2
+uset_closeOver(USet* set, int32_t attributes);
+
+/**
+ * Remove all strings from this set.
+ *
+ * @param set the set
+ * @stable ICU 4.2
+ */
+U_STABLE void U_EXPORT2
+uset_removeAllStrings(USet* set);
+
+/**
+ * Returns TRUE if the given USet contains no characters and no
+ * strings.
+ * @param set the set
+ * @return true if set is empty
+ * @stable ICU 2.4
+ */
+U_STABLE UBool U_EXPORT2
+uset_isEmpty(const USet* set);
+
+/**
+ * Returns TRUE if the given USet contains the given character.
+ * This function works faster with a frozen set.
+ * @param set the set
+ * @param c The codepoint to check for within the set
+ * @return true if set contains c
+ * @stable ICU 2.4
+ */
+U_STABLE UBool U_EXPORT2
+uset_contains(const USet* set, UChar32 c);
+
+/**
+ * Returns TRUE if the given USet contains all characters c
+ * where start <= c && c <= end.
+ * @param set the set
+ * @param start the first character of the range to test, inclusive
+ * @param end the last character of the range to test, inclusive
+ * @return TRUE if set contains the range
+ * @stable ICU 2.2
+ */
+U_STABLE UBool U_EXPORT2
+uset_containsRange(const USet* set, UChar32 start, UChar32 end);
+
+/**
+ * Returns TRUE if the given USet contains the given string.
+ * @param set the set
+ * @param str the string
+ * @param strLen the length of the string or -1 if null terminated.
+ * @return true if set contains str
+ * @stable ICU 2.4
+ */
+U_STABLE UBool U_EXPORT2
+uset_containsString(const USet* set, const UChar* str, int32_t strLen);
+
+/**
+ * Returns the index of the given character within this set, where
+ * the set is ordered by ascending code point.  If the character
+ * is not in this set, return -1.  The inverse of this method is
+ * <code>charAt()</code>.
+ * @param set the set
+ * @param c the character to obtain the index for
+ * @return an index from 0..size()-1, or -1
+ * @stable ICU 3.2
+ */
+U_STABLE int32_t U_EXPORT2
+uset_indexOf(const USet* set, UChar32 c);
+
+/**
+ * Returns the character at the given index within this set, where
+ * the set is ordered by ascending code point.  If the index is
+ * out of range, return (UChar32)-1.  The inverse of this method is
+ * <code>indexOf()</code>.
+ * @param set the set
+ * @param charIndex an index from 0..size()-1 to obtain the char for
+ * @return the character at the given index, or (UChar32)-1.
+ * @stable ICU 3.2
+ */
+U_STABLE UChar32 U_EXPORT2
+uset_charAt(const USet* set, int32_t charIndex);
+
+/**
+ * Returns the number of characters and strings contained in the given
+ * USet.
+ * @param set the set
+ * @return a non-negative integer counting the characters and strings
+ * contained in set
+ * @stable ICU 2.4
+ */
+U_STABLE int32_t U_EXPORT2
+uset_size(const USet* set);
+
+/**
+ * Returns the number of items in this set.  An item is either a range
+ * of characters or a single multicharacter string.
+ * @param set the set
+ * @return a non-negative integer counting the character ranges
+ * and/or strings contained in set
+ * @stable ICU 2.4
+ */
+U_STABLE int32_t U_EXPORT2
+uset_getItemCount(const USet* set);
+
+/**
+ * Returns an item of this set.  An item is either a range of
+ * characters or a single multicharacter string.
+ * @param set the set
+ * @param itemIndex a non-negative integer in the range 0..
+ * uset_getItemCount(set)-1
+ * @param start pointer to variable to receive first character
+ * in range, inclusive
+ * @param end pointer to variable to receive last character in range,
+ * inclusive
+ * @param str buffer to receive the string, may be NULL
+ * @param strCapacity capacity of str, or 0 if str is NULL
+ * @param ec error code
+ * @return the length of the string (>= 2), or 0 if the item is a
+ * range, in which case it is the range *start..*end, or -1 if
+ * itemIndex is out of range
+ * @stable ICU 2.4
+ */
+U_STABLE int32_t U_EXPORT2
+uset_getItem(const USet* set, int32_t itemIndex,
+             UChar32* start, UChar32* end,
+             UChar* str, int32_t strCapacity,
+             UErrorCode* ec);
+
+/**
+ * Returns true if set1 contains all the characters and strings
+ * of set2. It answers the question, 'Is set1 a superset of set2?'
+ * @param set1 set to be checked for containment
+ * @param set2 set to be checked for containment
+ * @return true if the test condition is met
+ * @stable ICU 3.2
+ */
+U_STABLE UBool U_EXPORT2
+uset_containsAll(const USet* set1, const USet* set2);
+
+/**
+ * Returns true if this set contains all the characters
+ * of the given string. This is does not check containment of grapheme
+ * clusters, like uset_containsString.
+ * @param set set of characters to be checked for containment
+ * @param str string containing codepoints to be checked for containment
+ * @param strLen the length of the string or -1 if null terminated.
+ * @return true if the test condition is met
+ * @stable ICU 3.4
+ */
+U_STABLE UBool U_EXPORT2
+uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen);
+
+/**
+ * Returns true if set1 contains none of the characters and strings
+ * of set2. It answers the question, 'Is set1 a disjoint set of set2?'
+ * @param set1 set to be checked for containment
+ * @param set2 set to be checked for containment
+ * @return true if the test condition is met
+ * @stable ICU 3.2
+ */
+U_STABLE UBool U_EXPORT2
+uset_containsNone(const USet* set1, const USet* set2);
+
+/**
+ * Returns true if set1 contains some of the characters and strings
+ * of set2. It answers the question, 'Does set1 and set2 have an intersection?'
+ * @param set1 set to be checked for containment
+ * @param set2 set to be checked for containment
+ * @return true if the test condition is met
+ * @stable ICU 3.2
+ */
+U_STABLE UBool U_EXPORT2
+uset_containsSome(const USet* set1, const USet* set2);
+
+/**
+ * Returns the length of the initial substring of the input string which
+ * consists only of characters and strings that are contained in this set
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+ * or only of characters and strings that are not contained
+ * in this set (USET_SPAN_NOT_CONTAINED).
+ * See USetSpanCondition for details.
+ * Similar to the strspn() C library function.
+ * Unpaired surrogates are treated according to contains() of their surrogate code points.
+ * This function works faster with a frozen set and with a non-negative string length argument.
+ * @param set the set
+ * @param s start of the string
+ * @param length of the string; can be -1 for NUL-terminated
+ * @param spanCondition specifies the containment condition
+ * @return the length of the initial substring according to the spanCondition;
+ *         0 if the start of the string does not fit the spanCondition
+ * @stable ICU 3.8
+ * @see USetSpanCondition
+ */
+U_STABLE int32_t U_EXPORT2
+uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
+
+/**
+ * Returns the start of the trailing substring of the input string which
+ * consists only of characters and strings that are contained in this set
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+ * or only of characters and strings that are not contained
+ * in this set (USET_SPAN_NOT_CONTAINED).
+ * See USetSpanCondition for details.
+ * Unpaired surrogates are treated according to contains() of their surrogate code points.
+ * This function works faster with a frozen set and with a non-negative string length argument.
+ * @param set the set
+ * @param s start of the string
+ * @param length of the string; can be -1 for NUL-terminated
+ * @param spanCondition specifies the containment condition
+ * @return the start of the trailing substring according to the spanCondition;
+ *         the string length if the end of the string does not fit the spanCondition
+ * @stable ICU 3.8
+ * @see USetSpanCondition
+ */
+U_STABLE int32_t U_EXPORT2
+uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
+
+/**
+ * Returns the length of the initial substring of the input string which
+ * consists only of characters and strings that are contained in this set
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+ * or only of characters and strings that are not contained
+ * in this set (USET_SPAN_NOT_CONTAINED).
+ * See USetSpanCondition for details.
+ * Similar to the strspn() C library function.
+ * Malformed byte sequences are treated according to contains(0xfffd).
+ * This function works faster with a frozen set and with a non-negative string length argument.
+ * @param set the set
+ * @param s start of the string (UTF-8)
+ * @param length of the string; can be -1 for NUL-terminated
+ * @param spanCondition specifies the containment condition
+ * @return the length of the initial substring according to the spanCondition;
+ *         0 if the start of the string does not fit the spanCondition
+ * @stable ICU 3.8
+ * @see USetSpanCondition
+ */
+U_STABLE int32_t U_EXPORT2
+uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
+
+/**
+ * Returns the start of the trailing substring of the input string which
+ * consists only of characters and strings that are contained in this set
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+ * or only of characters and strings that are not contained
+ * in this set (USET_SPAN_NOT_CONTAINED).
+ * See USetSpanCondition for details.
+ * Malformed byte sequences are treated according to contains(0xfffd).
+ * This function works faster with a frozen set and with a non-negative string length argument.
+ * @param set the set
+ * @param s start of the string (UTF-8)
+ * @param length of the string; can be -1 for NUL-terminated
+ * @param spanCondition specifies the containment condition
+ * @return the start of the trailing substring according to the spanCondition;
+ *         the string length if the end of the string does not fit the spanCondition
+ * @stable ICU 3.8
+ * @see USetSpanCondition
+ */
+U_STABLE int32_t U_EXPORT2
+uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
+
+/**
+ * Returns true if set1 contains all of the characters and strings
+ * of set2, and vis versa. It answers the question, 'Is set1 equal to set2?'
+ * @param set1 set to be checked for containment
+ * @param set2 set to be checked for containment
+ * @return true if the test condition is met
+ * @stable ICU 3.2
+ */
+U_STABLE UBool U_EXPORT2
+uset_equals(const USet* set1, const USet* set2);
+
+/*********************************************************************
+ * Serialized set API
+ *********************************************************************/
+
+/**
+ * Serializes this set into an array of 16-bit integers.  Serialization
+ * (currently) only records the characters in the set; multicharacter
+ * strings are ignored.
+ *
+ * The array
+ * has following format (each line is one 16-bit integer):
+ *
+ *  length     = (n+2*m) | (m!=0?0x8000:0)
+ *  bmpLength  = n; present if m!=0
+ *  bmp[0]
+ *  bmp[1]
+ *  ...
+ *  bmp[n-1]
+ *  supp-high[0]
+ *  supp-low[0]
+ *  supp-high[1]
+ *  supp-low[1]
+ *  ...
+ *  supp-high[m-1]
+ *  supp-low[m-1]
+ *
+ * The array starts with a header.  After the header are n bmp
+ * code points, then m supplementary code points.  Either n or m
+ * or both may be zero.  n+2*m is always <= 0x7FFF.
+ *
+ * If there are no supplementary characters (if m==0) then the
+ * header is one 16-bit integer, 'length', with value n.
+ *
+ * If there are supplementary characters (if m!=0) then the header
+ * is two 16-bit integers.  The first, 'length', has value
+ * (n+2*m)|0x8000.  The second, 'bmpLength', has value n.
+ *
+ * After the header the code points are stored in ascending order.
+ * Supplementary code points are stored as most significant 16
+ * bits followed by least significant 16 bits.
+ *
+ * @param set the set
+ * @param dest pointer to buffer of destCapacity 16-bit integers.
+ * May be NULL only if destCapacity is zero.
+ * @param destCapacity size of dest, or zero.  Must not be negative.
+ * @param pErrorCode pointer to the error code.  Will be set to
+ * U_INDEX_OUTOFBOUNDS_ERROR if n+2*m > 0x7FFF.  Will be set to
+ * U_BUFFER_OVERFLOW_ERROR if n+2*m+(m!=0?2:1) > destCapacity.
+ * @return the total length of the serialized format, including
+ * the header, that is, n+2*m+(m!=0?2:1), or 0 on error other
+ * than U_BUFFER_OVERFLOW_ERROR.
+ * @stable ICU 2.4
+ */
+U_STABLE int32_t U_EXPORT2
+uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode);
+
+/**
+ * Given a serialized array, fill in the given serialized set object.
+ * @param fillSet pointer to result
+ * @param src pointer to start of array
+ * @param srcLength length of array
+ * @return true if the given array is valid, otherwise false
+ * @stable ICU 2.4
+ */
+U_STABLE UBool U_EXPORT2
+uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength);
+
+/**
+ * Set the USerializedSet to contain the given character (and nothing
+ * else).
+ * @param fillSet pointer to result
+ * @param c The codepoint to set
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+uset_setSerializedToOne(USerializedSet* fillSet, UChar32 c);
+
+/**
+ * Returns TRUE if the given USerializedSet contains the given
+ * character.
+ * @param set the serialized set
+ * @param c The codepoint to check for within the set
+ * @return true if set contains c
+ * @stable ICU 2.4
+ */
+U_STABLE UBool U_EXPORT2
+uset_serializedContains(const USerializedSet* set, UChar32 c);
+
+/**
+ * Returns the number of disjoint ranges of characters contained in
+ * the given serialized set.  Ignores any strings contained in the
+ * set.
+ * @param set the serialized set
+ * @return a non-negative integer counting the character ranges
+ * contained in set
+ * @stable ICU 2.4
+ */
+U_STABLE int32_t U_EXPORT2
+uset_getSerializedRangeCount(const USerializedSet* set);
+
+/**
+ * Returns a range of characters contained in the given serialized
+ * set.
+ * @param set the serialized set
+ * @param rangeIndex a non-negative integer in the range 0..
+ * uset_getSerializedRangeCount(set)-1
+ * @param pStart pointer to variable to receive first character
+ * in range, inclusive
+ * @param pEnd pointer to variable to receive last character in range,
+ * inclusive
+ * @return true if rangeIndex is valid, otherwise false
+ * @stable ICU 2.4
+ */
+U_STABLE UBool U_EXPORT2
+uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
+                        UChar32* pStart, UChar32* pEnd);
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/ustring.h b/src/core/basetypes/icu-ucsdet/include/unicode/ustring.h
new file mode 100644
index 00000000..6d141e8d
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/ustring.h
@@ -0,0 +1,1700 @@
+/*
+**********************************************************************
+*   Copyright (C) 1998-2014, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*
+* File ustring.h
+*
+* Modification History:
+*
+*   Date        Name        Description
+*   12/07/98    bertrand    Creation.
+******************************************************************************
+*/
+
+#ifndef USTRING_H
+#define USTRING_H
+
+#include "unicode/utypes.h"
+#include "unicode/putil.h"
+#include "unicode/uiter.h"
+
+/**
+ * \def UBRK_TYPEDEF_UBREAK_ITERATOR
+ * @internal 
+ */
+
+#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
+#   define UBRK_TYPEDEF_UBREAK_ITERATOR
+/** Simple declaration for u_strToTitle() to avoid including unicode/ubrk.h. @stable ICU 2.1*/
+    typedef struct UBreakIterator UBreakIterator;
+#endif
+
+/**
+ * \file
+ * \brief C API: Unicode string handling functions
+ *
+ * These C API functions provide general Unicode string handling.
+ *
+ * Some functions are equivalent in name, signature, and behavior to the ANSI C <string.h>
+ * functions. (For example, they do not check for bad arguments like NULL string pointers.)
+ * In some cases, only the thread-safe variant of such a function is implemented here
+ * (see u_strtok_r()).
+ *
+ * Other functions provide more Unicode-specific functionality like locale-specific
+ * upper/lower-casing and string comparison in code point order.
+ *
+ * ICU uses 16-bit Unicode (UTF-16) in the form of arrays of UChar code units.
+ * UTF-16 encodes each Unicode code point with either one or two UChar code units.
+ * (This is the default form of Unicode, and a forward-compatible extension of the original,
+ * fixed-width form that was known as UCS-2. UTF-16 superseded UCS-2 with Unicode 2.0
+ * in 1996.)
+ *
+ * Some APIs accept a 32-bit UChar32 value for a single code point.
+ *
+ * ICU also handles 16-bit Unicode text with unpaired surrogates.
+ * Such text is not well-formed UTF-16.
+ * Code-point-related functions treat unpaired surrogates as surrogate code points,
+ * i.e., as separate units.
+ *
+ * Although UTF-16 is a variable-width encoding form (like some legacy multi-byte encodings),
+ * it is much more efficient even for random access because the code unit values
+ * for single-unit characters vs. lead units vs. trail units are completely disjoint.
+ * This means that it is easy to determine character (code point) boundaries from
+ * random offsets in the string.
+ *
+ * Unicode (UTF-16) string processing is optimized for the single-unit case.
+ * Although it is important to support supplementary characters
+ * (which use pairs of lead/trail code units called "surrogates"),
+ * their occurrence is rare. Almost all characters in modern use require only
+ * a single UChar code unit (i.e., their code point values are <=0xffff).
+ *
+ * For more details see the User Guide Strings chapter (http://icu-project.org/userguide/strings.html).
+ * For a discussion of the handling of unpaired surrogates see also
+ * Jitterbug 2145 and its icu mailing list proposal on 2002-sep-18.
+ */
+
+/**
+ * \defgroup ustring_ustrlen String Length
+ * \ingroup ustring_strlen
+ */
+/*@{*/
+/**
+ * Determine the length of an array of UChar.
+ *
+ * @param s The array of UChars, NULL (U+0000) terminated.
+ * @return The number of UChars in <code>chars</code>, minus the terminator.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strlen(const UChar *s);
+/*@}*/
+
+/**
+ * Count Unicode code points in the length UChar code units of the string.
+ * A code point may occupy either one or two UChar code units.
+ * Counting code points involves reading all code units.
+ *
+ * This functions is basically the inverse of the U16_FWD_N() macro (see utf.h).
+ *
+ * @param s The input string.
+ * @param length The number of UChar code units to be checked, or -1 to count all
+ *               code points before the first NUL (U+0000).
+ * @return The number of code points in the specified code units.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_countChar32(const UChar *s, int32_t length);
+
+/**
+ * Check if the string contains more Unicode code points than a certain number.
+ * This is more efficient than counting all code points in the entire string
+ * and comparing that number with a threshold.
+ * This function may not need to scan the string at all if the length is known
+ * (not -1 for NUL-termination) and falls within a certain range, and
+ * never needs to count more than 'number+1' code points.
+ * Logically equivalent to (u_countChar32(s, length)>number).
+ * A Unicode code point may occupy either one or two UChar code units.
+ *
+ * @param s The input string.
+ * @param length The length of the string, or -1 if it is NUL-terminated.
+ * @param number The number of code points in the string is compared against
+ *               the 'number' parameter.
+ * @return Boolean value for whether the string contains more Unicode code points
+ *         than 'number'. Same as (u_countChar32(s, length)>number).
+ * @stable ICU 2.4
+ */
+U_STABLE UBool U_EXPORT2
+u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number);
+
+/**
+ * Concatenate two ustrings.  Appends a copy of <code>src</code>,
+ * including the null terminator, to <code>dst</code>. The initial copied
+ * character from <code>src</code> overwrites the null terminator in <code>dst</code>.
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2
+u_strcat(UChar     *dst, 
+    const UChar     *src);
+
+/**
+ * Concatenate two ustrings.  
+ * Appends at most <code>n</code> characters from <code>src</code> to <code>dst</code>.
+ * Adds a terminating NUL.
+ * If src is too long, then only <code>n-1</code> characters will be copied
+ * before the terminating NUL.
+ * If <code>n&lt;=0</code> then dst is not modified.
+ *
+ * @param dst The destination string.
+ * @param src The source string (can be NULL/invalid if n<=0).
+ * @param n The maximum number of characters to append; no-op if <=0.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2
+u_strncat(UChar     *dst, 
+     const UChar     *src, 
+     int32_t     n);
+
+/**
+ * Find the first occurrence of a substring in a string.
+ * The substring is found at code point boundaries.
+ * That means that if the substring begins with
+ * a trail surrogate or ends with a lead surrogate,
+ * then it is found only if these surrogates stand alone in the text.
+ * Otherwise, the substring edge units would be matched against
+ * halves of surrogate pairs.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param substring The substring to find (NUL-terminated).
+ * @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>,
+ *         or <code>s</code> itself if the <code>substring</code> is empty,
+ *         or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
+ * @stable ICU 2.0
+ *
+ * @see u_strrstr
+ * @see u_strFindFirst
+ * @see u_strFindLast
+ */
+U_STABLE UChar * U_EXPORT2
+u_strstr(const UChar *s, const UChar *substring);
+
+/**
+ * Find the first occurrence of a substring in a string.
+ * The substring is found at code point boundaries.
+ * That means that if the substring begins with
+ * a trail surrogate or ends with a lead surrogate,
+ * then it is found only if these surrogates stand alone in the text.
+ * Otherwise, the substring edge units would be matched against
+ * halves of surrogate pairs.
+ *
+ * @param s The string to search.
+ * @param length The length of s (number of UChars), or -1 if it is NUL-terminated.
+ * @param substring The substring to find (NUL-terminated).
+ * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated.
+ * @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>,
+ *         or <code>s</code> itself if the <code>substring</code> is empty,
+ *         or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
+ * @stable ICU 2.4
+ *
+ * @see u_strstr
+ * @see u_strFindLast
+ */
+U_STABLE UChar * U_EXPORT2
+u_strFindFirst(const UChar *s, int32_t length, const UChar *substring, int32_t subLength);
+
+/**
+ * Find the first occurrence of a BMP code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param c The BMP code point to find.
+ * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
+ *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.0
+ *
+ * @see u_strchr32
+ * @see u_memchr
+ * @see u_strstr
+ * @see u_strFindFirst
+ */
+U_STABLE UChar * U_EXPORT2
+u_strchr(const UChar *s, UChar c);
+
+/**
+ * Find the first occurrence of a code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param c The code point to find.
+ * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
+ *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.0
+ *
+ * @see u_strchr
+ * @see u_memchr32
+ * @see u_strstr
+ * @see u_strFindFirst
+ */
+U_STABLE UChar * U_EXPORT2
+u_strchr32(const UChar *s, UChar32 c);
+
+/**
+ * Find the last occurrence of a substring in a string.
+ * The substring is found at code point boundaries.
+ * That means that if the substring begins with
+ * a trail surrogate or ends with a lead surrogate,
+ * then it is found only if these surrogates stand alone in the text.
+ * Otherwise, the substring edge units would be matched against
+ * halves of surrogate pairs.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param substring The substring to find (NUL-terminated).
+ * @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>,
+ *         or <code>s</code> itself if the <code>substring</code> is empty,
+ *         or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
+ * @stable ICU 2.4
+ *
+ * @see u_strstr
+ * @see u_strFindFirst
+ * @see u_strFindLast
+ */
+U_STABLE UChar * U_EXPORT2
+u_strrstr(const UChar *s, const UChar *substring);
+
+/**
+ * Find the last occurrence of a substring in a string.
+ * The substring is found at code point boundaries.
+ * That means that if the substring begins with
+ * a trail surrogate or ends with a lead surrogate,
+ * then it is found only if these surrogates stand alone in the text.
+ * Otherwise, the substring edge units would be matched against
+ * halves of surrogate pairs.
+ *
+ * @param s The string to search.
+ * @param length The length of s (number of UChars), or -1 if it is NUL-terminated.
+ * @param substring The substring to find (NUL-terminated).
+ * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated.
+ * @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>,
+ *         or <code>s</code> itself if the <code>substring</code> is empty,
+ *         or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
+ * @stable ICU 2.4
+ *
+ * @see u_strstr
+ * @see u_strFindLast
+ */
+U_STABLE UChar * U_EXPORT2
+u_strFindLast(const UChar *s, int32_t length, const UChar *substring, int32_t subLength);
+
+/**
+ * Find the last occurrence of a BMP code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param c The BMP code point to find.
+ * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
+ *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.4
+ *
+ * @see u_strrchr32
+ * @see u_memrchr
+ * @see u_strrstr
+ * @see u_strFindLast
+ */
+U_STABLE UChar * U_EXPORT2
+u_strrchr(const UChar *s, UChar c);
+
+/**
+ * Find the last occurrence of a code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param c The code point to find.
+ * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
+ *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.4
+ *
+ * @see u_strrchr
+ * @see u_memchr32
+ * @see u_strrstr
+ * @see u_strFindLast
+ */
+U_STABLE UChar * U_EXPORT2
+u_strrchr32(const UChar *s, UChar32 c);
+
+/**
+ * Locates the first occurrence in the string <code>string</code> of any of the characters
+ * in the string <code>matchSet</code>.
+ * Works just like C's strpbrk but with Unicode.
+ *
+ * @param string The string in which to search, NUL-terminated.
+ * @param matchSet A NUL-terminated string defining a set of code points
+ *                 for which to search in the text string.
+ * @return A pointer to the  character in <code>string</code> that matches one of the
+ *         characters in <code>matchSet</code>, or NULL if no such character is found.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar * U_EXPORT2
+u_strpbrk(const UChar *string, const UChar *matchSet);
+
+/**
+ * Returns the number of consecutive characters in <code>string</code>,
+ * beginning with the first, that do not occur somewhere in <code>matchSet</code>.
+ * Works just like C's strcspn but with Unicode.
+ *
+ * @param string The string in which to search, NUL-terminated.
+ * @param matchSet A NUL-terminated string defining a set of code points
+ *                 for which to search in the text string.
+ * @return The number of initial characters in <code>string</code> that do not
+ *         occur in <code>matchSet</code>.
+ * @see u_strspn
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strcspn(const UChar *string, const UChar *matchSet);
+
+/**
+ * Returns the number of consecutive characters in <code>string</code>,
+ * beginning with the first, that occur somewhere in <code>matchSet</code>.
+ * Works just like C's strspn but with Unicode.
+ *
+ * @param string The string in which to search, NUL-terminated.
+ * @param matchSet A NUL-terminated string defining a set of code points
+ *                 for which to search in the text string.
+ * @return The number of initial characters in <code>string</code> that do
+ *         occur in <code>matchSet</code>.
+ * @see u_strcspn
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strspn(const UChar *string, const UChar *matchSet);
+
+/**
+ * The string tokenizer API allows an application to break a string into
+ * tokens. Unlike strtok(), the saveState (the current pointer within the
+ * original string) is maintained in saveState. In the first call, the
+ * argument src is a pointer to the string. In subsequent calls to
+ * return successive tokens of that string, src must be specified as
+ * NULL. The value saveState is set by this function to maintain the
+ * function's position within the string, and on each subsequent call
+ * you must give this argument the same variable. This function does
+ * handle surrogate pairs. This function is similar to the strtok_r()
+ * the POSIX Threads Extension (1003.1c-1995) version.
+ *
+ * @param src String containing token(s). This string will be modified.
+ *            After the first call to u_strtok_r(), this argument must
+ *            be NULL to get to the next token.
+ * @param delim Set of delimiter characters (Unicode code points).
+ * @param saveState The current pointer within the original string,
+ *              which is set by this function. The saveState
+ *              parameter should the address of a local variable of type
+ *              UChar *. (i.e. defined "Uhar *myLocalSaveState" and use
+ *              &myLocalSaveState for this parameter).
+ * @return A pointer to the next token found in src, or NULL
+ *         when there are no more tokens.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar * U_EXPORT2
+u_strtok_r(UChar    *src, 
+     const UChar    *delim,
+           UChar   **saveState);
+
+/**
+ * Compare two Unicode strings for bitwise equality (code unit order).
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative
+ * value if <code>s1</code> is bitwise less than <code>s2,</code>; a positive
+ * value if <code>s1</code> is bitwise greater than <code>s2</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t  U_EXPORT2
+u_strcmp(const UChar     *s1, 
+         const UChar     *s2);
+
+/**
+ * Compare two Unicode strings in code point order.
+ * See u_strCompare for details.
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * the first string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strcmpCodePointOrder(const UChar *s1, const UChar *s2);
+
+/**
+ * Compare two Unicode strings (binary order).
+ *
+ * The comparison can be done in code unit order or in code point order.
+ * They differ only in UTF-16 when
+ * comparing supplementary code points (U+10000..U+10ffff)
+ * to BMP code points near the end of the BMP (i.e., U+e000..U+ffff).
+ * In code unit order, high BMP code points sort after supplementary code points
+ * because they are stored as pairs of surrogates which are at U+d800..U+dfff.
+ *
+ * This functions works with strings of different explicitly specified lengths
+ * unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
+ * NUL-terminated strings are possible with length arguments of -1.
+ *
+ * @param s1 First source string.
+ * @param length1 Length of first source string, or -1 if NUL-terminated.
+ *
+ * @param s2 Second source string.
+ * @param length2 Length of second source string, or -1 if NUL-terminated.
+ *
+ * @param codePointOrder Choose between code unit order (FALSE)
+ *                       and code point order (TRUE).
+ *
+ * @return <0 or 0 or >0 as usual for string comparisons
+ *
+ * @stable ICU 2.2
+ */
+U_STABLE int32_t U_EXPORT2
+u_strCompare(const UChar *s1, int32_t length1,
+             const UChar *s2, int32_t length2,
+             UBool codePointOrder);
+
+/**
+ * Compare two Unicode strings (binary order)
+ * as presented by UCharIterator objects.
+ * Works otherwise just like u_strCompare().
+ *
+ * Both iterators are reset to their start positions.
+ * When the function returns, it is undefined where the iterators
+ * have stopped.
+ *
+ * @param iter1 First source string iterator.
+ * @param iter2 Second source string iterator.
+ * @param codePointOrder Choose between code unit order (FALSE)
+ *                       and code point order (TRUE).
+ *
+ * @return <0 or 0 or >0 as usual for string comparisons
+ *
+ * @see u_strCompare
+ *
+ * @stable ICU 2.6
+ */
+U_STABLE int32_t U_EXPORT2
+u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder);
+
+#ifndef U_COMPARE_CODE_POINT_ORDER
+/* see also unistr.h and unorm.h */
+/**
+ * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
+ * Compare strings in code point order instead of code unit order.
+ * @stable ICU 2.2
+ */
+#define U_COMPARE_CODE_POINT_ORDER  0x8000
+#endif
+
+/**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to
+ *   u_strCompare(u_strFoldCase(s1, options),
+ *                u_strFoldCase(s2, options),
+ *                (options&U_COMPARE_CODE_POINT_ORDER)!=0).
+ *
+ * The comparison can be done in UTF-16 code unit order or in code point order.
+ * They differ only when comparing supplementary code points (U+10000..U+10ffff)
+ * to BMP code points near the end of the BMP (i.e., U+e000..U+ffff).
+ * In code unit order, high BMP code points sort after supplementary code points
+ * because they are stored as pairs of surrogates which are at U+d800..U+dfff.
+ *
+ * This functions works with strings of different explicitly specified lengths
+ * unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
+ * NUL-terminated strings are possible with length arguments of -1.
+ *
+ * @param s1 First source string.
+ * @param length1 Length of first source string, or -1 if NUL-terminated.
+ *
+ * @param s2 Second source string.
+ * @param length2 Length of second source string, or -1 if NUL-terminated.
+ *
+ * @param options A bit set of options:
+ *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ *     Comparison in code unit order with default case folding.
+ *
+ *   - U_COMPARE_CODE_POINT_ORDER
+ *     Set to choose code point order instead of code unit order
+ *     (see u_strCompare for details).
+ *
+ *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ *                  which must not indicate a failure before the function call.
+ *
+ * @return <0 or 0 or >0 as usual for string comparisons
+ *
+ * @stable ICU 2.2
+ */
+U_STABLE int32_t U_EXPORT2
+u_strCaseCompare(const UChar *s1, int32_t length1,
+                 const UChar *s2, int32_t length2,
+                 uint32_t options,
+                 UErrorCode *pErrorCode);
+
+/**
+ * Compare two ustrings for bitwise equality. 
+ * Compares at most <code>n</code> characters.
+ *
+ * @param ucs1 A string to compare (can be NULL/invalid if n<=0).
+ * @param ucs2 A string to compare (can be NULL/invalid if n<=0).
+ * @param n The maximum number of characters to compare; always returns 0 if n<=0.
+ * @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative
+ * value if <code>s1</code> is bitwise less than <code>s2</code>; a positive
+ * value if <code>s1</code> is bitwise greater than <code>s2</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strncmp(const UChar     *ucs1, 
+     const UChar     *ucs2, 
+     int32_t     n);
+
+/**
+ * Compare two Unicode strings in code point order.
+ * This is different in UTF-16 from u_strncmp() if supplementary characters are present.
+ * For details, see u_strCompare().
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @param n The maximum number of characters to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * the first string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n);
+
+/**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to u_strcmp(u_strFoldCase(s1, options), u_strFoldCase(s2, options)).
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @param options A bit set of options:
+ *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ *     Comparison in code unit order with default case folding.
+ *
+ *   - U_COMPARE_CODE_POINT_ORDER
+ *     Set to choose code point order instead of code unit order
+ *     (see u_strCompare for details).
+ *
+ *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options);
+
+/**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to u_strcmp(u_strFoldCase(s1, at most n, options),
+ * u_strFoldCase(s2, at most n, options)).
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @param n The maximum number of characters each string to case-fold and then compare.
+ * @param options A bit set of options:
+ *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ *     Comparison in code unit order with default case folding.
+ *
+ *   - U_COMPARE_CODE_POINT_ORDER
+ *     Set to choose code point order instead of code unit order
+ *     (see u_strCompare for details).
+ *
+ *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options);
+
+/**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to u_strcmp(u_strFoldCase(s1, n, options),
+ * u_strFoldCase(s2, n, options)).
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @param length The number of characters in each string to case-fold and then compare.
+ * @param options A bit set of options:
+ *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ *     Comparison in code unit order with default case folding.
+ *
+ *   - U_COMPARE_CODE_POINT_ORDER
+ *     Set to choose code point order instead of code unit order
+ *     (see u_strCompare for details).
+ *
+ *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options);
+
+/**
+ * Copy a ustring. Adds a null terminator.
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2
+u_strcpy(UChar     *dst, 
+    const UChar     *src);
+
+/**
+ * Copy a ustring.
+ * Copies at most <code>n</code> characters.  The result will be null terminated
+ * if the length of <code>src</code> is less than <code>n</code>.
+ *
+ * @param dst The destination string.
+ * @param src The source string (can be NULL/invalid if n<=0).
+ * @param n The maximum number of characters to copy; no-op if <=0.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2
+u_strncpy(UChar     *dst, 
+     const UChar     *src, 
+     int32_t     n);
+
+#if !UCONFIG_NO_CONVERSION
+
+/**
+ * Copy a byte string encoded in the default codepage to a ustring.
+ * Adds a null terminator.
+ * Performs a host byte to UChar conversion
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2 u_uastrcpy(UChar *dst,
+               const char *src );
+
+/**
+ * Copy a byte string encoded in the default codepage to a ustring.
+ * Copies at most <code>n</code> characters.  The result will be null terminated
+ * if the length of <code>src</code> is less than <code>n</code>.
+ * Performs a host byte to UChar conversion
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @param n The maximum number of characters to copy.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2 u_uastrncpy(UChar *dst,
+            const char *src,
+            int32_t n);
+
+/**
+ * Copy ustring to a byte string encoded in the default codepage.
+ * Adds a null terminator.
+ * Performs a UChar to host byte conversion
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE char* U_EXPORT2 u_austrcpy(char *dst,
+            const UChar *src );
+
+/**
+ * Copy ustring to a byte string encoded in the default codepage.
+ * Copies at most <code>n</code> characters.  The result will be null terminated
+ * if the length of <code>src</code> is less than <code>n</code>.
+ * Performs a UChar to host byte conversion
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @param n The maximum number of characters to copy.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE char* U_EXPORT2 u_austrncpy(char *dst,
+            const UChar *src,
+            int32_t n );
+
+#endif
+
+/**
+ * Synonym for memcpy(), but with UChars only.
+ * @param dest The destination string
+ * @param src The source string (can be NULL/invalid if count<=0)
+ * @param count The number of characters to copy; no-op if <=0
+ * @return A pointer to <code>dest</code>
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2
+u_memcpy(UChar *dest, const UChar *src, int32_t count);
+
+/**
+ * Synonym for memmove(), but with UChars only.
+ * @param dest The destination string
+ * @param src The source string (can be NULL/invalid if count<=0)
+ * @param count The number of characters to move; no-op if <=0
+ * @return A pointer to <code>dest</code>
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2
+u_memmove(UChar *dest, const UChar *src, int32_t count);
+
+/**
+ * Initialize <code>count</code> characters of <code>dest</code> to <code>c</code>.
+ *
+ * @param dest The destination string.
+ * @param c The character to initialize the string.
+ * @param count The maximum number of characters to set.
+ * @return A pointer to <code>dest</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2
+u_memset(UChar *dest, UChar c, int32_t count);
+
+/**
+ * Compare the first <code>count</code> UChars of each buffer.
+ *
+ * @param buf1 The first string to compare.
+ * @param buf2 The second string to compare.
+ * @param count The maximum number of UChars to compare.
+ * @return When buf1 < buf2, a negative number is returned.
+ *      When buf1 == buf2, 0 is returned.
+ *      When buf1 > buf2, a positive number is returned.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count);
+
+/**
+ * Compare two Unicode strings in code point order.
+ * This is different in UTF-16 from u_memcmp() if supplementary characters are present.
+ * For details, see u_strCompare().
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @param count The maximum number of characters to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * the first string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count);
+
+/**
+ * Find the first occurrence of a BMP code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (contains <code>count</code> UChars).
+ * @param c The BMP code point to find.
+ * @param count The length of the string.
+ * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
+ *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.0
+ *
+ * @see u_strchr
+ * @see u_memchr32
+ * @see u_strFindFirst
+ */
+U_STABLE UChar* U_EXPORT2
+u_memchr(const UChar *s, UChar c, int32_t count);
+
+/**
+ * Find the first occurrence of a code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (contains <code>count</code> UChars).
+ * @param c The code point to find.
+ * @param count The length of the string.
+ * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
+ *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.0
+ *
+ * @see u_strchr32
+ * @see u_memchr
+ * @see u_strFindFirst
+ */
+U_STABLE UChar* U_EXPORT2
+u_memchr32(const UChar *s, UChar32 c, int32_t count);
+
+/**
+ * Find the last occurrence of a BMP code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (contains <code>count</code> UChars).
+ * @param c The BMP code point to find.
+ * @param count The length of the string.
+ * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
+ *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.4
+ *
+ * @see u_strrchr
+ * @see u_memrchr32
+ * @see u_strFindLast
+ */
+U_STABLE UChar* U_EXPORT2
+u_memrchr(const UChar *s, UChar c, int32_t count);
+
+/**
+ * Find the last occurrence of a code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (contains <code>count</code> UChars).
+ * @param c The code point to find.
+ * @param count The length of the string.
+ * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
+ *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.4
+ *
+ * @see u_strrchr32
+ * @see u_memrchr
+ * @see u_strFindLast
+ */
+U_STABLE UChar* U_EXPORT2
+u_memrchr32(const UChar *s, UChar32 c, int32_t count);
+
+/**
+ * Unicode String literals in C.
+ * We need one macro to declare a variable for the string
+ * and to statically preinitialize it if possible,
+ * and a second macro to dynamically intialize such a string variable if necessary.
+ *
+ * The macros are defined for maximum performance.
+ * They work only for strings that contain "invariant characters", i.e.,
+ * only latin letters, digits, and some punctuation.
+ * See utypes.h for details.
+ *
+ * A pair of macros for a single string must be used with the same
+ * parameters.
+ * The string parameter must be a C string literal.
+ * The length of the string, not including the terminating
+ * <code>NUL</code>, must be specified as a constant.
+ * The U_STRING_DECL macro should be invoked exactly once for one
+ * such string variable before it is used.
+ *
+ * Usage:
+ * <pre>
+ *    U_STRING_DECL(ustringVar1, "Quick-Fox 2", 11);
+ *    U_STRING_DECL(ustringVar2, "jumps 5%", 8);
+ *    static UBool didInit=FALSE;
+ * 
+ *    int32_t function() {
+ *        if(!didInit) {
+ *            U_STRING_INIT(ustringVar1, "Quick-Fox 2", 11);
+ *            U_STRING_INIT(ustringVar2, "jumps 5%", 8);
+ *            didInit=TRUE;
+ *        }
+ *        return u_strcmp(ustringVar1, ustringVar2);
+ *    }
+ * </pre>
+ * 
+ * Note that the macros will NOT consistently work if their argument is another <code>#define</code>. 
+ *  The following will not work on all platforms, don't use it.
+ * 
+ * <pre>
+ *     #define GLUCK "Mr. Gluck"
+ *     U_STRING_DECL(var, GLUCK, 9)
+ *     U_STRING_INIT(var, GLUCK, 9)
+ * </pre>
+ * 
+ * Instead, use the string literal "Mr. Gluck"  as the argument to both macro
+ * calls.
+ *
+ *
+ * @stable ICU 2.0
+ */
+#if defined(U_DECLARE_UTF16)
+#   define U_STRING_DECL(var, cs, length) static const UChar *var=(const UChar *)U_DECLARE_UTF16(cs)
+    /**@stable ICU 2.0 */
+#   define U_STRING_INIT(var, cs, length)
+#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
+#   define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=L ## cs
+    /**@stable ICU 2.0 */
+#   define U_STRING_INIT(var, cs, length)
+#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
+#   define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=cs
+    /**@stable ICU 2.0 */
+#   define U_STRING_INIT(var, cs, length)
+#else
+#   define U_STRING_DECL(var, cs, length) static UChar var[(length)+1]
+    /**@stable ICU 2.0 */
+#   define U_STRING_INIT(var, cs, length) u_charsToUChars(cs, var, length+1)
+#endif
+
+/**
+ * Unescape a string of characters and write the resulting
+ * Unicode characters to the destination buffer.  The following escape
+ * sequences are recognized:
+ *
+ * \\uhhhh       4 hex digits; h in [0-9A-Fa-f]
+ * \\Uhhhhhhhh   8 hex digits
+ * \\xhh         1-2 hex digits
+ * \\x{h...}     1-8 hex digits
+ * \\ooo         1-3 octal digits; o in [0-7]
+ * \\cX          control-X; X is masked with 0x1F
+ *
+ * as well as the standard ANSI C escapes:
+ *
+ * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
+ * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
+ * \\&quot; => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
+ *
+ * Anything else following a backslash is generically escaped.  For
+ * example, "[a\\-z]" returns "[a-z]".
+ *
+ * If an escape sequence is ill-formed, this method returns an empty
+ * string.  An example of an ill-formed sequence is "\\u" followed by
+ * fewer than 4 hex digits.
+ *
+ * The above characters are recognized in the compiler's codepage,
+ * that is, they are coded as 'u', '\\', etc.  Characters that are
+ * not parts of escape sequences are converted using u_charsToUChars().
+ *
+ * This function is similar to UnicodeString::unescape() but not
+ * identical to it.  The latter takes a source UnicodeString, so it
+ * does escape recognition but no conversion.
+ *
+ * @param src a zero-terminated string of invariant characters
+ * @param dest pointer to buffer to receive converted and unescaped
+ * text and, if there is room, a zero terminator.  May be NULL for
+ * preflighting, in which case no UChars will be written, but the
+ * return value will still be valid.  On error, an empty string is
+ * stored here (if possible).
+ * @param destCapacity the number of UChars that may be written at
+ * dest.  Ignored if dest == NULL.
+ * @return the length of unescaped string.
+ * @see u_unescapeAt
+ * @see UnicodeString#unescape()
+ * @see UnicodeString#unescapeAt()
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_unescape(const char *src,
+           UChar *dest, int32_t destCapacity);
+
+U_CDECL_BEGIN
+/**
+ * Callback function for u_unescapeAt() that returns a character of
+ * the source text given an offset and a context pointer.  The context
+ * pointer will be whatever is passed into u_unescapeAt().
+ *
+ * @param offset pointer to the offset that will be passed to u_unescapeAt().
+ * @param context an opaque pointer passed directly into u_unescapeAt()
+ * @return the character represented by the escape sequence at
+ * offset
+ * @see u_unescapeAt
+ * @stable ICU 2.0
+ */
+typedef UChar (U_CALLCONV *UNESCAPE_CHAR_AT)(int32_t offset, void *context);
+U_CDECL_END
+
+/**
+ * Unescape a single sequence. The character at offset-1 is assumed
+ * (without checking) to be a backslash.  This method takes a callback
+ * pointer to a function that returns the UChar at a given offset.  By
+ * varying this callback, ICU functions are able to unescape char*
+ * strings, UnicodeString objects, and UFILE pointers.
+ *
+ * If offset is out of range, or if the escape sequence is ill-formed,
+ * (UChar32)0xFFFFFFFF is returned.  See documentation of u_unescape()
+ * for a list of recognized sequences.
+ *
+ * @param charAt callback function that returns a UChar of the source
+ * text given an offset and a context pointer.
+ * @param offset pointer to the offset that will be passed to charAt.
+ * The offset value will be updated upon return to point after the
+ * last parsed character of the escape sequence.  On error the offset
+ * is unchanged.
+ * @param length the number of characters in the source text.  The
+ * last character of the source text is considered to be at offset
+ * length-1.
+ * @param context an opaque pointer passed directly into charAt.
+ * @return the character represented by the escape sequence at
+ * offset, or (UChar32)0xFFFFFFFF on error.
+ * @see u_unescape()
+ * @see UnicodeString#unescape()
+ * @see UnicodeString#unescapeAt()
+ * @stable ICU 2.0
+ */
+U_STABLE UChar32 U_EXPORT2
+u_unescapeAt(UNESCAPE_CHAR_AT charAt,
+             int32_t *offset,
+             int32_t length,
+             void *context);
+
+/**
+ * Uppercase the characters in a string.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer are allowed to overlap.
+ *
+ * @param dest      A buffer for the result string. The result will be zero-terminated if
+ *                  the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ *                  dest may be NULL and the function will only return the length of the result
+ *                  without writing any of the result string.
+ * @param src       The original string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param locale    The locale to consider, or "" for the root locale or NULL for the default locale.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ *                  which must not indicate a failure before the function call.
+ * @return The length of the result string. It may be greater than destCapacity. In that case,
+ *         only some of the result was written to the destination buffer.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strToUpper(UChar *dest, int32_t destCapacity,
+             const UChar *src, int32_t srcLength,
+             const char *locale,
+             UErrorCode *pErrorCode);
+
+/**
+ * Lowercase the characters in a string.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer are allowed to overlap.
+ *
+ * @param dest      A buffer for the result string. The result will be zero-terminated if
+ *                  the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ *                  dest may be NULL and the function will only return the length of the result
+ *                  without writing any of the result string.
+ * @param src       The original string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param locale    The locale to consider, or "" for the root locale or NULL for the default locale.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ *                  which must not indicate a failure before the function call.
+ * @return The length of the result string. It may be greater than destCapacity. In that case,
+ *         only some of the result was written to the destination buffer.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strToLower(UChar *dest, int32_t destCapacity,
+             const UChar *src, int32_t srcLength,
+             const char *locale,
+             UErrorCode *pErrorCode);
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+/**
+ * Titlecase a string.
+ * Casing is locale-dependent and context-sensitive.
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others.
+ *
+ * The titlecase break iterator can be provided to customize for arbitrary
+ * styles, using rules and dictionaries beyond the standard iterators.
+ * It may be more efficient to always provide an iterator to avoid
+ * opening and closing one for each string.
+ * The standard titlecase iterator for the root locale implements the
+ * algorithm of Unicode TR 21.
+ *
+ * This function uses only the setText(), first() and next() methods of the
+ * provided break iterator.
+ *
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer are allowed to overlap.
+ *
+ * @param dest      A buffer for the result string. The result will be zero-terminated if
+ *                  the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ *                  dest may be NULL and the function will only return the length of the result
+ *                  without writing any of the result string.
+ * @param src       The original string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param titleIter A break iterator to find the first characters of words
+ *                  that are to be titlecased.
+ *                  If none is provided (NULL), then a standard titlecase
+ *                  break iterator is opened.
+ * @param locale    The locale to consider, or "" for the root locale or NULL for the default locale.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ *                  which must not indicate a failure before the function call.
+ * @return The length of the result string. It may be greater than destCapacity. In that case,
+ *         only some of the result was written to the destination buffer.
+ * @stable ICU 2.1
+ */
+U_STABLE int32_t U_EXPORT2
+u_strToTitle(UChar *dest, int32_t destCapacity,
+             const UChar *src, int32_t srcLength,
+             UBreakIterator *titleIter,
+             const char *locale,
+             UErrorCode *pErrorCode);
+
+#endif
+
+/**
+ * Case-folds the characters in a string.
+ *
+ * Case-folding is locale-independent and not context-sensitive,
+ * but there is an option for whether to include or exclude mappings for dotted I
+ * and dotless i that are marked with 'T' in CaseFolding.txt.
+ *
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer are allowed to overlap.
+ *
+ * @param dest      A buffer for the result string. The result will be zero-terminated if
+ *                  the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ *                  dest may be NULL and the function will only return the length of the result
+ *                  without writing any of the result string.
+ * @param src       The original string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param options   Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ *                  which must not indicate a failure before the function call.
+ * @return The length of the result string. It may be greater than destCapacity. In that case,
+ *         only some of the result was written to the destination buffer.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strFoldCase(UChar *dest, int32_t destCapacity,
+              const UChar *src, int32_t srcLength,
+              uint32_t options,
+              UErrorCode *pErrorCode);
+
+#if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION
+/**
+ * Convert a UTF-16 string to a wchar_t string.
+ * If it is known at compile time that wchar_t strings are in UTF-16 or UTF-32, then
+ * this function simply calls the fast, dedicated function for that.
+ * Otherwise, two conversions UTF-16 -> default charset -> wchar_t* are performed.
+ *
+ * @param dest          A buffer for the result string. The result will be zero-terminated if
+ *                      the buffer is large enough.
+ * @param destCapacity  The size of the buffer (number of wchar_t's). If it is 0, then
+ *                      dest may be NULL and the function will only return the length of the 
+ *                      result without writing any of the result string (pre-flighting).
+ * @param pDestLength   A pointer to receive the number of units written to the destination. If 
+ *                      pDestLength!=NULL then *pDestLength is always set to the 
+ *                      number of output units corresponding to the transformation of 
+ *                      all the input units, even in case of a buffer overflow.
+ * @param src           The original source string
+ * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode    Must be a valid pointer to an error code value,
+ *                      which must not indicate a failure before the function call.
+ * @return The pointer to destination buffer.
+ * @stable ICU 2.0
+ */
+U_STABLE wchar_t* U_EXPORT2
+u_strToWCS(wchar_t *dest, 
+           int32_t destCapacity,
+           int32_t *pDestLength,
+           const UChar *src, 
+           int32_t srcLength,
+           UErrorCode *pErrorCode);
+/**
+ * Convert a wchar_t string to UTF-16.
+ * If it is known at compile time that wchar_t strings are in UTF-16 or UTF-32, then
+ * this function simply calls the fast, dedicated function for that.
+ * Otherwise, two conversions wchar_t* -> default charset -> UTF-16 are performed.
+ *
+ * @param dest          A buffer for the result string. The result will be zero-terminated if
+ *                      the buffer is large enough.
+ * @param destCapacity  The size of the buffer (number of UChars). If it is 0, then
+ *                      dest may be NULL and the function will only return the length of the 
+ *                      result without writing any of the result string (pre-flighting).
+ * @param pDestLength   A pointer to receive the number of units written to the destination. If 
+ *                      pDestLength!=NULL then *pDestLength is always set to the 
+ *                      number of output units corresponding to the transformation of 
+ *                      all the input units, even in case of a buffer overflow.
+ * @param src           The original source string
+ * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode    Must be a valid pointer to an error code value,
+ *                      which must not indicate a failure before the function call.
+ * @return The pointer to destination buffer.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2
+u_strFromWCS(UChar   *dest,
+             int32_t destCapacity, 
+             int32_t *pDestLength,
+             const wchar_t *src,
+             int32_t srcLength,
+             UErrorCode *pErrorCode);
+#endif /* defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION */
+
+/**
+ * Convert a UTF-16 string to UTF-8.
+ * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
+ *
+ * @param dest          A buffer for the result string. The result will be zero-terminated if
+ *                      the buffer is large enough.
+ * @param destCapacity  The size of the buffer (number of chars). If it is 0, then
+ *                      dest may be NULL and the function will only return the length of the 
+ *                      result without writing any of the result string (pre-flighting).
+ * @param pDestLength   A pointer to receive the number of units written to the destination. If 
+ *                      pDestLength!=NULL then *pDestLength is always set to the 
+ *                      number of output units corresponding to the transformation of 
+ *                      all the input units, even in case of a buffer overflow.
+ * @param src           The original source string
+ * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode    Must be a valid pointer to an error code value,
+ *                      which must not indicate a failure before the function call.
+ * @return The pointer to destination buffer.
+ * @stable ICU 2.0
+ * @see u_strToUTF8WithSub
+ * @see u_strFromUTF8
+ */
+U_STABLE char* U_EXPORT2 
+u_strToUTF8(char *dest,           
+            int32_t destCapacity,
+            int32_t *pDestLength,
+            const UChar *src, 
+            int32_t srcLength,
+            UErrorCode *pErrorCode);
+
+/**
+ * Convert a UTF-8 string to UTF-16.
+ * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
+ *
+ * @param dest          A buffer for the result string. The result will be zero-terminated if
+ *                      the buffer is large enough.
+ * @param destCapacity  The size of the buffer (number of UChars). If it is 0, then
+ *                      dest may be NULL and the function will only return the length of the 
+ *                      result without writing any of the result string (pre-flighting).
+ * @param pDestLength   A pointer to receive the number of units written to the destination. If 
+ *                      pDestLength!=NULL then *pDestLength is always set to the 
+ *                      number of output units corresponding to the transformation of 
+ *                      all the input units, even in case of a buffer overflow.
+ * @param src           The original source string
+ * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode    Must be a valid pointer to an error code value,
+ *                      which must not indicate a failure before the function call.
+ * @return The pointer to destination buffer.
+ * @stable ICU 2.0
+ * @see u_strFromUTF8WithSub
+ * @see u_strFromUTF8Lenient
+ */
+U_STABLE UChar* U_EXPORT2
+u_strFromUTF8(UChar *dest,             
+              int32_t destCapacity,
+              int32_t *pDestLength,
+              const char *src, 
+              int32_t srcLength,
+              UErrorCode *pErrorCode);
+
+/**
+ * Convert a UTF-16 string to UTF-8.
+ *
+ * Same as u_strToUTF8() except for the additional subchar which is output for
+ * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
+ * With subchar==U_SENTINEL, this function behaves exactly like u_strToUTF8().
+ *
+ * @param dest          A buffer for the result string. The result will be zero-terminated if
+ *                      the buffer is large enough.
+ * @param destCapacity  The size of the buffer (number of chars). If it is 0, then
+ *                      dest may be NULL and the function will only return the length of the 
+ *                      result without writing any of the result string (pre-flighting).
+ * @param pDestLength   A pointer to receive the number of units written to the destination. If 
+ *                      pDestLength!=NULL then *pDestLength is always set to the 
+ *                      number of output units corresponding to the transformation of 
+ *                      all the input units, even in case of a buffer overflow.
+ * @param src           The original source string
+ * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
+ * @param subchar       The substitution character to use in place of an illegal input sequence,
+ *                      or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
+ *                      A substitution character can be any valid Unicode code point (up to U+10FFFF)
+ *                      except for surrogate code points (U+D800..U+DFFF).
+ *                      The recommended value is U+FFFD "REPLACEMENT CHARACTER".
+ * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
+ *                      Set to 0 if no substitutions occur or subchar<0.
+ *                      pNumSubstitutions can be NULL.
+ * @param pErrorCode    Pointer to a standard ICU error code. Its input value must
+ *                      pass the U_SUCCESS() test, or else the function returns
+ *                      immediately. Check for U_FAILURE() on output or use with
+ *                      function chaining. (See User Guide for details.)
+ * @return The pointer to destination buffer.
+ * @see u_strToUTF8
+ * @see u_strFromUTF8WithSub
+ * @stable ICU 3.6
+ */
+U_STABLE char* U_EXPORT2
+u_strToUTF8WithSub(char *dest,
+            int32_t destCapacity,
+            int32_t *pDestLength,
+            const UChar *src,
+            int32_t srcLength,
+            UChar32 subchar, int32_t *pNumSubstitutions,
+            UErrorCode *pErrorCode);
+
+/**
+ * Convert a UTF-8 string to UTF-16.
+ *
+ * Same as u_strFromUTF8() except for the additional subchar which is output for
+ * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
+ * With subchar==U_SENTINEL, this function behaves exactly like u_strFromUTF8().
+ *
+ * @param dest          A buffer for the result string. The result will be zero-terminated if
+ *                      the buffer is large enough.
+ * @param destCapacity  The size of the buffer (number of UChars). If it is 0, then
+ *                      dest may be NULL and the function will only return the length of the 
+ *                      result without writing any of the result string (pre-flighting).
+ * @param pDestLength   A pointer to receive the number of units written to the destination. If 
+ *                      pDestLength!=NULL then *pDestLength is always set to the 
+ *                      number of output units corresponding to the transformation of 
+ *                      all the input units, even in case of a buffer overflow.
+ * @param src           The original source string
+ * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
+ * @param subchar       The substitution character to use in place of an illegal input sequence,
+ *                      or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
+ *                      A substitution character can be any valid Unicode code point (up to U+10FFFF)
+ *                      except for surrogate code points (U+D800..U+DFFF).
+ *                      The recommended value is U+FFFD "REPLACEMENT CHARACTER".
+ * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
+ *                      Set to 0 if no substitutions occur or subchar<0.
+ *                      pNumSubstitutions can be NULL.
+ * @param pErrorCode    Pointer to a standard ICU error code. Its input value must
+ *                      pass the U_SUCCESS() test, or else the function returns
+ *                      immediately. Check for U_FAILURE() on output or use with
+ *                      function chaining. (See User Guide for details.)
+ * @return The pointer to destination buffer.
+ * @see u_strFromUTF8
+ * @see u_strFromUTF8Lenient
+ * @see u_strToUTF8WithSub
+ * @stable ICU 3.6
+ */
+U_STABLE UChar* U_EXPORT2
+u_strFromUTF8WithSub(UChar *dest,
+              int32_t destCapacity,
+              int32_t *pDestLength,
+              const char *src,
+              int32_t srcLength,
+              UChar32 subchar, int32_t *pNumSubstitutions,
+              UErrorCode *pErrorCode);
+
+/**
+ * Convert a UTF-8 string to UTF-16.
+ *
+ * Same as u_strFromUTF8() except that this function is designed to be very fast,
+ * which it achieves by being lenient about malformed UTF-8 sequences.
+ * This function is intended for use in environments where UTF-8 text is
+ * expected to be well-formed.
+ *
+ * Its semantics are:
+ * - Well-formed UTF-8 text is correctly converted to well-formed UTF-16 text.
+ * - The function will not read beyond the input string, nor write beyond
+ *   the destCapacity.
+ * - Malformed UTF-8 results in "garbage" 16-bit Unicode strings which may not
+ *   be well-formed UTF-16.
+ *   The function will resynchronize to valid code point boundaries
+ *   within a small number of code points after an illegal sequence.
+ * - Non-shortest forms are not detected and will result in "spoofing" output.
+ *
+ * For further performance improvement, if srcLength is given (>=0),
+ * then it must be destCapacity>=srcLength.
+ *
+ * There is no inverse u_strToUTF8Lenient() function because there is practically
+ * no performance gain from not checking that a UTF-16 string is well-formed.
+ *
+ * @param dest          A buffer for the result string. The result will be zero-terminated if
+ *                      the buffer is large enough.
+ * @param destCapacity  The size of the buffer (number of UChars). If it is 0, then
+ *                      dest may be NULL and the function will only return the length of the 
+ *                      result without writing any of the result string (pre-flighting).
+ *                      Unlike for other ICU functions, if srcLength>=0 then it
+ *                      must be destCapacity>=srcLength.
+ * @param pDestLength   A pointer to receive the number of units written to the destination. If 
+ *                      pDestLength!=NULL then *pDestLength is always set to the 
+ *                      number of output units corresponding to the transformation of 
+ *                      all the input units, even in case of a buffer overflow.
+ *                      Unlike for other ICU functions, if srcLength>=0 but
+ *                      destCapacity<srcLength, then *pDestLength will be set to srcLength
+ *                      (and U_BUFFER_OVERFLOW_ERROR will be set)
+ *                      regardless of the actual result length.
+ * @param src           The original source string
+ * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode    Pointer to a standard ICU error code. Its input value must
+ *                      pass the U_SUCCESS() test, or else the function returns
+ *                      immediately. Check for U_FAILURE() on output or use with
+ *                      function chaining. (See User Guide for details.)
+ * @return The pointer to destination buffer.
+ * @see u_strFromUTF8
+ * @see u_strFromUTF8WithSub
+ * @see u_strToUTF8WithSub
+ * @stable ICU 3.6
+ */
+U_STABLE UChar * U_EXPORT2
+u_strFromUTF8Lenient(UChar *dest,
+                     int32_t destCapacity,
+                     int32_t *pDestLength,
+                     const char *src,
+                     int32_t srcLength,
+                     UErrorCode *pErrorCode);
+
+/**
+ * Convert a UTF-16 string to UTF-32.
+ * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
+ *
+ * @param dest          A buffer for the result string. The result will be zero-terminated if
+ *                      the buffer is large enough.
+ * @param destCapacity  The size of the buffer (number of UChar32s). If it is 0, then
+ *                      dest may be NULL and the function will only return the length of the 
+ *                      result without writing any of the result string (pre-flighting).
+ * @param pDestLength   A pointer to receive the number of units written to the destination. If 
+ *                      pDestLength!=NULL then *pDestLength is always set to the 
+ *                      number of output units corresponding to the transformation of 
+ *                      all the input units, even in case of a buffer overflow.
+ * @param src           The original source string
+ * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode    Must be a valid pointer to an error code value,
+ *                      which must not indicate a failure before the function call.
+ * @return The pointer to destination buffer.
+ * @see u_strToUTF32WithSub
+ * @see u_strFromUTF32
+ * @stable ICU 2.0
+ */
+U_STABLE UChar32* U_EXPORT2 
+u_strToUTF32(UChar32 *dest, 
+             int32_t  destCapacity,
+             int32_t  *pDestLength,
+             const UChar *src, 
+             int32_t  srcLength,
+             UErrorCode *pErrorCode);
+
+/**
+ * Convert a UTF-32 string to UTF-16.
+ * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
+ *
+ * @param dest          A buffer for the result string. The result will be zero-terminated if
+ *                      the buffer is large enough.
+ * @param destCapacity  The size of the buffer (number of UChars). If it is 0, then
+ *                      dest may be NULL and the function will only return the length of the 
+ *                      result without writing any of the result string (pre-flighting).
+ * @param pDestLength   A pointer to receive the number of units written to the destination. If 
+ *                      pDestLength!=NULL then *pDestLength is always set to the 
+ *                      number of output units corresponding to the transformation of 
+ *                      all the input units, even in case of a buffer overflow.
+ * @param src           The original source string
+ * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode    Must be a valid pointer to an error code value,
+ *                      which must not indicate a failure before the function call.
+ * @return The pointer to destination buffer.
+ * @see u_strFromUTF32WithSub
+ * @see u_strToUTF32
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2 
+u_strFromUTF32(UChar   *dest,
+               int32_t destCapacity, 
+               int32_t *pDestLength,
+               const UChar32 *src,
+               int32_t srcLength,
+               UErrorCode *pErrorCode);
+
+/**
+ * Convert a UTF-16 string to UTF-32.
+ *
+ * Same as u_strToUTF32() except for the additional subchar which is output for
+ * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
+ * With subchar==U_SENTINEL, this function behaves exactly like u_strToUTF32().
+ *
+ * @param dest          A buffer for the result string. The result will be zero-terminated if
+ *                      the buffer is large enough.
+ * @param destCapacity  The size of the buffer (number of UChar32s). If it is 0, then
+ *                      dest may be NULL and the function will only return the length of the
+ *                      result without writing any of the result string (pre-flighting).
+ * @param pDestLength   A pointer to receive the number of units written to the destination. If
+ *                      pDestLength!=NULL then *pDestLength is always set to the
+ *                      number of output units corresponding to the transformation of
+ *                      all the input units, even in case of a buffer overflow.
+ * @param src           The original source string
+ * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
+ * @param subchar       The substitution character to use in place of an illegal input sequence,
+ *                      or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
+ *                      A substitution character can be any valid Unicode code point (up to U+10FFFF)
+ *                      except for surrogate code points (U+D800..U+DFFF).
+ *                      The recommended value is U+FFFD "REPLACEMENT CHARACTER".
+ * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
+ *                      Set to 0 if no substitutions occur or subchar<0.
+ *                      pNumSubstitutions can be NULL.
+ * @param pErrorCode    Pointer to a standard ICU error code. Its input value must
+ *                      pass the U_SUCCESS() test, or else the function returns
+ *                      immediately. Check for U_FAILURE() on output or use with
+ *                      function chaining. (See User Guide for details.)
+ * @return The pointer to destination buffer.
+ * @see u_strToUTF32
+ * @see u_strFromUTF32WithSub
+ * @stable ICU 4.2
+ */
+U_STABLE UChar32* U_EXPORT2
+u_strToUTF32WithSub(UChar32 *dest,
+             int32_t destCapacity,
+             int32_t *pDestLength,
+             const UChar *src,
+             int32_t srcLength,
+             UChar32 subchar, int32_t *pNumSubstitutions,
+             UErrorCode *pErrorCode);
+
+/**
+ * Convert a UTF-32 string to UTF-16.
+ *
+ * Same as u_strFromUTF32() except for the additional subchar which is output for
+ * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
+ * With subchar==U_SENTINEL, this function behaves exactly like u_strFromUTF32().
+ *
+ * @param dest          A buffer for the result string. The result will be zero-terminated if
+ *                      the buffer is large enough.
+ * @param destCapacity  The size of the buffer (number of UChars). If it is 0, then
+ *                      dest may be NULL and the function will only return the length of the
+ *                      result without writing any of the result string (pre-flighting).
+ * @param pDestLength   A pointer to receive the number of units written to the destination. If
+ *                      pDestLength!=NULL then *pDestLength is always set to the
+ *                      number of output units corresponding to the transformation of
+ *                      all the input units, even in case of a buffer overflow.
+ * @param src           The original source string
+ * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
+ * @param subchar       The substitution character to use in place of an illegal input sequence,
+ *                      or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
+ *                      A substitution character can be any valid Unicode code point (up to U+10FFFF)
+ *                      except for surrogate code points (U+D800..U+DFFF).
+ *                      The recommended value is U+FFFD "REPLACEMENT CHARACTER".
+ * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
+ *                      Set to 0 if no substitutions occur or subchar<0.
+ *                      pNumSubstitutions can be NULL.
+ * @param pErrorCode    Pointer to a standard ICU error code. Its input value must
+ *                      pass the U_SUCCESS() test, or else the function returns
+ *                      immediately. Check for U_FAILURE() on output or use with
+ *                      function chaining. (See User Guide for details.)
+ * @return The pointer to destination buffer.
+ * @see u_strFromUTF32
+ * @see u_strToUTF32WithSub
+ * @stable ICU 4.2
+ */
+U_STABLE UChar* U_EXPORT2
+u_strFromUTF32WithSub(UChar *dest,
+               int32_t destCapacity,
+               int32_t *pDestLength,
+               const UChar32 *src,
+               int32_t srcLength,
+               UChar32 subchar, int32_t *pNumSubstitutions,
+               UErrorCode *pErrorCode);
+
+/**
+ * Convert a 16-bit Unicode string to Java Modified UTF-8.
+ * See http://java.sun.com/javase/6/docs/api/java/io/DataInput.html#modified-utf-8
+ *
+ * This function behaves according to the documentation for Java DataOutput.writeUTF()
+ * except that it does not encode the output length in the destination buffer
+ * and does not have an output length restriction.
+ * See http://java.sun.com/javase/6/docs/api/java/io/DataOutput.html#writeUTF(java.lang.String)
+ *
+ * The input string need not be well-formed UTF-16.
+ * (Therefore there is no subchar parameter.)
+ *
+ * @param dest          A buffer for the result string. The result will be zero-terminated if
+ *                      the buffer is large enough.
+ * @param destCapacity  The size of the buffer (number of chars). If it is 0, then
+ *                      dest may be NULL and the function will only return the length of the 
+ *                      result without writing any of the result string (pre-flighting).
+ * @param pDestLength   A pointer to receive the number of units written to the destination. If 
+ *                      pDestLength!=NULL then *pDestLength is always set to the 
+ *                      number of output units corresponding to the transformation of 
+ *                      all the input units, even in case of a buffer overflow.
+ * @param src           The original source string
+ * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode    Pointer to a standard ICU error code. Its input value must
+ *                      pass the U_SUCCESS() test, or else the function returns
+ *                      immediately. Check for U_FAILURE() on output or use with
+ *                      function chaining. (See User Guide for details.)
+ * @return The pointer to destination buffer.
+ * @stable ICU 4.4
+ * @see u_strToUTF8WithSub
+ * @see u_strFromJavaModifiedUTF8WithSub
+ */
+U_STABLE char* U_EXPORT2 
+u_strToJavaModifiedUTF8(
+        char *dest,
+        int32_t destCapacity,
+        int32_t *pDestLength,
+        const UChar *src, 
+        int32_t srcLength,
+        UErrorCode *pErrorCode);
+
+/**
+ * Convert a Java Modified UTF-8 string to a 16-bit Unicode string.
+ * If the input string is not well-formed and no substitution char is specified, 
+ * then the U_INVALID_CHAR_FOUND error code is set.
+ *
+ * This function behaves according to the documentation for Java DataInput.readUTF()
+ * except that it takes a length parameter rather than
+ * interpreting the first two input bytes as the length.
+ * See http://java.sun.com/javase/6/docs/api/java/io/DataInput.html#readUTF()
+ *
+ * The output string may not be well-formed UTF-16.
+ *
+ * @param dest          A buffer for the result string. The result will be zero-terminated if
+ *                      the buffer is large enough.
+ * @param destCapacity  The size of the buffer (number of UChars). If it is 0, then
+ *                      dest may be NULL and the function will only return the length of the 
+ *                      result without writing any of the result string (pre-flighting).
+ * @param pDestLength   A pointer to receive the number of units written to the destination. If 
+ *                      pDestLength!=NULL then *pDestLength is always set to the 
+ *                      number of output units corresponding to the transformation of 
+ *                      all the input units, even in case of a buffer overflow.
+ * @param src           The original source string
+ * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
+ * @param subchar       The substitution character to use in place of an illegal input sequence,
+ *                      or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
+ *                      A substitution character can be any valid Unicode code point (up to U+10FFFF)
+ *                      except for surrogate code points (U+D800..U+DFFF).
+ *                      The recommended value is U+FFFD "REPLACEMENT CHARACTER".
+ * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
+ *                      Set to 0 if no substitutions occur or subchar<0.
+ *                      pNumSubstitutions can be NULL.
+ * @param pErrorCode    Pointer to a standard ICU error code. Its input value must
+ *                      pass the U_SUCCESS() test, or else the function returns
+ *                      immediately. Check for U_FAILURE() on output or use with
+ *                      function chaining. (See User Guide for details.)
+ * @return The pointer to destination buffer.
+ * @see u_strFromUTF8WithSub
+ * @see u_strFromUTF8Lenient
+ * @see u_strToJavaModifiedUTF8
+ * @stable ICU 4.4
+ */
+U_STABLE UChar* U_EXPORT2
+u_strFromJavaModifiedUTF8WithSub(
+        UChar *dest,
+        int32_t destCapacity,
+        int32_t *pDestLength,
+        const char *src,
+        int32_t srcLength,
+        UChar32 subchar, int32_t *pNumSubstitutions,
+        UErrorCode *pErrorCode);
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/utf.h b/src/core/basetypes/icu-ucsdet/include/unicode/utf.h
new file mode 100644
index 00000000..f5954fe9
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/utf.h
@@ -0,0 +1,223 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 1999-2011, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  utf.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 1999sep09
+*   created by: Markus W. Scherer
+*/
+
+/**
+ * \file
+ * \brief C API: Code point macros
+ *
+ * This file defines macros for checking whether a code point is
+ * a surrogate or a non-character etc.
+ *
+ * The UChar and UChar32 data types for Unicode code units and code points
+ * are defined in umachine.h because they can be machine-dependent.
+ *
+ * If U_NO_DEFAULT_INCLUDE_UTF_HEADERS is 0 then utf.h is included by utypes.h
+ * and itself includes utf8.h and utf16.h after some
+ * common definitions.
+ * If U_NO_DEFAULT_INCLUDE_UTF_HEADERS is 1 then each of these headers must be
+ * included explicitly if their definitions are used.
+ *
+ * utf8.h and utf16.h define macros for efficiently getting code points
+ * in and out of UTF-8/16 strings.
+ * utf16.h macros have "U16_" prefixes.
+ * utf8.h defines similar macros with "U8_" prefixes for UTF-8 string handling.
+ *
+ * ICU mostly processes 16-bit Unicode strings.
+ * Most of the time, such strings are well-formed UTF-16.
+ * Single, unpaired surrogates must be handled as well, and are treated in ICU
+ * like regular code points where possible.
+ * (Pairs of surrogate code points are indistinguishable from supplementary
+ * code points encoded as pairs of supplementary code units.)
+ *
+ * In fact, almost all Unicode code points in normal text (>99%)
+ * are on the BMP (<=U+ffff) and even <=U+d7ff.
+ * ICU functions handle supplementary code points (U+10000..U+10ffff)
+ * but are optimized for the much more frequently occurring BMP code points.
+ *
+ * umachine.h defines UChar to be an unsigned 16-bit integer.
+ * Where available, UChar is defined to be a char16_t
+ * or a wchar_t (if that is an unsigned 16-bit type), otherwise uint16_t.
+ *
+ * UChar32 is defined to be a signed 32-bit integer (int32_t), large enough for a 21-bit
+ * Unicode code point (Unicode scalar value, 0..0x10ffff).
+ * Before ICU 2.4, the definition of UChar32 was similarly platform-dependent as
+ * the definition of UChar. For details see the documentation for UChar32 itself.
+ *
+ * utf.h defines a small number of C macros for single Unicode code points.
+ * These are simple checks for surrogates and non-characters.
+ * For actual Unicode character properties see uchar.h.
+ *
+ * By default, string operations must be done with error checking in case
+ * a string is not well-formed UTF-16.
+ * The macros will detect if a surrogate code unit is unpaired
+ * (lead unit without trail unit or vice versa) and just return the unit itself
+ * as the code point.
+ *
+ * The regular "safe" macros require that the initial, passed-in string index
+ * is within bounds. They only check the index when they read more than one
+ * code unit. This is usually done with code similar to the following loop:
+ * <pre>while(i<length) {
+ *   U16_NEXT(s, i, length, c);
+ *   // use c
+ * }</pre>
+ *
+ * When it is safe to assume that text is well-formed UTF-16
+ * (does not contain single, unpaired surrogates), then one can use
+ * U16_..._UNSAFE macros.
+ * These do not check for proper code unit sequences or truncated text and may
+ * yield wrong results or even cause a crash if they are used with "malformed"
+ * text.
+ * In practice, U16_..._UNSAFE macros will produce slightly less code but
+ * should not be faster because the processing is only different when a
+ * surrogate code unit is detected, which will be rare.
+ *
+ * Similarly for UTF-8, there are "safe" macros without a suffix,
+ * and U8_..._UNSAFE versions.
+ * The performance differences are much larger here because UTF-8 provides so
+ * many opportunities for malformed sequences.
+ * The unsafe UTF-8 macros are entirely implemented inside the macro definitions
+ * and are fast, while the safe UTF-8 macros call functions for all but the
+ * trivial (ASCII) cases.
+ * (ICU 3.6 optimizes U8_NEXT() and U8_APPEND() to handle most other common
+ * characters inline as well.)
+ *
+ * Unlike with UTF-16, malformed sequences cannot be expressed with distinct
+ * code point values (0..U+10ffff). They are indicated with negative values instead.
+ *
+ * For more information see the ICU User Guide Strings chapter
+ * (http://userguide.icu-project.org/strings).
+ *
+ * <em>Usage:</em>
+ * ICU coding guidelines for if() statements should be followed when using these macros.
+ * Compound statements (curly braces {}) must be used  for if-else-while... 
+ * bodies and all macro statements should be terminated with semicolon.
+ *
+ * @stable ICU 2.4
+ */
+
+#ifndef __UTF_H__
+#define __UTF_H__
+
+#include "unicode/umachine.h"
+/* include the utfXX.h after the following definitions */
+
+/* single-code point definitions -------------------------------------------- */
+
+/**
+ * Is this code point a Unicode noncharacter?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U_IS_UNICODE_NONCHAR(c) \
+    ((c)>=0xfdd0 && \
+     ((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \
+     (uint32_t)(c)<=0x10ffff)
+
+/**
+ * Is c a Unicode code point value (0..U+10ffff)
+ * that can be assigned a character?
+ *
+ * Code points that are not characters include:
+ * - single surrogate code points (U+d800..U+dfff, 2048 code points)
+ * - the last two code points on each plane (U+__fffe and U+__ffff, 34 code points)
+ * - U+fdd0..U+fdef (new with Unicode 3.1, 32 code points)
+ * - the highest Unicode code point value is U+10ffff
+ *
+ * This means that all code points below U+d800 are character code points,
+ * and that boundary is tested first for performance.
+ *
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U_IS_UNICODE_CHAR(c) \
+    ((uint32_t)(c)<0xd800 || \
+        ((uint32_t)(c)>0xdfff && \
+         (uint32_t)(c)<=0x10ffff && \
+         !U_IS_UNICODE_NONCHAR(c)))
+
+/**
+ * Is this code point a BMP code point (U+0000..U+ffff)?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @stable ICU 2.8
+ */
+#define U_IS_BMP(c) ((uint32_t)(c)<=0xffff)
+
+/**
+ * Is this code point a supplementary code point (U+10000..U+10ffff)?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @stable ICU 2.8
+ */
+#define U_IS_SUPPLEMENTARY(c) ((uint32_t)((c)-0x10000)<=0xfffff)
+ 
+/**
+ * Is this code point a lead surrogate (U+d800..U+dbff)?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
+
+/**
+ * Is this code point a trail surrogate (U+dc00..U+dfff)?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
+
+/**
+ * Is this code point a surrogate (U+d800..U+dfff)?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800)
+
+/**
+ * Assuming c is a surrogate code point (U_IS_SURROGATE(c)),
+ * is it a lead surrogate?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
+
+/**
+ * Assuming c is a surrogate code point (U_IS_SURROGATE(c)),
+ * is it a trail surrogate?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @stable ICU 4.2
+ */
+#define U_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0)
+
+/* include the utfXX.h ------------------------------------------------------ */
+
+#if !U_NO_DEFAULT_INCLUDE_UTF_HEADERS
+
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
+
+/* utf_old.h contains deprecated, pre-ICU 2.4 definitions */
+#include "unicode/utf_old.h"
+
+#endif  /* !U_NO_DEFAULT_INCLUDE_UTF_HEADERS */
+
+#endif  /* __UTF_H__ */
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/utf16.h b/src/core/basetypes/icu-ucsdet/include/unicode/utf16.h
new file mode 100644
index 00000000..bdd88a8b
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/utf16.h
@@ -0,0 +1,623 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 1999-2012, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  utf16.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 1999sep09
+*   created by: Markus W. Scherer
+*/
+
+/**
+ * \file
+ * \brief C API: 16-bit Unicode handling macros
+ * 
+ * This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings.
+ *
+ * For more information see utf.h and the ICU User Guide Strings chapter
+ * (http://userguide.icu-project.org/strings).
+ *
+ * <em>Usage:</em>
+ * ICU coding guidelines for if() statements should be followed when using these macros.
+ * Compound statements (curly braces {}) must be used  for if-else-while... 
+ * bodies and all macro statements should be terminated with semicolon.
+ */
+
+#ifndef __UTF16_H__
+#define __UTF16_H__
+
+#include "unicode/umachine.h"
+#ifndef __UTF_H__
+#   include "unicode/utf.h"
+#endif
+
+/* single-code point definitions -------------------------------------------- */
+
+/**
+ * Does this code unit alone encode a code point (BMP, not a surrogate)?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U16_IS_SINGLE(c) !U_IS_SURROGATE(c)
+
+/**
+ * Is this code unit a lead surrogate (U+d800..U+dbff)?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
+
+/**
+ * Is this code unit a trail surrogate (U+dc00..U+dfff)?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
+
+/**
+ * Is this code unit a surrogate (U+d800..U+dfff)?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
+
+/**
+ * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
+ * is it a lead surrogate?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
+
+/**
+ * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
+ * is it a trail surrogate?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 4.2
+ */
+#define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0)
+
+/**
+ * Helper constant for U16_GET_SUPPLEMENTARY.
+ * @internal
+ */
+#define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
+
+/**
+ * Get a supplementary code point value (U+10000..U+10ffff)
+ * from its lead and trail surrogates.
+ * The result is undefined if the input values are not
+ * lead and trail surrogates.
+ *
+ * @param lead lead surrogate (U+d800..U+dbff)
+ * @param trail trail surrogate (U+dc00..U+dfff)
+ * @return supplementary code point (U+10000..U+10ffff)
+ * @stable ICU 2.4
+ */
+#define U16_GET_SUPPLEMENTARY(lead, trail) \
+    (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)
+
+
+/**
+ * Get the lead surrogate (0xd800..0xdbff) for a
+ * supplementary code point (0x10000..0x10ffff).
+ * @param supplementary 32-bit code point (U+10000..U+10ffff)
+ * @return lead surrogate (U+d800..U+dbff) for supplementary
+ * @stable ICU 2.4
+ */
+#define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
+
+/**
+ * Get the trail surrogate (0xdc00..0xdfff) for a
+ * supplementary code point (0x10000..0x10ffff).
+ * @param supplementary 32-bit code point (U+10000..U+10ffff)
+ * @return trail surrogate (U+dc00..U+dfff) for supplementary
+ * @stable ICU 2.4
+ */
+#define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
+
+/**
+ * How many 16-bit code units are used to encode this Unicode code point? (1 or 2)
+ * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff).
+ * @param c 32-bit code point
+ * @return 1 or 2
+ * @stable ICU 2.4
+ */
+#define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
+
+/**
+ * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff).
+ * @return 2
+ * @stable ICU 2.4
+ */
+#define U16_MAX_LENGTH 2
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * The offset may point to either the lead or trail surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the adjacent matching surrogate as well.
+ * The result is undefined if the offset points to a single, unpaired surrogate.
+ * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U16_GET
+ * @stable ICU 2.4
+ */
+#define U16_GET_UNSAFE(s, i, c) { \
+    (c)=(s)[i]; \
+    if(U16_IS_SURROGATE(c)) { \
+        if(U16_IS_SURROGATE_LEAD(c)) { \
+            (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \
+        } else { \
+            (c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \
+        } \
+    } \
+}
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The offset may point to either the lead or trail surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the adjacent matching surrogate as well.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * If the offset points to a single, unpaired surrogate, then that itself
+ * will be returned as the code point.
+ * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<=i<length
+ * @param length string length
+ * @param c output UChar32 variable
+ * @see U16_GET_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_GET(s, start, i, length, c) { \
+    (c)=(s)[i]; \
+    if(U16_IS_SURROGATE(c)) { \
+        uint16_t __c2; \
+        if(U16_IS_SURROGATE_LEAD(c)) { \
+            if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
+                (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
+            } \
+        } else { \
+            if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
+                (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
+            } \
+        } \
+    } \
+}
+
+/* definitions with forward iteration --------------------------------------- */
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * The offset may point to the lead surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the following trail surrogate as well.
+ * If the offset points to a trail surrogate, then that itself
+ * will be returned as the code point.
+ * The result is undefined if the offset points to a single, unpaired lead surrogate.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U16_NEXT
+ * @stable ICU 2.4
+ */
+#define U16_NEXT_UNSAFE(s, i, c) { \
+    (c)=(s)[(i)++]; \
+    if(U16_IS_LEAD(c)) { \
+        (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \
+    } \
+}
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * The offset may point to the lead surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the following trail surrogate as well.
+ * If the offset points to a trail surrogate or
+ * to a single, unpaired lead surrogate, then that itself
+ * will be returned as the code point.
+ *
+ * @param s const UChar * string
+ * @param i string offset, must be i<length
+ * @param length string length
+ * @param c output UChar32 variable
+ * @see U16_NEXT_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_NEXT(s, i, length, c) { \
+    (c)=(s)[(i)++]; \
+    if(U16_IS_LEAD(c)) { \
+        uint16_t __c2; \
+        if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
+            ++(i); \
+            (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
+        } \
+    } \
+}
+
+/**
+ * Append a code point to a string, overwriting 1 or 2 code units.
+ * The offset points to the current end of the string contents
+ * and is advanced (post-increment).
+ * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
+ * Otherwise, the result is undefined.
+ *
+ * @param s const UChar * string buffer
+ * @param i string offset
+ * @param c code point to append
+ * @see U16_APPEND
+ * @stable ICU 2.4
+ */
+#define U16_APPEND_UNSAFE(s, i, c) { \
+    if((uint32_t)(c)<=0xffff) { \
+        (s)[(i)++]=(uint16_t)(c); \
+    } else { \
+        (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
+        (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
+    } \
+}
+
+/**
+ * Append a code point to a string, overwriting 1 or 2 code units.
+ * The offset points to the current end of the string contents
+ * and is advanced (post-increment).
+ * "Safe" macro, checks for a valid code point.
+ * If a surrogate pair is written, checks for sufficient space in the string.
+ * If the code point is not valid or a trail surrogate does not fit,
+ * then isError is set to TRUE.
+ *
+ * @param s const UChar * string buffer
+ * @param i string offset, must be i<capacity
+ * @param capacity size of the string buffer
+ * @param c code point to append
+ * @param isError output UBool set to TRUE if an error occurs, otherwise not modified
+ * @see U16_APPEND_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_APPEND(s, i, capacity, c, isError) { \
+    if((uint32_t)(c)<=0xffff) { \
+        (s)[(i)++]=(uint16_t)(c); \
+    } else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \
+        (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
+        (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
+    } else /* c>0x10ffff or not enough space */ { \
+        (isError)=TRUE; \
+    } \
+}
+
+/**
+ * Advance the string offset from one code point boundary to the next.
+ * (Post-incrementing iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @see U16_FWD_1
+ * @stable ICU 2.4
+ */
+#define U16_FWD_1_UNSAFE(s, i) { \
+    if(U16_IS_LEAD((s)[(i)++])) { \
+        ++(i); \
+    } \
+}
+
+/**
+ * Advance the string offset from one code point boundary to the next.
+ * (Post-incrementing iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * @param s const UChar * string
+ * @param i string offset, must be i<length
+ * @param length string length
+ * @see U16_FWD_1_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_FWD_1(s, i, length) { \
+    if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \
+        ++(i); \
+    } \
+}
+
+/**
+ * Advance the string offset from one code point boundary to the n-th next one,
+ * i.e., move forward by n code points.
+ * (Post-incrementing iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param n number of code points to skip
+ * @see U16_FWD_N
+ * @stable ICU 2.4
+ */
+#define U16_FWD_N_UNSAFE(s, i, n) { \
+    int32_t __N=(n); \
+    while(__N>0) { \
+        U16_FWD_1_UNSAFE(s, i); \
+        --__N; \
+    } \
+}
+
+/**
+ * Advance the string offset from one code point boundary to the n-th next one,
+ * i.e., move forward by n code points.
+ * (Post-incrementing iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * @param s const UChar * string
+ * @param i int32_t string offset, must be i<length
+ * @param length int32_t string length
+ * @param n number of code points to skip
+ * @see U16_FWD_N_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_FWD_N(s, i, length, n) { \
+    int32_t __N=(n); \
+    while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
+        U16_FWD_1(s, i, length); \
+        --__N; \
+    } \
+}
+
+/**
+ * Adjust a random-access offset to a code point boundary
+ * at the start of a code point.
+ * If the offset points to the trail surrogate of a surrogate pair,
+ * then the offset is decremented.
+ * Otherwise, it is not modified.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @see U16_SET_CP_START
+ * @stable ICU 2.4
+ */
+#define U16_SET_CP_START_UNSAFE(s, i) { \
+    if(U16_IS_TRAIL((s)[i])) { \
+        --(i); \
+    } \
+}
+
+/**
+ * Adjust a random-access offset to a code point boundary
+ * at the start of a code point.
+ * If the offset points to the trail surrogate of a surrogate pair,
+ * then the offset is decremented.
+ * Otherwise, it is not modified.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<=i
+ * @see U16_SET_CP_START_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_SET_CP_START(s, start, i) { \
+    if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
+        --(i); \
+    } \
+}
+
+/* definitions with backward iteration -------------------------------------- */
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a trail surrogate unit
+ * for a supplementary code point, then the macro will read
+ * the preceding lead surrogate as well.
+ * If the offset is behind a lead surrogate, then that itself
+ * will be returned as the code point.
+ * The result is undefined if the offset is behind a single, unpaired trail surrogate.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U16_PREV
+ * @stable ICU 2.4
+ */
+#define U16_PREV_UNSAFE(s, i, c) { \
+    (c)=(s)[--(i)]; \
+    if(U16_IS_TRAIL(c)) { \
+        (c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \
+    } \
+}
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a trail surrogate unit
+ * for a supplementary code point, then the macro will read
+ * the preceding lead surrogate as well.
+ * If the offset is behind a lead surrogate or behind a single, unpaired
+ * trail surrogate, then that itself
+ * will be returned as the code point.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<i
+ * @param c output UChar32 variable
+ * @see U16_PREV_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_PREV(s, start, i, c) { \
+    (c)=(s)[--(i)]; \
+    if(U16_IS_TRAIL(c)) { \
+        uint16_t __c2; \
+        if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
+            --(i); \
+            (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
+        } \
+    } \
+}
+
+/**
+ * Move the string offset from one code point boundary to the previous one.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @see U16_BACK_1
+ * @stable ICU 2.4
+ */
+#define U16_BACK_1_UNSAFE(s, i) { \
+    if(U16_IS_TRAIL((s)[--(i)])) { \
+        --(i); \
+    } \
+}
+
+/**
+ * Move the string offset from one code point boundary to the previous one.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<i
+ * @see U16_BACK_1_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_BACK_1(s, start, i) { \
+    if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
+        --(i); \
+    } \
+}
+
+/**
+ * Move the string offset from one code point boundary to the n-th one before it,
+ * i.e., move backward by n code points.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param n number of code points to skip
+ * @see U16_BACK_N
+ * @stable ICU 2.4
+ */
+#define U16_BACK_N_UNSAFE(s, i, n) { \
+    int32_t __N=(n); \
+    while(__N>0) { \
+        U16_BACK_1_UNSAFE(s, i); \
+        --__N; \
+    } \
+}
+
+/**
+ * Move the string offset from one code point boundary to the n-th one before it,
+ * i.e., move backward by n code points.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * @param s const UChar * string
+ * @param start start of string
+ * @param i string offset, must be start<i
+ * @param n number of code points to skip
+ * @see U16_BACK_N_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_BACK_N(s, start, i, n) { \
+    int32_t __N=(n); \
+    while(__N>0 && (i)>(start)) { \
+        U16_BACK_1(s, start, i); \
+        --__N; \
+    } \
+}
+
+/**
+ * Adjust a random-access offset to a code point boundary after a code point.
+ * If the offset is behind the lead surrogate of a surrogate pair,
+ * then the offset is incremented.
+ * Otherwise, it is not modified.
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @see U16_SET_CP_LIMIT
+ * @stable ICU 2.4
+ */
+#define U16_SET_CP_LIMIT_UNSAFE(s, i) { \
+    if(U16_IS_LEAD((s)[(i)-1])) { \
+        ++(i); \
+    } \
+}
+
+/**
+ * Adjust a random-access offset to a code point boundary after a code point.
+ * If the offset is behind the lead surrogate of a surrogate pair,
+ * then the offset is incremented.
+ * Otherwise, it is not modified.
+ * The input offset may be the same as the string length.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * @param s const UChar * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param i int32_t string offset, start<=i<=length
+ * @param length int32_t string length
+ * @see U16_SET_CP_LIMIT_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_SET_CP_LIMIT(s, start, i, length) { \
+    if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \
+        ++(i); \
+    } \
+}
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/utf8.h b/src/core/basetypes/icu-ucsdet/include/unicode/utf8.h
new file mode 100644
index 00000000..7bd0b0e8
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/utf8.h
@@ -0,0 +1,824 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 1999-2014, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  utf8.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 1999sep13
+*   created by: Markus W. Scherer
+*/
+
+/**
+ * \file
+ * \brief C API: 8-bit Unicode handling macros
+ * 
+ * This file defines macros to deal with 8-bit Unicode (UTF-8) code units (bytes) and strings.
+ *
+ * For more information see utf.h and the ICU User Guide Strings chapter
+ * (http://userguide.icu-project.org/strings).
+ *
+ * <em>Usage:</em>
+ * ICU coding guidelines for if() statements should be followed when using these macros.
+ * Compound statements (curly braces {}) must be used  for if-else-while... 
+ * bodies and all macro statements should be terminated with semicolon.
+ */
+
+#ifndef __UTF8_H__
+#define __UTF8_H__
+
+#include "unicode/umachine.h"
+#ifndef __UTF_H__
+#   include "unicode/utf.h"
+#endif
+
+/* internal definitions ----------------------------------------------------- */
+
+/**
+ * \var utf8_countTrailBytes
+ * Internal array with numbers of trail bytes for any given byte used in
+ * lead byte position.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is called by public macros in this file and thus must remain stable,
+ * and should not be hidden when other internal functions are hidden (otherwise
+ * public macros would fail to compile).
+ * @internal
+ */
+#ifdef U_UTF8_IMPL
+U_EXPORT const uint8_t 
+#elif defined(U_STATIC_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION)
+U_CFUNC const uint8_t
+#else
+U_CFUNC U_IMPORT const uint8_t /* U_IMPORT2? */ /*U_IMPORT*/ 
+#endif
+utf8_countTrailBytes[256];
+
+/**
+ * Counts the trail bytes for a UTF-8 lead byte.
+ * Returns 0 for 0..0xbf as well as for 0xfe and 0xff.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is called by public macros in this file and thus must remain stable.
+ *
+ * Note: Beginning with ICU 50, the implementation uses a multi-condition expression
+ * which was shown in 2012 (on x86-64) to compile to fast, branch-free code.
+ * leadByte is evaluated multiple times.
+ *
+ * The pre-ICU 50 implementation used the exported array utf8_countTrailBytes:
+ * #define U8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[leadByte])
+ * leadByte was evaluated exactly once.
+ *
+ * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
+ * @internal
+ */
+#define U8_COUNT_TRAIL_BYTES(leadByte) \
+    ((uint8_t)(leadByte)<0xf0 ? \
+        ((uint8_t)(leadByte)>=0xc0)+((uint8_t)(leadByte)>=0xe0) : \
+        (uint8_t)(leadByte)<0xfe ? 3+((uint8_t)(leadByte)>=0xf8)+((uint8_t)(leadByte)>=0xfc) : 0)
+
+/**
+ * Counts the trail bytes for a UTF-8 lead byte of a valid UTF-8 sequence.
+ * The maximum supported lead byte is 0xf4 corresponding to U+10FFFF.
+ * leadByte might be evaluated multiple times.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is called by public macros in this file and thus must remain stable.
+ *
+ * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
+ * @internal
+ */
+#define U8_COUNT_TRAIL_BYTES_UNSAFE(leadByte) \
+    (((leadByte)>=0xc0)+((leadByte)>=0xe0)+((leadByte)>=0xf0))
+
+/**
+ * Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is called by public macros in this file and thus must remain stable.
+ * @internal
+ */
+#define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
+
+/**
+ * Function for handling "next code point" with error-checking.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
+ * file and thus must remain stable, and should not be hidden when other internal
+ * functions are hidden (otherwise public macros would fail to compile).
+ * @internal
+ */
+U_STABLE UChar32 U_EXPORT2
+utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict);
+
+/**
+ * Function for handling "append code point" with error-checking.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
+ * file and thus must remain stable, and should not be hidden when other internal
+ * functions are hidden (otherwise public macros would fail to compile).
+ * @internal
+ */
+U_STABLE int32_t U_EXPORT2
+utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool *pIsError);
+
+/**
+ * Function for handling "previous code point" with error-checking.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
+ * file and thus must remain stable, and should not be hidden when other internal
+ * functions are hidden (otherwise public macros would fail to compile).
+ * @internal
+ */
+U_STABLE UChar32 U_EXPORT2
+utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict);
+
+/**
+ * Function for handling "skip backward one code point" with error-checking.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
+ * file and thus must remain stable, and should not be hidden when other internal
+ * functions are hidden (otherwise public macros would fail to compile).
+ * @internal
+ */
+U_STABLE int32_t U_EXPORT2
+utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
+
+/* single-code point definitions -------------------------------------------- */
+
+/**
+ * Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)?
+ * @param c 8-bit code unit (byte)
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U8_IS_SINGLE(c) (((c)&0x80)==0)
+
+/**
+ * Is this code unit (byte) a UTF-8 lead byte?
+ * @param c 8-bit code unit (byte)
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U8_IS_LEAD(c) ((uint8_t)((c)-0xc0)<0x3e)
+
+/**
+ * Is this code unit (byte) a UTF-8 trail byte?
+ * @param c 8-bit code unit (byte)
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U8_IS_TRAIL(c) (((c)&0xc0)==0x80)
+
+/**
+ * How many code units (bytes) are used for the UTF-8 encoding
+ * of this Unicode code point?
+ * @param c 32-bit code point
+ * @return 1..4, or 0 if c is a surrogate or not a Unicode code point
+ * @stable ICU 2.4
+ */
+#define U8_LENGTH(c) \
+    ((uint32_t)(c)<=0x7f ? 1 : \
+        ((uint32_t)(c)<=0x7ff ? 2 : \
+            ((uint32_t)(c)<=0xd7ff ? 3 : \
+                ((uint32_t)(c)<=0xdfff || (uint32_t)(c)>0x10ffff ? 0 : \
+                    ((uint32_t)(c)<=0xffff ? 3 : 4)\
+                ) \
+            ) \
+        ) \
+    )
+
+/**
+ * The maximum number of UTF-8 code units (bytes) per Unicode code point (U+0000..U+10ffff).
+ * @return 4
+ * @stable ICU 2.4
+ */
+#define U8_MAX_LENGTH 4
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * The offset may point to either the lead byte or one of the trail bytes
+ * for a code point, in which case the macro will read all of the bytes
+ * for the code point.
+ * The result is undefined if the offset points to an illegal UTF-8
+ * byte sequence.
+ * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U8_GET
+ * @stable ICU 2.4
+ */
+#define U8_GET_UNSAFE(s, i, c) { \
+    int32_t _u8_get_unsafe_index=(int32_t)(i); \
+    U8_SET_CP_START_UNSAFE(s, _u8_get_unsafe_index); \
+    U8_NEXT_UNSAFE(s, _u8_get_unsafe_index, c); \
+}
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * The offset may point to either the lead byte or one of the trail bytes
+ * for a code point, in which case the macro will read all of the bytes
+ * for the code point.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * If the offset points to an illegal UTF-8 byte sequence, then
+ * c is set to a negative value.
+ * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset
+ * @param i int32_t string offset, must be start<=i<length
+ * @param length int32_t string length
+ * @param c output UChar32 variable, set to <0 in case of an error
+ * @see U8_GET_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_GET(s, start, i, length, c) { \
+    int32_t _u8_get_index=(i); \
+    U8_SET_CP_START(s, start, _u8_get_index); \
+    U8_NEXT(s, _u8_get_index, length, c); \
+}
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * The offset may point to either the lead byte or one of the trail bytes
+ * for a code point, in which case the macro will read all of the bytes
+ * for the code point.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * If the offset points to an illegal UTF-8 byte sequence, then
+ * c is set to U+FFFD.
+ * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT_OR_FFFD.
+ *
+ * This macro does not distinguish between a real U+FFFD in the text
+ * and U+FFFD returned for an ill-formed sequence.
+ * Use U8_GET() if that distinction is important.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset
+ * @param i int32_t string offset, must be start<=i<length
+ * @param length int32_t string length
+ * @param c output UChar32 variable, set to U+FFFD in case of an error
+ * @see U8_GET
+ * @stable ICU 51
+ */
+#define U8_GET_OR_FFFD(s, start, i, length, c) { \
+    int32_t _u8_get_index=(i); \
+    U8_SET_CP_START(s, start, _u8_get_index); \
+    U8_NEXT_OR_FFFD(s, _u8_get_index, length, c); \
+}
+
+/* definitions with forward iteration --------------------------------------- */
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * The offset may point to the lead byte of a multi-byte sequence,
+ * in which case the macro will read the whole sequence.
+ * The result is undefined if the offset points to a trail byte
+ * or an illegal UTF-8 sequence.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U8_NEXT
+ * @stable ICU 2.4
+ */
+#define U8_NEXT_UNSAFE(s, i, c) { \
+    (c)=(uint8_t)(s)[(i)++]; \
+    if((c)>=0x80) { \
+        if((c)<0xe0) { \
+            (c)=(((c)&0x1f)<<6)|((s)[(i)++]&0x3f); \
+        } else if((c)<0xf0) { \
+            /* no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ \
+            (c)=(UChar)(((c)<<12)|(((s)[i]&0x3f)<<6)|((s)[(i)+1]&0x3f)); \
+            (i)+=2; \
+        } else { \
+            (c)=(((c)&7)<<18)|(((s)[i]&0x3f)<<12)|(((s)[(i)+1]&0x3f)<<6)|((s)[(i)+2]&0x3f); \
+            (i)+=3; \
+        } \
+    } \
+}
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * The offset may point to the lead byte of a multi-byte sequence,
+ * in which case the macro will read the whole sequence.
+ * If the offset points to a trail byte or an illegal UTF-8 sequence, then
+ * c is set to a negative value.
+ *
+ * @param s const uint8_t * string
+ * @param i int32_t string offset, must be i<length
+ * @param length int32_t string length
+ * @param c output UChar32 variable, set to <0 in case of an error
+ * @see U8_NEXT_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_NEXT(s, i, length, c) { \
+    (c)=(uint8_t)(s)[(i)++]; \
+    if((c)>=0x80) { \
+        uint8_t __t1, __t2; \
+        if( /* handle U+1000..U+CFFF inline */ \
+            (0xe0<(c) && (c)<=0xec) && \
+            (((i)+1)<(length) || (length)<0) && \
+            (__t1=(uint8_t)((s)[i]-0x80))<=0x3f && \
+            (__t2=(uint8_t)((s)[(i)+1]-0x80))<= 0x3f \
+        ) { \
+            /* no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ \
+            (c)=(UChar)(((c)<<12)|(__t1<<6)|__t2); \
+            (i)+=2; \
+        } else if( /* handle U+0080..U+07FF inline */ \
+            ((c)<0xe0 && (c)>=0xc2) && \
+            ((i)!=(length)) && \
+            (__t1=(uint8_t)((s)[i]-0x80))<=0x3f \
+        ) { \
+            (c)=(((c)&0x1f)<<6)|__t1; \
+            ++(i); \
+        } else { \
+            /* function call for "complicated" and error cases */ \
+            (c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (length), c, -1); \
+        } \
+    } \
+}
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * The offset may point to the lead byte of a multi-byte sequence,
+ * in which case the macro will read the whole sequence.
+ * If the offset points to a trail byte or an illegal UTF-8 sequence, then
+ * c is set to U+FFFD.
+ *
+ * This macro does not distinguish between a real U+FFFD in the text
+ * and U+FFFD returned for an ill-formed sequence.
+ * Use U8_NEXT() if that distinction is important.
+ *
+ * @param s const uint8_t * string
+ * @param i int32_t string offset, must be i<length
+ * @param length int32_t string length
+ * @param c output UChar32 variable, set to U+FFFD in case of an error
+ * @see U8_NEXT
+ * @stable ICU 51
+ */
+#define U8_NEXT_OR_FFFD(s, i, length, c) { \
+    (c)=(uint8_t)(s)[(i)++]; \
+    if((c)>=0x80) { \
+        uint8_t __t1, __t2; \
+        if( /* handle U+1000..U+CFFF inline */ \
+            (0xe0<(c) && (c)<=0xec) && \
+            (((i)+1)<(length) || (length)<0) && \
+            (__t1=(uint8_t)((s)[i]-0x80))<=0x3f && \
+            (__t2=(uint8_t)((s)[(i)+1]-0x80))<= 0x3f \
+        ) { \
+            /* no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ \
+            (c)=(UChar)(((c)<<12)|(__t1<<6)|__t2); \
+            (i)+=2; \
+        } else if( /* handle U+0080..U+07FF inline */ \
+            ((c)<0xe0 && (c)>=0xc2) && \
+            ((i)!=(length)) && \
+            (__t1=(uint8_t)((s)[i]-0x80))<=0x3f \
+        ) { \
+            (c)=(((c)&0x1f)<<6)|__t1; \
+            ++(i); \
+        } else { \
+            /* function call for "complicated" and error cases */ \
+            (c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (length), c, -3); \
+        } \
+    } \
+}
+
+/**
+ * Append a code point to a string, overwriting 1 to 4 bytes.
+ * The offset points to the current end of the string contents
+ * and is advanced (post-increment).
+ * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
+ * Otherwise, the result is undefined.
+ *
+ * @param s const uint8_t * string buffer
+ * @param i string offset
+ * @param c code point to append
+ * @see U8_APPEND
+ * @stable ICU 2.4
+ */
+#define U8_APPEND_UNSAFE(s, i, c) { \
+    if((uint32_t)(c)<=0x7f) { \
+        (s)[(i)++]=(uint8_t)(c); \
+    } else { \
+        if((uint32_t)(c)<=0x7ff) { \
+            (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \
+        } else { \
+            if((uint32_t)(c)<=0xffff) { \
+                (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \
+            } else { \
+                (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \
+                (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \
+            } \
+            (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
+        } \
+        (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
+    } \
+}
+
+/**
+ * Append a code point to a string, overwriting 1 to 4 bytes.
+ * The offset points to the current end of the string contents
+ * and is advanced (post-increment).
+ * "Safe" macro, checks for a valid code point.
+ * If a non-ASCII code point is written, checks for sufficient space in the string.
+ * If the code point is not valid or trail bytes do not fit,
+ * then isError is set to TRUE.
+ *
+ * @param s const uint8_t * string buffer
+ * @param i int32_t string offset, must be i<capacity
+ * @param capacity int32_t size of the string buffer
+ * @param c UChar32 code point to append
+ * @param isError output UBool set to TRUE if an error occurs, otherwise not modified
+ * @see U8_APPEND_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_APPEND(s, i, capacity, c, isError) { \
+    if((uint32_t)(c)<=0x7f) { \
+        (s)[(i)++]=(uint8_t)(c); \
+    } else if((uint32_t)(c)<=0x7ff && (i)+1<(capacity)) { \
+        (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \
+        (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
+    } else if((uint32_t)(c)<=0xd7ff && (i)+2<(capacity)) { \
+        (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \
+        (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
+        (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
+    } else { \
+        (i)=utf8_appendCharSafeBody(s, (i), (capacity), c, &(isError)); \
+    } \
+}
+
+/**
+ * Advance the string offset from one code point boundary to the next.
+ * (Post-incrementing iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @see U8_FWD_1
+ * @stable ICU 2.4
+ */
+#define U8_FWD_1_UNSAFE(s, i) { \
+    (i)+=1+U8_COUNT_TRAIL_BYTES_UNSAFE((uint8_t)(s)[i]); \
+}
+
+/**
+ * Advance the string offset from one code point boundary to the next.
+ * (Post-incrementing iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * @param s const uint8_t * string
+ * @param i int32_t string offset, must be i<length
+ * @param length int32_t string length
+ * @see U8_FWD_1_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_FWD_1(s, i, length) { \
+    uint8_t __b=(uint8_t)(s)[(i)++]; \
+    if(U8_IS_LEAD(__b)) { \
+        uint8_t __count=U8_COUNT_TRAIL_BYTES(__b); \
+        if((i)+__count>(length) && (length)>=0) { \
+            __count=(uint8_t)((length)-(i)); \
+        } \
+        while(__count>0 && U8_IS_TRAIL((s)[i])) { \
+            ++(i); \
+            --__count; \
+        } \
+    } \
+}
+
+/**
+ * Advance the string offset from one code point boundary to the n-th next one,
+ * i.e., move forward by n code points.
+ * (Post-incrementing iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @param n number of code points to skip
+ * @see U8_FWD_N
+ * @stable ICU 2.4
+ */
+#define U8_FWD_N_UNSAFE(s, i, n) { \
+    int32_t __N=(n); \
+    while(__N>0) { \
+        U8_FWD_1_UNSAFE(s, i); \
+        --__N; \
+    } \
+}
+
+/**
+ * Advance the string offset from one code point boundary to the n-th next one,
+ * i.e., move forward by n code points.
+ * (Post-incrementing iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * @param s const uint8_t * string
+ * @param i int32_t string offset, must be i<length
+ * @param length int32_t string length
+ * @param n number of code points to skip
+ * @see U8_FWD_N_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_FWD_N(s, i, length, n) { \
+    int32_t __N=(n); \
+    while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
+        U8_FWD_1(s, i, length); \
+        --__N; \
+    } \
+}
+
+/**
+ * Adjust a random-access offset to a code point boundary
+ * at the start of a code point.
+ * If the offset points to a UTF-8 trail byte,
+ * then the offset is moved backward to the corresponding lead byte.
+ * Otherwise, it is not modified.
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @see U8_SET_CP_START
+ * @stable ICU 2.4
+ */
+#define U8_SET_CP_START_UNSAFE(s, i) { \
+    while(U8_IS_TRAIL((s)[i])) { --(i); } \
+}
+
+/**
+ * Adjust a random-access offset to a code point boundary
+ * at the start of a code point.
+ * If the offset points to a UTF-8 trail byte,
+ * then the offset is moved backward to the corresponding lead byte.
+ * Otherwise, it is not modified.
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param i int32_t string offset, must be start<=i
+ * @see U8_SET_CP_START_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_SET_CP_START(s, start, i) { \
+    if(U8_IS_TRAIL((s)[(i)])) { \
+        (i)=utf8_back1SafeBody(s, start, (i)); \
+    } \
+}
+
+/* definitions with backward iteration -------------------------------------- */
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a multi-byte sequence, then the macro will read
+ * the whole sequence.
+ * If the offset is behind a lead byte, then that itself
+ * will be returned as the code point.
+ * The result is undefined if the offset is behind an illegal UTF-8 sequence.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U8_PREV
+ * @stable ICU 2.4
+ */
+#define U8_PREV_UNSAFE(s, i, c) { \
+    (c)=(uint8_t)(s)[--(i)]; \
+    if(U8_IS_TRAIL(c)) { \
+        uint8_t __b, __count=1, __shift=6; \
+\
+        /* c is a trail byte */ \
+        (c)&=0x3f; \
+        for(;;) { \
+            __b=(uint8_t)(s)[--(i)]; \
+            if(__b>=0xc0) { \
+                U8_MASK_LEAD_BYTE(__b, __count); \
+                (c)|=(UChar32)__b<<__shift; \
+                break; \
+            } else { \
+                (c)|=(UChar32)(__b&0x3f)<<__shift; \
+                ++__count; \
+                __shift+=6; \
+            } \
+        } \
+    } \
+}
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a multi-byte sequence, then the macro will read
+ * the whole sequence.
+ * If the offset is behind a lead byte, then that itself
+ * will be returned as the code point.
+ * If the offset is behind an illegal UTF-8 sequence, then c is set to a negative value.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param i int32_t string offset, must be start<i
+ * @param c output UChar32 variable, set to <0 in case of an error
+ * @see U8_PREV_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_PREV(s, start, i, c) { \
+    (c)=(uint8_t)(s)[--(i)]; \
+    if((c)>=0x80) { \
+        (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -1); \
+    } \
+}
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a multi-byte sequence, then the macro will read
+ * the whole sequence.
+ * If the offset is behind a lead byte, then that itself
+ * will be returned as the code point.
+ * If the offset is behind an illegal UTF-8 sequence, then c is set to U+FFFD.
+ *
+ * This macro does not distinguish between a real U+FFFD in the text
+ * and U+FFFD returned for an ill-formed sequence.
+ * Use U8_PREV() if that distinction is important.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param i int32_t string offset, must be start<i
+ * @param c output UChar32 variable, set to U+FFFD in case of an error
+ * @see U8_PREV
+ * @stable ICU 51
+ */
+#define U8_PREV_OR_FFFD(s, start, i, c) { \
+    (c)=(uint8_t)(s)[--(i)]; \
+    if((c)>=0x80) { \
+        (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -3); \
+    } \
+}
+
+/**
+ * Move the string offset from one code point boundary to the previous one.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @see U8_BACK_1
+ * @stable ICU 2.4
+ */
+#define U8_BACK_1_UNSAFE(s, i) { \
+    while(U8_IS_TRAIL((s)[--(i)])) {} \
+}
+
+/**
+ * Move the string offset from one code point boundary to the previous one.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param i int32_t string offset, must be start<i
+ * @see U8_BACK_1_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_BACK_1(s, start, i) { \
+    if(U8_IS_TRAIL((s)[--(i)])) { \
+        (i)=utf8_back1SafeBody(s, start, (i)); \
+    } \
+}
+
+/**
+ * Move the string offset from one code point boundary to the n-th one before it,
+ * i.e., move backward by n code points.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @param n number of code points to skip
+ * @see U8_BACK_N
+ * @stable ICU 2.4
+ */
+#define U8_BACK_N_UNSAFE(s, i, n) { \
+    int32_t __N=(n); \
+    while(__N>0) { \
+        U8_BACK_1_UNSAFE(s, i); \
+        --__N; \
+    } \
+}
+
+/**
+ * Move the string offset from one code point boundary to the n-th one before it,
+ * i.e., move backward by n code points.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t index of the start of the string
+ * @param i int32_t string offset, must be start<i
+ * @param n number of code points to skip
+ * @see U8_BACK_N_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_BACK_N(s, start, i, n) { \
+    int32_t __N=(n); \
+    while(__N>0 && (i)>(start)) { \
+        U8_BACK_1(s, start, i); \
+        --__N; \
+    } \
+}
+
+/**
+ * Adjust a random-access offset to a code point boundary after a code point.
+ * If the offset is behind a partial multi-byte sequence,
+ * then the offset is incremented to behind the whole sequence.
+ * Otherwise, it is not modified.
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @see U8_SET_CP_LIMIT
+ * @stable ICU 2.4
+ */
+#define U8_SET_CP_LIMIT_UNSAFE(s, i) { \
+    U8_BACK_1_UNSAFE(s, i); \
+    U8_FWD_1_UNSAFE(s, i); \
+}
+
+/**
+ * Adjust a random-access offset to a code point boundary after a code point.
+ * If the offset is behind a partial multi-byte sequence,
+ * then the offset is incremented to behind the whole sequence.
+ * Otherwise, it is not modified.
+ * The input offset may be the same as the string length.
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param i int32_t string offset, must be start<=i<=length
+ * @param length int32_t string length
+ * @see U8_SET_CP_LIMIT_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_SET_CP_LIMIT(s, start, i, length) { \
+    if((start)<(i) && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
+        U8_BACK_1(s, start, i); \
+        U8_FWD_1(s, i, length); \
+    } \
+}
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/utf_old.h b/src/core/basetypes/icu-ucsdet/include/unicode/utf_old.h
new file mode 100644
index 00000000..f9125b1d
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/utf_old.h
@@ -0,0 +1,1169 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2002-2012, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  utf_old.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2002sep21
+*   created by: Markus W. Scherer
+*/
+
+/**
+ * \file 
+ * \brief C API: Deprecated macros for Unicode string handling
+ */
+
+/**
+ * 
+ * The macros in utf_old.h are all deprecated and their use discouraged.
+ * Some of the design principles behind the set of UTF macros
+ * have changed or proved impractical.
+ * Almost all of the old "UTF macros" are at least renamed.
+ * If you are looking for a new equivalent to an old macro, please see the
+ * comment at the old one.
+ *
+ * Brief summary of reasons for deprecation:
+ * - Switch on UTF_SIZE (selection of UTF-8/16/32 default string processing)
+ *   was impractical.
+ * - Switch on UTF_SAFE etc. (selection of unsafe/safe/strict default string processing)
+ *   was of little use and impractical.
+ * - Whole classes of macros became obsolete outside of the UTF_SIZE/UTF_SAFE
+ *   selection framework: UTF32_ macros (all trivial)
+ *   and UTF_ default and intermediate macros (all aliases).
+ * - The selection framework also caused many macro aliases.
+ * - Change in Unicode standard: "irregular" sequences (3.0) became illegal (3.2).
+ * - Change of language in Unicode standard:
+ *   Growing distinction between internal x-bit Unicode strings and external UTF-x
+ *   forms, with the former more lenient.
+ *   Suggests renaming of UTF16_ macros to U16_.
+ * - The prefix "UTF_" without a width number confused some users.
+ * - "Safe" append macros needed the addition of an error indicator output.
+ * - "Safe" UTF-8 macros used legitimate (if rarely used) code point values
+ *   to indicate error conditions.
+ * - The use of the "_CHAR" infix for code point operations confused some users.
+ *
+ * More details:
+ *
+ * Until ICU 2.2, utf.h theoretically allowed to choose among UTF-8/16/32
+ * for string processing, and among unsafe/safe/strict default macros for that.
+ *
+ * It proved nearly impossible to write non-trivial, high-performance code
+ * that is UTF-generic.
+ * Unsafe default macros would be dangerous for default string processing,
+ * and the main reason for the "strict" versions disappeared:
+ * Between Unicode 3.0 and 3.2 all "irregular" UTF-8 sequences became illegal.
+ * The only other conditions that "strict" checked for were non-characters,
+ * which are valid during processing. Only during text input/output should they
+ * be checked, and at that time other well-formedness checks may be
+ * necessary or useful as well.
+ * This can still be done by using U16_NEXT and U_IS_UNICODE_NONCHAR
+ * or U_IS_UNICODE_CHAR.
+ *
+ * The old UTF8_..._SAFE macros also used some normal Unicode code points
+ * to indicate malformed sequences.
+ * The new UTF8_ macros without suffix use negative values instead.
+ *
+ * The entire contents of utf32.h was moved here without replacement
+ * because all those macros were trivial and
+ * were meaningful only in the framework of choosing the UTF size.
+ *
+ * See Jitterbug 2150 and its discussion on the ICU mailing list
+ * in September 2002.
+ *
+ * <hr>
+ *
+ * <em>Obsolete part</em> of pre-ICU 2.4 utf.h file documentation:
+ *
+ * <p>The original concept for these files was for ICU to allow
+ * in principle to set which UTF (UTF-8/16/32) is used internally
+ * by defining UTF_SIZE to either 8, 16, or 32. utf.h would then define the UChar type
+ * accordingly. UTF-16 was the default.</p>
+ *
+ * <p>This concept has been abandoned.
+ * A lot of the ICU source code assumes UChar strings are in UTF-16.
+ * This is especially true for low-level code like
+ * conversion, normalization, and collation.
+ * The utf.h header enforces the default of UTF-16.
+ * The UTF-8 and UTF-32 macros remain for now for completeness and backward compatibility.</p>
+ *
+ * <p>Accordingly, utf.h defines UChar to be an unsigned 16-bit integer. If this matches wchar_t, then
+ * UChar is defined to be exactly wchar_t, otherwise uint16_t.</p>
+ *
+ * <p>UChar32 is defined to be a signed 32-bit integer (int32_t), large enough for a 21-bit
+ * Unicode code point (Unicode scalar value, 0..0x10ffff).
+ * Before ICU 2.4, the definition of UChar32 was similarly platform-dependent as
+ * the definition of UChar. For details see the documentation for UChar32 itself.</p>
+ *
+ * <p>utf.h also defines a number of C macros for handling single Unicode code points and
+ * for using UTF Unicode strings. It includes utf8.h, utf16.h, and utf32.h for the actual
+ * implementations of those macros and then aliases one set of them (for UTF-16) for general use.
+ * The UTF-specific macros have the UTF size in the macro name prefixes (UTF16_...), while
+ * the general alias macros always begin with UTF_...</p>
+ *
+ * <p>Many string operations can be done with or without error checking.
+ * Where such a distinction is useful, there are two versions of the macros, "unsafe" and "safe"
+ * ones with ..._UNSAFE and ..._SAFE suffixes. The unsafe macros are fast but may cause
+ * program failures if the strings are not well-formed. The safe macros have an additional, boolean
+ * parameter "strict". If strict is FALSE, then only illegal sequences are detected.
+ * Otherwise, irregular sequences and non-characters are detected as well (like single surrogates).
+ * Safe macros return special error code points for illegal/irregular sequences:
+ * Typically, U+ffff, or values that would result in a code unit sequence of the same length
+ * as the erroneous input sequence.<br>
+ * Note that _UNSAFE macros have fewer parameters: They do not have the strictness parameter, and
+ * they do not have start/length parameters for boundary checking.</p>
+ *
+ * <p>Here, the macros are aliased in two steps:
+ * In the first step, the UTF-specific macros with UTF16_ prefix and _UNSAFE and _SAFE suffixes are
+ * aliased according to the UTF_SIZE to macros with UTF_ prefix and the same suffixes and signatures.
+ * Then, in a second step, the default, general alias macros are set to use either the unsafe or
+ * the safe/not strict (default) or the safe/strict macro;
+ * these general macros do not have a strictness parameter.</p>
+ *
+ * <p>It is possible to change the default choice for the general alias macros to be unsafe, safe/not strict or safe/strict.
+ * The default is safe/not strict. It is not recommended to select the unsafe macros as the basis for
+ * Unicode string handling in ICU! To select this, define UTF_SAFE, UTF_STRICT, or UTF_UNSAFE.</p>
+ *
+ * <p>For general use, one should use the default, general macros with UTF_ prefix and no _SAFE/_UNSAFE suffix.
+ * Only in some cases it may be necessary to control the choice of macro directly and use a less generic alias.
+ * For example, if it can be assumed that a string is well-formed and the index will stay within the bounds,
+ * then the _UNSAFE version may be used.
+ * If a UTF-8 string is to be processed, then the macros with UTF8_ prefixes need to be used.</p>
+ *
+ * <hr>
+ *
+ * @deprecated ICU 2.4. Use the macros in utf.h, utf16.h, utf8.h instead.
+ */
+
+#ifndef __UTF_OLD_H__
+#define __UTF_OLD_H__
+
+#ifndef U_HIDE_DEPRECATED_API
+
+#include "unicode/utf.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
+
+/* Formerly utf.h, part 1 --------------------------------------------------- */
+
+#ifdef U_USE_UTF_DEPRECATES
+/**
+ * Unicode string and array offset and index type.
+ * ICU always counts Unicode code units (UChars) for
+ * string offsets, indexes, and lengths, not Unicode code points.
+ *
+ * @obsolete ICU 2.6. Use int32_t directly instead since this API will be removed in that release.
+ */
+typedef int32_t UTextOffset;
+#endif
+
+/** Number of bits in a Unicode string code unit - ICU uses 16-bit Unicode. @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF_SIZE 16
+
+/**
+ * The default choice for general Unicode string macros is to use the ..._SAFE macro implementations
+ * with strict=FALSE.
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define UTF_SAFE
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#undef UTF_UNSAFE
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#undef UTF_STRICT
+
+/**
+ * UTF8_ERROR_VALUE_1 and UTF8_ERROR_VALUE_2 are special error values for UTF-8,
+ * which need 1 or 2 bytes in UTF-8:
+ * \code
+ * U+0015 = NAK = Negative Acknowledge, C0 control character
+ * U+009f = highest C1 control character
+ * \endcode
+ *
+ * These are used by UTF8_..._SAFE macros so that they can return an error value
+ * that needs the same number of code units (bytes) as were seen by
+ * a macro. They should be tested with UTF_IS_ERROR() or UTF_IS_VALID().
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define UTF8_ERROR_VALUE_1 0x15
+
+/**
+ * See documentation on UTF8_ERROR_VALUE_1 for details.
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define UTF8_ERROR_VALUE_2 0x9f
+
+/**
+ * Error value for all UTFs. This code point value will be set by macros with error
+ * checking if an error is detected.
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define UTF_ERROR_VALUE 0xffff
+
+/**
+ * Is a given 32-bit code an error value
+ * as returned by one of the macros for any UTF?
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define UTF_IS_ERROR(c) \
+    (((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2)
+
+/**
+ * This is a combined macro: Is c a valid Unicode value _and_ not an error code?
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define UTF_IS_VALID(c) \
+    (UTF_IS_UNICODE_CHAR(c) && \
+     (c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2)
+
+/**
+ * Is this code unit or code point a surrogate (U+d800..U+dfff)?
+ * @deprecated ICU 2.4. Renamed to U_IS_SURROGATE and U16_IS_SURROGATE, see utf_old.h.
+ */
+#define UTF_IS_SURROGATE(uchar) (((uchar)&0xfffff800)==0xd800)
+
+/**
+ * Is a given 32-bit code point a Unicode noncharacter?
+ *
+ * @deprecated ICU 2.4. Renamed to U_IS_UNICODE_NONCHAR, see utf_old.h.
+ */
+#define UTF_IS_UNICODE_NONCHAR(c) \
+    ((c)>=0xfdd0 && \
+     ((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \
+     (uint32_t)(c)<=0x10ffff)
+
+/**
+ * Is a given 32-bit value a Unicode code point value (0..U+10ffff)
+ * that can be assigned a character?
+ *
+ * Code points that are not characters include:
+ * - single surrogate code points (U+d800..U+dfff, 2048 code points)
+ * - the last two code points on each plane (U+__fffe and U+__ffff, 34 code points)
+ * - U+fdd0..U+fdef (new with Unicode 3.1, 32 code points)
+ * - the highest Unicode code point value is U+10ffff
+ *
+ * This means that all code points below U+d800 are character code points,
+ * and that boundary is tested first for performance.
+ *
+ * @deprecated ICU 2.4. Renamed to U_IS_UNICODE_CHAR, see utf_old.h.
+ */
+#define UTF_IS_UNICODE_CHAR(c) \
+    ((uint32_t)(c)<0xd800 || \
+        ((uint32_t)(c)>0xdfff && \
+         (uint32_t)(c)<=0x10ffff && \
+         !UTF_IS_UNICODE_NONCHAR(c)))
+
+/* Formerly utf8.h ---------------------------------------------------------- */
+
+/**
+ * Count the trail bytes for a UTF-8 lead byte.
+ * @deprecated ICU 2.4. Renamed to U8_COUNT_TRAIL_BYTES, see utf_old.h.
+ */
+#define UTF8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte])
+
+/**
+ * Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
+ * @deprecated ICU 2.4. Renamed to U8_MASK_LEAD_BYTE, see utf_old.h.
+ */
+#define UTF8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
+
+/** Is this this code point a single code unit (byte)? @deprecated ICU 2.4. Renamed to U8_IS_SINGLE, see utf_old.h. */
+#define UTF8_IS_SINGLE(uchar) (((uchar)&0x80)==0)
+/** Is this this code unit the lead code unit (byte) of a code point? @deprecated ICU 2.4. Renamed to U8_IS_LEAD, see utf_old.h. */
+#define UTF8_IS_LEAD(uchar) ((uint8_t)((uchar)-0xc0)<0x3e)
+/** Is this this code unit a trailing code unit (byte) of a code point? @deprecated ICU 2.4. Renamed to U8_IS_TRAIL, see utf_old.h. */
+#define UTF8_IS_TRAIL(uchar) (((uchar)&0xc0)==0x80)
+
+/** Does this scalar Unicode value need multiple code units for storage? @deprecated ICU 2.4. Use U8_LENGTH or test ((uint32_t)(c)>0x7f) instead, see utf_old.h. */
+#define UTF8_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0x7f)
+
+/**
+ * Given the lead character, how many bytes are taken by this code point.
+ * ICU does not deal with code points >0x10ffff
+ * unless necessary for advancing in the byte stream.
+ *
+ * These length macros take into account that for values >0x10ffff
+ * the UTF8_APPEND_CHAR_SAFE macros would write the error code point 0xffff
+ * with 3 bytes.
+ * Code point comparisons need to be in uint32_t because UChar32
+ * may be a signed type, and negative values must be recognized.
+ *
+ * @deprecated ICU 2.4. Use U8_LENGTH instead, see utf.h.
+ */
+#if 1
+#   define UTF8_CHAR_LENGTH(c) \
+        ((uint32_t)(c)<=0x7f ? 1 : \
+            ((uint32_t)(c)<=0x7ff ? 2 : \
+                ((uint32_t)((c)-0x10000)>0xfffff ? 3 : 4) \
+            ) \
+        )
+#else
+#   define UTF8_CHAR_LENGTH(c) \
+        ((uint32_t)(c)<=0x7f ? 1 : \
+            ((uint32_t)(c)<=0x7ff ? 2 : \
+                ((uint32_t)(c)<=0xffff ? 3 : \
+                    ((uint32_t)(c)<=0x10ffff ? 4 : \
+                        ((uint32_t)(c)<=0x3ffffff ? 5 : \
+                            ((uint32_t)(c)<=0x7fffffff ? 6 : 3) \
+                        ) \
+                    ) \
+                ) \
+            ) \
+        )
+#endif
+
+/** The maximum number of bytes per code point. @deprecated ICU 2.4. Renamed to U8_MAX_LENGTH, see utf_old.h. */
+#define UTF8_MAX_CHAR_LENGTH 4
+
+/** Average number of code units compared to UTF-16. @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF8_ARRAY_SIZE(size) ((5*(size))/2)
+
+/** @deprecated ICU 2.4. Renamed to U8_GET_UNSAFE, see utf_old.h. */
+#define UTF8_GET_CHAR_UNSAFE(s, i, c) { \
+    int32_t _utf8_get_char_unsafe_index=(int32_t)(i); \
+    UTF8_SET_CHAR_START_UNSAFE(s, _utf8_get_char_unsafe_index); \
+    UTF8_NEXT_CHAR_UNSAFE(s, _utf8_get_char_unsafe_index, c); \
+}
+
+/** @deprecated ICU 2.4. Use U8_GET instead, see utf_old.h. */
+#define UTF8_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
+    int32_t _utf8_get_char_safe_index=(int32_t)(i); \
+    UTF8_SET_CHAR_START_SAFE(s, start, _utf8_get_char_safe_index); \
+    UTF8_NEXT_CHAR_SAFE(s, _utf8_get_char_safe_index, length, c, strict); \
+}
+
+/** @deprecated ICU 2.4. Renamed to U8_NEXT_UNSAFE, see utf_old.h. */
+#define UTF8_NEXT_CHAR_UNSAFE(s, i, c) { \
+    (c)=(s)[(i)++]; \
+    if((uint8_t)((c)-0xc0)<0x35) { \
+        uint8_t __count=UTF8_COUNT_TRAIL_BYTES(c); \
+        UTF8_MASK_LEAD_BYTE(c, __count); \
+        switch(__count) { \
+        /* each following branch falls through to the next one */ \
+        case 3: \
+            (c)=((c)<<6)|((s)[(i)++]&0x3f); \
+        case 2: \
+            (c)=((c)<<6)|((s)[(i)++]&0x3f); \
+        case 1: \
+            (c)=((c)<<6)|((s)[(i)++]&0x3f); \
+        /* no other branches to optimize switch() */ \
+            break; \
+        } \
+    } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U8_APPEND_UNSAFE, see utf_old.h. */
+#define UTF8_APPEND_CHAR_UNSAFE(s, i, c) { \
+    if((uint32_t)(c)<=0x7f) { \
+        (s)[(i)++]=(uint8_t)(c); \
+    } else { \
+        if((uint32_t)(c)<=0x7ff) { \
+            (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \
+        } else { \
+            if((uint32_t)(c)<=0xffff) { \
+                (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \
+            } else { \
+                (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \
+                (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \
+            } \
+            (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
+        } \
+        (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
+    } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U8_FWD_1_UNSAFE, see utf_old.h. */
+#define UTF8_FWD_1_UNSAFE(s, i) { \
+    (i)+=1+UTF8_COUNT_TRAIL_BYTES((s)[i]); \
+}
+
+/** @deprecated ICU 2.4. Renamed to U8_FWD_N_UNSAFE, see utf_old.h. */
+#define UTF8_FWD_N_UNSAFE(s, i, n) { \
+    int32_t __N=(n); \
+    while(__N>0) { \
+        UTF8_FWD_1_UNSAFE(s, i); \
+        --__N; \
+    } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U8_SET_CP_START_UNSAFE, see utf_old.h. */
+#define UTF8_SET_CHAR_START_UNSAFE(s, i) { \
+    while(UTF8_IS_TRAIL((s)[i])) { --(i); } \
+}
+
+/** @deprecated ICU 2.4. Use U8_NEXT instead, see utf_old.h. */
+#define UTF8_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
+    (c)=(s)[(i)++]; \
+    if((c)>=0x80) { \
+        if(UTF8_IS_LEAD(c)) { \
+            (c)=utf8_nextCharSafeBody(s, &(i), (int32_t)(length), c, strict); \
+        } else { \
+            (c)=UTF8_ERROR_VALUE_1; \
+        } \
+    } \
+}
+
+/** @deprecated ICU 2.4. Use U8_APPEND instead, see utf_old.h. */
+#define UTF8_APPEND_CHAR_SAFE(s, i, length, c)  { \
+    if((uint32_t)(c)<=0x7f) { \
+        (s)[(i)++]=(uint8_t)(c); \
+    } else { \
+        (i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(length), c, NULL); \
+    } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U8_FWD_1, see utf_old.h. */
+#define UTF8_FWD_1_SAFE(s, i, length) U8_FWD_1(s, i, length)
+
+/** @deprecated ICU 2.4. Renamed to U8_FWD_N, see utf_old.h. */
+#define UTF8_FWD_N_SAFE(s, i, length, n) U8_FWD_N(s, i, length, n)
+
+/** @deprecated ICU 2.4. Renamed to U8_SET_CP_START, see utf_old.h. */
+#define UTF8_SET_CHAR_START_SAFE(s, start, i) U8_SET_CP_START(s, start, i)
+
+/** @deprecated ICU 2.4. Renamed to U8_PREV_UNSAFE, see utf_old.h. */
+#define UTF8_PREV_CHAR_UNSAFE(s, i, c) { \
+    (c)=(s)[--(i)]; \
+    if(UTF8_IS_TRAIL(c)) { \
+        uint8_t __b, __count=1, __shift=6; \
+\
+        /* c is a trail byte */ \
+        (c)&=0x3f; \
+        for(;;) { \
+            __b=(s)[--(i)]; \
+            if(__b>=0xc0) { \
+                UTF8_MASK_LEAD_BYTE(__b, __count); \
+                (c)|=(UChar32)__b<<__shift; \
+                break; \
+            } else { \
+                (c)|=(UChar32)(__b&0x3f)<<__shift; \
+                ++__count; \
+                __shift+=6; \
+            } \
+        } \
+    } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U8_BACK_1_UNSAFE, see utf_old.h. */
+#define UTF8_BACK_1_UNSAFE(s, i) { \
+    while(UTF8_IS_TRAIL((s)[--(i)])) {} \
+}
+
+/** @deprecated ICU 2.4. Renamed to U8_BACK_N_UNSAFE, see utf_old.h. */
+#define UTF8_BACK_N_UNSAFE(s, i, n) { \
+    int32_t __N=(n); \
+    while(__N>0) { \
+        UTF8_BACK_1_UNSAFE(s, i); \
+        --__N; \
+    } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U8_SET_CP_LIMIT_UNSAFE, see utf_old.h. */
+#define UTF8_SET_CHAR_LIMIT_UNSAFE(s, i) { \
+    UTF8_BACK_1_UNSAFE(s, i); \
+    UTF8_FWD_1_UNSAFE(s, i); \
+}
+
+/** @deprecated ICU 2.4. Use U8_PREV instead, see utf_old.h. */
+#define UTF8_PREV_CHAR_SAFE(s, start, i, c, strict) { \
+    (c)=(s)[--(i)]; \
+    if((c)>=0x80) { \
+        if((c)<=0xbf) { \
+            (c)=utf8_prevCharSafeBody(s, start, &(i), c, strict); \
+        } else { \
+            (c)=UTF8_ERROR_VALUE_1; \
+        } \
+    } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U8_BACK_1, see utf_old.h. */
+#define UTF8_BACK_1_SAFE(s, start, i) U8_BACK_1(s, start, i)
+
+/** @deprecated ICU 2.4. Renamed to U8_BACK_N, see utf_old.h. */
+#define UTF8_BACK_N_SAFE(s, start, i, n) U8_BACK_N(s, start, i, n)
+
+/** @deprecated ICU 2.4. Renamed to U8_SET_CP_LIMIT, see utf_old.h. */
+#define UTF8_SET_CHAR_LIMIT_SAFE(s, start, i, length) U8_SET_CP_LIMIT(s, start, i, length)
+
+/* Formerly utf16.h --------------------------------------------------------- */
+
+/** Is uchar a first/lead surrogate? @deprecated ICU 2.4. Renamed to U_IS_LEAD and U16_IS_LEAD, see utf_old.h. */
+#define UTF_IS_FIRST_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xd800)
+
+/** Is uchar a second/trail surrogate? @deprecated ICU 2.4. Renamed to U_IS_TRAIL and U16_IS_TRAIL, see utf_old.h. */
+#define UTF_IS_SECOND_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xdc00)
+
+/** Assuming c is a surrogate, is it a first/lead surrogate? @deprecated ICU 2.4. Renamed to U_IS_SURROGATE_LEAD and U16_IS_SURROGATE_LEAD, see utf_old.h. */
+#define UTF_IS_SURROGATE_FIRST(c) (((c)&0x400)==0)
+
+/** Helper constant for UTF16_GET_PAIR_VALUE. @deprecated ICU 2.4. Renamed to U16_SURROGATE_OFFSET, see utf_old.h. */
+#define UTF_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
+
+/** Get the UTF-32 value from the surrogate code units. @deprecated ICU 2.4. Renamed to U16_GET_SUPPLEMENTARY, see utf_old.h. */
+#define UTF16_GET_PAIR_VALUE(first, second) \
+    (((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET)
+
+/** @deprecated ICU 2.4. Renamed to U16_LEAD, see utf_old.h. */
+#define UTF_FIRST_SURROGATE(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
+
+/** @deprecated ICU 2.4. Renamed to U16_TRAIL, see utf_old.h. */
+#define UTF_SECOND_SURROGATE(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
+
+/** @deprecated ICU 2.4. Renamed to U16_LEAD, see utf_old.h. */
+#define UTF16_LEAD(supplementary) UTF_FIRST_SURROGATE(supplementary)
+
+/** @deprecated ICU 2.4. Renamed to U16_TRAIL, see utf_old.h. */
+#define UTF16_TRAIL(supplementary) UTF_SECOND_SURROGATE(supplementary)
+
+/** @deprecated ICU 2.4. Renamed to U16_IS_SINGLE, see utf_old.h. */
+#define UTF16_IS_SINGLE(uchar) !UTF_IS_SURROGATE(uchar)
+
+/** @deprecated ICU 2.4. Renamed to U16_IS_LEAD, see utf_old.h. */
+#define UTF16_IS_LEAD(uchar) UTF_IS_FIRST_SURROGATE(uchar)
+
+/** @deprecated ICU 2.4. Renamed to U16_IS_TRAIL, see utf_old.h. */
+#define UTF16_IS_TRAIL(uchar) UTF_IS_SECOND_SURROGATE(uchar)
+
+/** Does this scalar Unicode value need multiple code units for storage? @deprecated ICU 2.4. Use U16_LENGTH or test ((uint32_t)(c)>0xffff) instead, see utf_old.h. */
+#define UTF16_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0xffff)
+
+/** @deprecated ICU 2.4. Renamed to U16_LENGTH, see utf_old.h. */
+#define UTF16_CHAR_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
+
+/** @deprecated ICU 2.4. Renamed to U16_MAX_LENGTH, see utf_old.h. */
+#define UTF16_MAX_CHAR_LENGTH 2
+
+/** Average number of code units compared to UTF-16. @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF16_ARRAY_SIZE(size) (size)
+
+/**
+ * Get a single code point from an offset that points to any
+ * of the code units that belong to that code point.
+ * Assume 0<=i<length.
+ *
+ * This could be used for iteration together with
+ * UTF16_CHAR_LENGTH() and UTF_IS_ERROR(),
+ * but the use of UTF16_NEXT_CHAR[_UNSAFE]() and
+ * UTF16_PREV_CHAR[_UNSAFE]() is more efficient for that.
+ * @deprecated ICU 2.4. Renamed to U16_GET_UNSAFE, see utf_old.h.
+ */
+#define UTF16_GET_CHAR_UNSAFE(s, i, c) { \
+    (c)=(s)[i]; \
+    if(UTF_IS_SURROGATE(c)) { \
+        if(UTF_IS_SURROGATE_FIRST(c)) { \
+            (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)+1]); \
+        } else { \
+            (c)=UTF16_GET_PAIR_VALUE((s)[(i)-1], (c)); \
+        } \
+    } \
+}
+
+/** @deprecated ICU 2.4. Use U16_GET instead, see utf_old.h. */
+#define UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
+    (c)=(s)[i]; \
+    if(UTF_IS_SURROGATE(c)) { \
+        uint16_t __c2; \
+        if(UTF_IS_SURROGATE_FIRST(c)) { \
+            if((i)+1<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)+1])) { \
+                (c)=UTF16_GET_PAIR_VALUE((c), __c2); \
+                /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
+            } else if(strict) {\
+                /* unmatched first surrogate */ \
+                (c)=UTF_ERROR_VALUE; \
+            } \
+        } else { \
+            if((i)-1>=(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
+                (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
+                /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
+            } else if(strict) {\
+                /* unmatched second surrogate */ \
+                (c)=UTF_ERROR_VALUE; \
+            } \
+        } \
+    } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
+        (c)=UTF_ERROR_VALUE; \
+    } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U16_NEXT_UNSAFE, see utf_old.h. */
+#define UTF16_NEXT_CHAR_UNSAFE(s, i, c) { \
+    (c)=(s)[(i)++]; \
+    if(UTF_IS_FIRST_SURROGATE(c)) { \
+        (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)++]); \
+    } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U16_APPEND_UNSAFE, see utf_old.h. */
+#define UTF16_APPEND_CHAR_UNSAFE(s, i, c) { \
+    if((uint32_t)(c)<=0xffff) { \
+        (s)[(i)++]=(uint16_t)(c); \
+    } else { \
+        (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
+        (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
+    } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_1_UNSAFE, see utf_old.h. */
+#define UTF16_FWD_1_UNSAFE(s, i) { \
+    if(UTF_IS_FIRST_SURROGATE((s)[(i)++])) { \
+        ++(i); \
+    } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_N_UNSAFE, see utf_old.h. */
+#define UTF16_FWD_N_UNSAFE(s, i, n) { \
+    int32_t __N=(n); \
+    while(__N>0) { \
+        UTF16_FWD_1_UNSAFE(s, i); \
+        --__N; \
+    } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START_UNSAFE, see utf_old.h. */
+#define UTF16_SET_CHAR_START_UNSAFE(s, i) { \
+    if(UTF_IS_SECOND_SURROGATE((s)[i])) { \
+        --(i); \
+    } \
+}
+
+/** @deprecated ICU 2.4. Use U16_NEXT instead, see utf_old.h. */
+#define UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
+    (c)=(s)[(i)++]; \
+    if(UTF_IS_FIRST_SURROGATE(c)) { \
+        uint16_t __c2; \
+        if((i)<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)])) { \
+            ++(i); \
+            (c)=UTF16_GET_PAIR_VALUE((c), __c2); \
+            /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
+        } else if(strict) {\
+            /* unmatched first surrogate */ \
+            (c)=UTF_ERROR_VALUE; \
+        } \
+    } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
+        /* unmatched second surrogate or other non-character */ \
+        (c)=UTF_ERROR_VALUE; \
+    } \
+}
+
+/** @deprecated ICU 2.4. Use U16_APPEND instead, see utf_old.h. */
+#define UTF16_APPEND_CHAR_SAFE(s, i, length, c) { \
+    if((uint32_t)(c)<=0xffff) { \
+        (s)[(i)++]=(uint16_t)(c); \
+    } else if((uint32_t)(c)<=0x10ffff) { \
+        if((i)+1<(length)) { \
+            (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
+            (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
+        } else /* not enough space */ { \
+            (s)[(i)++]=UTF_ERROR_VALUE; \
+        } \
+    } else /* c>0x10ffff, write error value */ { \
+        (s)[(i)++]=UTF_ERROR_VALUE; \
+    } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_1, see utf_old.h. */
+#define UTF16_FWD_1_SAFE(s, i, length) U16_FWD_1(s, i, length)
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_N, see utf_old.h. */
+#define UTF16_FWD_N_SAFE(s, i, length, n) U16_FWD_N(s, i, length, n)
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START, see utf_old.h. */
+#define UTF16_SET_CHAR_START_SAFE(s, start, i) U16_SET_CP_START(s, start, i)
+
+/** @deprecated ICU 2.4. Renamed to U16_PREV_UNSAFE, see utf_old.h. */
+#define UTF16_PREV_CHAR_UNSAFE(s, i, c) { \
+    (c)=(s)[--(i)]; \
+    if(UTF_IS_SECOND_SURROGATE(c)) { \
+        (c)=UTF16_GET_PAIR_VALUE((s)[--(i)], (c)); \
+    } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_1_UNSAFE, see utf_old.h. */
+#define UTF16_BACK_1_UNSAFE(s, i) { \
+    if(UTF_IS_SECOND_SURROGATE((s)[--(i)])) { \
+        --(i); \
+    } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_N_UNSAFE, see utf_old.h. */
+#define UTF16_BACK_N_UNSAFE(s, i, n) { \
+    int32_t __N=(n); \
+    while(__N>0) { \
+        UTF16_BACK_1_UNSAFE(s, i); \
+        --__N; \
+    } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT_UNSAFE, see utf_old.h. */
+#define UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) { \
+    if(UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
+        ++(i); \
+    } \
+}
+
+/** @deprecated ICU 2.4. Use U16_PREV instead, see utf_old.h. */
+#define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) { \
+    (c)=(s)[--(i)]; \
+    if(UTF_IS_SECOND_SURROGATE(c)) { \
+        uint16_t __c2; \
+        if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
+            --(i); \
+            (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
+            /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
+        } else if(strict) {\
+            /* unmatched second surrogate */ \
+            (c)=UTF_ERROR_VALUE; \
+        } \
+    } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
+        /* unmatched first surrogate or other non-character */ \
+        (c)=UTF_ERROR_VALUE; \
+    } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_1, see utf_old.h. */
+#define UTF16_BACK_1_SAFE(s, start, i) U16_BACK_1(s, start, i)
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_N, see utf_old.h. */
+#define UTF16_BACK_N_SAFE(s, start, i, n) U16_BACK_N(s, start, i, n)
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT, see utf_old.h. */
+#define UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length)
+
+/* Formerly utf32.h --------------------------------------------------------- */
+
+/*
+* Old documentation:
+*
+*   This file defines macros to deal with UTF-32 code units and code points.
+*   Signatures and semantics are the same as for the similarly named macros
+*   in utf16.h.
+*   utf32.h is included by utf.h after unicode/umachine.h</p>
+*   and some common definitions.
+*   <p><b>Usage:</b>  ICU coding guidelines for if() statements should be followed when using these macros.
+*                  Compound statements (curly braces {}) must be used  for if-else-while...
+*                  bodies and all macro statements should be terminated with semicolon.</p>
+*/
+
+/* internal definitions ----------------------------------------------------- */
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_IS_SAFE(c, strict) \
+    (!(strict) ? \
+        (uint32_t)(c)<=0x10ffff : \
+        UTF_IS_UNICODE_CHAR(c))
+
+/*
+ * For the semantics of all of these macros, see utf16.h.
+ * The UTF-32 versions are trivial because any code point is
+ * encoded using exactly one code unit.
+ */
+
+/* single-code point definitions -------------------------------------------- */
+
+/* classes of code unit values */
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_IS_SINGLE(uchar) 1
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_IS_LEAD(uchar) 0
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_IS_TRAIL(uchar) 0
+
+/* number of code units per code point */
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_NEED_MULTIPLE_UCHAR(c) 0
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_CHAR_LENGTH(c) 1
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_MAX_CHAR_LENGTH 1
+
+/* average number of code units compared to UTF-16 */
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_ARRAY_SIZE(size) (size)
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_GET_CHAR_UNSAFE(s, i, c) { \
+    (c)=(s)[i]; \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
+    (c)=(s)[i]; \
+    if(!UTF32_IS_SAFE(c, strict)) { \
+        (c)=UTF_ERROR_VALUE; \
+    } \
+}
+
+/* definitions with forward iteration --------------------------------------- */
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_NEXT_CHAR_UNSAFE(s, i, c) { \
+    (c)=(s)[(i)++]; \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_APPEND_CHAR_UNSAFE(s, i, c) { \
+    (s)[(i)++]=(c); \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_FWD_1_UNSAFE(s, i) { \
+    ++(i); \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_FWD_N_UNSAFE(s, i, n) { \
+    (i)+=(n); \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_SET_CHAR_START_UNSAFE(s, i) { \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
+    (c)=(s)[(i)++]; \
+    if(!UTF32_IS_SAFE(c, strict)) { \
+        (c)=UTF_ERROR_VALUE; \
+    } \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_APPEND_CHAR_SAFE(s, i, length, c) { \
+    if((uint32_t)(c)<=0x10ffff) { \
+        (s)[(i)++]=(c); \
+    } else /* c>0x10ffff, write 0xfffd */ { \
+        (s)[(i)++]=0xfffd; \
+    } \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_FWD_1_SAFE(s, i, length) { \
+    ++(i); \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_FWD_N_SAFE(s, i, length, n) { \
+    if(((i)+=(n))>(length)) { \
+        (i)=(length); \
+    } \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_SET_CHAR_START_SAFE(s, start, i) { \
+}
+
+/* definitions with backward iteration -------------------------------------- */
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_PREV_CHAR_UNSAFE(s, i, c) { \
+    (c)=(s)[--(i)]; \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_BACK_1_UNSAFE(s, i) { \
+    --(i); \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_BACK_N_UNSAFE(s, i, n) { \
+    (i)-=(n); \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_SET_CHAR_LIMIT_UNSAFE(s, i) { \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_PREV_CHAR_SAFE(s, start, i, c, strict) { \
+    (c)=(s)[--(i)]; \
+    if(!UTF32_IS_SAFE(c, strict)) { \
+        (c)=UTF_ERROR_VALUE; \
+    } \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_BACK_1_SAFE(s, start, i) { \
+    --(i); \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_BACK_N_SAFE(s, start, i, n) { \
+    (i)-=(n); \
+    if((i)<(start)) { \
+        (i)=(start); \
+    } \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_SET_CHAR_LIMIT_SAFE(s, i, length) { \
+}
+
+/* Formerly utf.h, part 2 --------------------------------------------------- */
+
+/**
+ * Estimate the number of code units for a string based on the number of UTF-16 code units.
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define UTF_ARRAY_SIZE(size) UTF16_ARRAY_SIZE(size)
+
+/** @deprecated ICU 2.4. Renamed to U16_GET_UNSAFE, see utf_old.h. */
+#define UTF_GET_CHAR_UNSAFE(s, i, c)                 UTF16_GET_CHAR_UNSAFE(s, i, c)
+
+/** @deprecated ICU 2.4. Use U16_GET instead, see utf_old.h. */
+#define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_NEXT_UNSAFE, see utf_old.h. */
+#define UTF_NEXT_CHAR_UNSAFE(s, i, c)                UTF16_NEXT_CHAR_UNSAFE(s, i, c)
+
+/** @deprecated ICU 2.4. Use U16_NEXT instead, see utf_old.h. */
+#define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict)  UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_APPEND_UNSAFE, see utf_old.h. */
+#define UTF_APPEND_CHAR_UNSAFE(s, i, c)              UTF16_APPEND_CHAR_UNSAFE(s, i, c)
+
+/** @deprecated ICU 2.4. Use U16_APPEND instead, see utf_old.h. */
+#define UTF_APPEND_CHAR_SAFE(s, i, length, c)        UTF16_APPEND_CHAR_SAFE(s, i, length, c)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_1_UNSAFE, see utf_old.h. */
+#define UTF_FWD_1_UNSAFE(s, i)                       UTF16_FWD_1_UNSAFE(s, i)
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_1, see utf_old.h. */
+#define UTF_FWD_1_SAFE(s, i, length)                 UTF16_FWD_1_SAFE(s, i, length)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_N_UNSAFE, see utf_old.h. */
+#define UTF_FWD_N_UNSAFE(s, i, n)                    UTF16_FWD_N_UNSAFE(s, i, n)
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_N, see utf_old.h. */
+#define UTF_FWD_N_SAFE(s, i, length, n)              UTF16_FWD_N_SAFE(s, i, length, n)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START_UNSAFE, see utf_old.h. */
+#define UTF_SET_CHAR_START_UNSAFE(s, i)              UTF16_SET_CHAR_START_UNSAFE(s, i)
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START, see utf_old.h. */
+#define UTF_SET_CHAR_START_SAFE(s, start, i)         UTF16_SET_CHAR_START_SAFE(s, start, i)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_PREV_UNSAFE, see utf_old.h. */
+#define UTF_PREV_CHAR_UNSAFE(s, i, c)                UTF16_PREV_CHAR_UNSAFE(s, i, c)
+
+/** @deprecated ICU 2.4. Use U16_PREV instead, see utf_old.h. */
+#define UTF_PREV_CHAR_SAFE(s, start, i, c, strict)   UTF16_PREV_CHAR_SAFE(s, start, i, c, strict)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_1_UNSAFE, see utf_old.h. */
+#define UTF_BACK_1_UNSAFE(s, i)                      UTF16_BACK_1_UNSAFE(s, i)
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_1, see utf_old.h. */
+#define UTF_BACK_1_SAFE(s, start, i)                 UTF16_BACK_1_SAFE(s, start, i)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_N_UNSAFE, see utf_old.h. */
+#define UTF_BACK_N_UNSAFE(s, i, n)                   UTF16_BACK_N_UNSAFE(s, i, n)
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_N, see utf_old.h. */
+#define UTF_BACK_N_SAFE(s, start, i, n)              UTF16_BACK_N_SAFE(s, start, i, n)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT_UNSAFE, see utf_old.h. */
+#define UTF_SET_CHAR_LIMIT_UNSAFE(s, i)              UTF16_SET_CHAR_LIMIT_UNSAFE(s, i)
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT, see utf_old.h. */
+#define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length)
+
+/* Define default macros (UTF-16 "safe") ------------------------------------ */
+
+/**
+ * Does this code unit alone encode a code point (BMP, not a surrogate)?
+ * Same as UTF16_IS_SINGLE.
+ * @deprecated ICU 2.4. Renamed to U_IS_SINGLE and U16_IS_SINGLE, see utf_old.h.
+ */
+#define UTF_IS_SINGLE(uchar) U16_IS_SINGLE(uchar)
+
+/**
+ * Is this code unit the first one of several (a lead surrogate)?
+ * Same as UTF16_IS_LEAD.
+ * @deprecated ICU 2.4. Renamed to U_IS_LEAD and U16_IS_LEAD, see utf_old.h.
+ */
+#define UTF_IS_LEAD(uchar) U16_IS_LEAD(uchar)
+
+/**
+ * Is this code unit one of several but not the first one (a trail surrogate)?
+ * Same as UTF16_IS_TRAIL.
+ * @deprecated ICU 2.4. Renamed to U_IS_TRAIL and U16_IS_TRAIL, see utf_old.h.
+ */
+#define UTF_IS_TRAIL(uchar) U16_IS_TRAIL(uchar)
+
+/**
+ * Does this code point require multiple code units (is it a supplementary code point)?
+ * Same as UTF16_NEED_MULTIPLE_UCHAR.
+ * @deprecated ICU 2.4. Use U16_LENGTH or test ((uint32_t)(c)>0xffff) instead.
+ */
+#define UTF_NEED_MULTIPLE_UCHAR(c) UTF16_NEED_MULTIPLE_UCHAR(c)
+
+/**
+ * How many code units are used to encode this code point (1 or 2)?
+ * Same as UTF16_CHAR_LENGTH.
+ * @deprecated ICU 2.4. Renamed to U16_LENGTH, see utf_old.h.
+ */
+#define UTF_CHAR_LENGTH(c) U16_LENGTH(c)
+
+/**
+ * How many code units are used at most for any Unicode code point (2)?
+ * Same as UTF16_MAX_CHAR_LENGTH.
+ * @deprecated ICU 2.4. Renamed to U16_MAX_LENGTH, see utf_old.h.
+ */
+#define UTF_MAX_CHAR_LENGTH U16_MAX_LENGTH
+
+/**
+ * Set c to the code point that contains the code unit i.
+ * i could point to the lead or the trail surrogate for the code point.
+ * i is not modified.
+ * Same as UTF16_GET_CHAR.
+ * \pre 0<=i<length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_GET, see utf_old.h.
+ */
+#define UTF_GET_CHAR(s, start, i, length, c) U16_GET(s, start, i, length, c)
+
+/**
+ * Set c to the code point that starts at code unit i
+ * and advance i to beyond the code units of this code point (post-increment).
+ * i must point to the first code unit of a code point.
+ * Otherwise c is set to the trail unit (surrogate) itself.
+ * Same as UTF16_NEXT_CHAR.
+ * \pre 0<=i<length
+ * \post 0<i<=length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_NEXT, see utf_old.h.
+ */
+#define UTF_NEXT_CHAR(s, i, length, c) U16_NEXT(s, i, length, c)
+
+/**
+ * Append the code units of code point c to the string at index i
+ * and advance i to beyond the new code units (post-increment).
+ * The code units beginning at index i will be overwritten.
+ * Same as UTF16_APPEND_CHAR.
+ * \pre 0<=c<=0x10ffff
+ * \pre 0<=i<length
+ * \post 0<i<=length
+ *
+ * @deprecated ICU 2.4. Use U16_APPEND instead, see utf_old.h.
+ */
+#define UTF_APPEND_CHAR(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c)
+
+/**
+ * Advance i to beyond the code units of the code point that begins at i.
+ * I.e., advance i by one code point.
+ * Same as UTF16_FWD_1.
+ * \pre 0<=i<length
+ * \post 0<i<=length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_FWD_1, see utf_old.h.
+ */
+#define UTF_FWD_1(s, i, length) U16_FWD_1(s, i, length)
+
+/**
+ * Advance i to beyond the code units of the n code points where the first one begins at i.
+ * I.e., advance i by n code points.
+ * Same as UT16_FWD_N.
+ * \pre 0<=i<length
+ * \post 0<i<=length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_FWD_N, see utf_old.h.
+ */
+#define UTF_FWD_N(s, i, length, n) U16_FWD_N(s, i, length, n)
+
+/**
+ * Take the random-access index i and adjust it so that it points to the beginning
+ * of a code point.
+ * The input index points to any code unit of a code point and is moved to point to
+ * the first code unit of the same code point. i is never incremented.
+ * In other words, if i points to a trail surrogate that is preceded by a matching
+ * lead surrogate, then i is decremented. Otherwise it is not modified.
+ * This can be used to start an iteration with UTF_NEXT_CHAR() from a random index.
+ * Same as UTF16_SET_CHAR_START.
+ * \pre start<=i<length
+ * \post start<=i<length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_SET_CP_START, see utf_old.h.
+ */
+#define UTF_SET_CHAR_START(s, start, i) U16_SET_CP_START(s, start, i)
+
+/**
+ * Set c to the code point that has code units before i
+ * and move i backward (towards the beginning of the string)
+ * to the first code unit of this code point (pre-increment).
+ * i must point to the first code unit after the last unit of a code point (i==length is allowed).
+ * Same as UTF16_PREV_CHAR.
+ * \pre start<i<=length
+ * \post start<=i<length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_PREV, see utf_old.h.
+ */
+#define UTF_PREV_CHAR(s, start, i, c) U16_PREV(s, start, i, c)
+
+/**
+ * Move i backward (towards the beginning of the string)
+ * to the first code unit of the code point that has code units before i.
+ * I.e., move i backward by one code point.
+ * i must point to the first code unit after the last unit of a code point (i==length is allowed).
+ * Same as UTF16_BACK_1.
+ * \pre start<i<=length
+ * \post start<=i<length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_BACK_1, see utf_old.h.
+ */
+#define UTF_BACK_1(s, start, i) U16_BACK_1(s, start, i)
+
+/**
+ * Move i backward (towards the beginning of the string)
+ * to the first code unit of the n code points that have code units before i.
+ * I.e., move i backward by n code points.
+ * i must point to the first code unit after the last unit of a code point (i==length is allowed).
+ * Same as UTF16_BACK_N.
+ * \pre start<i<=length
+ * \post start<=i<length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_BACK_N, see utf_old.h.
+ */
+#define UTF_BACK_N(s, start, i, n) U16_BACK_N(s, start, i, n)
+
+/**
+ * Take the random-access index i and adjust it so that it points beyond
+ * a code point. The input index points beyond any code unit
+ * of a code point and is moved to point beyond the last code unit of the same
+ * code point. i is never decremented.
+ * In other words, if i points to a trail surrogate that is preceded by a matching
+ * lead surrogate, then i is incremented. Otherwise it is not modified.
+ * This can be used to start an iteration with UTF_PREV_CHAR() from a random index.
+ * Same as UTF16_SET_CHAR_LIMIT.
+ * \pre start<i<=length
+ * \post start<i<=length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT, see utf_old.h.
+ */
+#define UTF_SET_CHAR_LIMIT(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length)
+
+#endif /* U_HIDE_DEPRECATED_API */
+
+#endif
+
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/utrace.h b/src/core/basetypes/icu-ucsdet/include/unicode/utrace.h
new file mode 100644
index 00000000..2621cf9c
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/utrace.h
@@ -0,0 +1,359 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2003-2013, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  utrace.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2003aug06
+*   created by: Markus W. Scherer
+*
+*   Definitions for ICU tracing/logging.
+*
+*/
+
+#ifndef __UTRACE_H__
+#define __UTRACE_H__
+
+#include <stdarg.h>
+#include "unicode/utypes.h"
+
+/**
+ * \file
+ * \brief C API:  Definitions for ICU tracing/logging. 
+ *
+ * This provides API for debugging the internals of ICU without the use of
+ * a traditional debugger.
+ *
+ * By default, tracing is disabled in ICU. If you need to debug ICU with 
+ * tracing, please compile ICU with the --enable-tracing configure option.
+ */
+ 
+U_CDECL_BEGIN
+
+/**
+ * Trace severity levels.  Higher levels increase the verbosity of the trace output.
+ * @see utrace_setLevel
+ * @stable ICU 2.8
+ */
+typedef enum UTraceLevel {
+    /** Disable all tracing  @stable ICU 2.8*/
+    UTRACE_OFF=-1,
+    /** Trace error conditions only  @stable ICU 2.8*/
+    UTRACE_ERROR=0,
+    /** Trace errors and warnings  @stable ICU 2.8*/
+    UTRACE_WARNING=3,
+    /** Trace opens and closes of ICU services  @stable ICU 2.8*/
+    UTRACE_OPEN_CLOSE=5,
+    /** Trace an intermediate number of ICU operations  @stable ICU 2.8*/
+    UTRACE_INFO=7,
+    /** Trace the maximum number of ICU operations  @stable ICU 2.8*/
+    UTRACE_VERBOSE=9
+} UTraceLevel;
+
+/**
+ *  These are the ICU functions that will be traced when tracing is enabled.
+ *  @stable ICU 2.8
+ */
+typedef enum UTraceFunctionNumber {
+    UTRACE_FUNCTION_START=0,
+    UTRACE_U_INIT=UTRACE_FUNCTION_START,
+    UTRACE_U_CLEANUP,
+    UTRACE_FUNCTION_LIMIT,
+
+    UTRACE_CONVERSION_START=0x1000,
+    UTRACE_UCNV_OPEN=UTRACE_CONVERSION_START,
+    UTRACE_UCNV_OPEN_PACKAGE,
+    UTRACE_UCNV_OPEN_ALGORITHMIC,
+    UTRACE_UCNV_CLONE,
+    UTRACE_UCNV_CLOSE,
+    UTRACE_UCNV_FLUSH_CACHE,
+    UTRACE_UCNV_LOAD,
+    UTRACE_UCNV_UNLOAD,
+    UTRACE_CONVERSION_LIMIT,
+
+    UTRACE_COLLATION_START=0x2000,
+    UTRACE_UCOL_OPEN=UTRACE_COLLATION_START,
+    UTRACE_UCOL_CLOSE,
+    UTRACE_UCOL_STRCOLL,
+    UTRACE_UCOL_GET_SORTKEY,
+    UTRACE_UCOL_GETLOCALE,
+    UTRACE_UCOL_NEXTSORTKEYPART,
+    UTRACE_UCOL_STRCOLLITER,
+    UTRACE_UCOL_OPEN_FROM_SHORT_STRING,
+    UTRACE_UCOL_STRCOLLUTF8, /**< @stable ICU 50 */
+    UTRACE_COLLATION_LIMIT
+} UTraceFunctionNumber;
+
+/**
+ * Setter for the trace level.
+ * @param traceLevel A UTraceLevel value.
+ * @stable ICU 2.8
+ */
+U_STABLE void U_EXPORT2
+utrace_setLevel(int32_t traceLevel);
+
+/**
+ * Getter for the trace level.
+ * @return The UTraceLevel value being used by ICU.
+ * @stable ICU 2.8
+ */
+U_STABLE int32_t U_EXPORT2
+utrace_getLevel(void);
+
+/* Trace function pointers types  ----------------------------- */
+
+/**
+  *  Type signature for the trace function to be called when entering a function.
+  *  @param context value supplied at the time the trace functions are set.
+  *  @param fnNumber Enum value indicating the ICU function being entered.
+  *  @stable ICU 2.8
+  */
+typedef void U_CALLCONV
+UTraceEntry(const void *context, int32_t fnNumber);
+
+/**
+  *  Type signature for the trace function to be called when exiting from a function.
+  *  @param context value supplied at the time the trace functions are set.
+  *  @param fnNumber Enum value indicating the ICU function being exited.
+  *  @param fmt     A formatting string that describes the number and types
+  *                 of arguments included with the variable args.  The fmt
+  *                 string has the same form as the utrace_vformat format
+  *                 string.
+  *  @param args    A variable arguments list.  Contents are described by
+  *                 the fmt parameter.
+  *  @see   utrace_vformat
+  *  @stable ICU 2.8
+  */
+typedef void U_CALLCONV
+UTraceExit(const void *context, int32_t fnNumber, 
+           const char *fmt, va_list args);
+
+/**
+  *  Type signature for the trace function to be called from within an ICU function
+  *  to display data or messages.
+  *  @param context  value supplied at the time the trace functions are set.
+  *  @param fnNumber Enum value indicating the ICU function being exited.
+  *  @param level    The current tracing level
+  *  @param fmt      A format string describing the tracing data that is supplied
+  *                  as variable args
+  *  @param args     The data being traced, passed as variable args.
+  *  @stable ICU 2.8
+  */
+typedef void U_CALLCONV
+UTraceData(const void *context, int32_t fnNumber, int32_t level,
+           const char *fmt, va_list args);
+
+/**
+  *  Set ICU Tracing functions.  Installs application-provided tracing
+  *  functions into ICU.  After doing this, subsequent ICU operations
+  *  will call back to the installed functions, providing a trace
+  *  of the use of ICU.  Passing a NULL pointer for a tracing function
+  *  is allowed, and inhibits tracing action at points where that function
+  *  would be called.
+  *  <p>
+  *  Tracing and Threads:  Tracing functions are global to a process, and
+  *  will be called in response to ICU operations performed by any
+  *  thread.  If tracing of an individual thread is desired, the
+  *  tracing functions must themselves filter by checking that the
+  *  current thread is the desired thread.
+  *
+  *  @param context an uninterpretted pointer.  Whatever is passed in
+  *                 here will in turn be passed to each of the tracing
+  *                 functions UTraceEntry, UTraceExit and UTraceData.
+  *                 ICU does not use or alter this pointer.
+  *  @param e       Callback function to be called on entry to a 
+  *                 a traced ICU function.
+  *  @param x       Callback function to be called on exit from a
+  *                 traced ICU function.
+  *  @param d       Callback function to be called from within a 
+  *                 traced ICU function, for the purpose of providing
+  *                 data to the trace.
+  *
+  *  @stable ICU 2.8
+  */
+U_STABLE void U_EXPORT2
+utrace_setFunctions(const void *context,
+                    UTraceEntry *e, UTraceExit *x, UTraceData *d);
+
+/**
+  * Get the currently installed ICU tracing functions.   Note that a null function
+  *   pointer will be returned if no trace function has been set.
+  *
+  * @param context  The currently installed tracing context.
+  * @param e        The currently installed UTraceEntry function.
+  * @param x        The currently installed UTraceExit function.
+  * @param d        The currently installed UTraceData function.
+  * @stable ICU 2.8
+  */
+U_STABLE void U_EXPORT2
+utrace_getFunctions(const void **context,
+                    UTraceEntry **e, UTraceExit **x, UTraceData **d);
+
+
+
+/*
+ *
+ * ICU trace format string syntax
+ *
+ * Format Strings are passed to UTraceData functions, and define the
+ * number and types of the trace data being passed on each call.
+ *
+ * The UTraceData function, which is supplied by the application,
+ * not by ICU, can either forward the trace data (passed via
+ * varargs) and the format string back to ICU for formatting into
+ * a displayable string, or it can interpret the format itself,
+ * and do as it wishes with the trace data.
+ *
+ *
+ * Goals for the format string
+ * - basic data output
+ * - easy to use for trace programmer
+ * - sufficient provision for data types for trace output readability
+ * - well-defined types and binary portable APIs
+ *
+ * Non-goals
+ * - printf compatibility
+ * - fancy formatting
+ * - argument reordering and other internationalization features
+ *
+ * ICU trace format strings contain plain text with argument inserts,
+ * much like standard printf format strings.
+ * Each insert begins with a '%', then optionally contains a 'v',
+ * then exactly one type character.
+ * Two '%' in a row represent a '%' instead of an insert.
+ * The trace format strings need not have \n at the end.
+ *
+ *
+ * Types
+ * -----
+ *
+ * Type characters:
+ * - c A char character in the default codepage.
+ * - s A NUL-terminated char * string in the default codepage.
+ * - S A UChar * string.  Requires two params, (ptr, length).  Length=-1 for nul term.
+ * - b A byte (8-bit integer).
+ * - h A 16-bit integer.  Also a 16 bit Unicode code unit.
+ * - d A 32-bit integer.  Also a 20 bit Unicode code point value. 
+ * - l A 64-bit integer.
+ * - p A data pointer.
+ *
+ * Vectors
+ * -------
+ *
+ * If the 'v' is not specified, then one item of the specified type
+ * is passed in.
+ * If the 'v' (for "vector") is specified, then a vector of items of the
+ * specified type is passed in, via a pointer to the first item
+ * and an int32_t value for the length of the vector.
+ * Length==-1 means zero or NUL termination.  Works for vectors of all types.
+ *
+ * Note:  %vS is a vector of (UChar *) strings.  The strings must
+ *        be nul terminated as there is no way to provide a
+ *        separate length parameter for each string.  The length
+ *        parameter (required for all vectors) is the number of
+ *        strings, not the length of the strings.
+ *
+ * Examples
+ * --------
+ *
+ * These examples show the parameters that will be passed to an application's
+ *   UTraceData() function for various formats.
+ *
+ * - the precise formatting is up to the application!
+ * - the examples use type casts for arguments only to _show_ the types of
+ *   arguments without needing variable declarations in the examples;
+ *   the type casts will not be necessary in actual code
+ *
+ * UTraceDataFunc(context, fnNumber, level,
+ *              "There is a character %c in the string %s.",   // Format String 
+ *              (char)c, (const char *)s);                     // varargs parameters
+ * ->   There is a character 0x42 'B' in the string "Bravo".
+ *
+ * UTraceDataFunc(context, fnNumber, level,
+ *              "Vector of bytes %vb vector of chars %vc",
+ *              (const uint8_t *)bytes, (int32_t)bytesLength,
+ *              (const char *)chars, (int32_t)charsLength);
+ * ->  Vector of bytes
+ *      42 63 64 3f [4]
+ *     vector of chars
+ *      "Bcd?"[4]
+ *
+ * UTraceDataFunc(context, fnNumber, level,
+ *              "An int32_t %d and a whole bunch of them %vd",
+ *              (int32_t)-5, (const int32_t *)ints, (int32_t)intsLength);
+ * ->   An int32_t 0xfffffffb and a whole bunch of them
+ *      fffffffb 00000005 0000010a [3]
+ *
+ */
+
+
+
+/**
+  *  Trace output Formatter.  An application's UTraceData tracing functions may call
+  *                 back to this function to format the trace output in a
+  *                 human readable form.  Note that a UTraceData function may choose
+  *                 to not format the data;  it could, for example, save it in
+  *                 in the raw form it was received (more compact), leaving
+  *                 formatting for a later trace analyis tool.
+  *  @param outBuf  pointer to a buffer to receive the formatted output.  Output
+  *                 will be nul terminated if there is space in the buffer -
+  *                 if the length of the requested output < the output buffer size.
+  *  @param capacity  Length of the output buffer.
+  *  @param indent  Number of spaces to indent the output.  Intended to allow
+  *                 data displayed from nested functions to be indented for readability.
+  *  @param fmt     Format specification for the data to output
+  *  @param args    Data to be formatted.
+  *  @return        Length of formatted output, including the terminating NUL.
+  *                 If buffer capacity is insufficient, the required capacity is returned. 
+  *  @stable ICU 2.8
+  */
+U_STABLE int32_t U_EXPORT2
+utrace_vformat(char *outBuf, int32_t capacity,
+              int32_t indent, const char *fmt,  va_list args);
+
+/**
+  *  Trace output Formatter.  An application's UTraceData tracing functions may call
+  *                 this function to format any additional trace data, beyond that
+  *                 provided by default, in human readable form with the same
+  *                 formatting conventions used by utrace_vformat().
+  *  @param outBuf  pointer to a buffer to receive the formatted output.  Output
+  *                 will be nul terminated if there is space in the buffer -
+  *                 if the length of the requested output < the output buffer size.
+  *  @param capacity  Length of the output buffer.
+  *  @param indent  Number of spaces to indent the output.  Intended to allow
+  *                 data displayed from nested functions to be indented for readability.
+  *  @param fmt     Format specification for the data to output
+  *  @param ...     Data to be formatted.
+  *  @return        Length of formatted output, including the terminating NUL.
+  *                 If buffer capacity is insufficient, the required capacity is returned. 
+  *  @stable ICU 2.8
+  */
+U_STABLE int32_t U_EXPORT2
+utrace_format(char *outBuf, int32_t capacity,
+              int32_t indent, const char *fmt,  ...);
+
+
+
+/* Trace function numbers --------------------------------------------------- */
+
+/**
+ * Get the name of a function from its trace function number.
+ *
+ * @param fnNumber The trace number for an ICU function.
+ * @return The name string for the function.
+ *
+ * @see UTraceFunctionNumber
+ * @stable ICU 2.8
+ */
+U_STABLE const char * U_EXPORT2
+utrace_functionName(int32_t fnNumber);
+
+U_CDECL_END
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/utypes.h b/src/core/basetypes/icu-ucsdet/include/unicode/utypes.h
new file mode 100644
index 00000000..704089c7
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/utypes.h
@@ -0,0 +1,723 @@
+/*
+**********************************************************************
+*   Copyright (C) 1996-2014, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*
+*  FILE NAME : UTYPES.H (formerly ptypes.h)
+*
+*   Date        Name        Description
+*   12/11/96    helena      Creation.
+*   02/27/97    aliu        Added typedefs for UClassID, int8, int16, int32,
+*                           uint8, uint16, and uint32.
+*   04/01/97    aliu        Added XP_CPLUSPLUS and modified to work under C as
+*                            well as C++.
+*                           Modified to use memcpy() for uprv_arrayCopy() fns.
+*   04/14/97    aliu        Added TPlatformUtilities.
+*   05/07/97    aliu        Added import/export specifiers (replacing the old
+*                           broken EXT_CLASS).  Added version number for our
+*                           code.  Cleaned up header.
+*    6/20/97    helena      Java class name change.
+*   08/11/98    stephen     UErrorCode changed from typedef to enum
+*   08/12/98    erm         Changed T_ANALYTIC_PACKAGE_VERSION to 3
+*   08/14/98    stephen     Added uprv_arrayCopy() for int8_t, int16_t, int32_t
+*   12/09/98    jfitz       Added BUFFER_OVERFLOW_ERROR (bug 1100066)
+*   04/20/99    stephen     Cleaned up & reworked for autoconf.
+*                           Renamed to utypes.h.
+*   05/05/99    stephen     Changed to use <inttypes.h>
+*   12/07/99    helena      Moved copyright notice string from ucnv_bld.h here.
+*******************************************************************************
+*/
+
+#ifndef UTYPES_H
+#define UTYPES_H
+
+
+#include "unicode/umachine.h"
+#include "unicode/uversion.h"
+#include "unicode/uconfig.h"
+#include <float.h>
+
+#if !U_NO_DEFAULT_INCLUDE_UTF_HEADERS
+#   include "unicode/utf.h"
+#endif
+
+/*!
+ * \file
+ * \brief Basic definitions for ICU, for both C and C++ APIs
+ *
+ * This file defines basic types, constants, and enumerations directly or
+ * indirectly by including other header files, especially utf.h for the
+ * basic character and string definitions and umachine.h for consistent
+ * integer and other types.
+ */
+
+
+/**
+ * \def U_SHOW_CPLUSPLUS_API
+ * @internal
+ */
+#ifdef __cplusplus
+#   ifndef U_SHOW_CPLUSPLUS_API
+#       define U_SHOW_CPLUSPLUS_API 1
+#   endif
+#else
+#   undef U_SHOW_CPLUSPLUS_API
+#   define U_SHOW_CPLUSPLUS_API 0
+#endif
+
+/** @{ API visibility control */
+
+/**
+ * \def U_HIDE_DRAFT_API
+ * Define this to 1 to request that draft API be "hidden"
+ * @internal
+ */
+/**
+ * \def U_HIDE_INTERNAL_API
+ * Define this to 1 to request that internal API be "hidden"
+ * @internal
+ */
+#if !U_DEFAULT_SHOW_DRAFT && !defined(U_SHOW_DRAFT_API)
+#define U_HIDE_DRAFT_API 1
+#endif
+#if !U_DEFAULT_SHOW_DRAFT && !defined(U_SHOW_INTERNAL_API)
+#define U_HIDE_INTERNAL_API 1
+#endif
+
+/** @} */
+
+/*===========================================================================*/
+/* ICUDATA naming scheme                                                     */
+/*===========================================================================*/
+
+/**
+ * \def U_ICUDATA_TYPE_LETTER
+ *
+ * This is a platform-dependent string containing one letter:
+ * - b for big-endian, ASCII-family platforms
+ * - l for little-endian, ASCII-family platforms
+ * - e for big-endian, EBCDIC-family platforms
+ * This letter is part of the common data file name.
+ * @stable ICU 2.0
+ */
+
+/**
+ * \def U_ICUDATA_TYPE_LITLETTER
+ * The non-string form of U_ICUDATA_TYPE_LETTER
+ * @stable ICU 2.0
+ */
+#if U_CHARSET_FAMILY
+#   if U_IS_BIG_ENDIAN
+   /* EBCDIC - should always be BE */
+#     define U_ICUDATA_TYPE_LETTER "e"
+#     define U_ICUDATA_TYPE_LITLETTER e
+#   else
+#     error "Don't know what to do with little endian EBCDIC!"
+#     define U_ICUDATA_TYPE_LETTER "x"
+#     define U_ICUDATA_TYPE_LITLETTER x
+#   endif
+#else
+#   if U_IS_BIG_ENDIAN
+      /* Big-endian ASCII */
+#     define U_ICUDATA_TYPE_LETTER "b"
+#     define U_ICUDATA_TYPE_LITLETTER b
+#   else
+      /* Little-endian ASCII */
+#     define U_ICUDATA_TYPE_LETTER "l"
+#     define U_ICUDATA_TYPE_LITLETTER l
+#   endif
+#endif
+
+/**
+ * A single string literal containing the icudata stub name. i.e. 'icudt18e' for
+ * ICU 1.8.x on EBCDIC, etc..
+ * @stable ICU 2.0
+ */
+#define U_ICUDATA_NAME    "icudt" U_ICU_VERSION_SHORT U_ICUDATA_TYPE_LETTER
+#ifndef U_HIDE_INTERNAL_API
+#define U_USRDATA_NAME    "usrdt" U_ICU_VERSION_SHORT U_ICUDATA_TYPE_LETTER  /**< @internal */
+#define U_USE_USRDATA     1  /**< @internal */
+#endif  /* U_HIDE_INTERNAL_API */
+
+/**
+ *  U_ICU_ENTRY_POINT is the name of the DLL entry point to the ICU data library.
+ *    Defined as a literal, not a string.
+ *    Tricky Preprocessor use - ## operator replaces macro paramters with the literal string
+ *                              from the corresponding macro invocation, _before_ other macro substitutions.
+ *                              Need a nested \#defines to get the actual version numbers rather than
+ *                              the literal text U_ICU_VERSION_MAJOR_NUM into the name.
+ *                              The net result will be something of the form
+ *                                  \#define U_ICU_ENTRY_POINT icudt19_dat
+ * @stable ICU 2.4
+ */
+#define U_ICUDATA_ENTRY_POINT  U_DEF2_ICUDATA_ENTRY_POINT(U_ICU_VERSION_MAJOR_NUM,U_LIB_SUFFIX_C_NAME)
+
+#ifndef U_HIDE_INTERNAL_API
+/**
+ * Do not use. Note that it's OK for the 2nd argument to be undefined (literal).
+ * @internal
+ */
+#define U_DEF2_ICUDATA_ENTRY_POINT(major,suff) U_DEF_ICUDATA_ENTRY_POINT(major,suff)
+
+/**
+ * Do not use.
+ * @internal
+ */
+#ifndef U_DEF_ICUDATA_ENTRY_POINT
+/* affected by symbol renaming. See platform.h */
+#ifndef U_LIB_SUFFIX_C_NAME
+#define U_DEF_ICUDATA_ENTRY_POINT(major, suff) icudt##major##_dat
+#else
+#define U_DEF_ICUDATA_ENTRY_POINT(major, suff) icudt##suff ## major##_dat
+#endif
+#endif
+#endif  /* U_HIDE_INTERNAL_API */
+
+/**
+ * \def NULL
+ * Define NULL if necessary, to 0 for C++ and to ((void *)0) for C.
+ * @stable ICU 2.0
+ */
+#ifndef NULL
+#ifdef __cplusplus
+#define NULL    0
+#else
+#define NULL    ((void *)0)
+#endif
+#endif
+
+/*===========================================================================*/
+/* Calendar/TimeZone data types                                              */
+/*===========================================================================*/
+
+/**
+ * Date and Time data type.
+ * This is a primitive data type that holds the date and time
+ * as the number of milliseconds since 1970-jan-01, 00:00 UTC.
+ * UTC leap seconds are ignored.
+ * @stable ICU 2.0
+ */
+typedef double UDate;
+
+/** The number of milliseconds per second @stable ICU 2.0 */
+#define U_MILLIS_PER_SECOND        (1000)
+/** The number of milliseconds per minute @stable ICU 2.0 */
+#define U_MILLIS_PER_MINUTE       (60000)
+/** The number of milliseconds per hour @stable ICU 2.0 */
+#define U_MILLIS_PER_HOUR       (3600000)
+/** The number of milliseconds per day @stable ICU 2.0 */
+#define U_MILLIS_PER_DAY       (86400000)
+
+/** 
+ * Maximum UDate value 
+ * @stable ICU 4.8 
+ */ 
+#define U_DATE_MAX DBL_MAX
+
+/**
+ * Minimum UDate value 
+ * @stable ICU 4.8 
+ */ 
+#define U_DATE_MIN -U_DATE_MAX
+
+/*===========================================================================*/
+/* Shared library/DLL import-export API control                              */
+/*===========================================================================*/
+
+/*
+ * Control of symbol import/export.
+ * ICU is separated into three libraries.
+ */
+
+/**
+ * \def U_COMBINED_IMPLEMENTATION
+ * Set to export library symbols from inside the ICU library
+ * when all of ICU is in a single library.
+ * This can be set as a compiler option while building ICU, and it
+ * needs to be the first one tested to override U_COMMON_API, U_I18N_API, etc.
+ * @stable ICU 2.0
+ */
+
+/**
+ * \def U_DATA_API
+ * Set to export library symbols from inside the stubdata library,
+ * and to import them from outside.
+ * @stable ICU 3.0
+ */
+
+/**
+ * \def U_COMMON_API
+ * Set to export library symbols from inside the common library,
+ * and to import them from outside.
+ * @stable ICU 2.0
+ */
+
+/**
+ * \def U_I18N_API
+ * Set to export library symbols from inside the i18n library,
+ * and to import them from outside.
+ * @stable ICU 2.0
+ */
+
+/**
+ * \def U_LAYOUT_API
+ * Set to export library symbols from inside the layout engine library,
+ * and to import them from outside.
+ * @stable ICU 2.0
+ */
+
+/**
+ * \def U_LAYOUTEX_API
+ * Set to export library symbols from inside the layout extensions library,
+ * and to import them from outside.
+ * @stable ICU 2.6
+ */
+
+/**
+ * \def U_IO_API
+ * Set to export library symbols from inside the ustdio library,
+ * and to import them from outside.
+ * @stable ICU 2.0
+ */
+
+/**
+ * \def U_TOOLUTIL_API
+ * Set to export library symbols from inside the toolutil library,
+ * and to import them from outside.
+ * @stable ICU 3.4
+ */
+
+#if defined(U_COMBINED_IMPLEMENTATION)
+#define U_DATA_API     U_EXPORT
+#define U_COMMON_API   U_EXPORT
+#define U_I18N_API     U_EXPORT
+#define U_LAYOUT_API   U_EXPORT
+#define U_LAYOUTEX_API U_EXPORT
+#define U_IO_API       U_EXPORT
+#define U_TOOLUTIL_API U_EXPORT
+#elif defined(U_STATIC_IMPLEMENTATION)
+#define U_DATA_API
+#define U_COMMON_API
+#define U_I18N_API
+#define U_LAYOUT_API
+#define U_LAYOUTEX_API
+#define U_IO_API
+#define U_TOOLUTIL_API
+#elif defined(U_COMMON_IMPLEMENTATION)
+#define U_DATA_API     U_IMPORT
+#define U_COMMON_API   U_EXPORT
+#define U_I18N_API     U_IMPORT
+#define U_LAYOUT_API   U_IMPORT
+#define U_LAYOUTEX_API U_IMPORT
+#define U_IO_API       U_IMPORT
+#define U_TOOLUTIL_API U_IMPORT
+#elif defined(U_I18N_IMPLEMENTATION)
+#define U_DATA_API     U_IMPORT
+#define U_COMMON_API   U_IMPORT
+#define U_I18N_API     U_EXPORT
+#define U_LAYOUT_API   U_IMPORT
+#define U_LAYOUTEX_API U_IMPORT
+#define U_IO_API       U_IMPORT
+#define U_TOOLUTIL_API U_IMPORT
+#elif defined(U_LAYOUT_IMPLEMENTATION)
+#define U_DATA_API     U_IMPORT
+#define U_COMMON_API   U_IMPORT
+#define U_I18N_API     U_IMPORT
+#define U_LAYOUT_API   U_EXPORT
+#define U_LAYOUTEX_API U_IMPORT
+#define U_IO_API       U_IMPORT
+#define U_TOOLUTIL_API U_IMPORT
+#elif defined(U_LAYOUTEX_IMPLEMENTATION)
+#define U_DATA_API     U_IMPORT
+#define U_COMMON_API   U_IMPORT
+#define U_I18N_API     U_IMPORT
+#define U_LAYOUT_API   U_IMPORT
+#define U_LAYOUTEX_API U_EXPORT
+#define U_IO_API       U_IMPORT
+#define U_TOOLUTIL_API U_IMPORT
+#elif defined(U_IO_IMPLEMENTATION)
+#define U_DATA_API     U_IMPORT
+#define U_COMMON_API   U_IMPORT
+#define U_I18N_API     U_IMPORT
+#define U_LAYOUT_API   U_IMPORT
+#define U_LAYOUTEX_API U_IMPORT
+#define U_IO_API       U_EXPORT
+#define U_TOOLUTIL_API U_IMPORT
+#elif defined(U_TOOLUTIL_IMPLEMENTATION)
+#define U_DATA_API     U_IMPORT
+#define U_COMMON_API   U_IMPORT
+#define U_I18N_API     U_IMPORT
+#define U_LAYOUT_API   U_IMPORT
+#define U_LAYOUTEX_API U_IMPORT
+#define U_IO_API       U_IMPORT
+#define U_TOOLUTIL_API U_EXPORT
+#else
+#define U_DATA_API     U_IMPORT
+#define U_COMMON_API   U_IMPORT
+#define U_I18N_API     U_IMPORT
+#define U_LAYOUT_API   U_IMPORT
+#define U_LAYOUTEX_API U_IMPORT
+#define U_IO_API       U_IMPORT
+#define U_TOOLUTIL_API U_IMPORT
+#endif
+
+/**
+ * \def U_STANDARD_CPP_NAMESPACE
+ * Control of C++ Namespace
+ * @stable ICU 2.0
+ */
+#ifdef __cplusplus
+#define U_STANDARD_CPP_NAMESPACE        ::
+#else
+#define U_STANDARD_CPP_NAMESPACE
+#endif
+
+
+/*===========================================================================*/
+/* Global delete operator                                                    */
+/*===========================================================================*/
+
+/*
+ * The ICU4C library must not use the global new and delete operators.
+ * These operators here are defined to enable testing for this.
+ * See Jitterbug 2581 for details of why this is necessary.
+ *
+ * Verification that ICU4C's memory usage is correct, i.e.,
+ * that global new/delete are not used:
+ *
+ * a) Check for imports of global new/delete (see uobject.cpp for details)
+ * b) Verify that new is never imported.
+ * c) Verify that delete is only imported from object code for interface/mixin classes.
+ * d) Add global delete and delete[] only for the ICU4C library itself
+ *    and define them in a way that crashes or otherwise easily shows a problem.
+ *
+ * The following implements d).
+ * The operator implementations crash; this is intentional and used for library debugging.
+ *
+ * Note: This is currently only done on Windows because
+ * some Linux/Unix compilers have problems with defining global new/delete.
+ * On Windows, it is _MSC_VER>=1200 for MSVC 6.0 and higher.
+ */
+#if defined(__cplusplus) && U_DEBUG && U_OVERRIDE_CXX_ALLOCATION && (_MSC_VER>=1200) && !defined(U_STATIC_IMPLEMENTATION) && (defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) || defined(U_LAYOUT_IMPLEMENTATION) || defined(U_LAYOUTEX_IMPLEMENTATION))
+
+#ifndef U_HIDE_INTERNAL_API
+/**
+ * Global operator new, defined only inside ICU4C, must not be used.
+ * Crashes intentionally.
+ * @internal
+ */
+inline void *
+operator new(size_t /*size*/) {
+    char *q=NULL;
+    *q=5; /* break it */
+    return q;
+}
+
+#ifdef _Ret_bytecap_
+/* This is only needed to suppress a Visual C++ 2008 warning for operator new[]. */
+_Ret_bytecap_(_Size)
+#endif
+/**
+ * Global operator new[], defined only inside ICU4C, must not be used.
+ * Crashes intentionally.
+ * @internal
+ */
+inline void *
+operator new[](size_t /*size*/) {
+    char *q=NULL;
+    *q=5; /* break it */
+    return q;
+}
+
+/**
+ * Global operator delete, defined only inside ICU4C, must not be used.
+ * Crashes intentionally.
+ * @internal
+ */
+inline void
+operator delete(void * /*p*/) {
+    char *q=NULL;
+    *q=5; /* break it */
+}
+
+/**
+ * Global operator delete[], defined only inside ICU4C, must not be used.
+ * Crashes intentionally.
+ * @internal
+ */
+inline void
+operator delete[](void * /*p*/) {
+    char *q=NULL;
+    *q=5; /* break it */
+}
+
+#endif /* U_HIDE_INTERNAL_API */
+#endif
+
+/*===========================================================================*/
+/* UErrorCode */
+/*===========================================================================*/
+
+/**
+ * Error code to replace exception handling, so that the code is compatible with all C++ compilers,
+ * and to use the same mechanism for C and C++.
+ *
+ * \par
+ * ICU functions that take a reference (C++) or a pointer (C) to a UErrorCode
+ * first test if(U_FAILURE(errorCode)) { return immediately; }
+ * so that in a chain of such functions the first one that sets an error code
+ * causes the following ones to not perform any operations.
+ *
+ * \par
+ * Error codes should be tested using U_FAILURE() and U_SUCCESS().
+ * @stable ICU 2.0
+ */
+typedef enum UErrorCode {
+    /* The ordering of U_ERROR_INFO_START Vs U_USING_FALLBACK_WARNING looks weird
+     * and is that way because VC++ debugger displays first encountered constant,
+     * which is not the what the code is used for
+     */
+
+    U_USING_FALLBACK_WARNING  = -128,   /**< A resource bundle lookup returned a fallback result (not an error) */
+
+    U_ERROR_WARNING_START     = -128,   /**< Start of information results (semantically successful) */
+
+    U_USING_DEFAULT_WARNING   = -127,   /**< A resource bundle lookup returned a result from the root locale (not an error) */
+
+    U_SAFECLONE_ALLOCATED_WARNING = -126, /**< A SafeClone operation required allocating memory (informational only) */
+
+    U_STATE_OLD_WARNING       = -125,   /**< ICU has to use compatibility layer to construct the service. Expect performance/memory usage degradation. Consider upgrading */
+
+    U_STRING_NOT_TERMINATED_WARNING = -124,/**< An output string could not be NUL-terminated because output length==destCapacity. */
+
+    U_SORT_KEY_TOO_SHORT_WARNING = -123, /**< Number of levels requested in getBound is higher than the number of levels in the sort key */
+
+    U_AMBIGUOUS_ALIAS_WARNING = -122,   /**< This converter alias can go to different converter implementations */
+
+    U_DIFFERENT_UCA_VERSION = -121,     /**< ucol_open encountered a mismatch between UCA version and collator image version, so the collator was constructed from rules. No impact to further function */
+    
+    U_PLUGIN_CHANGED_LEVEL_WARNING = -120, /**< A plugin caused a level change. May not be an error, but later plugins may not load. */
+
+    U_ERROR_WARNING_LIMIT,              /**< This must always be the last warning value to indicate the limit for UErrorCode warnings (last warning code +1) */
+
+
+    U_ZERO_ERROR              =  0,     /**< No error, no warning. */
+
+    U_ILLEGAL_ARGUMENT_ERROR  =  1,     /**< Start of codes indicating failure */
+    U_MISSING_RESOURCE_ERROR  =  2,     /**< The requested resource cannot be found */
+    U_INVALID_FORMAT_ERROR    =  3,     /**< Data format is not what is expected */
+    U_FILE_ACCESS_ERROR       =  4,     /**< The requested file cannot be found */
+    U_INTERNAL_PROGRAM_ERROR  =  5,     /**< Indicates a bug in the library code */
+    U_MESSAGE_PARSE_ERROR     =  6,     /**< Unable to parse a message (message format) */
+    U_MEMORY_ALLOCATION_ERROR =  7,     /**< Memory allocation error */
+    U_INDEX_OUTOFBOUNDS_ERROR =  8,     /**< Trying to access the index that is out of bounds */
+    U_PARSE_ERROR             =  9,     /**< Equivalent to Java ParseException */
+    U_INVALID_CHAR_FOUND      = 10,     /**< Character conversion: Unmappable input sequence. In other APIs: Invalid character. */
+    U_TRUNCATED_CHAR_FOUND    = 11,     /**< Character conversion: Incomplete input sequence. */
+    U_ILLEGAL_CHAR_FOUND      = 12,     /**< Character conversion: Illegal input sequence/combination of input units. */
+    U_INVALID_TABLE_FORMAT    = 13,     /**< Conversion table file found, but corrupted */
+    U_INVALID_TABLE_FILE      = 14,     /**< Conversion table file not found */
+    U_BUFFER_OVERFLOW_ERROR   = 15,     /**< A result would not fit in the supplied buffer */
+    U_UNSUPPORTED_ERROR       = 16,     /**< Requested operation not supported in current context */
+    U_RESOURCE_TYPE_MISMATCH  = 17,     /**< an operation is requested over a resource that does not support it */
+    U_ILLEGAL_ESCAPE_SEQUENCE = 18,     /**< ISO-2022 illlegal escape sequence */
+    U_UNSUPPORTED_ESCAPE_SEQUENCE = 19, /**< ISO-2022 unsupported escape sequence */
+    U_NO_SPACE_AVAILABLE      = 20,     /**< No space available for in-buffer expansion for Arabic shaping */
+    U_CE_NOT_FOUND_ERROR      = 21,     /**< Currently used only while setting variable top, but can be used generally */
+    U_PRIMARY_TOO_LONG_ERROR  = 22,     /**< User tried to set variable top to a primary that is longer than two bytes */
+    U_STATE_TOO_OLD_ERROR     = 23,     /**< ICU cannot construct a service from this state, as it is no longer supported */
+    U_TOO_MANY_ALIASES_ERROR  = 24,     /**< There are too many aliases in the path to the requested resource.
+                                             It is very possible that a circular alias definition has occured */
+    U_ENUM_OUT_OF_SYNC_ERROR  = 25,     /**< UEnumeration out of sync with underlying collection */
+    U_INVARIANT_CONVERSION_ERROR = 26,  /**< Unable to convert a UChar* string to char* with the invariant converter. */
+    U_INVALID_STATE_ERROR     = 27,     /**< Requested operation can not be completed with ICU in its current state */
+    U_COLLATOR_VERSION_MISMATCH = 28,   /**< Collator version is not compatible with the base version */
+    U_USELESS_COLLATOR_ERROR  = 29,     /**< Collator is options only and no base is specified */
+    U_NO_WRITE_PERMISSION     = 30,     /**< Attempt to modify read-only or constant data. */
+
+    U_STANDARD_ERROR_LIMIT,             /**< This must always be the last value to indicate the limit for standard errors */
+    /*
+     * the error code range 0x10000 0x10100 are reserved for Transliterator
+     */
+    U_BAD_VARIABLE_DEFINITION=0x10000,/**< Missing '$' or duplicate variable name */
+    U_PARSE_ERROR_START = 0x10000,    /**< Start of Transliterator errors */
+    U_MALFORMED_RULE,                 /**< Elements of a rule are misplaced */
+    U_MALFORMED_SET,                  /**< A UnicodeSet pattern is invalid*/
+    U_MALFORMED_SYMBOL_REFERENCE,     /**< UNUSED as of ICU 2.4 */
+    U_MALFORMED_UNICODE_ESCAPE,       /**< A Unicode escape pattern is invalid*/
+    U_MALFORMED_VARIABLE_DEFINITION,  /**< A variable definition is invalid */
+    U_MALFORMED_VARIABLE_REFERENCE,   /**< A variable reference is invalid */
+    U_MISMATCHED_SEGMENT_DELIMITERS,  /**< UNUSED as of ICU 2.4 */
+    U_MISPLACED_ANCHOR_START,         /**< A start anchor appears at an illegal position */
+    U_MISPLACED_CURSOR_OFFSET,        /**< A cursor offset occurs at an illegal position */
+    U_MISPLACED_QUANTIFIER,           /**< A quantifier appears after a segment close delimiter */
+    U_MISSING_OPERATOR,               /**< A rule contains no operator */
+    U_MISSING_SEGMENT_CLOSE,          /**< UNUSED as of ICU 2.4 */
+    U_MULTIPLE_ANTE_CONTEXTS,         /**< More than one ante context */
+    U_MULTIPLE_CURSORS,               /**< More than one cursor */
+    U_MULTIPLE_POST_CONTEXTS,         /**< More than one post context */
+    U_TRAILING_BACKSLASH,             /**< A dangling backslash */
+    U_UNDEFINED_SEGMENT_REFERENCE,    /**< A segment reference does not correspond to a defined segment */
+    U_UNDEFINED_VARIABLE,             /**< A variable reference does not correspond to a defined variable */
+    U_UNQUOTED_SPECIAL,               /**< A special character was not quoted or escaped */
+    U_UNTERMINATED_QUOTE,             /**< A closing single quote is missing */
+    U_RULE_MASK_ERROR,                /**< A rule is hidden by an earlier more general rule */
+    U_MISPLACED_COMPOUND_FILTER,      /**< A compound filter is in an invalid location */
+    U_MULTIPLE_COMPOUND_FILTERS,      /**< More than one compound filter */
+    U_INVALID_RBT_SYNTAX,             /**< A "::id" rule was passed to the RuleBasedTransliterator parser */
+    U_INVALID_PROPERTY_PATTERN,       /**< UNUSED as of ICU 2.4 */
+    U_MALFORMED_PRAGMA,               /**< A 'use' pragma is invlalid */
+    U_UNCLOSED_SEGMENT,               /**< A closing ')' is missing */
+    U_ILLEGAL_CHAR_IN_SEGMENT,        /**< UNUSED as of ICU 2.4 */
+    U_VARIABLE_RANGE_EXHAUSTED,       /**< Too many stand-ins generated for the given variable range */
+    U_VARIABLE_RANGE_OVERLAP,         /**< The variable range overlaps characters used in rules */
+    U_ILLEGAL_CHARACTER,              /**< A special character is outside its allowed context */
+    U_INTERNAL_TRANSLITERATOR_ERROR,  /**< Internal transliterator system error */
+    U_INVALID_ID,                     /**< A "::id" rule specifies an unknown transliterator */
+    U_INVALID_FUNCTION,               /**< A "&fn()" rule specifies an unknown transliterator */
+    U_PARSE_ERROR_LIMIT,              /**< The limit for Transliterator errors */
+
+    /*
+     * the error code range 0x10100 0x10200 are reserved for formatting API parsing error
+     */
+    U_UNEXPECTED_TOKEN=0x10100,       /**< Syntax error in format pattern */
+    U_FMT_PARSE_ERROR_START=0x10100,  /**< Start of format library errors */
+    U_MULTIPLE_DECIMAL_SEPARATORS,    /**< More than one decimal separator in number pattern */
+    U_MULTIPLE_DECIMAL_SEPERATORS = U_MULTIPLE_DECIMAL_SEPARATORS, /**< Typo: kept for backward compatibility. Use U_MULTIPLE_DECIMAL_SEPARATORS */
+    U_MULTIPLE_EXPONENTIAL_SYMBOLS,   /**< More than one exponent symbol in number pattern */
+    U_MALFORMED_EXPONENTIAL_PATTERN,  /**< Grouping symbol in exponent pattern */
+    U_MULTIPLE_PERCENT_SYMBOLS,       /**< More than one percent symbol in number pattern */
+    U_MULTIPLE_PERMILL_SYMBOLS,       /**< More than one permill symbol in number pattern */
+    U_MULTIPLE_PAD_SPECIFIERS,        /**< More than one pad symbol in number pattern */
+    U_PATTERN_SYNTAX_ERROR,           /**< Syntax error in format pattern */
+    U_ILLEGAL_PAD_POSITION,           /**< Pad symbol misplaced in number pattern */
+    U_UNMATCHED_BRACES,               /**< Braces do not match in message pattern */
+    U_UNSUPPORTED_PROPERTY,           /**< UNUSED as of ICU 2.4 */
+    U_UNSUPPORTED_ATTRIBUTE,          /**< UNUSED as of ICU 2.4 */
+    U_ARGUMENT_TYPE_MISMATCH,         /**< Argument name and argument index mismatch in MessageFormat functions */
+    U_DUPLICATE_KEYWORD,              /**< Duplicate keyword in PluralFormat */
+    U_UNDEFINED_KEYWORD,              /**< Undefined Plural keyword */
+    U_DEFAULT_KEYWORD_MISSING,        /**< Missing DEFAULT rule in plural rules */
+    U_DECIMAL_NUMBER_SYNTAX_ERROR,    /**< Decimal number syntax error */
+    U_FORMAT_INEXACT_ERROR,           /**< Cannot format a number exactly and rounding mode is ROUND_UNNECESSARY @stable ICU 4.8 */
+    U_FMT_PARSE_ERROR_LIMIT,          /**< The limit for format library errors */
+
+    /*
+     * the error code range 0x10200 0x102ff are reserved for Break Iterator related error
+     */
+    U_BRK_INTERNAL_ERROR=0x10200,          /**< An internal error (bug) was detected.             */
+    U_BRK_ERROR_START=0x10200,             /**< Start of codes indicating Break Iterator failures */
+    U_BRK_HEX_DIGITS_EXPECTED,             /**< Hex digits expected as part of a escaped char in a rule. */
+    U_BRK_SEMICOLON_EXPECTED,              /**< Missing ';' at the end of a RBBI rule.            */
+    U_BRK_RULE_SYNTAX,                     /**< Syntax error in RBBI rule.                        */
+    U_BRK_UNCLOSED_SET,                    /**< UnicodeSet witing an RBBI rule missing a closing ']'.  */
+    U_BRK_ASSIGN_ERROR,                    /**< Syntax error in RBBI rule assignment statement.   */
+    U_BRK_VARIABLE_REDFINITION,            /**< RBBI rule $Variable redefined.                    */
+    U_BRK_MISMATCHED_PAREN,                /**< Mis-matched parentheses in an RBBI rule.          */
+    U_BRK_NEW_LINE_IN_QUOTED_STRING,       /**< Missing closing quote in an RBBI rule.            */
+    U_BRK_UNDEFINED_VARIABLE,              /**< Use of an undefined $Variable in an RBBI rule.    */
+    U_BRK_INIT_ERROR,                      /**< Initialization failure.  Probable missing ICU Data. */
+    U_BRK_RULE_EMPTY_SET,                  /**< Rule contains an empty Unicode Set.               */
+    U_BRK_UNRECOGNIZED_OPTION,             /**< !!option in RBBI rules not recognized.            */
+    U_BRK_MALFORMED_RULE_TAG,              /**< The {nnn} tag on a rule is mal formed             */
+    U_BRK_ERROR_LIMIT,                     /**< This must always be the last value to indicate the limit for Break Iterator failures */
+
+    /*
+     * The error codes in the range 0x10300-0x103ff are reserved for regular expression related errrs
+     */
+    U_REGEX_INTERNAL_ERROR=0x10300,       /**< An internal error (bug) was detected.              */
+    U_REGEX_ERROR_START=0x10300,          /**< Start of codes indicating Regexp failures          */
+    U_REGEX_RULE_SYNTAX,                  /**< Syntax error in regexp pattern.                    */
+    U_REGEX_INVALID_STATE,                /**< RegexMatcher in invalid state for requested operation */
+    U_REGEX_BAD_ESCAPE_SEQUENCE,          /**< Unrecognized backslash escape sequence in pattern  */
+    U_REGEX_PROPERTY_SYNTAX,              /**< Incorrect Unicode property                         */
+    U_REGEX_UNIMPLEMENTED,                /**< Use of regexp feature that is not yet implemented. */
+    U_REGEX_MISMATCHED_PAREN,             /**< Incorrectly nested parentheses in regexp pattern.  */
+    U_REGEX_NUMBER_TOO_BIG,               /**< Decimal number is too large.                       */
+    U_REGEX_BAD_INTERVAL,                 /**< Error in {min,max} interval                        */
+    U_REGEX_MAX_LT_MIN,                   /**< In {min,max}, max is less than min.                */
+    U_REGEX_INVALID_BACK_REF,             /**< Back-reference to a non-existent capture group.    */
+    U_REGEX_INVALID_FLAG,                 /**< Invalid value for match mode flags.                */
+    U_REGEX_LOOK_BEHIND_LIMIT,            /**< Look-Behind pattern matches must have a bounded maximum length.    */
+    U_REGEX_SET_CONTAINS_STRING,          /**< Regexps cannot have UnicodeSets containing strings.*/
+    U_REGEX_OCTAL_TOO_BIG,                /**< Octal character constants must be <= 0377. @deprecated ICU 54. This error cannot occur. */
+    U_REGEX_MISSING_CLOSE_BRACKET,        /**< Missing closing bracket on a bracket expression.   */
+    U_REGEX_INVALID_RANGE,                /**< In a character range [x-y], x is greater than y.   */
+    U_REGEX_STACK_OVERFLOW,               /**< Regular expression backtrack stack overflow.       */
+    U_REGEX_TIME_OUT,                     /**< Maximum allowed match time exceeded                */
+    U_REGEX_STOPPED_BY_CALLER,            /**< Matching operation aborted by user callback fn.    */
+    U_REGEX_ERROR_LIMIT,                  /**< This must always be the last value to indicate the limit for regexp errors */
+
+    /*
+     * The error code in the range 0x10400-0x104ff are reserved for IDNA related error codes
+     */
+    U_IDNA_PROHIBITED_ERROR=0x10400,
+    U_IDNA_ERROR_START=0x10400,
+    U_IDNA_UNASSIGNED_ERROR,
+    U_IDNA_CHECK_BIDI_ERROR,
+    U_IDNA_STD3_ASCII_RULES_ERROR,
+    U_IDNA_ACE_PREFIX_ERROR,
+    U_IDNA_VERIFICATION_ERROR,
+    U_IDNA_LABEL_TOO_LONG_ERROR,
+    U_IDNA_ZERO_LENGTH_LABEL_ERROR,
+    U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR,
+    U_IDNA_ERROR_LIMIT,
+    /*
+     * Aliases for StringPrep
+     */
+    U_STRINGPREP_PROHIBITED_ERROR = U_IDNA_PROHIBITED_ERROR,
+    U_STRINGPREP_UNASSIGNED_ERROR = U_IDNA_UNASSIGNED_ERROR,
+    U_STRINGPREP_CHECK_BIDI_ERROR = U_IDNA_CHECK_BIDI_ERROR,
+    
+    /*
+     * The error code in the range 0x10500-0x105ff are reserved for Plugin related error codes
+     */
+    U_PLUGIN_ERROR_START=0x10500,         /**< Start of codes indicating plugin failures */
+    U_PLUGIN_TOO_HIGH=0x10500,            /**< The plugin's level is too high to be loaded right now. */
+    U_PLUGIN_DIDNT_SET_LEVEL,             /**< The plugin didn't call uplug_setPlugLevel in response to a QUERY */
+    U_PLUGIN_ERROR_LIMIT,                 /**< This must always be the last value to indicate the limit for plugin errors */
+
+    U_ERROR_LIMIT=U_PLUGIN_ERROR_LIMIT      /**< This must always be the last value to indicate the limit for UErrorCode (last error code +1) */
+} UErrorCode;
+
+/* Use the following to determine if an UErrorCode represents */
+/* operational success or failure. */
+
+#ifdef __cplusplus
+    /**
+     * Does the error code indicate success?
+     * @stable ICU 2.0
+     */
+    static
+    inline UBool U_SUCCESS(UErrorCode code) { return (UBool)(code<=U_ZERO_ERROR); }
+    /**
+     * Does the error code indicate a failure?
+     * @stable ICU 2.0
+     */
+    static
+    inline UBool U_FAILURE(UErrorCode code) { return (UBool)(code>U_ZERO_ERROR); }
+#else
+    /**
+     * Does the error code indicate success?
+     * @stable ICU 2.0
+     */
+#   define U_SUCCESS(x) ((x)<=U_ZERO_ERROR)
+    /**
+     * Does the error code indicate a failure?
+     * @stable ICU 2.0
+     */
+#   define U_FAILURE(x) ((x)>U_ZERO_ERROR)
+#endif
+
+/**
+ * Return a string for a UErrorCode value.
+ * The string will be the same as the name of the error code constant
+ * in the UErrorCode enum above.
+ * @stable ICU 2.0
+ */
+U_STABLE const char * U_EXPORT2
+u_errorName(UErrorCode code);
+
+
+#endif /* _UTYPES */
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/uvernum.h b/src/core/basetypes/icu-ucsdet/include/unicode/uvernum.h
new file mode 100644
index 00000000..5af40e33
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/uvernum.h
@@ -0,0 +1,170 @@
+/*
+*******************************************************************************
+*   Copyright (C) 2000-2014, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*
+*   file name:  uvernum.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   Created by: Vladimir Weinstein
+*   Updated by: Steven R. Loomis
+*
+*/
+
+/**
+ * \file
+ * \brief C API: definitions of ICU version numbers
+ *
+ * This file is included by uversion.h and other files. This file contains only
+ * macros and definitions. The actual version numbers are defined here.
+ */
+
+ /*
+  * IMPORTANT: When updating version, the following things need to be done:
+  * source/common/unicode/uvernum.h - this file: update major, minor,
+  *        patchlevel, suffix, version, short version constants, namespace,
+  *                    renaming macro, and copyright
+  *
+  * The following files need to be updated as well, which can be done
+  *  by running the UNIX makefile target 'update-windows-makefiles' in icu/source.
+  *
+  *
+  * source/common/common.vcproj - update 'Output file name' on the link tab so
+  *                   that it contains the new major/minor combination
+  * source/i18n/i18n.vcproj - same as for the common.vcproj
+  * source/layout/layout.vcproj - same as for the common.vcproj
+  * source/layoutex/layoutex.vcproj - same
+  * source/stubdata/stubdata.vcproj - same as for the common.vcproj
+  * source/io/io.vcproj - same as for the common.vcproj
+  * source/data/makedata.mak - change U_ICUDATA_NAME so that it contains
+  *                            the new major/minor combination and the Unicode version.
+  */
+
+#ifndef UVERNUM_H
+#define UVERNUM_H
+
+/** The standard copyright notice that gets compiled into each library.
+ *  This value will change in the subsequent releases of ICU
+ *  @stable ICU 2.4
+ */
+#define U_COPYRIGHT_STRING \
+  " Copyright (C) 2014, International Business Machines Corporation and others. All Rights Reserved. "
+
+/** The current ICU major version as an integer.
+ *  This value will change in the subsequent releases of ICU
+ *  @stable ICU 2.4
+ */
+#define U_ICU_VERSION_MAJOR_NUM 54
+
+/** The current ICU minor version as an integer.
+ *  This value will change in the subsequent releases of ICU
+ *  @stable ICU 2.6
+ */
+#define U_ICU_VERSION_MINOR_NUM 1
+
+/** The current ICU patchlevel version as an integer.
+ *  This value will change in the subsequent releases of ICU
+ *  @stable ICU 2.4
+ */
+#define U_ICU_VERSION_PATCHLEVEL_NUM 0
+
+/** The current ICU build level version as an integer.
+ *  This value is for use by ICU clients. It defaults to 0.
+ *  @stable ICU 4.0
+ */
+#ifndef U_ICU_VERSION_BUILDLEVEL_NUM
+#define U_ICU_VERSION_BUILDLEVEL_NUM 0
+#endif
+
+/** Glued version suffix for renamers
+ *  This value will change in the subsequent releases of ICU
+ *  @stable ICU 2.6
+ */
+#define U_ICU_VERSION_SUFFIX _54_mc
+
+/**
+ * \def U_DEF2_ICU_ENTRY_POINT_RENAME
+ * @internal
+ */
+/**
+ * \def U_DEF_ICU_ENTRY_POINT_RENAME
+ * @internal
+ */
+/** Glued version suffix function for renamers
+ *  This value will change in the subsequent releases of ICU.
+ *  If a custom suffix (such as matching library suffixes) is desired, this can be modified.
+ *  Note that if present, platform.h may contain an earlier definition of this macro.
+ *  \def U_ICU_ENTRY_POINT_RENAME
+ *  @stable ICU 4.2
+ */
+
+#ifndef U_ICU_ENTRY_POINT_RENAME
+#ifdef U_HAVE_LIB_SUFFIX
+#define U_DEF_ICU_ENTRY_POINT_RENAME(x,y,z) x ## y ##  z
+#define U_DEF2_ICU_ENTRY_POINT_RENAME(x,y,z) U_DEF_ICU_ENTRY_POINT_RENAME(x,y,z)
+#define U_ICU_ENTRY_POINT_RENAME(x)    U_DEF2_ICU_ENTRY_POINT_RENAME(x,U_ICU_VERSION_SUFFIX,U_LIB_SUFFIX_C_NAME)
+#else
+#define U_DEF_ICU_ENTRY_POINT_RENAME(x,y) x ## y
+#define U_DEF2_ICU_ENTRY_POINT_RENAME(x,y) U_DEF_ICU_ENTRY_POINT_RENAME(x,y)
+#define U_ICU_ENTRY_POINT_RENAME(x)    U_DEF2_ICU_ENTRY_POINT_RENAME(x,U_ICU_VERSION_SUFFIX)
+#endif
+#endif
+
+/** The current ICU library version as a dotted-decimal string. The patchlevel
+ *  only appears in this string if it non-zero.
+ *  This value will change in the subsequent releases of ICU
+ *  @stable ICU 2.4
+ */
+#define U_ICU_VERSION "54.1"
+
+/** The current ICU library major/minor version as a string without dots, for library name suffixes.
+ *  This value will change in the subsequent releases of ICU
+ *  @stable ICU 2.6
+ */
+#define U_ICU_VERSION_SHORT "54"
+
+#ifndef U_HIDE_INTERNAL_API
+/** Data version in ICU4C.
+ * @internal ICU 4.4 Internal Use Only
+ **/
+#define U_ICU_DATA_VERSION "54.1"
+#endif  /* U_HIDE_INTERNAL_API */
+
+/*===========================================================================
+ * ICU collation framework version information
+ * Version info that can be obtained from a collator is affected by these
+ * numbers in a secret and magic way. Please use collator version as whole
+ *===========================================================================
+ */
+
+/**
+ * Collation runtime version (sort key generator, strcoll).
+ * If the version is different, sort keys for the same string could be different.
+ * This value may change in subsequent releases of ICU.
+ * @stable ICU 2.4
+ */
+#define UCOL_RUNTIME_VERSION 8
+
+/**
+ * Collation builder code version.
+ * When this is different, the same tailoring might result
+ * in assigning different collation elements to code points.
+ * This value may change in subsequent releases of ICU.
+ * @stable ICU 2.4
+ */
+#define UCOL_BUILDER_VERSION 9
+
+#ifndef U_HIDE_DEPRECATED_API
+/**
+ * Constant 1.
+ * This was intended to be the version of collation tailorings,
+ * but instead the tailoring data carries a version number.
+ * @deprecated ICU 54
+ */
+#define UCOL_TAILORINGS_VERSION 1
+#endif  /* U_HIDE_DEPRECATED_API */
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/unicode/uversion.h b/src/core/basetypes/icu-ucsdet/include/unicode/uversion.h
new file mode 100644
index 00000000..74e30910
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/unicode/uversion.h
@@ -0,0 +1,193 @@
+/*
+*******************************************************************************
+*   Copyright (C) 2000-2011, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*
+*   file name:  uversion.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   Created by: Vladimir Weinstein
+*
+*  Gets included by utypes.h and Windows .rc files
+*/
+
+/**
+ * \file
+ * \brief C API: API for accessing ICU version numbers. 
+ */
+/*===========================================================================*/
+/* Main ICU version information                                              */
+/*===========================================================================*/
+
+#ifndef UVERSION_H
+#define UVERSION_H
+
+#include "unicode/umachine.h"
+
+/* Actual version info lives in uvernum.h */
+#include "unicode/uvernum.h"
+
+/** Maximum length of the copyright string.
+ *  @stable ICU 2.4
+ */
+#define U_COPYRIGHT_STRING_LENGTH  128
+
+/** An ICU version consists of up to 4 numbers from 0..255.
+ *  @stable ICU 2.4
+ */
+#define U_MAX_VERSION_LENGTH 4
+
+/** In a string, ICU version fields are delimited by dots.
+ *  @stable ICU 2.4
+ */
+#define U_VERSION_DELIMITER '.'
+
+/** The maximum length of an ICU version string.
+ *  @stable ICU 2.4
+ */
+#define U_MAX_VERSION_STRING_LENGTH 20
+
+/** The binary form of a version on ICU APIs is an array of 4 uint8_t.
+ *  To compare two versions, use memcmp(v1,v2,sizeof(UVersionInfo)).
+ *  @stable ICU 2.4
+ */
+typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
+
+/*===========================================================================*/
+/* C++ namespace if supported. Versioned unless versioning is disabled.      */
+/*===========================================================================*/
+
+/**
+ * \def U_NAMESPACE_BEGIN
+ * This is used to begin a declaration of a public ICU C++ API.
+ * When not compiling for C++, it does nothing.
+ * When compiling for C++, it begins an extern "C++" linkage block (to protect
+ * against cases in which an external client includes ICU header files inside
+ * an extern "C" linkage block).
+ *
+ * It also begins a versioned-ICU-namespace block.
+ * @stable ICU 2.4
+ */
+
+/**
+ * \def U_NAMESPACE_END
+ * This is used to end a declaration of a public ICU C++ API.
+ * When not compiling for C++, it does nothing.
+ * When compiling for C++, it ends the extern "C++" block begun by
+ * U_NAMESPACE_BEGIN.
+ *
+ * It also ends the versioned-ICU-namespace block begun by U_NAMESPACE_BEGIN.
+ * @stable ICU 2.4
+ */
+
+/**
+ * \def U_NAMESPACE_USE
+ * This is used to specify that the rest of the code uses the
+ * public ICU C++ API namespace.
+ * This is invoked by default; we recommend that you turn it off:
+ * See the "Recommended Build Options" section of the ICU4C readme
+ * (http://source.icu-project.org/repos/icu/icu/trunk/readme.html#RecBuild)
+ * @stable ICU 2.4
+ */
+
+/**
+ * \def U_NAMESPACE_QUALIFIER
+ * This is used to qualify that a function or class is part of
+ * the public ICU C++ API namespace.
+ *
+ * This macro is unnecessary since ICU 49 requires namespace support.
+ * You can just use "icu::" instead.
+ * @stable ICU 2.4
+ */
+
+/* Define namespace symbols if the compiler supports it. */
+#ifdef __cplusplus
+#   if U_DISABLE_RENAMING
+#       define U_ICU_NAMESPACE icu
+        namespace U_ICU_NAMESPACE { }
+#   else
+#       define U_ICU_NAMESPACE U_ICU_ENTRY_POINT_RENAME(icu)
+        namespace U_ICU_NAMESPACE { }
+        namespace icu = U_ICU_NAMESPACE;
+#   endif
+
+#   define U_NAMESPACE_BEGIN extern "C++" { namespace U_ICU_NAMESPACE {
+#   define U_NAMESPACE_END } }
+#   define U_NAMESPACE_USE using namespace U_ICU_NAMESPACE;
+#   define U_NAMESPACE_QUALIFIER U_ICU_NAMESPACE::
+
+#   ifndef U_USING_ICU_NAMESPACE
+#       define U_USING_ICU_NAMESPACE 1
+#   endif
+#   if U_USING_ICU_NAMESPACE
+        U_NAMESPACE_USE
+#   endif
+#else
+#   define U_NAMESPACE_BEGIN
+#   define U_NAMESPACE_END
+#   define U_NAMESPACE_USE
+#   define U_NAMESPACE_QUALIFIER
+#endif
+
+/*===========================================================================*/
+/* General version helper functions. Definitions in putil.c                  */
+/*===========================================================================*/
+
+/**
+ * Parse a string with dotted-decimal version information and
+ * fill in a UVersionInfo structure with the result.
+ * Definition of this function lives in putil.c
+ *
+ * @param versionArray The destination structure for the version information.
+ * @param versionString A string with dotted-decimal version information,
+ *                      with up to four non-negative number fields with
+ *                      values of up to 255 each.
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+u_versionFromString(UVersionInfo versionArray, const char *versionString);
+
+/**
+ * Parse a Unicode string with dotted-decimal version information and
+ * fill in a UVersionInfo structure with the result.
+ * Definition of this function lives in putil.c
+ *
+ * @param versionArray The destination structure for the version information.
+ * @param versionString A Unicode string with dotted-decimal version
+ *                      information, with up to four non-negative number
+ *                      fields with values of up to 255 each.
+ * @stable ICU 4.2
+ */
+U_STABLE void U_EXPORT2
+u_versionFromUString(UVersionInfo versionArray, const UChar *versionString);
+
+
+/**
+ * Write a string with dotted-decimal version information according
+ * to the input UVersionInfo.
+ * Definition of this function lives in putil.c
+ *
+ * @param versionArray The version information to be written as a string.
+ * @param versionString A string buffer that will be filled in with
+ *                      a string corresponding to the numeric version
+ *                      information in versionArray.
+ *                      The buffer size must be at least U_MAX_VERSION_STRING_LENGTH.
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+u_versionToString(const UVersionInfo versionArray, char *versionString);
+
+/**
+ * Gets the ICU release version.  The version array stores the version information
+ * for ICU.  For example, release "1.3.31.2" is then represented as 0x01031F02.
+ * Definition of this function lives in putil.c
+ *
+ * @param versionArray the version # information, the result will be filled in
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+u_getVersion(UVersionInfo versionArray);
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/uposixdefs.h b/src/core/basetypes/icu-ucsdet/include/uposixdefs.h
new file mode 100644
index 00000000..bd64d91a
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/uposixdefs.h
@@ -0,0 +1,73 @@
+/*
+*******************************************************************************
+*   Copyright (C) 2011-2012, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*   file name:  uposixdefs.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2011jul25
+*   created by: Markus W. Scherer
+*
+*   Common definitions for implementation files working with POSIX functions.
+*   *Important*: #include this file before any other header files!
+*/
+
+#ifndef __UPOSIXDEFS_H__
+#define __UPOSIXDEFS_H__
+
+/*
+ * Define _XOPEN_SOURCE for access to POSIX functions.
+ *
+ * We cannot use U_PLATFORM from platform.h/utypes.h because
+ * "The Open Group Base Specifications"
+ * chapter "2.2 The Compilation Environment" says:
+ * "In the compilation of an application that #defines a feature test macro
+ * specified by IEEE Std 1003.1-2001,
+ * no header defined by IEEE Std 1003.1-2001 shall be included prior to
+ * the definition of the feature test macro."
+ */
+#ifdef _XOPEN_SOURCE
+    /* Use the predefined value. */
+#else
+    /*
+     * Version 6.0:
+     * The Open Group Base Specifications Issue 6 (IEEE Std 1003.1, 2004 Edition)
+     * also known as
+     * SUSv3 = Open Group Single UNIX Specification, Version 3 (UNIX03)
+     *
+     * Note: This definition used to be in C source code (e.g., putil.c)
+     * and define _XOPEN_SOURCE to different values depending on __STDC_VERSION__.
+     * In C++ source code (e.g., putil.cpp), __STDC_VERSION__ is not defined at all.
+     */
+#   define _XOPEN_SOURCE 600
+#endif
+
+/*
+ * Make sure things like readlink and such functions work.
+ * Poorly upgraded Solaris machines can't have this defined.
+ * Cleanly installed Solaris can use this #define.
+ *
+ * z/OS needs this definition for timeval and to get usleep.
+ */
+#if !defined(_XOPEN_SOURCE_EXTENDED)
+#   define _XOPEN_SOURCE_EXTENDED 1
+#endif
+
+/*
+ * There is an issue with turning on _XOPEN_SOURCE_EXTENDED on certain platforms.
+ * A compatibility issue exists between turning on _XOPEN_SOURCE_EXTENDED and using
+ * standard C++ string class. As a result, standard C++ string class needs to be
+ * turned off for the follwing platforms:
+ *  -AIX/VACPP
+ *  -Solaris/GCC
+ */
+#if (U_PLATFORM == U_PF_AIX && !defined(__GNUC__)) || (U_PLATFORM == U_PF_SOLARIS && defined(__GNUC__))
+#   if _XOPEN_SOURCE_EXTENDED && !defined(U_HAVE_STD_STRING)
+#   define U_HAVE_STD_STRING 0
+#   endif
+#endif
+
+#endif  /* __UPOSIXDEFS_H__ */
diff --git a/src/core/basetypes/icu-ucsdet/include/uset_imp.h b/src/core/basetypes/icu-ucsdet/include/uset_imp.h
new file mode 100644
index 00000000..07a7381e
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/uset_imp.h
@@ -0,0 +1,60 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2004-2007, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  uset_imp.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2004sep07
+*   created by: Markus W. Scherer
+*
+*   Internal USet definitions.
+*/
+
+#ifndef __USET_IMP_H__
+#define __USET_IMP_H__
+
+#include "unicode/utypes.h"
+#include "unicode/uset.h"
+
+U_CDECL_BEGIN
+
+typedef void U_CALLCONV
+USetAdd(USet *set, UChar32 c);
+
+typedef void U_CALLCONV
+USetAddRange(USet *set, UChar32 start, UChar32 end);
+
+typedef void U_CALLCONV
+USetAddString(USet *set, const UChar *str, int32_t length);
+
+typedef void U_CALLCONV
+USetRemove(USet *set, UChar32 c);
+
+typedef void U_CALLCONV
+USetRemoveRange(USet *set, UChar32 start, UChar32 end);
+
+/**
+ * Interface for adding items to a USet, to keep low-level code from
+ * statically depending on the USet implementation.
+ * Calls will look like sa->add(sa->set, c);
+ */
+struct USetAdder {
+    USet *set;
+    USetAdd *add;
+    USetAddRange *addRange;
+    USetAddString *addString;
+    USetRemove *remove;
+    USetRemoveRange *removeRange;
+};
+typedef struct USetAdder USetAdder;
+
+U_CDECL_END
+
+#endif
+
diff --git a/src/core/basetypes/icu-ucsdet/include/ustr_imp.h b/src/core/basetypes/icu-ucsdet/include/ustr_imp.h
new file mode 100644
index 00000000..ee54d332
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/ustr_imp.h
@@ -0,0 +1,247 @@
+/*  
+**********************************************************************
+*   Copyright (C) 1999-2011, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   file name:  ustr_imp.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2001jan30
+*   created by: Markus W. Scherer
+*/
+
+#ifndef __USTR_IMP_H__
+#define __USTR_IMP_H__
+
+#include "unicode/utypes.h"
+#include "unicode/uiter.h"
+#include "ucase.h"
+
+/** Simple declaration to avoid including unicode/ubrk.h. */
+#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
+#   define UBRK_TYPEDEF_UBREAK_ITERATOR
+    typedef struct UBreakIterator UBreakIterator;
+#endif
+
+#ifndef U_COMPARE_IGNORE_CASE
+/* see also unorm.h */
+/**
+ * Option bit for unorm_compare:
+ * Perform case-insensitive comparison.
+ */
+#define U_COMPARE_IGNORE_CASE       0x10000
+#endif
+
+/**
+ * Internal option for unorm_cmpEquivFold() for strncmp style.
+ * If set, checks for both string length and terminating NUL.
+ */
+#define _STRNCMP_STYLE 0x1000
+
+/**
+ * Compare two strings in code point order or code unit order.
+ * Works in strcmp style (both lengths -1),
+ * strncmp style (lengths equal and >=0, flag TRUE),
+ * and memcmp/UnicodeString style (at least one length >=0).
+ */
+U_CFUNC int32_t U_EXPORT2
+uprv_strCompare(const UChar *s1, int32_t length1,
+                const UChar *s2, int32_t length2,
+                UBool strncmpStyle, UBool codePointOrder);
+
+/**
+ * Internal API, used by u_strcasecmp() etc.
+ * Compare strings case-insensitively,
+ * in code point order or code unit order.
+ */
+U_CFUNC int32_t
+u_strcmpFold(const UChar *s1, int32_t length1,
+             const UChar *s2, int32_t length2,
+             uint32_t options,
+             UErrorCode *pErrorCode);
+
+/**
+ * Are the Unicode properties loaded?
+ * This must be used before internal functions are called that do
+ * not perform this check.
+ * Generate a debug assertion failure if data is not loaded.
+ */
+U_CFUNC UBool
+uprv_haveProperties(UErrorCode *pErrorCode);
+
+/**
+  * Load the Unicode property data.
+  * Intended primarily for use from u_init().
+  * Has no effect if property data is already loaded.
+  * NOT thread safe.
+  */
+/*U_CFUNC int8_t
+uprv_loadPropsData(UErrorCode *errorCode);*/
+
+/*
+ * Internal string casing functions implementing
+ * ustring.h/ustrcase.c and UnicodeString case mapping functions.
+ */
+
+struct UCaseMap {
+    const UCaseProps *csp;
+#if !UCONFIG_NO_BREAK_ITERATION
+    UBreakIterator *iter;  /* We adopt the iterator, so we own it. */
+#endif
+    char locale[32];
+    int32_t locCache;
+    uint32_t options;
+};
+
+#ifndef __UCASEMAP_H__
+typedef struct UCaseMap UCaseMap;
+#endif
+
+#if UCONFIG_NO_BREAK_ITERATION
+#   define UCASEMAP_INITIALIZER { NULL, { 0 }, 0, 0 }
+#else
+#   define UCASEMAP_INITIALIZER { NULL, NULL, { 0 }, 0, 0 }
+#endif
+
+U_CFUNC void
+ustrcase_setTempCaseMapLocale(UCaseMap *csm, const char *locale);
+
+#ifndef U_STRING_CASE_MAPPER_DEFINED
+#define U_STRING_CASE_MAPPER_DEFINED
+
+/**
+ * String case mapping function type, used by ustrcase_map().
+ * All error checking must be done.
+ * The UCaseMap must be fully initialized, with locale and/or iter set as needed.
+ * src and dest must not overlap.
+ */
+typedef int32_t U_CALLCONV
+UStringCaseMapper(const UCaseMap *csm,
+                  UChar *dest, int32_t destCapacity,
+                  const UChar *src, int32_t srcLength,
+                  UErrorCode *pErrorCode);
+
+#endif
+
+/** Implements UStringCaseMapper. */
+U_CFUNC int32_t U_CALLCONV
+ustrcase_internalToLower(const UCaseMap *csm,
+                         UChar *dest, int32_t destCapacity,
+                         const UChar *src, int32_t srcLength,
+                         UErrorCode *pErrorCode);
+
+/** Implements UStringCaseMapper. */
+U_CFUNC int32_t U_CALLCONV
+ustrcase_internalToUpper(const UCaseMap *csm,
+                         UChar *dest, int32_t destCapacity,
+                         const UChar *src, int32_t srcLength,
+                         UErrorCode *pErrorCode);
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+/** Implements UStringCaseMapper. */
+U_CFUNC int32_t U_CALLCONV
+ustrcase_internalToTitle(const UCaseMap *csm,
+                         UChar *dest, int32_t destCapacity,
+                         const UChar *src, int32_t srcLength,
+                         UErrorCode *pErrorCode);
+
+#endif
+
+/** Implements UStringCaseMapper. */
+U_CFUNC int32_t U_CALLCONV
+ustrcase_internalFold(const UCaseMap *csm,
+                      UChar *dest, int32_t destCapacity,
+                      const UChar *src, int32_t srcLength,
+                      UErrorCode *pErrorCode);
+
+/**
+ * Implements argument checking and buffer handling
+ * for string case mapping as a common function.
+ */
+U_CFUNC int32_t
+ustrcase_map(const UCaseMap *csm,
+             UChar *dest, int32_t destCapacity,
+             const UChar *src, int32_t srcLength,
+             UStringCaseMapper *stringCaseMapper,
+             UErrorCode *pErrorCode);
+
+/**
+ * UTF-8 string case mapping function type, used by ucasemap_mapUTF8().
+ * UTF-8 version of UStringCaseMapper.
+ * All error checking must be done.
+ * The UCaseMap must be fully initialized, with locale and/or iter set as needed.
+ * src and dest must not overlap.
+ */
+typedef int32_t U_CALLCONV
+UTF8CaseMapper(const UCaseMap *csm,
+               uint8_t *dest, int32_t destCapacity,
+               const uint8_t *src, int32_t srcLength,
+               UErrorCode *pErrorCode);
+
+/** Implements UTF8CaseMapper. */
+U_CFUNC int32_t U_CALLCONV
+ucasemap_internalUTF8ToTitle(const UCaseMap *csm,
+         uint8_t *dest, int32_t destCapacity,
+         const uint8_t *src, int32_t srcLength,
+         UErrorCode *pErrorCode);
+
+/**
+ * Implements argument checking and buffer handling
+ * for UTF-8 string case mapping as a common function.
+ */
+U_CFUNC int32_t
+ucasemap_mapUTF8(const UCaseMap *csm,
+                 uint8_t *dest, int32_t destCapacity,
+                 const uint8_t *src, int32_t srcLength,
+                 UTF8CaseMapper *stringCaseMapper,
+                 UErrorCode *pErrorCode);
+
+U_CAPI int32_t U_EXPORT2 
+ustr_hashUCharsN(const UChar *str, int32_t length);
+
+U_CAPI int32_t U_EXPORT2 
+ustr_hashCharsN(const char *str, int32_t length);
+
+U_CAPI int32_t U_EXPORT2
+ustr_hashICharsN(const char *str, int32_t length);
+
+/**
+ * NUL-terminate a UChar * string if possible.
+ * If length  < destCapacity then NUL-terminate.
+ * If length == destCapacity then do not terminate but set U_STRING_NOT_TERMINATED_WARNING.
+ * If length  > destCapacity then do not terminate but set U_BUFFER_OVERFLOW_ERROR.
+ *
+ * @param dest Destination buffer, can be NULL if destCapacity==0.
+ * @param destCapacity Number of UChars available at dest.
+ * @param length Number of UChars that were (to be) written to dest.
+ * @param pErrorCode ICU error code.
+ * @return length
+ */
+U_CAPI int32_t U_EXPORT2
+u_terminateUChars(UChar *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode);
+
+/**
+ * NUL-terminate a char * string if possible.
+ * Same as u_terminateUChars() but for a different string type.
+ */
+U_CAPI int32_t U_EXPORT2
+u_terminateChars(char *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode);
+
+/**
+ * NUL-terminate a UChar32 * string if possible.
+ * Same as u_terminateUChars() but for a different string type.
+ */
+U_CAPI int32_t U_EXPORT2
+u_terminateUChar32s(UChar32 *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode);
+
+/**
+ * NUL-terminate a wchar_t * string if possible.
+ * Same as u_terminateUChars() but for a different string type.
+ */
+U_CAPI int32_t U_EXPORT2
+u_terminateWChars(wchar_t *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode);
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/include/utracimp.h b/src/core/basetypes/icu-ucsdet/include/utracimp.h
new file mode 100644
index 00000000..317fbe35
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/include/utracimp.h
@@ -0,0 +1,384 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2003-2009, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  utracimp.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2003aug06
+*   created by: Markus W. Scherer
+*
+*   Internal header for ICU tracing/logging.
+*
+*
+*   Various notes:
+*   - using a trace level variable to only call trace functions
+*     when the level is sufficient
+*   - using the same variable for tracing on/off to never make a function
+*     call when off
+*   - the function number is put into a local variable by the entry macro
+*     and used implicitly to avoid copy&paste/typing mistakes by the developer
+*   - the application must call utrace_setFunctions() and pass in
+*     implementations for the trace functions
+*   - ICU trace macros call ICU functions that route through the function
+*     pointers if they have been set;
+*     this avoids an indirection at the call site
+*     (which would cost more code for another check and for the indirection)
+*
+*   ### TODO Issues:
+*   - Verify that va_list is portable among compilers for the same platform.
+*     va_list should be portable because printf() would fail otherwise!
+*   - Should enum values like UTraceLevel be passed into int32_t-type arguments,
+*     or should enum types be used?
+*/
+
+#ifndef __UTRACIMP_H__
+#define __UTRACIMP_H__
+
+#include "unicode/utrace.h"
+#include <stdarg.h>
+
+U_CDECL_BEGIN
+
+/**
+ * \var utrace_level
+ * Trace level variable. Negative for "off".
+ * Use only via UTRACE_ macros.
+ * @internal
+ */
+#ifdef UTRACE_IMPL
+U_EXPORT int32_t
+#else
+U_CFUNC U_COMMON_API int32_t
+#endif
+utrace_level;
+
+
+/** 
+ *   Traced Function Exit return types.  
+ *   Flags indicating the number and types of varargs included in a call
+ *   to a UTraceExit function.
+ *   Bits 0-3:  The function return type.  First variable param.
+ *   Bit    4:  Flag for presence of U_ErrorCode status param.
+ *   @internal
+ */
+typedef enum UTraceExitVal {
+    /** The traced function returns no value  @internal */
+    UTRACE_EXITV_NONE   = 0,
+    /** The traced function returns an int32_t, or compatible, type.  @internal */
+    UTRACE_EXITV_I32    = 1,
+    /** The traced function returns a pointer  @internal */
+    UTRACE_EXITV_PTR    = 2,
+    /** The traced function returns a UBool  @internal */
+    UTRACE_EXITV_BOOL   = 3,
+    /** Mask to extract the return type values from a UTraceExitVal  @internal */
+    UTRACE_EXITV_MASK   = 0xf,
+    /** Bit indicating that the traced function includes a UErrorCode parameter  @internal */
+    UTRACE_EXITV_STATUS = 0x10
+} UTraceExitVal;
+
+/**
+ * Trace function for the entry point of a function.
+ * Do not use directly, use UTRACE_ENTRY instead.
+ * @param fnNumber The UTraceFunctionNumber for the current function.
+ * @internal
+ */
+U_CAPI void U_EXPORT2
+utrace_entry(int32_t fnNumber);
+
+/**
+ * Trace function for each exit point of a function.
+ * Do not use directly, use UTRACE_EXIT* instead.
+ * @param fnNumber The UTraceFunctionNumber for the current function.
+ * @param returnType The type of the value returned by the function.
+ * @param errorCode The UErrorCode value at function exit. See UTRACE_EXIT.
+ * @internal
+ */
+U_CAPI void U_EXPORT2
+utrace_exit(int32_t fnNumber, int32_t returnType, ...);
+
+
+/**
+ * Trace function used inside functions that have a UTRACE_ENTRY() statement.
+ * Do not use directly, use UTRACE_DATAX() macros instead.
+ *
+ * @param utraceFnNumber The number of the current function, from the local
+ *        variable of the same name.
+ * @param level The trace level for this message.
+ * @param fmt The trace format string.
+ *
+ * @internal
+ */
+U_CAPI void U_EXPORT2
+utrace_data(int32_t utraceFnNumber, int32_t level, const char *fmt, ...);
+
+U_CDECL_END
+
+#if U_ENABLE_TRACING
+
+/**
+ * Boolean expression to see if ICU tracing is turned on
+ * to at least the specified level.
+ * @internal
+ */
+#define UTRACE_LEVEL(level) (utrace_getLevel()>=(level))
+
+/**
+  *  Flag bit in utraceFnNumber, the local variable added to each function 
+  *  with tracing code to contains the function number.
+  *
+  *  Set the flag if the function's entry is traced, which will cause the
+  *  function's exit to also be traced.  utraceFnNumber is uncoditionally 
+  *  set at entry, whether or not the entry is traced, so that it will
+  *  always be available for error trace output.
+  *  @internal
+  */            
+#define UTRACE_TRACED_ENTRY 0x80000000
+
+/**
+ * Trace statement for the entry point of a function.
+ * Stores the function number in a local variable.
+ * In C code, must be placed immediately after the last variable declaration.
+ * Must be matched with UTRACE_EXIT() at all function exit points.
+ *
+ * Tracing should start with UTRACE_ENTRY after checking for
+ * U_FAILURE at function entry, so that if a function returns immediately
+ * because of a pre-existing error condition, it does not show up in the trace,
+ * consistent with ICU's error handling model.
+ *
+ * @param fnNumber The UTraceFunctionNumber for the current function.
+ * @internal
+ */
+#define UTRACE_ENTRY(fnNumber) \
+    int32_t utraceFnNumber=(fnNumber); \
+    if(utrace_getLevel()>=UTRACE_INFO) { \
+        utrace_entry(fnNumber); \
+        utraceFnNumber |= UTRACE_TRACED_ENTRY; \
+    }
+
+
+/**
+ * Trace statement for the entry point of open and close functions.
+ * Produces trace output at a less verbose setting than plain UTRACE_ENTRY
+ * Stores the function number in a local variable.
+ * In C code, must be placed immediately after the last variable declaration.
+ * Must be matched with UTRACE_EXIT() at all function exit points.
+ *
+ * @param fnNumber The UTraceFunctionNumber for the current function.
+ * @internal
+ */
+#define UTRACE_ENTRY_OC(fnNumber) \
+    int32_t utraceFnNumber=(fnNumber); \
+    if(utrace_getLevel()>=UTRACE_OPEN_CLOSE) { \
+        utrace_entry(fnNumber); \
+        utraceFnNumber |= UTRACE_TRACED_ENTRY; \
+    }
+
+/**
+ * Trace statement for each exit point of a function that has a UTRACE_ENTRY()
+ * statement.
+ *
+ * @param errorCode The function's ICU UErrorCode value at function exit,
+ *                  or U_ZERO_ERROR if the function does not use a UErrorCode.
+ *                  0==U_ZERO_ERROR indicates success,
+ *                  positive values an error (see u_errorName()),
+ *                  negative values an informational status.
+ *
+ * @internal
+ */
+#define UTRACE_EXIT() \
+    {if(utraceFnNumber & UTRACE_TRACED_ENTRY) { \
+        utrace_exit(utraceFnNumber & ~UTRACE_TRACED_ENTRY, UTRACE_EXITV_NONE); \
+    }}
+
+/**
+ * Trace statement for each exit point of a function that has a UTRACE_ENTRY()
+ * statement, and that returns a value.
+ *
+ * @param val       The function's return value, int32_t or comatible type.
+ *
+ * @internal 
+ */
+#define UTRACE_EXIT_VALUE(val) \
+    {if(utraceFnNumber & UTRACE_TRACED_ENTRY) { \
+        utrace_exit(utraceFnNumber & ~UTRACE_TRACED_ENTRY, UTRACE_EXITV_I32, val); \
+    }}
+
+#define UTRACE_EXIT_STATUS(status) \
+    {if(utraceFnNumber & UTRACE_TRACED_ENTRY) { \
+        utrace_exit(utraceFnNumber & ~UTRACE_TRACED_ENTRY, UTRACE_EXITV_STATUS, status); \
+    }}
+
+#define UTRACE_EXIT_VALUE_STATUS(val, status) \
+    {if(utraceFnNumber & UTRACE_TRACED_ENTRY) { \
+        utrace_exit(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (UTRACE_EXITV_I32 | UTRACE_EXITV_STATUS), val, status); \
+    }}
+
+#define UTRACE_EXIT_PTR_STATUS(ptr, status) \
+    {if(utraceFnNumber & UTRACE_TRACED_ENTRY) { \
+        utrace_exit(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (UTRACE_EXITV_PTR | UTRACE_EXITV_STATUS), ptr, status); \
+    }}
+
+/**
+ * Trace statement used inside functions that have a UTRACE_ENTRY() statement.
+ * Takes no data arguments.
+ * The number of arguments for this macro must match the number of inserts
+ * in the format string. Vector inserts count as two arguments.
+ * Calls utrace_data() if the level is high enough.
+ * @internal
+ */
+#define UTRACE_DATA0(level, fmt) \
+    if(UTRACE_LEVEL(level)) { \
+        utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt)); \
+    }
+
+/**
+ * Trace statement used inside functions that have a UTRACE_ENTRY() statement.
+ * Takes one data argument.
+ * The number of arguments for this macro must match the number of inserts
+ * in the format string. Vector inserts count as two arguments.
+ * Calls utrace_data() if the level is high enough.
+ * @internal
+ */
+#define UTRACE_DATA1(level, fmt, a) \
+    if(UTRACE_LEVEL(level)) { \
+        utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY , (level), (fmt), (a)); \
+    }
+
+/**
+ * Trace statement used inside functions that have a UTRACE_ENTRY() statement.
+ * Takes two data arguments.
+ * The number of arguments for this macro must match the number of inserts
+ * in the format string. Vector inserts count as two arguments.
+ * Calls utrace_data() if the level is high enough.
+ * @internal
+ */
+#define UTRACE_DATA2(level, fmt, a, b) \
+    if(UTRACE_LEVEL(level)) { \
+        utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY , (level), (fmt), (a), (b)); \
+    }
+
+/**
+ * Trace statement used inside functions that have a UTRACE_ENTRY() statement.
+ * Takes three data arguments.
+ * The number of arguments for this macro must match the number of inserts
+ * in the format string. Vector inserts count as two arguments.
+ * Calls utrace_data() if the level is high enough.
+ * @internal
+ */
+#define UTRACE_DATA3(level, fmt, a, b, c) \
+    if(UTRACE_LEVEL(level)) { \
+        utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c)); \
+    }
+
+/**
+ * Trace statement used inside functions that have a UTRACE_ENTRY() statement.
+ * Takes four data arguments.
+ * The number of arguments for this macro must match the number of inserts
+ * in the format string. Vector inserts count as two arguments.
+ * Calls utrace_data() if the level is high enough.
+ * @internal
+ */
+#define UTRACE_DATA4(level, fmt, a, b, c, d) \
+    if(UTRACE_LEVEL(level)) { \
+        utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c), (d)); \
+    }
+
+/**
+ * Trace statement used inside functions that have a UTRACE_ENTRY() statement.
+ * Takes five data arguments.
+ * The number of arguments for this macro must match the number of inserts
+ * in the format string. Vector inserts count as two arguments.
+ * Calls utrace_data() if the level is high enough.
+ * @internal
+ */
+#define UTRACE_DATA5(level, fmt, a, b, c, d, e) \
+    if(UTRACE_LEVEL(level)) { \
+        utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c), (d), (e)); \
+    }
+
+/**
+ * Trace statement used inside functions that have a UTRACE_ENTRY() statement.
+ * Takes six data arguments.
+ * The number of arguments for this macro must match the number of inserts
+ * in the format string. Vector inserts count as two arguments.
+ * Calls utrace_data() if the level is high enough.
+ * @internal
+ */
+#define UTRACE_DATA6(level, fmt, a, b, c, d, e, f) \
+    if(UTRACE_LEVEL(level)) { \
+        utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c), (d), (e), (f)); \
+    }
+
+/**
+ * Trace statement used inside functions that have a UTRACE_ENTRY() statement.
+ * Takes seven data arguments.
+ * The number of arguments for this macro must match the number of inserts
+ * in the format string. Vector inserts count as two arguments.
+ * Calls utrace_data() if the level is high enough.
+ * @internal
+ */
+#define UTRACE_DATA7(level, fmt, a, b, c, d, e, f, g) \
+    if(UTRACE_LEVEL(level)) { \
+        utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c), (d), (e), (f), (g)); \
+    }
+
+/**
+ * Trace statement used inside functions that have a UTRACE_ENTRY() statement.
+ * Takes eight data arguments.
+ * The number of arguments for this macro must match the number of inserts
+ * in the format string. Vector inserts count as two arguments.
+ * Calls utrace_data() if the level is high enough.
+ * @internal
+ */
+#define UTRACE_DATA8(level, fmt, a, b, c, d, e, f, g, h) \
+    if(UTRACE_LEVEL(level)) { \
+        utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c), (d), (e), (f), (g), (h)); \
+    }
+
+/**
+ * Trace statement used inside functions that have a UTRACE_ENTRY() statement.
+ * Takes nine data arguments.
+ * The number of arguments for this macro must match the number of inserts
+ * in the format string. Vector inserts count as two arguments.
+ * Calls utrace_data() if the level is high enough.
+ * @internal
+ */
+#define UTRACE_DATA9(level, fmt, a, b, c, d, e, f, g, h, i) \
+    if(UTRACE_LEVEL(level)) { \
+        utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c), (d), (e), (f), (g), (h), (i)); \
+    }
+
+#else
+
+/*
+ * When tracing is disabled, the following macros become empty
+ */
+
+#define UTRACE_LEVEL(level) 0
+#define UTRACE_ENTRY(fnNumber)
+#define UTRACE_ENTRY_OC(fnNumber)
+#define UTRACE_EXIT()
+#define UTRACE_EXIT_VALUE(val)
+#define UTRACE_EXIT_STATUS(status)
+#define UTRACE_EXIT_VALUE_STATUS(val, status)
+#define UTRACE_EXIT_PTR_STATUS(ptr, status)
+#define UTRACE_DATA0(level, fmt)
+#define UTRACE_DATA1(level, fmt, a)
+#define UTRACE_DATA2(level, fmt, a, b)
+#define UTRACE_DATA3(level, fmt, a, b, c)
+#define UTRACE_DATA4(level, fmt, a, b, c, d)
+#define UTRACE_DATA5(level, fmt, a, b, c, d, e)
+#define UTRACE_DATA6(level, fmt, a, b, c, d, e, f)
+#define UTRACE_DATA7(level, fmt, a, b, c, d, e, f, g)
+#define UTRACE_DATA8(level, fmt, a, b, c, d, e, f, g, h)
+#define UTRACE_DATA9(level, fmt, a, b, c, d, e, f, g, h, i)
+
+#endif
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/inputext.cpp b/src/core/basetypes/icu-ucsdet/inputext.cpp
new file mode 100644
index 00000000..e5a8ee18
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/inputext.cpp
@@ -0,0 +1,164 @@
+/*
+ **********************************************************************
+ *   Copyright (C) 2005-2009, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ **********************************************************************
+ */
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "inputext.h"
+
+#include "cmemory.h"
+#include "cstring.h"
+
+#include <string.h>
+
+U_NAMESPACE_BEGIN
+
+#define BUFFER_SIZE 8192
+
+#define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
+
+#define NEW_ARRAY(type,count) (type *) uprv_malloc((count) * sizeof(type))
+#define DELETE_ARRAY(array) uprv_free((void *) (array))
+
+InputText::InputText(UErrorCode &status)
+    : fInputBytes(NEW_ARRAY(uint8_t, BUFFER_SIZE)), // The text to be checked.  Markup will have been
+                                                 //   removed if appropriate.
+      fByteStats(NEW_ARRAY(int16_t, 256)),       // byte frequency statistics for the input text.
+                                                 //   Value is percent, not absolute.
+      fDeclaredEncoding(0),
+      fRawInput(0),
+      fRawLength(0)
+{
+    if (fInputBytes == NULL || fByteStats == NULL) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+    }
+}
+
+InputText::~InputText()
+{
+    DELETE_ARRAY(fDeclaredEncoding);
+    DELETE_ARRAY(fByteStats);
+    DELETE_ARRAY(fInputBytes);
+}
+
+void InputText::setText(const char *in, int32_t len)
+{
+    fInputLen  = 0;
+    fC1Bytes   = FALSE;
+    fRawInput  = (const uint8_t *) in;
+    fRawLength = len == -1? (int32_t)uprv_strlen(in) : len;
+}
+
+void InputText::setDeclaredEncoding(const char* encoding, int32_t len)
+{
+    if(encoding) {
+        if (len == -1) {
+            len = (int32_t)uprv_strlen(encoding);
+        }
+
+        len += 1;     // to make place for the \0 at the end.
+        uprv_free(fDeclaredEncoding);
+        fDeclaredEncoding = NEW_ARRAY(char, len);
+        uprv_strncpy(fDeclaredEncoding, encoding, len);
+    }
+}
+
+UBool InputText::isSet() const 
+{
+    return fRawInput != NULL;
+}
+
+/**
+*  MungeInput - after getting a set of raw input data to be analyzed, preprocess
+*               it by removing what appears to be html markup.
+* 
+* @internal
+*/
+void InputText::MungeInput(UBool fStripTags) {
+    int     srci = 0;
+    int     dsti = 0;
+    uint8_t b;
+    bool    inMarkup = FALSE;
+    int32_t openTags = 0;
+    int32_t badTags  = 0;
+
+    //
+    //  html / xml markup stripping.
+    //     quick and dirty, not 100% accurate, but hopefully good enough, statistically.
+    //     discard everything within < brackets >
+    //     Count how many total '<' and illegal (nested) '<' occur, so we can make some
+    //     guess as to whether the input was actually marked up at all.
+    // TODO: Think about how this interacts with EBCDIC charsets that are detected.
+    if (fStripTags) {
+        for (srci = 0; srci < fRawLength && dsti < BUFFER_SIZE; srci += 1) {
+            b = fRawInput[srci];
+
+            if (b == (uint8_t)0x3C) { /* Check for the ASCII '<' */
+                if (inMarkup) {
+                    badTags += 1;
+                }
+
+                inMarkup = TRUE;
+                openTags += 1;
+            }
+
+            if (! inMarkup) {
+                fInputBytes[dsti++] = b;
+            }
+
+            if (b == (uint8_t)0x3E) { /* Check for the ASCII '>' */
+                inMarkup = FALSE;
+            }
+        }
+
+        fInputLen = dsti;
+    }
+
+    //
+    //  If it looks like this input wasn't marked up, or if it looks like it's
+    //    essentially nothing but markup abandon the markup stripping.
+    //    Detection will have to work on the unstripped input.
+    //
+    if (openTags<5 || openTags/5 < badTags || 
+        (fInputLen < 100 && fRawLength>600))
+    {
+        int32_t limit = fRawLength;
+
+        if (limit > BUFFER_SIZE) {
+            limit = BUFFER_SIZE;
+        }
+
+        for (srci=0; srci<limit; srci++) {
+            fInputBytes[srci] = fRawInput[srci];
+        }
+
+        fInputLen = srci;
+    }
+
+    //
+    // Tally up the byte occurence statistics.
+    // These are available for use by the various detectors.
+    //
+
+    uprv_memset(fByteStats, 0, (sizeof fByteStats[0]) * 256);
+
+    for (srci = 0; srci < fInputLen; srci += 1) {
+        fByteStats[fInputBytes[srci]] += 1;
+    }
+
+    for (int32_t i = 0x80; i <= 0x9F; i += 1) {
+        if (fByteStats[i] != 0) {
+            fC1Bytes = TRUE;
+            break;
+        }
+    }
+}
+
+U_NAMESPACE_END
+#endif
+
diff --git a/src/core/basetypes/icu-ucsdet/uarrsort.c b/src/core/basetypes/icu-ucsdet/uarrsort.c
new file mode 100644
index 00000000..22c76972
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/uarrsort.c
@@ -0,0 +1,283 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2003-2013, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  uarrsort.c
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2003aug04
+*   created by: Markus W. Scherer
+*
+*   Internal function for sorting arrays.
+*/
+
+#include "unicode/utypes.h"
+#include "cmemory.h"
+#include "uarrsort.h"
+
+enum {
+    /**
+     * "from Knuth"
+     *
+     * A binary search over 8 items performs 4 comparisons:
+     * log2(8)=3 to subdivide, +1 to check for equality.
+     * A linear search over 8 items on average also performs 4 comparisons.
+     */
+    MIN_QSORT=9,
+    STACK_ITEM_SIZE=200
+};
+
+/* UComparator convenience implementations ---------------------------------- */
+
+U_CAPI int32_t U_EXPORT2
+uprv_uint16Comparator(const void *context, const void *left, const void *right) {
+    return (int32_t)*(const uint16_t *)left - (int32_t)*(const uint16_t *)right;
+}
+
+U_CAPI int32_t U_EXPORT2
+uprv_int32Comparator(const void *context, const void *left, const void *right) {
+    return *(const int32_t *)left - *(const int32_t *)right;
+}
+
+U_CAPI int32_t U_EXPORT2
+uprv_uint32Comparator(const void *context, const void *left, const void *right) {
+    uint32_t l=*(const uint32_t *)left, r=*(const uint32_t *)right;
+
+    /* compare directly because (l-r) would overflow the int32_t result */
+    if(l<r) {
+        return -1;
+    } else if(l==r) {
+        return 0;
+    } else /* l>r */ {
+        return 1;
+    }
+}
+
+/* Insertion sort using binary search --------------------------------------- */
+
+U_CAPI int32_t U_EXPORT2
+uprv_stableBinarySearch(char *array, int32_t limit, void *item, int32_t itemSize,
+                        UComparator *cmp, const void *context) {
+    int32_t start=0;
+    UBool found=FALSE;
+
+    /* Binary search until we get down to a tiny sub-array. */
+    while((limit-start)>=MIN_QSORT) {
+        int32_t i=(start+limit)/2;
+        int32_t diff=cmp(context, item, array+i*itemSize);
+        if(diff==0) {
+            /*
+             * Found the item. We look for the *last* occurrence of such
+             * an item, for stable sorting.
+             * If we knew that there will be only few equal items,
+             * we could break now and enter the linear search.
+             * However, if there are many equal items, then it should be
+             * faster to continue with the binary search.
+             * It seems likely that we either have all unique items
+             * (where found will never become TRUE in the insertion sort)
+             * or potentially many duplicates.
+             */
+            found=TRUE;
+            start=i+1;
+        } else if(diff<0) {
+            limit=i;
+        } else {
+            start=i;
+        }
+    }
+
+    /* Linear search over the remaining tiny sub-array. */
+    while(start<limit) {
+        int32_t diff=cmp(context, item, array+start*itemSize);
+        if(diff==0) {
+            found=TRUE;
+        } else if(diff<0) {
+            break;
+        }
+        ++start;
+    }
+    return found ? (start-1) : ~start;
+}
+
+static void
+doInsertionSort(char *array, int32_t length, int32_t itemSize,
+                UComparator *cmp, const void *context, void *pv) {
+    int32_t j;
+
+    for(j=1; j<length; ++j) {
+        char *item=array+j*itemSize;
+        int32_t insertionPoint=uprv_stableBinarySearch(array, j, item, itemSize, cmp, context);
+        if(insertionPoint<0) {
+            insertionPoint=~insertionPoint;
+        } else {
+            ++insertionPoint;  /* one past the last equal item */
+        }
+        if(insertionPoint<j) {
+            char *dest=array+insertionPoint*itemSize;
+            uprv_memcpy(pv, item, itemSize);  /* v=array[j] */
+            uprv_memmove(dest+itemSize, dest, (j-insertionPoint)*itemSize);
+            uprv_memcpy(dest, pv, itemSize);  /* array[insertionPoint]=v */
+        }
+    }
+}
+
+static void
+insertionSort(char *array, int32_t length, int32_t itemSize,
+              UComparator *cmp, const void *context, UErrorCode *pErrorCode) {
+    UAlignedMemory v[STACK_ITEM_SIZE/sizeof(UAlignedMemory)+1];
+    void *pv;
+
+    /* allocate an intermediate item variable (v) */
+    if(itemSize<=STACK_ITEM_SIZE) {
+        pv=v;
+    } else {
+        pv=uprv_malloc(itemSize);
+        if(pv==NULL) {
+            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+            return;
+        }
+    }
+
+    doInsertionSort(array, length, itemSize, cmp, context, pv);
+
+    if(pv!=v) {
+        uprv_free(pv);
+    }
+}
+
+/* QuickSort ---------------------------------------------------------------- */
+
+/*
+ * This implementation is semi-recursive:
+ * It recurses for the smaller sub-array to shorten the recursion depth,
+ * and loops for the larger sub-array.
+ *
+ * Loosely after QuickSort algorithms in
+ * Niklaus Wirth
+ * Algorithmen und Datenstrukturen mit Modula-2
+ * B.G. Teubner Stuttgart
+ * 4. Auflage 1986
+ * ISBN 3-519-02260-5
+ */
+static void
+subQuickSort(char *array, int32_t start, int32_t limit, int32_t itemSize,
+             UComparator *cmp, const void *context,
+             void *px, void *pw) {
+    int32_t left, right;
+
+    /* start and left are inclusive, limit and right are exclusive */
+    do {
+        if((start+MIN_QSORT)>=limit) {
+            doInsertionSort(array+start*itemSize, limit-start, itemSize, cmp, context, px);
+            break;
+        }
+
+        left=start;
+        right=limit;
+
+        /* x=array[middle] */
+        uprv_memcpy(px, array+((start+limit)/2)*itemSize, itemSize);
+
+        do {
+            while(/* array[left]<x */
+                  cmp(context, array+left*itemSize, px)<0
+            ) {
+                ++left;
+            }
+            while(/* x<array[right-1] */
+                  cmp(context, px, array+(right-1)*itemSize)<0
+            ) {
+                --right;
+            }
+
+            /* swap array[left] and array[right-1] via w; ++left; --right */
+            if(left<right) {
+                --right;
+
+                if(left<right) {
+                    uprv_memcpy(pw, array+left*itemSize, itemSize);
+                    uprv_memcpy(array+left*itemSize, array+right*itemSize, itemSize);
+                    uprv_memcpy(array+right*itemSize, pw, itemSize);
+                }
+
+                ++left;
+            }
+        } while(left<right);
+
+        /* sort sub-arrays */
+        if((right-start)<(limit-left)) {
+            /* sort [start..right[ */
+            if(start<(right-1)) {
+                subQuickSort(array, start, right, itemSize, cmp, context, px, pw);
+            }
+
+            /* sort [left..limit[ */
+            start=left;
+        } else {
+            /* sort [left..limit[ */
+            if(left<(limit-1)) {
+                subQuickSort(array, left, limit, itemSize, cmp, context, px, pw);
+            }
+
+            /* sort [start..right[ */
+            limit=right;
+        }
+    } while(start<(limit-1));
+}
+
+static void
+quickSort(char *array, int32_t length, int32_t itemSize,
+            UComparator *cmp, const void *context, UErrorCode *pErrorCode) {
+    UAlignedMemory xw[(2*STACK_ITEM_SIZE)/sizeof(UAlignedMemory)+1];
+    void *p;
+
+    /* allocate two intermediate item variables (x and w) */
+    if(itemSize<=STACK_ITEM_SIZE) {
+        p=xw;
+    } else {
+        p=uprv_malloc(2*itemSize);
+        if(p==NULL) {
+            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+            return;
+        }
+    }
+
+    subQuickSort(array, 0, length, itemSize,
+                 cmp, context, p, (char *)p+itemSize);
+
+    if(p!=xw) {
+        uprv_free(p);
+    }
+}
+
+/* uprv_sortArray() API ----------------------------------------------------- */
+
+/*
+ * Check arguments, select an appropriate implementation,
+ * cast the array to char * so that array+i*itemSize works.
+ */
+U_CAPI void U_EXPORT2
+uprv_sortArray(void *array, int32_t length, int32_t itemSize,
+               UComparator *cmp, const void *context,
+               UBool sortStable, UErrorCode *pErrorCode) {
+    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+        return;
+    }
+    if((length>0 && array==NULL) || length<0 || itemSize<=0 || cmp==NULL) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return;
+    }
+
+    if(length<=1) {
+        return;
+    } else if(length<MIN_QSORT || sortStable) {
+        insertionSort((char *)array, length, itemSize, cmp, context, pErrorCode);
+    } else {
+        quickSort((char *)array, length, itemSize, cmp, context, pErrorCode);
+    }
+}
diff --git a/src/core/basetypes/icu-ucsdet/ucln_cmn.cpp b/src/core/basetypes/icu-ucsdet/ucln_cmn.cpp
new file mode 100644
index 00000000..2480c9d3
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/ucln_cmn.cpp
@@ -0,0 +1,111 @@
+/*
+******************************************************************************
+* Copyright (C) 2001-2014, International Business Machines
+*                Corporation and others. All Rights Reserved.
+******************************************************************************
+*   file name:  ucln_cmn.cpp
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2001July05
+*   created by: George Rhoten
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/uclean.h"
+#include "cmemory.h"
+#include "mutex.h"
+#include "uassert.h"
+#include "ucln.h"
+#include "ucln_cmn.h"
+#include "utracimp.h"
+#include "umutex.h"
+
+/**  Auto-client for UCLN_COMMON **/
+#define UCLN_TYPE_IS_COMMON
+#include "ucln_imp.h"
+
+static cleanupFunc *gCommonCleanupFunctions[UCLN_COMMON_COUNT];
+static cleanupFunc *gLibCleanupFunctions[UCLN_COMMON];
+
+
+/************************************************
+ The cleanup order is important in this function.
+ Please be sure that you have read ucln.h
+ ************************************************/
+U_CAPI void U_EXPORT2
+u_cleanup(void)
+{
+    UTRACE_ENTRY_OC(UTRACE_U_CLEANUP);
+    umtx_lock(NULL);     /* Force a memory barrier, so that we are sure to see   */
+    umtx_unlock(NULL);   /*   all state left around by any other threads.        */
+
+    ucln_lib_cleanup();
+
+    cmemory_cleanup();       /* undo any heap functions set by u_setMemoryFunctions(). */
+    UTRACE_EXIT();           /* Must be before utrace_cleanup(), which turns off tracing. */
+/*#if U_ENABLE_TRACING*/
+    utrace_cleanup();
+/*#endif*/
+}
+
+U_CAPI void U_EXPORT2 ucln_cleanupOne(ECleanupLibraryType libType) 
+{
+    if (gLibCleanupFunctions[libType])
+    {
+        gLibCleanupFunctions[libType]();
+        gLibCleanupFunctions[libType] = NULL;
+    }
+}
+
+U_CFUNC void
+ucln_common_registerCleanup(ECleanupCommonType type,
+                            cleanupFunc *func)
+{
+    U_ASSERT(UCLN_COMMON_START < type && type < UCLN_COMMON_COUNT);
+    if (UCLN_COMMON_START < type && type < UCLN_COMMON_COUNT)
+    {
+        icu::Mutex m;     // See ticket 10295 for discussion.
+        gCommonCleanupFunctions[type] = func;
+    }
+#if !UCLN_NO_AUTO_CLEANUP && (defined(UCLN_AUTO_ATEXIT) || defined(UCLN_AUTO_LOCAL))
+    ucln_registerAutomaticCleanup();
+#endif
+}
+
+// Note: ucln_registerCleanup() is called with the ICU global mutex locked.
+//       Be aware if adding anything to the function.
+//       See ticket 10295 for discussion.
+
+U_CAPI void U_EXPORT2
+ucln_registerCleanup(ECleanupLibraryType type,
+                     cleanupFunc *func)
+{
+    U_ASSERT(UCLN_START < type && type < UCLN_COMMON);
+    if (UCLN_START < type && type < UCLN_COMMON)
+    {
+        gLibCleanupFunctions[type] = func;
+    }
+}
+
+U_CFUNC UBool ucln_lib_cleanup(void) {
+    int32_t libType = UCLN_START;
+    int32_t commonFunc = UCLN_COMMON_START;
+
+    for (libType++; libType<UCLN_COMMON; libType++) {
+        ucln_cleanupOne(static_cast<ECleanupLibraryType>(libType));
+    }
+
+    for (commonFunc++; commonFunc<UCLN_COMMON_COUNT; commonFunc++) {
+        if (gCommonCleanupFunctions[commonFunc])
+        {
+            gCommonCleanupFunctions[commonFunc]();
+            gCommonCleanupFunctions[commonFunc] = NULL;
+        }
+    }
+#if !UCLN_NO_AUTO_CLEANUP && (defined(UCLN_AUTO_ATEXIT) || defined(UCLN_AUTO_LOCAL))
+    ucln_unRegisterAutomaticCleanup();
+#endif
+    return TRUE;
+}
diff --git a/src/core/basetypes/icu-ucsdet/ucln_in.cpp b/src/core/basetypes/icu-ucsdet/ucln_in.cpp
new file mode 100644
index 00000000..431d4316
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/ucln_in.cpp
@@ -0,0 +1,63 @@
+/*
+******************************************************************************
+*                                                                            *
+* Copyright (C) 2001-2014, International Business Machines                   *
+*                Corporation and others. All Rights Reserved.                *
+*                                                                            *
+******************************************************************************
+*   file name:  ucln_in.cpp
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2001July05
+*   created by: George Rhoten
+*/
+
+#include "ucln.h"
+#include "ucln_in.h"
+#include "mutex.h"
+#include "uassert.h"
+
+/**  Auto-client for UCLN_I18N **/
+#define UCLN_TYPE UCLN_I18N
+#include "ucln_imp.h"
+
+/* Leave this copyright notice here! It needs to go somewhere in this library. */
+static const char copyright[] = U_COPYRIGHT_STRING;
+
+static cleanupFunc *gCleanupFunctions[UCLN_I18N_COUNT];
+
+static UBool i18n_cleanup(void)
+{
+    int32_t libType = UCLN_I18N_START;
+    (void)copyright;   /* Suppress unused variable warning with clang. */
+
+    while (++libType<UCLN_I18N_COUNT) {
+        if (gCleanupFunctions[libType])
+        {
+            gCleanupFunctions[libType]();
+            gCleanupFunctions[libType] = NULL;
+        }
+    }
+#if !UCLN_NO_AUTO_CLEANUP && (defined(UCLN_AUTO_ATEXIT) || defined(UCLN_AUTO_LOCAL))
+    ucln_unRegisterAutomaticCleanup();
+#endif
+    return TRUE;
+}
+
+void ucln_i18n_registerCleanup(ECleanupI18NType type,
+                               cleanupFunc *func) {
+    U_ASSERT(UCLN_I18N_START < type && type < UCLN_I18N_COUNT);
+    {
+        icu::Mutex m;   // See ticket 10295 for discussion.
+        ucln_registerCleanup(UCLN_I18N, i18n_cleanup);
+        if (UCLN_I18N_START < type && type < UCLN_I18N_COUNT) {
+            gCleanupFunctions[type] = func;
+        }
+    }
+#if !UCLN_NO_AUTO_CLEANUP && (defined(UCLN_AUTO_ATEXIT) || defined(UCLN_AUTO_LOCAL))
+    ucln_registerAutomaticCleanup();
+#endif
+}
+
diff --git a/src/core/basetypes/icu-ucsdet/ucsdet.cpp b/src/core/basetypes/icu-ucsdet/ucsdet.cpp
new file mode 100644
index 00000000..b42758d5
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/ucsdet.cpp
@@ -0,0 +1,207 @@
+/*
+ ********************************************************************************
+ *   Copyright (C) 2005-2013, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ ********************************************************************************
+ */
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+#include "unicode/ucsdet.h"
+#include "csdetect.h"
+#include "csmatch.h"
+#include "csrsbcs.h"
+#include "csrmbcs.h"
+#include "csrutf8.h"
+#include "csrucode.h"
+#include "csr2022.h"
+
+#include "cmemory.h"
+
+U_NAMESPACE_USE
+
+#define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
+
+#define NEW_ARRAY(type,count) (type *) uprv_malloc((count) * sizeof(type))
+#define DELETE_ARRAY(array) uprv_free((void *) (array))
+
+U_CDECL_BEGIN
+
+U_CAPI UCharsetDetector * U_EXPORT2
+ucsdet_open(UErrorCode   *status)
+{
+    if(U_FAILURE(*status)) {
+        return 0;
+    }
+
+    CharsetDetector* csd = new CharsetDetector(*status);
+
+    if (U_FAILURE(*status)) {
+        delete csd;
+        csd = NULL;
+    }
+
+    return (UCharsetDetector *) csd;
+}
+
+U_CAPI void U_EXPORT2
+ucsdet_close(UCharsetDetector *ucsd)
+{
+    CharsetDetector *csd = (CharsetDetector *) ucsd;
+    delete csd;
+}
+
+U_CAPI void U_EXPORT2
+ucsdet_setText(UCharsetDetector *ucsd, const char *textIn, int32_t len, UErrorCode *status)
+{
+    if(U_FAILURE(*status)) {
+        return;
+    }
+
+    ((CharsetDetector *) ucsd)->setText(textIn, len);
+}
+
+U_CAPI const char * U_EXPORT2
+ucsdet_getName(const UCharsetMatch *ucsm, UErrorCode *status)
+{
+    if(U_FAILURE(*status)) {
+        return NULL;
+    }
+
+    return ((CharsetMatch *) ucsm)->getName();
+}
+
+U_CAPI int32_t U_EXPORT2
+ucsdet_getConfidence(const UCharsetMatch *ucsm, UErrorCode *status)
+{
+    if(U_FAILURE(*status)) {
+        return 0;
+    }
+
+    return ((CharsetMatch *) ucsm)->getConfidence();
+}
+
+U_CAPI const char * U_EXPORT2
+ucsdet_getLanguage(const UCharsetMatch *ucsm, UErrorCode *status)
+{
+    if(U_FAILURE(*status)) {
+        return NULL;
+    }
+
+    return ((CharsetMatch *) ucsm)->getLanguage();
+}
+
+U_CAPI const UCharsetMatch * U_EXPORT2
+ucsdet_detect(UCharsetDetector *ucsd, UErrorCode *status)
+{
+    if(U_FAILURE(*status)) {
+        return NULL;
+    }
+
+    return (const UCharsetMatch *) ((CharsetDetector *) ucsd)->detect(*status);
+}
+
+U_CAPI void U_EXPORT2
+ucsdet_setDeclaredEncoding(UCharsetDetector *ucsd, const char *encoding, int32_t length, UErrorCode *status)
+{
+    if(U_FAILURE(*status)) {
+        return;
+    }
+
+    ((CharsetDetector *) ucsd)->setDeclaredEncoding(encoding,length);
+}
+
+U_CAPI const UCharsetMatch**
+ucsdet_detectAll(UCharsetDetector *ucsd,
+                 int32_t *maxMatchesFound, UErrorCode *status)
+{
+    if(U_FAILURE(*status)) {
+        return NULL;
+    }
+
+    CharsetDetector *csd = (CharsetDetector *) ucsd;
+
+    return (const UCharsetMatch**)csd->detectAll(*maxMatchesFound,*status);
+}
+
+// U_CAPI  const char * U_EXPORT2
+// ucsdet_getDetectableCharsetName(const UCharsetDetector *csd, int32_t index, UErrorCode *status)
+// {
+//     if(U_FAILURE(*status)) {
+//         return 0;
+//     }
+//     return csd->getCharsetName(index,*status);
+// }
+
+// U_CAPI  int32_t U_EXPORT2
+// ucsdet_getDetectableCharsetsCount(const UCharsetDetector *csd, UErrorCode *status)
+// {
+//     if(U_FAILURE(*status)) {
+//         return -1;
+//     }
+//     return UCharsetDetector::getDetectableCount();
+// }
+
+U_CAPI  UBool U_EXPORT2
+ucsdet_isInputFilterEnabled(const UCharsetDetector *ucsd)
+{
+    // todo: could use an error return...
+    if (ucsd == NULL) {
+        return FALSE;
+    }
+
+    return ((CharsetDetector *) ucsd)->getStripTagsFlag();
+}
+
+U_CAPI  UBool U_EXPORT2
+ucsdet_enableInputFilter(UCharsetDetector *ucsd, UBool filter)
+{
+    // todo: could use an error return...
+    if (ucsd == NULL) {
+        return FALSE;
+    }
+
+    CharsetDetector *csd = (CharsetDetector *) ucsd;
+    UBool prev = csd->getStripTagsFlag();
+
+    csd->setStripTagsFlag(filter);
+
+    return prev;
+}
+
+/*
+U_CAPI  int32_t U_EXPORT2
+ucsdet_getUChars(const UCharsetMatch *ucsm,
+                 UChar *buf, int32_t cap, UErrorCode *status)
+{
+    if(U_FAILURE(*status)) {
+        return 0;
+    }
+
+    return ((CharsetMatch *) ucsm)->getUChars(buf, cap, status);
+}
+*/
+
+U_CAPI void U_EXPORT2
+ucsdet_setDetectableCharset(UCharsetDetector *ucsd, const char *encoding, UBool enabled, UErrorCode *status)
+{
+    ((CharsetDetector *)ucsd)->setDetectableCharset(encoding, enabled, *status);
+}
+
+U_CAPI  UEnumeration * U_EXPORT2
+ucsdet_getAllDetectableCharsets(const UCharsetDetector * /*ucsd*/, UErrorCode *status)
+{
+    return CharsetDetector::getAllDetectableCharsets(*status);
+}
+
+U_DRAFT UEnumeration * U_EXPORT2
+ucsdet_getDetectableCharsets(const UCharsetDetector *ucsd,  UErrorCode *status)
+{
+    return ((CharsetDetector *)ucsd)->getDetectableCharsets(*status);
+}
+
+U_CDECL_END
+
+
+#endif
diff --git a/src/core/basetypes/icu-ucsdet/udataswp.c b/src/core/basetypes/icu-ucsdet/udataswp.c
new file mode 100644
index 00000000..06fe85bc
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/udataswp.c
@@ -0,0 +1,471 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2003-2014, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  udataswp.c
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2003jun05
+*   created by: Markus W. Scherer
+*
+*   Definitions for ICU data transformations for different platforms,
+*   changing between big- and little-endian data and/or between
+*   charset families (ASCII<->EBCDIC).
+*/
+
+#include <stdarg.h>
+#include "unicode/utypes.h"
+#include "unicode/udata.h" /* UDataInfo */
+#include "ucmndata.h" /* DataHeader */
+#include "cmemory.h"
+#include "udataswp.h"
+
+/* swapping primitives ------------------------------------------------------ */
+
+static int32_t U_CALLCONV
+uprv_swapArray16(const UDataSwapper *ds,
+                 const void *inData, int32_t length, void *outData,
+                 UErrorCode *pErrorCode) {
+    const uint16_t *p;
+    uint16_t *q;
+    int32_t count;
+    uint16_t x;
+
+    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+        return 0;
+    }
+    if(ds==NULL || inData==NULL || length<0 || (length&1)!=0 || outData==NULL) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+
+    /* setup and swapping */
+    p=(const uint16_t *)inData;
+    q=(uint16_t *)outData;
+    count=length/2;
+    while(count>0) {
+        x=*p++;
+        *q++=(uint16_t)((x<<8)|(x>>8));
+        --count;
+    }
+
+    return length;
+}
+
+static int32_t U_CALLCONV
+uprv_copyArray16(const UDataSwapper *ds,
+                 const void *inData, int32_t length, void *outData,
+                 UErrorCode *pErrorCode) {
+    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+        return 0;
+    }
+    if(ds==NULL || inData==NULL || length<0 || (length&1)!=0 || outData==NULL) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+
+    if(length>0 && inData!=outData) {
+        uprv_memcpy(outData, inData, length);
+    }
+    return length;
+}
+
+static int32_t U_CALLCONV
+uprv_swapArray32(const UDataSwapper *ds,
+                 const void *inData, int32_t length, void *outData,
+                 UErrorCode *pErrorCode) {
+    const uint32_t *p;
+    uint32_t *q;
+    int32_t count;
+    uint32_t x;
+
+    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+        return 0;
+    }
+    if(ds==NULL || inData==NULL || length<0 || (length&3)!=0 || outData==NULL) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+
+    /* setup and swapping */
+    p=(const uint32_t *)inData;
+    q=(uint32_t *)outData;
+    count=length/4;
+    while(count>0) {
+        x=*p++;
+        *q++=(uint32_t)((x<<24)|((x<<8)&0xff0000)|((x>>8)&0xff00)|(x>>24));
+        --count;
+    }
+
+    return length;
+}
+
+static int32_t U_CALLCONV
+uprv_copyArray32(const UDataSwapper *ds,
+                 const void *inData, int32_t length, void *outData,
+                 UErrorCode *pErrorCode) {
+    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+        return 0;
+    }
+    if(ds==NULL || inData==NULL || length<0 || (length&3)!=0 || outData==NULL) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+
+    if(length>0 && inData!=outData) {
+        uprv_memcpy(outData, inData, length);
+    }
+    return length;
+}
+
+static int32_t U_CALLCONV
+uprv_swapArray64(const UDataSwapper *ds,
+                 const void *inData, int32_t length, void *outData,
+                 UErrorCode *pErrorCode) {
+    const uint64_t *p;
+    uint64_t *q;
+    int32_t count;
+
+    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+        return 0;
+    }
+    if(ds==NULL || inData==NULL || length<0 || (length&7)!=0 || outData==NULL) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+
+    /* setup and swapping */
+    p=(const uint64_t *)inData;
+    q=(uint64_t *)outData;
+    count=length/8;
+    while(count>0) {
+        uint64_t x=*p++;
+        x=(x<<56)|((x&0xff00)<<40)|((x&0xff0000)<<24)|((x&0xff000000)<<8)|
+            ((x>>8)&0xff000000)|((x>>24)&0xff0000)|((x>>40)&0xff00)|(x>>56);
+        *q++=x;
+        --count;
+    }
+
+    return length;
+}
+
+static int32_t U_CALLCONV
+uprv_copyArray64(const UDataSwapper *ds,
+                 const void *inData, int32_t length, void *outData,
+                 UErrorCode *pErrorCode) {
+    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+        return 0;
+    }
+    if(ds==NULL || inData==NULL || length<0 || (length&7)!=0 || outData==NULL) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+
+    if(length>0 && inData!=outData) {
+        uprv_memcpy(outData, inData, length);
+    }
+    return length;
+}
+
+static uint16_t U_CALLCONV
+uprv_readSwapUInt16(uint16_t x) {
+    return (uint16_t)((x<<8)|(x>>8));
+}
+
+static uint16_t U_CALLCONV
+uprv_readDirectUInt16(uint16_t x) {
+    return x;
+}
+
+static uint32_t U_CALLCONV
+uprv_readSwapUInt32(uint32_t x) {
+    return (uint32_t)((x<<24)|((x<<8)&0xff0000)|((x>>8)&0xff00)|(x>>24));
+}
+
+static uint32_t U_CALLCONV
+uprv_readDirectUInt32(uint32_t x) {
+    return x;
+}
+
+static void U_CALLCONV
+uprv_writeSwapUInt16(uint16_t *p, uint16_t x) {
+    *p=(uint16_t)((x<<8)|(x>>8));
+}
+
+static void U_CALLCONV
+uprv_writeDirectUInt16(uint16_t *p, uint16_t x) {
+    *p=x;
+}
+
+static void U_CALLCONV
+uprv_writeSwapUInt32(uint32_t *p, uint32_t x) {
+    *p=(uint32_t)((x<<24)|((x<<8)&0xff0000)|((x>>8)&0xff00)|(x>>24));
+}
+
+static void U_CALLCONV
+uprv_writeDirectUInt32(uint32_t *p, uint32_t x) {
+    *p=x;
+}
+
+U_CAPI int16_t U_EXPORT2
+udata_readInt16(const UDataSwapper *ds, int16_t x) {
+    return (int16_t)ds->readUInt16((uint16_t)x);
+}
+
+U_CAPI int32_t U_EXPORT2
+udata_readInt32(const UDataSwapper *ds, int32_t x) {
+    return (int32_t)ds->readUInt32((uint32_t)x);
+}
+
+/**
+ * Swap a block of invariant, NUL-terminated strings, but not padding
+ * bytes after the last string.
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+udata_swapInvStringBlock(const UDataSwapper *ds,
+                         const void *inData, int32_t length, void *outData,
+                         UErrorCode *pErrorCode) {
+    const char *inChars;
+    int32_t stringsLength;
+
+    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+        return 0;
+    }
+    if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+
+    /* reduce the strings length to not include bytes after the last NUL */
+    inChars=(const char *)inData;
+    stringsLength=length;
+    while(stringsLength>0 && inChars[stringsLength-1]!=0) {
+        --stringsLength;
+    }
+
+    /* swap up to the last NUL */
+    ds->swapInvChars(ds, inData, stringsLength, outData, pErrorCode);
+
+    /* copy the bytes after the last NUL */
+    if(inData!=outData && length>stringsLength) {
+        uprv_memcpy((char *)outData+stringsLength, inChars+stringsLength, length-stringsLength);
+    }
+
+    /* return the length including padding bytes */
+    if(U_SUCCESS(*pErrorCode)) {
+        return length;
+    } else {
+        return 0;
+    }
+}
+
+U_CAPI void U_EXPORT2
+udata_printError(const UDataSwapper *ds,
+                 const char *fmt,
+                 ...) {
+    va_list args;
+
+    if(ds->printError!=NULL) {
+        va_start(args, fmt);
+        ds->printError(ds->printErrorContext, fmt, args);
+        va_end(args);
+    }
+}
+
+/* swap a data header ------------------------------------------------------- */
+
+U_CAPI int32_t U_EXPORT2
+udata_swapDataHeader(const UDataSwapper *ds,
+                     const void *inData, int32_t length, void *outData,
+                     UErrorCode *pErrorCode) {
+    const DataHeader *pHeader;
+    uint16_t headerSize, infoSize;
+
+    /* argument checking */
+    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+        return 0;
+    }
+    if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+
+    /* check minimum length and magic bytes */
+    pHeader=(const DataHeader *)inData;
+    if( (length>=0 && length<sizeof(DataHeader)) ||
+        pHeader->dataHeader.magic1!=0xda ||
+        pHeader->dataHeader.magic2!=0x27 ||
+        pHeader->info.sizeofUChar!=2
+    ) {
+        udata_printError(ds, "udata_swapDataHeader(): initial bytes do not look like ICU data\n");
+        *pErrorCode=U_UNSUPPORTED_ERROR;
+        return 0;
+    }
+
+    headerSize=ds->readUInt16(pHeader->dataHeader.headerSize);
+    infoSize=ds->readUInt16(pHeader->info.size);
+
+    if( headerSize<sizeof(DataHeader) ||
+        infoSize<sizeof(UDataInfo) ||
+        headerSize<(sizeof(pHeader->dataHeader)+infoSize) ||
+        (length>=0 && length<headerSize)
+    ) {
+        udata_printError(ds, "udata_swapDataHeader(): header size mismatch - headerSize %d infoSize %d length %d\n",
+                         headerSize, infoSize, length);
+        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+        return 0;
+    }
+
+    if(length>0) {
+        DataHeader *outHeader;
+        const char *s;
+        int32_t maxLength;
+
+        /* Most of the fields are just bytes and need no swapping. */
+        if(inData!=outData) {
+            uprv_memcpy(outData, inData, headerSize);
+        }
+        outHeader=(DataHeader *)outData;
+
+        outHeader->info.isBigEndian = ds->outIsBigEndian;
+        outHeader->info.charsetFamily = ds->outCharset;
+
+        /* swap headerSize */
+        ds->swapArray16(ds, &pHeader->dataHeader.headerSize, 2, &outHeader->dataHeader.headerSize, pErrorCode);
+
+        /* swap UDataInfo size and reservedWord */
+        ds->swapArray16(ds, &pHeader->info.size, 4, &outHeader->info.size, pErrorCode);
+
+        /* swap copyright statement after the UDataInfo */
+        infoSize+=sizeof(pHeader->dataHeader);
+        s=(const char *)inData+infoSize;
+        maxLength=headerSize-infoSize;
+        /* get the length of the string */
+        for(length=0; length<maxLength && s[length]!=0; ++length) {}
+        /* swap the string contents */
+        ds->swapInvChars(ds, s, length, (char *)outData+infoSize, pErrorCode);
+    }
+
+    return headerSize;
+}
+
+/* API functions ------------------------------------------------------------ */
+
+U_CAPI UDataSwapper * U_EXPORT2
+udata_openSwapper(UBool inIsBigEndian, uint8_t inCharset,
+                  UBool outIsBigEndian, uint8_t outCharset,
+                  UErrorCode *pErrorCode) {
+    UDataSwapper *swapper;
+
+    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+        return NULL;
+    }
+    if(inCharset>U_EBCDIC_FAMILY || outCharset>U_EBCDIC_FAMILY) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return NULL;
+    }
+
+    /* allocate the swapper */
+    swapper=uprv_malloc(sizeof(UDataSwapper));
+    if(swapper==NULL) {
+        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+        return NULL;
+    }
+    uprv_memset(swapper, 0, sizeof(UDataSwapper));
+
+    /* set values and functions pointers according to in/out parameters */
+    swapper->inIsBigEndian=inIsBigEndian;
+    swapper->inCharset=inCharset;
+    swapper->outIsBigEndian=outIsBigEndian;
+    swapper->outCharset=outCharset;
+
+    swapper->readUInt16= inIsBigEndian==U_IS_BIG_ENDIAN ? uprv_readDirectUInt16 : uprv_readSwapUInt16;
+    swapper->readUInt32= inIsBigEndian==U_IS_BIG_ENDIAN ? uprv_readDirectUInt32 : uprv_readSwapUInt32;
+
+    swapper->writeUInt16= outIsBigEndian==U_IS_BIG_ENDIAN ? uprv_writeDirectUInt16 : uprv_writeSwapUInt16;
+    swapper->writeUInt32= outIsBigEndian==U_IS_BIG_ENDIAN ? uprv_writeDirectUInt32 : uprv_writeSwapUInt32;
+
+    swapper->compareInvChars= outCharset==U_ASCII_FAMILY ? uprv_compareInvAscii : uprv_compareInvEbcdic;
+
+    if(inIsBigEndian==outIsBigEndian) {
+        swapper->swapArray16=uprv_copyArray16;
+        swapper->swapArray32=uprv_copyArray32;
+        swapper->swapArray64=uprv_copyArray64;
+    } else {
+        swapper->swapArray16=uprv_swapArray16;
+        swapper->swapArray32=uprv_swapArray32;
+        swapper->swapArray64=uprv_swapArray64;
+    }
+
+    if(inCharset==U_ASCII_FAMILY) {
+        swapper->swapInvChars= outCharset==U_ASCII_FAMILY ? uprv_copyAscii : uprv_ebcdicFromAscii;
+    } else /* U_EBCDIC_FAMILY */ {
+        swapper->swapInvChars= outCharset==U_EBCDIC_FAMILY ? uprv_copyEbcdic : uprv_asciiFromEbcdic;
+    }
+
+    return swapper;
+}
+
+U_CAPI UDataSwapper * U_EXPORT2
+udata_openSwapperForInputData(const void *data, int32_t length,
+                              UBool outIsBigEndian, uint8_t outCharset,
+                              UErrorCode *pErrorCode) {
+    const DataHeader *pHeader;
+    uint16_t headerSize, infoSize;
+    UBool inIsBigEndian;
+    int8_t inCharset;
+
+    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+        return NULL;
+    }
+    if( data==NULL ||
+        (length>=0 && length<sizeof(DataHeader)) ||
+        outCharset>U_EBCDIC_FAMILY
+    ) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return NULL;
+    }
+
+    pHeader=(const DataHeader *)data;
+    if( (length>=0 && length<sizeof(DataHeader)) ||
+        pHeader->dataHeader.magic1!=0xda ||
+        pHeader->dataHeader.magic2!=0x27 ||
+        pHeader->info.sizeofUChar!=2
+    ) {
+        *pErrorCode=U_UNSUPPORTED_ERROR;
+        return 0;
+    }
+
+    inIsBigEndian=(UBool)pHeader->info.isBigEndian;
+    inCharset=pHeader->info.charsetFamily;
+
+    if(inIsBigEndian==U_IS_BIG_ENDIAN) {
+        headerSize=pHeader->dataHeader.headerSize;
+        infoSize=pHeader->info.size;
+    } else {
+        headerSize=uprv_readSwapUInt16(pHeader->dataHeader.headerSize);
+        infoSize=uprv_readSwapUInt16(pHeader->info.size);
+    }
+
+    if( headerSize<sizeof(DataHeader) ||
+        infoSize<sizeof(UDataInfo) ||
+        headerSize<(sizeof(pHeader->dataHeader)+infoSize) ||
+        (length>=0 && length<headerSize)
+    ) {
+        *pErrorCode=U_UNSUPPORTED_ERROR;
+        return 0;
+    }
+
+    return udata_openSwapper(inIsBigEndian, inCharset, outIsBigEndian, outCharset, pErrorCode);
+}
+
+U_CAPI void U_EXPORT2
+udata_closeSwapper(UDataSwapper *ds) {
+    uprv_free(ds);
+}
diff --git a/src/core/basetypes/icu-ucsdet/uenum.c b/src/core/basetypes/icu-ucsdet/uenum.c
new file mode 100644
index 00000000..9a3d9e14
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/uenum.c
@@ -0,0 +1,187 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2002-2012, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  uenum.c
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:2
+*
+*   created on: 2002jul08
+*   created by: Vladimir Weinstein
+*/
+
+#include "unicode/putil.h"
+#include "uenumimp.h"
+#include "cmemory.h"
+
+/* Layout of the baseContext buffer. */
+typedef struct {
+    int32_t len;  /* number of bytes available starting at 'data' */
+    char    data; /* actual data starts here */
+} _UEnumBuffer;
+
+/* Extra bytes to allocate in the baseContext buffer. */
+static const int32_t PAD = 8;
+
+/* Return a pointer to the baseContext buffer, possibly allocating
+   or reallocating it if at least 'capacity' bytes are not available. */
+static void* _getBuffer(UEnumeration* en, int32_t capacity) {
+
+    if (en->baseContext != NULL) {
+        if (((_UEnumBuffer*) en->baseContext)->len < capacity) {
+            capacity += PAD;
+            en->baseContext = uprv_realloc(en->baseContext,
+                                           sizeof(int32_t) + capacity);
+            if (en->baseContext == NULL) {
+                return NULL;
+            }
+            ((_UEnumBuffer*) en->baseContext)->len = capacity;
+        }
+    } else {
+        capacity += PAD;
+        en->baseContext = uprv_malloc(sizeof(int32_t) + capacity);
+        if (en->baseContext == NULL) {
+            return NULL;
+        }
+        ((_UEnumBuffer*) en->baseContext)->len = capacity;
+    }
+    
+    return (void*) & ((_UEnumBuffer*) en->baseContext)->data;
+}
+
+U_CAPI void U_EXPORT2
+uenum_close(UEnumeration* en)
+{
+    if (en) {
+        if (en->close != NULL) {
+            if (en->baseContext) {
+                uprv_free(en->baseContext);
+            }
+            en->close(en);
+        } else { /* this seems dangerous, but we better kill the object */
+            uprv_free(en);
+        }
+    }
+}
+
+U_CAPI int32_t U_EXPORT2
+uenum_count(UEnumeration* en, UErrorCode* status)
+{
+    if (!en || U_FAILURE(*status)) {
+        return -1;
+    }
+    if (en->count != NULL) {
+        return en->count(en, status);
+    } else {
+        *status = U_UNSUPPORTED_ERROR;
+        return -1;
+    }
+}
+
+/* Don't call this directly. Only uenum_unext should be calling this. */
+U_CAPI const UChar* U_EXPORT2
+uenum_unextDefault(UEnumeration* en,
+            int32_t* resultLength,
+            UErrorCode* status)
+{
+    UChar *ustr = NULL;
+    int32_t len = 0;
+    if (en->next != NULL) {
+        const char *cstr = en->next(en, &len, status);
+        if (cstr != NULL) {
+            ustr = (UChar*) _getBuffer(en, (len+1) * sizeof(UChar));
+            if (ustr == NULL) {
+                *status = U_MEMORY_ALLOCATION_ERROR;
+            } else {
+                u_charsToUChars(cstr, ustr, len+1);
+            }
+        }
+    } else {
+        *status = U_UNSUPPORTED_ERROR;
+    }
+    if (resultLength) {
+        *resultLength = len;
+    }
+    return ustr;
+}
+
+/* Don't call this directly. Only uenum_next should be calling this. */
+U_CAPI const char* U_EXPORT2
+uenum_nextDefault(UEnumeration* en,
+            int32_t* resultLength,
+            UErrorCode* status)
+{
+    if (en->uNext != NULL) {
+        char *tempCharVal;
+        const UChar *tempUCharVal = en->uNext(en, resultLength, status);
+        if (tempUCharVal == NULL) {
+            return NULL;
+        }
+        tempCharVal = (char*)
+            _getBuffer(en, (*resultLength+1) * sizeof(char));
+        if (!tempCharVal) {
+            *status = U_MEMORY_ALLOCATION_ERROR;
+            return NULL;
+        }
+        u_UCharsToChars(tempUCharVal, tempCharVal, *resultLength + 1);
+        return tempCharVal;
+    } else {
+        *status = U_UNSUPPORTED_ERROR;
+        return NULL;
+    }
+}
+
+U_CAPI const UChar* U_EXPORT2
+uenum_unext(UEnumeration* en,
+            int32_t* resultLength,
+            UErrorCode* status)
+{
+    if (!en || U_FAILURE(*status)) {
+        return NULL;
+    }
+    if (en->uNext != NULL) {
+        return en->uNext(en, resultLength, status);
+    } else {
+        *status = U_UNSUPPORTED_ERROR;
+        return NULL;
+    }
+}
+
+U_CAPI const char* U_EXPORT2
+uenum_next(UEnumeration* en,
+          int32_t* resultLength,
+          UErrorCode* status)
+{
+    if (!en || U_FAILURE(*status)) {
+        return NULL;
+    }
+    if (en->next != NULL) {
+        if (resultLength != NULL) {
+            return en->next(en, resultLength, status);
+        }
+        else {
+            int32_t dummyLength=0;
+            return en->next(en, &dummyLength, status);
+        }
+    } else {
+        *status = U_UNSUPPORTED_ERROR;
+        return NULL;
+    }
+}
+
+U_CAPI void U_EXPORT2
+uenum_reset(UEnumeration* en, UErrorCode* status)
+{
+    if (!en || U_FAILURE(*status)) {
+        return;
+    }
+    if (en->reset != NULL) {
+        en->reset(en, status);
+    } else {
+        *status = U_UNSUPPORTED_ERROR;
+    }
+}
diff --git a/src/core/basetypes/icu-ucsdet/uinvchar.c b/src/core/basetypes/icu-ucsdet/uinvchar.c
new file mode 100644
index 00000000..f874edd9
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/uinvchar.c
@@ -0,0 +1,611 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 1999-2010, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  uinvchar.c
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:2
+*
+*   created on: 2004sep14
+*   created by: Markus W. Scherer
+*
+*   Functions for handling invariant characters, moved here from putil.c
+*   for better modularization.
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/ustring.h"
+#include "udataswp.h"
+#include "cstring.h"
+#include "cmemory.h"
+#include "uassert.h"
+#include "uinvchar.h"
+
+/* invariant-character handling --------------------------------------------- */
+
+/*
+ * These maps for ASCII to/from EBCDIC map invariant characters (see utypes.h)
+ * appropriately for most EBCDIC codepages.
+ *
+ * They currently also map most other ASCII graphic characters,
+ * appropriately for codepages 37 and 1047.
+ * Exceptions: The characters for []^ have different codes in 37 & 1047.
+ * Both versions are mapped to ASCII.
+ *
+ *    ASCII 37 1047
+ * [     5B BA   AD
+ * ]     5D BB   BD
+ * ^     5E B0   5F
+ *
+ * There are no mappings for variant characters from Unicode to EBCDIC.
+ *
+ * Currently, C0 control codes are also included in these maps.
+ * Exceptions: S/390 Open Edition swaps LF and NEL codes compared with other
+ * EBCDIC platforms; both codes (15 and 25) are mapped to ASCII LF (0A),
+ * but there is no mapping for ASCII LF back to EBCDIC.
+ *
+ *    ASCII EBCDIC S/390-OE
+ * LF    0A     25       15
+ * NEL   85     15       25
+ *
+ * The maps below explicitly exclude the variant
+ * control and graphical characters that are in ASCII-based
+ * codepages at 0x80 and above.
+ * "No mapping" is expressed by mapping to a 00 byte.
+ *
+ * These tables do not establish a converter or a codepage.
+ */
+
+static const uint8_t asciiFromEbcdic[256]={
+    0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+    0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07,
+    0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a,
+
+    0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
+    0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e,
+    0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
+
+    0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00,
+    0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00,
+
+    0x7b, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x5c, 0x00, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
+static const uint8_t ebcdicFromAscii[256]={
+    0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f, 0x16, 0x05, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+    0x10, 0x11, 0x12, 0x13, 0x3c, 0x3d, 0x32, 0x26, 0x18, 0x19, 0x3f, 0x27, 0x1c, 0x1d, 0x1e, 0x1f,
+    0x40, 0x00, 0x7f, 0x00, 0x00, 0x6c, 0x50, 0x7d, 0x4d, 0x5d, 0x5c, 0x4e, 0x6b, 0x60, 0x4b, 0x61,
+    0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0x7a, 0x5e, 0x4c, 0x7e, 0x6e, 0x6f,
+
+    0x00, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6,
+    0xd7, 0xd8, 0xd9, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0x00, 0x00, 0x00, 0x00, 0x6d,
+    0x00, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
+    0x97, 0x98, 0x99, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0x00, 0x00, 0x00, 0x00, 0x07,
+
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+/* Same as asciiFromEbcdic[] except maps all letters to lowercase. */
+static const uint8_t lowercaseAsciiFromEbcdic[256]={
+    0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+    0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07,
+    0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a,
+
+    0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
+    0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e,
+    0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
+
+    0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00,
+    0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00,
+
+    0x7b, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x7d, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x7c, 0x00, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
+/*
+ * Bit sets indicating which characters of the ASCII repertoire
+ * (by ASCII/Unicode code) are "invariant".
+ * See utypes.h for more details.
+ *
+ * As invariant are considered the characters of the ASCII repertoire except
+ * for the following:
+ * 21  '!' <exclamation mark>
+ * 23  '#' <number sign>
+ * 24  '$' <dollar sign>
+ *
+ * 40  '@' <commercial at>
+ *
+ * 5b  '[' <left bracket>
+ * 5c  '\' <backslash>
+ * 5d  ']' <right bracket>
+ * 5e  '^' <circumflex>
+ *
+ * 60  '`' <grave accent>
+ *
+ * 7b  '{' <left brace>
+ * 7c  '|' <vertical line>
+ * 7d  '}' <right brace>
+ * 7e  '~' <tilde>
+ */
+static const uint32_t invariantChars[4]={
+    0xfffffbff, /* 00..1f but not 0a */
+    0xffffffe5, /* 20..3f but not 21 23 24 */
+    0x87fffffe, /* 40..5f but not 40 5b..5e */
+    0x87fffffe  /* 60..7f but not 60 7b..7e */
+};
+
+/*
+ * test unsigned types (or values known to be non-negative) for invariant characters,
+ * tests ASCII-family character values
+ */
+#define UCHAR_IS_INVARIANT(c) (((c)<=0x7f) && (invariantChars[(c)>>5]&((uint32_t)1<<((c)&0x1f)))!=0)
+
+/* test signed types for invariant characters, adds test for positive values */
+#define SCHAR_IS_INVARIANT(c) ((0<=(c)) && UCHAR_IS_INVARIANT(c))
+
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+#define CHAR_TO_UCHAR(c) c
+#define UCHAR_TO_CHAR(c) c
+#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+#define CHAR_TO_UCHAR(u) asciiFromEbcdic[u]
+#define UCHAR_TO_CHAR(u) ebcdicFromAscii[u]
+#else
+#   error U_CHARSET_FAMILY is not valid
+#endif
+
+
+U_CAPI void U_EXPORT2
+u_charsToUChars(const char *cs, UChar *us, int32_t length) {
+    UChar u;
+    uint8_t c;
+
+    /*
+     * Allow the entire ASCII repertoire to be mapped _to_ Unicode.
+     * For EBCDIC systems, this works for characters with codes from
+     * codepages 37 and 1047 or compatible.
+     */
+    while(length>0) {
+        c=(uint8_t)(*cs++);
+        u=(UChar)CHAR_TO_UCHAR(c);
+        U_ASSERT((u!=0 || c==0)); /* only invariant chars converted? */
+        *us++=u;
+        --length;
+    }
+}
+
+U_CAPI void U_EXPORT2
+u_UCharsToChars(const UChar *us, char *cs, int32_t length) {
+    UChar u;
+
+    while(length>0) {
+        u=*us++;
+        if(!UCHAR_IS_INVARIANT(u)) {
+            U_ASSERT(FALSE); /* Variant characters were used. These are not portable in ICU. */
+            u=0;
+        }
+        *cs++=(char)UCHAR_TO_CHAR(u);
+        --length;
+    }
+}
+
+U_CAPI UBool U_EXPORT2
+uprv_isInvariantString(const char *s, int32_t length) {
+    uint8_t c;
+
+    for(;;) {
+        if(length<0) {
+            /* NUL-terminated */
+            c=(uint8_t)*s++;
+            if(c==0) {
+                break;
+            }
+        } else {
+            /* count length */
+            if(length==0) {
+                break;
+            }
+            --length;
+            c=(uint8_t)*s++;
+            if(c==0) {
+                continue; /* NUL is invariant */
+            }
+        }
+        /* c!=0 now, one branch below checks c==0 for variant characters */
+
+        /*
+         * no assertions here because these functions are legitimately called
+         * for strings with variant characters
+         */
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+        if(!UCHAR_IS_INVARIANT(c)) {
+            return FALSE; /* found a variant char */
+        }
+#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+        c=CHAR_TO_UCHAR(c);
+        if(c==0 || !UCHAR_IS_INVARIANT(c)) {
+            return FALSE; /* found a variant char */
+        }
+#else
+#   error U_CHARSET_FAMILY is not valid
+#endif
+    }
+    return TRUE;
+}
+
+U_CAPI UBool U_EXPORT2
+uprv_isInvariantUString(const UChar *s, int32_t length) {
+    UChar c;
+
+    for(;;) {
+        if(length<0) {
+            /* NUL-terminated */
+            c=*s++;
+            if(c==0) {
+                break;
+            }
+        } else {
+            /* count length */
+            if(length==0) {
+                break;
+            }
+            --length;
+            c=*s++;
+        }
+
+        /*
+         * no assertions here because these functions are legitimately called
+         * for strings with variant characters
+         */
+        if(!UCHAR_IS_INVARIANT(c)) {
+            return FALSE; /* found a variant char */
+        }
+    }
+    return TRUE;
+}
+
+/* UDataSwapFn implementations used in udataswp.c ------- */
+
+/* convert ASCII to EBCDIC and verify that all characters are invariant */
+U_CAPI int32_t U_EXPORT2
+uprv_ebcdicFromAscii(const UDataSwapper *ds,
+                     const void *inData, int32_t length, void *outData,
+                     UErrorCode *pErrorCode) {
+    const uint8_t *s;
+    uint8_t *t;
+    uint8_t c;
+
+    int32_t count;
+
+    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+        return 0;
+    }
+    if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+
+    /* setup and swapping */
+    s=(const uint8_t *)inData;
+    t=(uint8_t *)outData;
+    count=length;
+    while(count>0) {
+        c=*s++;
+        if(!UCHAR_IS_INVARIANT(c)) {
+            udata_printError(ds, "uprv_ebcdicFromAscii() string[%d] contains a variant character in position %d\n",
+                             length, length-count);
+            *pErrorCode=U_INVALID_CHAR_FOUND;
+            return 0;
+        }
+        *t++=ebcdicFromAscii[c];
+        --count;
+    }
+
+    return length;
+}
+
+/* this function only checks and copies ASCII strings without conversion */
+U_CFUNC int32_t
+uprv_copyAscii(const UDataSwapper *ds,
+               const void *inData, int32_t length, void *outData,
+               UErrorCode *pErrorCode) {
+    const uint8_t *s;
+    uint8_t c;
+
+    int32_t count;
+
+    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+        return 0;
+    }
+    if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+
+    /* setup and checking */
+    s=(const uint8_t *)inData;
+    count=length;
+    while(count>0) {
+        c=*s++;
+        if(!UCHAR_IS_INVARIANT(c)) {
+            udata_printError(ds, "uprv_copyFromAscii() string[%d] contains a variant character in position %d\n",
+                             length, length-count);
+            *pErrorCode=U_INVALID_CHAR_FOUND;
+            return 0;
+        }
+        --count;
+    }
+
+    if(length>0 && inData!=outData) {
+        uprv_memcpy(outData, inData, length);
+    }
+
+    return length;
+}
+
+/* convert EBCDIC to ASCII and verify that all characters are invariant */
+U_CFUNC int32_t
+uprv_asciiFromEbcdic(const UDataSwapper *ds,
+                     const void *inData, int32_t length, void *outData,
+                     UErrorCode *pErrorCode) {
+    const uint8_t *s;
+    uint8_t *t;
+    uint8_t c;
+
+    int32_t count;
+
+    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+        return 0;
+    }
+    if(ds==NULL || inData==NULL || length<0 ||  (length>0 && outData==NULL)) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+
+    /* setup and swapping */
+    s=(const uint8_t *)inData;
+    t=(uint8_t *)outData;
+    count=length;
+    while(count>0) {
+        c=*s++;
+        if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) {
+            udata_printError(ds, "uprv_asciiFromEbcdic() string[%d] contains a variant character in position %d\n",
+                             length, length-count);
+            *pErrorCode=U_INVALID_CHAR_FOUND;
+            return 0;
+        }
+        *t++=c;
+        --count;
+    }
+
+    return length;
+}
+
+/* this function only checks and copies EBCDIC strings without conversion */
+U_CFUNC int32_t
+uprv_copyEbcdic(const UDataSwapper *ds,
+                const void *inData, int32_t length, void *outData,
+                UErrorCode *pErrorCode) {
+    const uint8_t *s;
+    uint8_t c;
+
+    int32_t count;
+
+    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+        return 0;
+    }
+    if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+
+    /* setup and checking */
+    s=(const uint8_t *)inData;
+    count=length;
+    while(count>0) {
+        c=*s++;
+        if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) {
+            udata_printError(ds, "uprv_copyEbcdic() string[%] contains a variant character in position %d\n",
+                             length, length-count);
+            *pErrorCode=U_INVALID_CHAR_FOUND;
+            return 0;
+        }
+        --count;
+    }
+
+    if(length>0 && inData!=outData) {
+        uprv_memcpy(outData, inData, length);
+    }
+
+    return length;
+}
+
+/* compare invariant strings; variant characters compare less than others and unlike each other */
+U_CFUNC int32_t
+uprv_compareInvAscii(const UDataSwapper *ds,
+                     const char *outString, int32_t outLength,
+                     const UChar *localString, int32_t localLength) {
+    int32_t minLength;
+    UChar32 c1, c2;
+    uint8_t c;
+
+    if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) {
+        return 0;
+    }
+
+    if(outLength<0) {
+        outLength=(int32_t)uprv_strlen(outString);
+    }
+    if(localLength<0) {
+        localLength=u_strlen(localString);
+    }
+
+    minLength= outLength<localLength ? outLength : localLength;
+
+    while(minLength>0) {
+        c=(uint8_t)*outString++;
+        if(UCHAR_IS_INVARIANT(c)) {
+            c1=c;
+        } else {
+            c1=-1;
+        }
+
+        c2=*localString++;
+        if(!UCHAR_IS_INVARIANT(c2)) {
+            c2=-2;
+        }
+
+        if((c1-=c2)!=0) {
+            return c1;
+        }
+
+        --minLength;
+    }
+
+    /* strings start with same prefix, compare lengths */
+    return outLength-localLength;
+}
+
+U_CFUNC int32_t
+uprv_compareInvEbcdic(const UDataSwapper *ds,
+                      const char *outString, int32_t outLength,
+                      const UChar *localString, int32_t localLength) {
+    int32_t minLength;
+    UChar32 c1, c2;
+    uint8_t c;
+
+    if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) {
+        return 0;
+    }
+
+    if(outLength<0) {
+        outLength=(int32_t)uprv_strlen(outString);
+    }
+    if(localLength<0) {
+        localLength=u_strlen(localString);
+    }
+
+    minLength= outLength<localLength ? outLength : localLength;
+
+    while(minLength>0) {
+        c=(uint8_t)*outString++;
+        if(c==0) {
+            c1=0;
+        } else if((c1=asciiFromEbcdic[c])!=0 && UCHAR_IS_INVARIANT(c1)) {
+            /* c1 is set */
+        } else {
+            c1=-1;
+        }
+
+        c2=*localString++;
+        if(!UCHAR_IS_INVARIANT(c2)) {
+            c2=-2;
+        }
+
+        if((c1-=c2)!=0) {
+            return c1;
+        }
+
+        --minLength;
+    }
+
+    /* strings start with same prefix, compare lengths */
+    return outLength-localLength;
+}
+
+U_CAPI int32_t U_EXPORT2
+uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2) {
+    int32_t c1, c2;
+
+    for(;; ++s1, ++s2) {
+        c1=(uint8_t)*s1;
+        c2=(uint8_t)*s2;
+        if(c1!=c2) {
+            if(c1!=0 && ((c1=asciiFromEbcdic[c1])==0 || !UCHAR_IS_INVARIANT(c1))) {
+                c1=-(int32_t)(uint8_t)*s1;
+            }
+            if(c2!=0 && ((c2=asciiFromEbcdic[c2])==0 || !UCHAR_IS_INVARIANT(c2))) {
+                c2=-(int32_t)(uint8_t)*s2;
+            }
+            return c1-c2;
+        } else if(c1==0) {
+            return 0;
+        }
+    }
+}
+
+U_CAPI char U_EXPORT2
+uprv_ebcdicToLowercaseAscii(char c) {
+    return (char)lowercaseAsciiFromEbcdic[(uint8_t)c];
+}
+
+U_INTERNAL uint8_t* U_EXPORT2
+uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n)
+{
+  uint8_t *orig_dst = dst;
+
+  if(n==-1) { 
+    n = uprv_strlen((const char*)src)+1; /* copy NUL */
+  }
+  /* copy non-null */
+  while(*src && n>0) {
+    *(dst++) = asciiFromEbcdic[*(src++)];
+    n--;
+  }
+  /* pad */
+  while(n>0) {
+    *(dst++) = 0;
+    n--;
+  }
+  return orig_dst;
+}
+
+U_INTERNAL uint8_t* U_EXPORT2
+uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n)
+{
+  uint8_t *orig_dst = dst;
+
+  if(n==-1) { 
+    n = uprv_strlen((const char*)src)+1; /* copy NUL */
+  }
+  /* copy non-null */
+  while(*src && n>0) {
+    char ch = ebcdicFromAscii[*(src++)];
+    if(ch == 0) {
+      ch = ebcdicFromAscii[0x3f]; /* questionmark (subchar) */
+    }
+    *(dst++) = ch;
+    n--;
+  }
+  /* pad */
+  while(n>0) {
+    *(dst++) = 0;
+    n--;
+  }
+  return orig_dst;
+}
+
diff --git a/src/core/basetypes/icu-ucsdet/umutex.cpp b/src/core/basetypes/icu-ucsdet/umutex.cpp
new file mode 100644
index 00000000..0c1fdc80
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/umutex.cpp
@@ -0,0 +1,392 @@
+/*
+******************************************************************************
+*
+*   Copyright (C) 1997-2014, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*
+* File umutex.cpp
+*
+* Modification History:
+*
+*   Date        Name        Description
+*   04/02/97    aliu        Creation.
+*   04/07/99    srl         updated
+*   05/13/99    stephen     Changed to umutex (from cmutex).
+*   11/22/99    aliu        Make non-global mutex autoinitialize [j151]
+******************************************************************************
+*/
+
+#include "umutex.h"
+
+#include "unicode/utypes.h"
+#include "uassert.h"
+#include "cmemory.h"
+
+
+// The ICU global mutex. Used when ICU implementation code passes NULL for the mutex pointer.
+static UMutex   globalMutex = U_MUTEX_INITIALIZER;
+
+/*
+ * ICU Mutex wrappers.  Wrap operating system mutexes, giving the rest of ICU a
+ * platform independent set of mutex operations.  For internal ICU use only.
+ */
+
+#if defined(U_USER_MUTEX_CPP)
+// Build time user mutex hook: #include "U_USER_MUTEX_CPP"
+#include U_MUTEX_XSTR(U_USER_MUTEX_CPP)
+
+#elif U_PLATFORM_HAS_WIN32_API
+
+//-------------------------------------------------------------------------------------------
+//
+//    Windows Specific Definitions
+//
+//        Note: Cygwin (and possibly others) have both WIN32 and POSIX.
+//              Prefer Win32 in these cases.  (Win32 comes ahead in the #if chain)
+//
+//-------------------------------------------------------------------------------------------
+
+#if defined U_NO_PLATFORM_ATOMICS
+#error ICU on Win32 requires support for low level atomic operations.
+// Visual Studio, gcc, clang are OK. Shouldn't get here.
+#endif
+
+
+// This function is called when a test of a UInitOnce::fState reveals that
+//   initialization has not completed, that we either need to call the
+//   function on this thread, or wait for some other thread to complete.
+//
+// The actual call to the init function is made inline by template code
+//   that knows the C++ types involved. This function returns TRUE if
+//   the caller needs to call the Init function.
+//
+
+U_NAMESPACE_BEGIN
+
+U_COMMON_API UBool U_EXPORT2 umtx_initImplPreInit(UInitOnce &uio) {
+    for (;;) {
+        int32_t previousState = InterlockedCompareExchange(
+#if (U_PLATFORM == U_PF_MINGW) || (U_PLATFORM == U_PF_CYGWIN) || defined(__clang__)
+           (LONG volatile *) // this is the type given in the API doc for this function.
+#endif
+            &uio.fState,  //  Destination
+            1,            //  Exchange Value
+            0);           //  Compare value
+
+        if (previousState == 0) {
+            return true;   // Caller will next call the init function.
+                           // Current state == 1.
+        } else if (previousState == 2) {
+            // Another thread already completed the initialization.
+            //   We can simply return FALSE, indicating no
+            //   further action is needed by the caller.
+            return FALSE;
+        } else {
+            // Another thread is currently running the initialization.
+            // Wait until it completes.
+            do {
+                Sleep(1);
+                previousState = umtx_loadAcquire(uio.fState);
+            } while (previousState == 1);
+        }
+    }
+}
+
+// This function is called by the thread that ran an initialization function,
+// just after completing the function.
+//
+//   success: True:  the inialization succeeded. No further calls to the init
+//                   function will be made.
+//            False: the initializtion failed. The next call to umtx_initOnce()
+//                   will retry the initialization.
+
+U_COMMON_API void U_EXPORT2 umtx_initImplPostInit(UInitOnce &uio) {
+    umtx_storeRelease(uio.fState, 2);
+}
+
+U_NAMESPACE_END
+
+static void winMutexInit(CRITICAL_SECTION *cs) {
+    InitializeCriticalSection(cs);
+    return;
+}
+
+U_CAPI void  U_EXPORT2
+umtx_lock(UMutex *mutex) {
+    if (mutex == NULL) {
+        mutex = &globalMutex;
+    }
+    CRITICAL_SECTION *cs = &mutex->fCS;
+    umtx_initOnce(mutex->fInitOnce, winMutexInit, cs);
+    EnterCriticalSection(cs);
+}
+
+U_CAPI void  U_EXPORT2
+umtx_unlock(UMutex* mutex)
+{
+    if (mutex == NULL) {
+        mutex = &globalMutex;
+    }
+    LeaveCriticalSection(&mutex->fCS);
+}
+
+
+U_CAPI void U_EXPORT2
+umtx_condBroadcast(UConditionVar *condition) {
+    // We require that the associated mutex be held by the caller,
+    //  so access to fWaitCount is protected and safe. No other thread can
+    //  call condWait() while we are here.
+    if (condition->fWaitCount == 0) {
+        return;
+    }
+    ResetEvent(condition->fExitGate);
+    SetEvent(condition->fEntryGate);
+}
+
+U_CAPI void U_EXPORT2
+umtx_condSignal(UConditionVar *condition) {
+    // Function not implemented. There is no immediate requirement from ICU to have it.
+    // Once ICU drops support for Windows XP and Server 2003, ICU Condition Variables will be
+    // changed to be thin wrappers on native Windows CONDITION_VARIABLEs, and this function
+    // becomes trivial to provide.
+    U_ASSERT(FALSE);
+}
+
+U_CAPI void U_EXPORT2
+umtx_condWait(UConditionVar *condition, UMutex *mutex) {
+    if (condition->fEntryGate == NULL) {
+        // Note: because the associated mutex must be locked when calling
+        //       wait, we know that there can not be multiple threads
+        //       running here with the same condition variable.
+        //       Meaning that lazy initialization is safe.
+        U_ASSERT(condition->fExitGate == NULL);
+        condition->fEntryGate = CreateEvent(NULL,   // Security Attributes
+                                            TRUE,   // Manual Reset
+                                            FALSE,  // Initially reset
+                                            NULL);  // Name.
+        U_ASSERT(condition->fEntryGate != NULL);
+        condition->fExitGate = CreateEvent(NULL, TRUE, TRUE, NULL);
+        U_ASSERT(condition->fExitGate != NULL);
+    }
+
+    condition->fWaitCount++;
+    umtx_unlock(mutex);
+    WaitForSingleObject(condition->fEntryGate, INFINITE); 
+    umtx_lock(mutex);
+    condition->fWaitCount--;
+    if (condition->fWaitCount == 0) {
+        // All threads that were waiting at the entry gate have woken up
+        // and moved through. Shut the entry gate and open the exit gate.
+        ResetEvent(condition->fEntryGate);
+        SetEvent(condition->fExitGate);
+    } else {
+        umtx_unlock(mutex);
+        WaitForSingleObject(condition->fExitGate, INFINITE);
+        umtx_lock(mutex);
+    }
+}
+
+
+#elif U_PLATFORM_IMPLEMENTS_POSIX
+
+//-------------------------------------------------------------------------------------------
+//
+//  POSIX specific definitions
+//
+//-------------------------------------------------------------------------------------------
+
+# include <pthread.h>
+
+// Each UMutex consists of a pthread_mutex_t.
+// All are statically initialized and ready for use.
+// There is no runtime mutex initialization code needed.
+
+U_CAPI void  U_EXPORT2
+umtx_lock(UMutex *mutex) {
+    if (mutex == NULL) {
+        mutex = &globalMutex;
+    }
+    int sysErr = pthread_mutex_lock(&mutex->fMutex);
+    (void)sysErr;   // Suppress unused variable warnings.
+    U_ASSERT(sysErr == 0);
+}
+
+
+U_CAPI void  U_EXPORT2
+umtx_unlock(UMutex* mutex)
+{
+    if (mutex == NULL) {
+        mutex = &globalMutex;
+    }
+    int sysErr = pthread_mutex_unlock(&mutex->fMutex);
+    (void)sysErr;   // Suppress unused variable warnings.
+    U_ASSERT(sysErr == 0);
+}
+
+
+U_CAPI void U_EXPORT2
+umtx_condWait(UConditionVar *cond, UMutex *mutex) {
+    if (mutex == NULL) {
+        mutex = &globalMutex;
+    }
+    int sysErr = pthread_cond_wait(&cond->fCondition, &mutex->fMutex);
+    (void)sysErr;
+    U_ASSERT(sysErr == 0);
+}
+
+U_CAPI void U_EXPORT2
+umtx_condBroadcast(UConditionVar *cond) {
+    int sysErr = pthread_cond_broadcast(&cond->fCondition);
+    (void)sysErr;
+    U_ASSERT(sysErr == 0);
+}
+
+U_CAPI void U_EXPORT2
+umtx_condSignal(UConditionVar *cond) {
+    int sysErr = pthread_cond_signal(&cond->fCondition);
+    (void)sysErr;
+    U_ASSERT(sysErr == 0);
+}
+
+
+
+U_NAMESPACE_BEGIN
+
+static pthread_mutex_t initMutex = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t initCondition = PTHREAD_COND_INITIALIZER;
+
+
+// This function is called when a test of a UInitOnce::fState reveals that
+//   initialization has not completed, that we either need to call the
+//   function on this thread, or wait for some other thread to complete.
+//
+// The actual call to the init function is made inline by template code
+//   that knows the C++ types involved. This function returns TRUE if
+//   the caller needs to call the Init function.
+//
+U_COMMON_API UBool U_EXPORT2
+umtx_initImplPreInit(UInitOnce &uio) {
+    pthread_mutex_lock(&initMutex);
+    int32_t state = uio.fState;
+    if (state == 0) {
+        umtx_storeRelease(uio.fState, 1);
+        pthread_mutex_unlock(&initMutex);
+        return TRUE;   // Caller will next call the init function.
+    } else {
+        while (uio.fState == 1) {
+            // Another thread is currently running the initialization.
+            // Wait until it completes.
+            pthread_cond_wait(&initCondition, &initMutex);
+        }
+        pthread_mutex_unlock(&initMutex);
+        U_ASSERT(uio.fState == 2);
+        return FALSE;
+    }
+}
+
+
+
+// This function is called by the thread that ran an initialization function,
+// just after completing the function.
+//   Some threads may be waiting on the condition, requiring the broadcast wakeup.
+//   Some threads may be racing to test the fState variable outside of the mutex,
+//   requiring the use of store/release when changing its value.
+
+U_COMMON_API void U_EXPORT2
+umtx_initImplPostInit(UInitOnce &uio) {
+    pthread_mutex_lock(&initMutex);
+    umtx_storeRelease(uio.fState, 2);
+    pthread_cond_broadcast(&initCondition);
+    pthread_mutex_unlock(&initMutex);
+}
+
+U_NAMESPACE_END
+
+// End of POSIX specific umutex implementation.
+
+#else  // Platform #define chain.
+
+#error Unknown Platform
+
+#endif  // Platform #define chain.
+
+
+//-------------------------------------------------------------------------------
+//
+//   Atomic Operations, out-of-line versions.
+//                      These are conditional, only defined if better versions
+//                      were not available for the platform.
+//
+//                      These versions are platform neutral.
+//
+//--------------------------------------------------------------------------------
+
+#if defined U_NO_PLATFORM_ATOMICS
+static UMutex   gIncDecMutex = U_MUTEX_INITIALIZER;
+
+U_NAMESPACE_BEGIN
+
+U_COMMON_API int32_t U_EXPORT2
+umtx_atomic_inc(u_atomic_int32_t *p)  {
+    int32_t retVal;
+    umtx_lock(&gIncDecMutex);
+    retVal = ++(*p);
+    umtx_unlock(&gIncDecMutex);
+    return retVal;
+}
+
+
+U_COMMON_API int32_t U_EXPORT2
+umtx_atomic_dec(u_atomic_int32_t *p) {
+    int32_t retVal;
+    umtx_lock(&gIncDecMutex);
+    retVal = --(*p);
+    umtx_unlock(&gIncDecMutex);
+    return retVal;
+}
+
+U_COMMON_API int32_t U_EXPORT2
+umtx_loadAcquire(u_atomic_int32_t &var) {
+    int32_t val = var;
+    umtx_lock(&gIncDecMutex);
+    umtx_unlock(&gIncDecMutex);
+    return val;
+}
+
+U_COMMON_API void U_EXPORT2
+umtx_storeRelease(u_atomic_int32_t &var, int32_t val) {
+    umtx_lock(&gIncDecMutex);
+    umtx_unlock(&gIncDecMutex);
+    var = val;
+}
+
+U_NAMESPACE_END
+#endif
+
+//--------------------------------------------------------------------------
+//
+//  Deprecated functions for setting user mutexes.
+//
+//--------------------------------------------------------------------------
+
+U_DEPRECATED void U_EXPORT2
+u_setMutexFunctions(const void * /*context */, UMtxInitFn *, UMtxFn *,
+                    UMtxFn *,  UMtxFn *, UErrorCode *status) {
+    if (U_SUCCESS(*status)) {
+        *status = U_UNSUPPORTED_ERROR;
+    }
+    return;
+}
+
+
+
+U_DEPRECATED void U_EXPORT2
+u_setAtomicIncDecFunctions(const void * /*context */, UMtxAtomicFn *, UMtxAtomicFn *,
+                           UErrorCode *status) {
+    if (U_SUCCESS(*status)) {
+        *status = U_UNSUPPORTED_ERROR;
+    }
+    return;
+}
diff --git a/src/core/basetypes/icu-ucsdet/uobject.cpp b/src/core/basetypes/icu-ucsdet/uobject.cpp
new file mode 100644
index 00000000..900e0345
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/uobject.cpp
@@ -0,0 +1,103 @@
+/*
+******************************************************************************
+*
+*   Copyright (C) 2002-2012, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*   file name:  uobject.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2002jun26
+*   created by: Markus W. Scherer
+*/
+
+#include "unicode/uobject.h"
+#include "cmemory.h"
+
+U_NAMESPACE_BEGIN
+
+#if U_OVERRIDE_CXX_ALLOCATION
+
+/*
+ * Default implementation of UMemory::new/delete
+ * using uprv_malloc() and uprv_free().
+ *
+ * For testing, this is used together with a list of imported symbols to verify
+ * that ICU is not using the global ::new and ::delete operators.
+ *
+ * These operators can be implemented like this or any other appropriate way
+ * when customizing ICU for certain environments.
+ * Whenever ICU is customized in binary incompatible ways please be sure
+ * to use library name suffixes to distinguish such libraries from
+ * the standard build.
+ *
+ * Instead of just modifying these C++ new/delete operators, it is usually best
+ * to modify the uprv_malloc()/uprv_free()/uprv_realloc() functions in cmemory.c.
+ *
+ * Memory test on Windows/MSVC 6:
+ * The global operators new and delete look as follows:
+ *   04F 00000000 UNDEF  notype ()    External     | ??2@YAPAXI@Z (void * __cdecl operator new(unsigned int))
+ *   03F 00000000 UNDEF  notype ()    External     | ??3@YAXPAX@Z (void __cdecl operator delete(void *))
+ *
+ * These lines are from output generated by the MSVC 6 tool dumpbin with
+ * dumpbin /symbols *.obj
+ *
+ * ??2@YAPAXI@Z and ??3@YAXPAX@Z are the linker symbols in the .obj
+ * files and are imported from msvcrtd.dll (in a debug build).
+ *
+ * Make sure that with the UMemory operators new and delete defined these two symbols
+ * do not appear in the dumpbin /symbols output for the ICU libraries!
+ *
+ * If such a symbol appears in the output then look in the preceding lines in the output
+ * for which file and function calls the global new or delete operator,
+ * and replace with uprv_malloc/uprv_free.
+ */
+
+void * U_EXPORT2 UMemory::operator new(size_t size) U_NO_THROW {
+    return uprv_malloc(size);
+}
+
+void U_EXPORT2 UMemory::operator delete(void *p) U_NO_THROW {
+    if(p!=NULL) {
+        uprv_free(p);
+    }
+}
+
+void * U_EXPORT2 UMemory::operator new[](size_t size) U_NO_THROW {
+    return uprv_malloc(size);
+}
+
+void U_EXPORT2 UMemory::operator delete[](void *p) U_NO_THROW {
+    if(p!=NULL) {
+        uprv_free(p);
+    }
+}
+
+#if U_HAVE_DEBUG_LOCATION_NEW
+void * U_EXPORT2 UMemory::operator new(size_t size, const char* /*file*/, int /*line*/) U_NO_THROW {
+    return UMemory::operator new(size);
+}
+
+void U_EXPORT2 UMemory::operator delete(void* p, const char* /*file*/, int /*line*/) U_NO_THROW {
+    UMemory::operator delete(p);
+}
+#endif /* U_HAVE_DEBUG_LOCATION_NEW */
+
+
+#endif
+
+UObject::~UObject() {}
+
+UClassID UObject::getDynamicClassID() const { return NULL; }
+
+U_NAMESPACE_END
+
+U_NAMESPACE_USE
+
+U_CAPI void U_EXPORT2
+uprv_deleteUObject(void *obj) {
+    delete static_cast<UObject *>(obj);
+}
diff --git a/src/core/basetypes/icu-ucsdet/ustring.cpp b/src/core/basetypes/icu-ucsdet/ustring.cpp
new file mode 100644
index 00000000..40d23c06
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/ustring.cpp
@@ -0,0 +1,1516 @@
+/*
+******************************************************************************
+*
+*   Copyright (C) 1998-2014, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*
+* File ustring.cpp
+*
+* Modification History:
+*
+*   Date        Name        Description
+*   12/07/98    bertrand    Creation.
+******************************************************************************
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/putil.h"
+#include "unicode/ustring.h"
+#include "unicode/utf16.h"
+#include "cstring.h"
+#include "cwchar.h"
+#include "cmemory.h"
+#include "ustr_imp.h"
+
+/* ANSI string.h - style functions ------------------------------------------ */
+
+/* U+ffff is the highest BMP code point, the highest one that fits into a 16-bit UChar */
+#define U_BMP_MAX 0xffff
+
+/* Forward binary string search functions ----------------------------------- */
+
+/*
+ * Test if a substring match inside a string is at code point boundaries.
+ * All pointers refer to the same buffer.
+ * The limit pointer may be NULL, all others must be real pointers.
+ */
+static inline UBool
+isMatchAtCPBoundary(const UChar *start, const UChar *match, const UChar *matchLimit, const UChar *limit) {
+    if(U16_IS_TRAIL(*match) && start!=match && U16_IS_LEAD(*(match-1))) {
+        /* the leading edge of the match is in the middle of a surrogate pair */
+        return FALSE;
+    }
+    if(U16_IS_LEAD(*(matchLimit-1)) && match!=limit && U16_IS_TRAIL(*matchLimit)) {
+        /* the trailing edge of the match is in the middle of a surrogate pair */
+        return FALSE;
+    }
+    return TRUE;
+}
+
+U_CAPI UChar * U_EXPORT2
+u_strFindFirst(const UChar *s, int32_t length,
+               const UChar *sub, int32_t subLength) {
+    const UChar *start, *p, *q, *subLimit;
+    UChar c, cs, cq;
+
+    if(sub==NULL || subLength<-1) {
+        return (UChar *)s;
+    }
+    if(s==NULL || length<-1) {
+        return NULL;
+    }
+
+    start=s;
+
+    if(length<0 && subLength<0) {
+        /* both strings are NUL-terminated */
+        if((cs=*sub++)==0) {
+            return (UChar *)s;
+        }
+        if(*sub==0 && !U16_IS_SURROGATE(cs)) {
+            /* the substring consists of a single, non-surrogate BMP code point */
+            return u_strchr(s, cs);
+        }
+
+        while((c=*s++)!=0) {
+            if(c==cs) {
+                /* found first substring UChar, compare rest */
+                p=s;
+                q=sub;
+                for(;;) {
+                    if((cq=*q)==0) {
+                        if(isMatchAtCPBoundary(start, s-1, p, NULL)) {
+                            return (UChar *)(s-1); /* well-formed match */
+                        } else {
+                            break; /* no match because surrogate pair is split */
+                        }
+                    }
+                    if((c=*p)==0) {
+                        return NULL; /* no match, and none possible after s */
+                    }
+                    if(c!=cq) {
+                        break; /* no match */
+                    }
+                    ++p;
+                    ++q;
+                }
+            }
+        }
+
+        /* not found */
+        return NULL;
+    }
+
+    if(subLength<0) {
+        subLength=u_strlen(sub);
+    }
+    if(subLength==0) {
+        return (UChar *)s;
+    }
+
+    /* get sub[0] to search for it fast */
+    cs=*sub++;
+    --subLength;
+    subLimit=sub+subLength;
+
+    if(subLength==0 && !U16_IS_SURROGATE(cs)) {
+        /* the substring consists of a single, non-surrogate BMP code point */
+        return length<0 ? u_strchr(s, cs) : u_memchr(s, cs, length);
+    }
+
+    if(length<0) {
+        /* s is NUL-terminated */
+        while((c=*s++)!=0) {
+            if(c==cs) {
+                /* found first substring UChar, compare rest */
+                p=s;
+                q=sub;
+                for(;;) {
+                    if(q==subLimit) {
+                        if(isMatchAtCPBoundary(start, s-1, p, NULL)) {
+                            return (UChar *)(s-1); /* well-formed match */
+                        } else {
+                            break; /* no match because surrogate pair is split */
+                        }
+                    }
+                    if((c=*p)==0) {
+                        return NULL; /* no match, and none possible after s */
+                    }
+                    if(c!=*q) {
+                        break; /* no match */
+                    }
+                    ++p;
+                    ++q;
+                }
+            }
+        }
+    } else {
+        const UChar *limit, *preLimit;
+
+        /* subLength was decremented above */
+        if(length<=subLength) {
+            return NULL; /* s is shorter than sub */
+        }
+
+        limit=s+length;
+
+        /* the substring must start before preLimit */
+        preLimit=limit-subLength;
+
+        while(s!=preLimit) {
+            c=*s++;
+            if(c==cs) {
+                /* found first substring UChar, compare rest */
+                p=s;
+                q=sub;
+                for(;;) {
+                    if(q==subLimit) {
+                        if(isMatchAtCPBoundary(start, s-1, p, limit)) {
+                            return (UChar *)(s-1); /* well-formed match */
+                        } else {
+                            break; /* no match because surrogate pair is split */
+                        }
+                    }
+                    if(*p!=*q) {
+                        break; /* no match */
+                    }
+                    ++p;
+                    ++q;
+                }
+            }
+        }
+    }
+
+    /* not found */
+    return NULL;
+}
+
+U_CAPI UChar * U_EXPORT2
+u_strstr(const UChar *s, const UChar *substring) {
+    return u_strFindFirst(s, -1, substring, -1);
+}
+
+U_CAPI UChar * U_EXPORT2
+u_strchr(const UChar *s, UChar c) {
+    if(U16_IS_SURROGATE(c)) {
+        /* make sure to not find half of a surrogate pair */
+        return u_strFindFirst(s, -1, &c, 1);
+    } else {
+        UChar cs;
+
+        /* trivial search for a BMP code point */
+        for(;;) {
+            if((cs=*s)==c) {
+                return (UChar *)s;
+            }
+            if(cs==0) {
+                return NULL;
+            }
+            ++s;
+        }
+    }
+}
+
+U_CAPI UChar * U_EXPORT2
+u_strchr32(const UChar *s, UChar32 c) {
+    if((uint32_t)c<=U_BMP_MAX) {
+        /* find BMP code point */
+        return u_strchr(s, (UChar)c);
+    } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
+        /* find supplementary code point as surrogate pair */
+        UChar cs, lead=U16_LEAD(c), trail=U16_TRAIL(c);
+
+        while((cs=*s++)!=0) {
+            if(cs==lead && *s==trail) {
+                return (UChar *)(s-1);
+            }
+        }
+        return NULL;
+    } else {
+        /* not a Unicode code point, not findable */
+        return NULL;
+    }
+}
+
+U_CAPI UChar * U_EXPORT2
+u_memchr(const UChar *s, UChar c, int32_t count) {
+    if(count<=0) {
+        return NULL; /* no string */
+    } else if(U16_IS_SURROGATE(c)) {
+        /* make sure to not find half of a surrogate pair */
+        return u_strFindFirst(s, count, &c, 1);
+    } else {
+        /* trivial search for a BMP code point */
+        const UChar *limit=s+count;
+        do {
+            if(*s==c) {
+                return (UChar *)s;
+            }
+        } while(++s!=limit);
+        return NULL;
+    }
+}
+
+U_CAPI UChar * U_EXPORT2
+u_memchr32(const UChar *s, UChar32 c, int32_t count) {
+    if((uint32_t)c<=U_BMP_MAX) {
+        /* find BMP code point */
+        return u_memchr(s, (UChar)c, count);
+    } else if(count<2) {
+        /* too short for a surrogate pair */
+        return NULL;
+    } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
+        /* find supplementary code point as surrogate pair */
+        const UChar *limit=s+count-1; /* -1 so that we do not need a separate check for the trail unit */
+        UChar lead=U16_LEAD(c), trail=U16_TRAIL(c);
+
+        do {
+            if(*s==lead && *(s+1)==trail) {
+                return (UChar *)s;
+            }
+        } while(++s!=limit);
+        return NULL;
+    } else {
+        /* not a Unicode code point, not findable */
+        return NULL;
+    }
+}
+
+/* Backward binary string search functions ---------------------------------- */
+
+U_CAPI UChar * U_EXPORT2
+u_strFindLast(const UChar *s, int32_t length,
+              const UChar *sub, int32_t subLength) {
+    const UChar *start, *limit, *p, *q, *subLimit;
+    UChar c, cs;
+
+    if(sub==NULL || subLength<-1) {
+        return (UChar *)s;
+    }
+    if(s==NULL || length<-1) {
+        return NULL;
+    }
+
+    /*
+     * This implementation is more lazy than the one for u_strFindFirst():
+     * There is no special search code for NUL-terminated strings.
+     * It does not seem to be worth it for searching substrings to
+     * search forward and find all matches like in u_strrchr() and similar.
+     * Therefore, we simply get both string lengths and search backward.
+     *
+     * markus 2002oct23
+     */
+
+    if(subLength<0) {
+        subLength=u_strlen(sub);
+    }
+    if(subLength==0) {
+        return (UChar *)s;
+    }
+
+    /* get sub[subLength-1] to search for it fast */
+    subLimit=sub+subLength;
+    cs=*(--subLimit);
+    --subLength;
+
+    if(subLength==0 && !U16_IS_SURROGATE(cs)) {
+        /* the substring consists of a single, non-surrogate BMP code point */
+        return length<0 ? u_strrchr(s, cs) : u_memrchr(s, cs, length);
+    }
+
+    if(length<0) {
+        length=u_strlen(s);
+    }
+
+    /* subLength was decremented above */
+    if(length<=subLength) {
+        return NULL; /* s is shorter than sub */
+    }
+
+    start=s;
+    limit=s+length;
+
+    /* the substring must start no later than s+subLength */
+    s+=subLength;
+
+    while(s!=limit) {
+        c=*(--limit);
+        if(c==cs) {
+            /* found last substring UChar, compare rest */
+            p=limit;
+            q=subLimit;
+            for(;;) {
+                if(q==sub) {
+                    if(isMatchAtCPBoundary(start, p, limit+1, start+length)) {
+                        return (UChar *)p; /* well-formed match */
+                    } else {
+                        break; /* no match because surrogate pair is split */
+                    }
+                }
+                if(*(--p)!=*(--q)) {
+                    break; /* no match */
+                }
+            }
+        }
+    }
+
+    /* not found */
+    return NULL;
+}
+
+U_CAPI UChar * U_EXPORT2
+u_strrstr(const UChar *s, const UChar *substring) {
+    return u_strFindLast(s, -1, substring, -1);
+}
+
+U_CAPI UChar * U_EXPORT2
+u_strrchr(const UChar *s, UChar c) {
+    if(U16_IS_SURROGATE(c)) {
+        /* make sure to not find half of a surrogate pair */
+        return u_strFindLast(s, -1, &c, 1);
+    } else {
+        const UChar *result=NULL;
+        UChar cs;
+
+        /* trivial search for a BMP code point */
+        for(;;) {
+            if((cs=*s)==c) {
+                result=s;
+            }
+            if(cs==0) {
+                return (UChar *)result;
+            }
+            ++s;
+        }
+    }
+}
+
+U_CAPI UChar * U_EXPORT2
+u_strrchr32(const UChar *s, UChar32 c) {
+    if((uint32_t)c<=U_BMP_MAX) {
+        /* find BMP code point */
+        return u_strrchr(s, (UChar)c);
+    } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
+        /* find supplementary code point as surrogate pair */
+        const UChar *result=NULL;
+        UChar cs, lead=U16_LEAD(c), trail=U16_TRAIL(c);
+
+        while((cs=*s++)!=0) {
+            if(cs==lead && *s==trail) {
+                result=s-1;
+            }
+        }
+        return (UChar *)result;
+    } else {
+        /* not a Unicode code point, not findable */
+        return NULL;
+    }
+}
+
+U_CAPI UChar * U_EXPORT2
+u_memrchr(const UChar *s, UChar c, int32_t count) {
+    if(count<=0) {
+        return NULL; /* no string */
+    } else if(U16_IS_SURROGATE(c)) {
+        /* make sure to not find half of a surrogate pair */
+        return u_strFindLast(s, count, &c, 1);
+    } else {
+        /* trivial search for a BMP code point */
+        const UChar *limit=s+count;
+        do {
+            if(*(--limit)==c) {
+                return (UChar *)limit;
+            }
+        } while(s!=limit);
+        return NULL;
+    }
+}
+
+U_CAPI UChar * U_EXPORT2
+u_memrchr32(const UChar *s, UChar32 c, int32_t count) {
+    if((uint32_t)c<=U_BMP_MAX) {
+        /* find BMP code point */
+        return u_memrchr(s, (UChar)c, count);
+    } else if(count<2) {
+        /* too short for a surrogate pair */
+        return NULL;
+    } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
+        /* find supplementary code point as surrogate pair */
+        const UChar *limit=s+count-1;
+        UChar lead=U16_LEAD(c), trail=U16_TRAIL(c);
+
+        do {
+            if(*limit==trail && *(limit-1)==lead) {
+                return (UChar *)(limit-1);
+            }
+        } while(s!=--limit);
+        return NULL;
+    } else {
+        /* not a Unicode code point, not findable */
+        return NULL;
+    }
+}
+
+/* Tokenization functions --------------------------------------------------- */
+
+/*
+ * Match each code point in a string against each code point in the matchSet.
+ * Return the index of the first string code point that
+ * is (polarity==TRUE) or is not (FALSE) contained in the matchSet.
+ * Return -(string length)-1 if there is no such code point.
+ */
+static int32_t
+_matchFromSet(const UChar *string, const UChar *matchSet, UBool polarity) {
+    int32_t matchLen, matchBMPLen, strItr, matchItr;
+    UChar32 stringCh, matchCh;
+    UChar c, c2;
+
+    /* first part of matchSet contains only BMP code points */
+    matchBMPLen = 0;
+    while((c = matchSet[matchBMPLen]) != 0 && U16_IS_SINGLE(c)) {
+        ++matchBMPLen;
+    }
+
+    /* second part of matchSet contains BMP and supplementary code points */
+    matchLen = matchBMPLen;
+    while(matchSet[matchLen] != 0) {
+        ++matchLen;
+    }
+
+    for(strItr = 0; (c = string[strItr]) != 0;) {
+        ++strItr;
+        if(U16_IS_SINGLE(c)) {
+            if(polarity) {
+                for(matchItr = 0; matchItr < matchLen; ++matchItr) {
+                    if(c == matchSet[matchItr]) {
+                        return strItr - 1; /* one matches */
+                    }
+                }
+            } else {
+                for(matchItr = 0; matchItr < matchLen; ++matchItr) {
+                    if(c == matchSet[matchItr]) {
+                        goto endloop;
+                    }
+                }
+                return strItr - 1; /* none matches */
+            }
+        } else {
+            /*
+             * No need to check for string length before U16_IS_TRAIL
+             * because c2 could at worst be the terminating NUL.
+             */
+            if(U16_IS_SURROGATE_LEAD(c) && U16_IS_TRAIL(c2 = string[strItr])) {
+                ++strItr;
+                stringCh = U16_GET_SUPPLEMENTARY(c, c2);
+            } else {
+                stringCh = c; /* unpaired trail surrogate */
+            }
+
+            if(polarity) {
+                for(matchItr = matchBMPLen; matchItr < matchLen;) {
+                    U16_NEXT(matchSet, matchItr, matchLen, matchCh);
+                    if(stringCh == matchCh) {
+                        return strItr - U16_LENGTH(stringCh); /* one matches */
+                    }
+                }
+            } else {
+                for(matchItr = matchBMPLen; matchItr < matchLen;) {
+                    U16_NEXT(matchSet, matchItr, matchLen, matchCh);
+                    if(stringCh == matchCh) {
+                        goto endloop;
+                    }
+                }
+                return strItr - U16_LENGTH(stringCh); /* none matches */
+            }
+        }
+endloop:
+        /* wish C had continue with labels like Java... */;
+    }
+
+    /* Didn't find it. */
+    return -strItr-1;
+}
+
+/* Search for a codepoint in a string that matches one of the matchSet codepoints. */
+U_CAPI UChar * U_EXPORT2
+u_strpbrk(const UChar *string, const UChar *matchSet)
+{
+    int32_t idx = _matchFromSet(string, matchSet, TRUE);
+    if(idx >= 0) {
+        return (UChar *)string + idx;
+    } else {
+        return NULL;
+    }
+}
+
+/* Search for a codepoint in a string that matches one of the matchSet codepoints. */
+U_CAPI int32_t U_EXPORT2
+u_strcspn(const UChar *string, const UChar *matchSet)
+{
+    int32_t idx = _matchFromSet(string, matchSet, TRUE);
+    if(idx >= 0) {
+        return idx;
+    } else {
+        return -idx - 1; /* == u_strlen(string) */
+    }
+}
+
+/* Search for a codepoint in a string that does not match one of the matchSet codepoints. */
+U_CAPI int32_t U_EXPORT2
+u_strspn(const UChar *string, const UChar *matchSet)
+{
+    int32_t idx = _matchFromSet(string, matchSet, FALSE);
+    if(idx >= 0) {
+        return idx;
+    } else {
+        return -idx - 1; /* == u_strlen(string) */
+    }
+}
+
+/* ----- Text manipulation functions --- */
+
+U_CAPI UChar* U_EXPORT2
+u_strtok_r(UChar    *src, 
+     const UChar    *delim,
+           UChar   **saveState)
+{
+    UChar *tokSource;
+    UChar *nextToken;
+    uint32_t nonDelimIdx;
+
+    /* If saveState is NULL, the user messed up. */
+    if (src != NULL) {
+        tokSource = src;
+        *saveState = src; /* Set to "src" in case there are no delimiters */
+    }
+    else if (*saveState) {
+        tokSource = *saveState;
+    }
+    else {
+        /* src == NULL && *saveState == NULL */
+        /* This shouldn't happen. We already finished tokenizing. */
+        return NULL;
+    }
+
+    /* Skip initial delimiters */
+    nonDelimIdx = u_strspn(tokSource, delim);
+    tokSource = &tokSource[nonDelimIdx];
+
+    if (*tokSource) {
+        nextToken = u_strpbrk(tokSource, delim);
+        if (nextToken != NULL) {
+            /* Create a token */
+            *(nextToken++) = 0;
+            *saveState = nextToken;
+            return tokSource;
+        }
+        else if (*saveState) {
+            /* Return the last token */
+            *saveState = NULL;
+            return tokSource;
+        }
+    }
+    else {
+        /* No tokens were found. Only delimiters were left. */
+        *saveState = NULL;
+    }
+    return NULL;
+}
+
+/* Miscellaneous functions -------------------------------------------------- */
+
+U_CAPI UChar* U_EXPORT2
+u_strcat(UChar     *dst, 
+    const UChar     *src)
+{
+    UChar *anchor = dst;            /* save a pointer to start of dst */
+
+    while(*dst != 0) {              /* To end of first string          */
+        ++dst;
+    }
+    while((*(dst++) = *(src++)) != 0) {     /* copy string 2 over              */
+    }
+
+    return anchor;
+}
+
+U_CAPI UChar*  U_EXPORT2
+u_strncat(UChar     *dst, 
+     const UChar     *src, 
+     int32_t     n ) 
+{
+    if(n > 0) {
+        UChar *anchor = dst;            /* save a pointer to start of dst */
+
+        while(*dst != 0) {              /* To end of first string          */
+            ++dst;
+        }
+        while((*dst = *src) != 0) {     /* copy string 2 over              */
+            ++dst;
+            if(--n == 0) {
+                *dst = 0;
+                break;
+            }
+            ++src;
+        }
+
+        return anchor;
+    } else {
+        return dst;
+    }
+}
+
+/* ----- Text property functions --- */
+
+U_CAPI int32_t   U_EXPORT2
+u_strcmp(const UChar *s1, 
+    const UChar *s2) 
+{
+    UChar  c1, c2;
+
+    for(;;) {
+        c1=*s1++;
+        c2=*s2++;
+        if (c1 != c2 || c1 == 0) {
+            break;
+        }
+    }
+    return (int32_t)c1 - (int32_t)c2;
+}
+
+U_CFUNC int32_t U_EXPORT2
+uprv_strCompare(const UChar *s1, int32_t length1,
+                const UChar *s2, int32_t length2,
+                UBool strncmpStyle, UBool codePointOrder) {
+    const UChar *start1, *start2, *limit1, *limit2;
+    UChar c1, c2;
+
+    /* setup for fix-up */
+    start1=s1;
+    start2=s2;
+
+    /* compare identical prefixes - they do not need to be fixed up */
+    if(length1<0 && length2<0) {
+        /* strcmp style, both NUL-terminated */
+        if(s1==s2) {
+            return 0;
+        }
+
+        for(;;) {
+            c1=*s1;
+            c2=*s2;
+            if(c1!=c2) {
+                break;
+            }
+            if(c1==0) {
+                return 0;
+            }
+            ++s1;
+            ++s2;
+        }
+
+        /* setup for fix-up */
+        limit1=limit2=NULL;
+    } else if(strncmpStyle) {
+        /* special handling for strncmp, assume length1==length2>=0 but also check for NUL */
+        if(s1==s2) {
+            return 0;
+        }
+
+        limit1=start1+length1;
+
+        for(;;) {
+            /* both lengths are same, check only one limit */
+            if(s1==limit1) {
+                return 0;
+            }
+
+            c1=*s1;
+            c2=*s2;
+            if(c1!=c2) {
+                break;
+            }
+            if(c1==0) {
+                return 0;
+            }
+            ++s1;
+            ++s2;
+        }
+
+        /* setup for fix-up */
+        limit2=start2+length1; /* use length1 here, too, to enforce assumption */
+    } else {
+        /* memcmp/UnicodeString style, both length-specified */
+        int32_t lengthResult;
+
+        if(length1<0) {
+            length1=u_strlen(s1);
+        }
+        if(length2<0) {
+            length2=u_strlen(s2);
+        }
+
+        /* limit1=start1+min(lenght1, length2) */
+        if(length1<length2) {
+            lengthResult=-1;
+            limit1=start1+length1;
+        } else if(length1==length2) {
+            lengthResult=0;
+            limit1=start1+length1;
+        } else /* length1>length2 */ {
+            lengthResult=1;
+            limit1=start1+length2;
+        }
+
+        if(s1==s2) {
+            return lengthResult;
+        }
+
+        for(;;) {
+            /* check pseudo-limit */
+            if(s1==limit1) {
+                return lengthResult;
+            }
+
+            c1=*s1;
+            c2=*s2;
+            if(c1!=c2) {
+                break;
+            }
+            ++s1;
+            ++s2;
+        }
+
+        /* setup for fix-up */
+        limit1=start1+length1;
+        limit2=start2+length2;
+    }
+
+    /* if both values are in or above the surrogate range, fix them up */
+    if(c1>=0xd800 && c2>=0xd800 && codePointOrder) {
+        /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
+        if(
+            (c1<=0xdbff && (s1+1)!=limit1 && U16_IS_TRAIL(*(s1+1))) ||
+            (U16_IS_TRAIL(c1) && start1!=s1 && U16_IS_LEAD(*(s1-1)))
+        ) {
+            /* part of a surrogate pair, leave >=d800 */
+        } else {
+            /* BMP code point - may be surrogate code point - make <d800 */
+            c1-=0x2800;
+        }
+
+        if(
+            (c2<=0xdbff && (s2+1)!=limit2 && U16_IS_TRAIL(*(s2+1))) ||
+            (U16_IS_TRAIL(c2) && start2!=s2 && U16_IS_LEAD(*(s2-1)))
+        ) {
+            /* part of a surrogate pair, leave >=d800 */
+        } else {
+            /* BMP code point - may be surrogate code point - make <d800 */
+            c2-=0x2800;
+        }
+    }
+
+    /* now c1 and c2 are in the requested (code unit or code point) order */
+    return (int32_t)c1-(int32_t)c2;
+}
+
+/*
+ * Compare two strings as presented by UCharIterators.
+ * Use code unit or code point order.
+ * When the function returns, it is undefined where the iterators
+ * have stopped.
+ */
+U_CAPI int32_t U_EXPORT2
+u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder) {
+    UChar32 c1, c2;
+
+    /* argument checking */
+    if(iter1==NULL || iter2==NULL) {
+        return 0; /* bad arguments */
+    }
+    if(iter1==iter2) {
+        return 0; /* identical iterators */
+    }
+
+    /* reset iterators to start? */
+    iter1->move(iter1, 0, UITER_START);
+    iter2->move(iter2, 0, UITER_START);
+
+    /* compare identical prefixes - they do not need to be fixed up */
+    for(;;) {
+        c1=iter1->next(iter1);
+        c2=iter2->next(iter2);
+        if(c1!=c2) {
+            break;
+        }
+        if(c1==-1) {
+            return 0;
+        }
+    }
+
+    /* if both values are in or above the surrogate range, fix them up */
+    if(c1>=0xd800 && c2>=0xd800 && codePointOrder) {
+        /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
+        if(
+            (c1<=0xdbff && U16_IS_TRAIL(iter1->current(iter1))) ||
+            (U16_IS_TRAIL(c1) && (iter1->previous(iter1), U16_IS_LEAD(iter1->previous(iter1))))
+        ) {
+            /* part of a surrogate pair, leave >=d800 */
+        } else {
+            /* BMP code point - may be surrogate code point - make <d800 */
+            c1-=0x2800;
+        }
+
+        if(
+            (c2<=0xdbff && U16_IS_TRAIL(iter2->current(iter2))) ||
+            (U16_IS_TRAIL(c2) && (iter2->previous(iter2), U16_IS_LEAD(iter2->previous(iter2))))
+        ) {
+            /* part of a surrogate pair, leave >=d800 */
+        } else {
+            /* BMP code point - may be surrogate code point - make <d800 */
+            c2-=0x2800;
+        }
+    }
+
+    /* now c1 and c2 are in the requested (code unit or code point) order */
+    return (int32_t)c1-(int32_t)c2;
+}
+
+#if 0
+/*
+ * u_strCompareIter() does not leave the iterators _on_ the different units.
+ * This is possible but would cost a few extra indirect function calls to back
+ * up if the last unit (c1 or c2 respectively) was >=0.
+ *
+ * Consistently leaving them _behind_ the different units is not an option
+ * because the current "unit" is the end of the string if that is reached,
+ * and in such a case the iterator does not move.
+ * For example, when comparing "ab" with "abc", both iterators rest _on_ the end
+ * of their strings. Calling previous() on each does not move them to where
+ * the comparison fails.
+ *
+ * So the simplest semantics is to not define where the iterators end up.
+ *
+ * The following fragment is part of what would need to be done for backing up.
+ */
+void fragment {
+        /* iff a surrogate is part of a surrogate pair, leave >=d800 */
+        if(c1<=0xdbff) {
+            if(!U16_IS_TRAIL(iter1->current(iter1))) {
+                /* lead surrogate code point - make <d800 */
+                c1-=0x2800;
+            }
+        } else if(c1<=0xdfff) {
+            int32_t idx=iter1->getIndex(iter1, UITER_CURRENT);
+            iter1->previous(iter1); /* ==c1 */
+            if(!U16_IS_LEAD(iter1->previous(iter1))) {
+                /* trail surrogate code point - make <d800 */
+                c1-=0x2800;
+            }
+            /* go back to behind where the difference is */
+            iter1->move(iter1, idx, UITER_ZERO);
+        } else /* 0xe000<=c1<=0xffff */ {
+            /* BMP code point - make <d800 */
+            c1-=0x2800;
+        }
+}
+#endif
+
+U_CAPI int32_t U_EXPORT2
+u_strCompare(const UChar *s1, int32_t length1,
+             const UChar *s2, int32_t length2,
+             UBool codePointOrder) {
+    /* argument checking */
+    if(s1==NULL || length1<-1 || s2==NULL || length2<-1) {
+        return 0;
+    }
+    return uprv_strCompare(s1, length1, s2, length2, FALSE, codePointOrder);
+}
+
+/* String compare in code point order - u_strcmp() compares in code unit order. */
+U_CAPI int32_t U_EXPORT2
+u_strcmpCodePointOrder(const UChar *s1, const UChar *s2) {
+    return uprv_strCompare(s1, -1, s2, -1, FALSE, TRUE);
+}
+
+U_CAPI int32_t   U_EXPORT2
+u_strncmp(const UChar     *s1, 
+     const UChar     *s2, 
+     int32_t     n) 
+{
+    if(n > 0) {
+        int32_t rc;
+        for(;;) {
+            rc = (int32_t)*s1 - (int32_t)*s2;
+            if(rc != 0 || *s1 == 0 || --n == 0) {
+                return rc;
+            }
+            ++s1;
+            ++s2;
+        }
+    } else {
+        return 0;
+    }
+}
+
+U_CAPI int32_t U_EXPORT2
+u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n) {
+    return uprv_strCompare(s1, n, s2, n, TRUE, TRUE);
+}
+
+U_CAPI UChar* U_EXPORT2
+u_strcpy(UChar     *dst, 
+    const UChar     *src) 
+{
+    UChar *anchor = dst;            /* save a pointer to start of dst */
+
+    while((*(dst++) = *(src++)) != 0) {     /* copy string 2 over              */
+    }
+
+    return anchor;
+}
+
+U_CAPI UChar*  U_EXPORT2
+u_strncpy(UChar     *dst, 
+     const UChar     *src, 
+     int32_t     n) 
+{
+    UChar *anchor = dst;            /* save a pointer to start of dst */
+
+    /* copy string 2 over */
+    while(n > 0 && (*(dst++) = *(src++)) != 0) {
+        --n;
+    }
+
+    return anchor;
+}
+
+U_CAPI int32_t   U_EXPORT2
+u_strlen(const UChar *s) 
+{
+#if U_SIZEOF_WCHAR_T == U_SIZEOF_UCHAR
+    return (int32_t)uprv_wcslen(s);
+#else
+    const UChar *t = s;
+    while(*t != 0) {
+      ++t;
+    }
+    return t - s;
+#endif
+}
+
+U_CAPI int32_t U_EXPORT2
+u_countChar32(const UChar *s, int32_t length) {
+    int32_t count;
+
+    if(s==NULL || length<-1) {
+        return 0;
+    }
+
+    count=0;
+    if(length>=0) {
+        while(length>0) {
+            ++count;
+            if(U16_IS_LEAD(*s) && length>=2 && U16_IS_TRAIL(*(s+1))) {
+                s+=2;
+                length-=2;
+            } else {
+                ++s;
+                --length;
+            }
+        }
+    } else /* length==-1 */ {
+        UChar c;
+
+        for(;;) {
+            if((c=*s++)==0) {
+                break;
+            }
+            ++count;
+
+            /*
+             * sufficient to look ahead one because of UTF-16;
+             * safe to look ahead one because at worst that would be the terminating NUL
+             */
+            if(U16_IS_LEAD(c) && U16_IS_TRAIL(*s)) {
+                ++s;
+            }
+        }
+    }
+    return count;
+}
+
+U_CAPI UBool U_EXPORT2
+u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number) {
+
+    if(number<0) {
+        return TRUE;
+    }
+    if(s==NULL || length<-1) {
+        return FALSE;
+    }
+
+    if(length==-1) {
+        /* s is NUL-terminated */
+        UChar c;
+
+        /* count code points until they exceed */
+        for(;;) {
+            if((c=*s++)==0) {
+                return FALSE;
+            }
+            if(number==0) {
+                return TRUE;
+            }
+            if(U16_IS_LEAD(c) && U16_IS_TRAIL(*s)) {
+                ++s;
+            }
+            --number;
+        }
+    } else {
+        /* length>=0 known */
+        const UChar *limit;
+        int32_t maxSupplementary;
+
+        /* s contains at least (length+1)/2 code points: <=2 UChars per cp */
+        if(((length+1)/2)>number) {
+            return TRUE;
+        }
+
+        /* check if s does not even contain enough UChars */
+        maxSupplementary=length-number;
+        if(maxSupplementary<=0) {
+            return FALSE;
+        }
+        /* there are maxSupplementary=length-number more UChars than asked-for code points */
+
+        /*
+         * count code points until they exceed and also check that there are
+         * no more than maxSupplementary supplementary code points (UChar pairs)
+         */
+        limit=s+length;
+        for(;;) {
+            if(s==limit) {
+                return FALSE;
+            }
+            if(number==0) {
+                return TRUE;
+            }
+            if(U16_IS_LEAD(*s++) && s!=limit && U16_IS_TRAIL(*s)) {
+                ++s;
+                if(--maxSupplementary<=0) {
+                    /* too many pairs - too few code points */
+                    return FALSE;
+                }
+            }
+            --number;
+        }
+    }
+}
+
+U_CAPI UChar * U_EXPORT2
+u_memcpy(UChar *dest, const UChar *src, int32_t count) {
+    if(count > 0) {
+        uprv_memcpy(dest, src, count*U_SIZEOF_UCHAR);
+    }
+    return dest;
+}
+
+U_CAPI UChar * U_EXPORT2
+u_memmove(UChar *dest, const UChar *src, int32_t count) {
+    if(count > 0) {
+        uprv_memmove(dest, src, count*U_SIZEOF_UCHAR);
+    }
+    return dest;
+}
+
+U_CAPI UChar * U_EXPORT2
+u_memset(UChar *dest, UChar c, int32_t count) {
+    if(count > 0) {
+        UChar *ptr = dest;
+        UChar *limit = dest + count;
+
+        while (ptr < limit) {
+            *(ptr++) = c;
+        }
+    }
+    return dest;
+}
+
+U_CAPI int32_t U_EXPORT2
+u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count) {
+    if(count > 0) {
+        const UChar *limit = buf1 + count;
+        int32_t result;
+
+        while (buf1 < limit) {
+            result = (int32_t)(uint16_t)*buf1 - (int32_t)(uint16_t)*buf2;
+            if (result != 0) {
+                return result;
+            }
+            buf1++;
+            buf2++;
+        }
+    }
+    return 0;
+}
+
+U_CAPI int32_t U_EXPORT2
+u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count) {
+    return uprv_strCompare(s1, count, s2, count, FALSE, TRUE);
+}
+
+/* u_unescape & support fns ------------------------------------------------- */
+
+/* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
+static const UChar UNESCAPE_MAP[] = {
+    /*"   0x22, 0x22 */
+    /*'   0x27, 0x27 */
+    /*?   0x3F, 0x3F */
+    /*\   0x5C, 0x5C */
+    /*a*/ 0x61, 0x07,
+    /*b*/ 0x62, 0x08,
+    /*e*/ 0x65, 0x1b,
+    /*f*/ 0x66, 0x0c,
+    /*n*/ 0x6E, 0x0a,
+    /*r*/ 0x72, 0x0d,
+    /*t*/ 0x74, 0x09,
+    /*v*/ 0x76, 0x0b
+};
+enum { UNESCAPE_MAP_LENGTH = sizeof(UNESCAPE_MAP) / sizeof(UNESCAPE_MAP[0]) };
+
+/* Convert one octal digit to a numeric value 0..7, or -1 on failure */
+static int8_t _digit8(UChar c) {
+    if (c >= 0x0030 && c <= 0x0037) {
+        return (int8_t)(c - 0x0030);
+    }
+    return -1;
+}
+
+/* Convert one hex digit to a numeric value 0..F, or -1 on failure */
+static int8_t _digit16(UChar c) {
+    if (c >= 0x0030 && c <= 0x0039) {
+        return (int8_t)(c - 0x0030);
+    }
+    if (c >= 0x0041 && c <= 0x0046) {
+        return (int8_t)(c - (0x0041 - 10));
+    }
+    if (c >= 0x0061 && c <= 0x0066) {
+        return (int8_t)(c - (0x0061 - 10));
+    }
+    return -1;
+}
+
+/* Parse a single escape sequence.  Although this method deals in
+ * UChars, it does not use C++ or UnicodeString.  This allows it to
+ * be used from C contexts. */
+U_CAPI UChar32 U_EXPORT2
+u_unescapeAt(UNESCAPE_CHAR_AT charAt,
+             int32_t *offset,
+             int32_t length,
+             void *context) {
+
+    int32_t start = *offset;
+    UChar c;
+    UChar32 result = 0;
+    int8_t n = 0;
+    int8_t minDig = 0;
+    int8_t maxDig = 0;
+    int8_t bitsPerDigit = 4; 
+    int8_t dig;
+    int32_t i;
+    UBool braces = FALSE;
+
+    /* Check that offset is in range */
+    if (*offset < 0 || *offset >= length) {
+        goto err;
+    }
+
+    /* Fetch first UChar after '\\' */
+    c = charAt((*offset)++, context);
+
+    /* Convert hexadecimal and octal escapes */
+    switch (c) {
+    case 0x0075 /*'u'*/:
+        minDig = maxDig = 4;
+        break;
+    case 0x0055 /*'U'*/:
+        minDig = maxDig = 8;
+        break;
+    case 0x0078 /*'x'*/:
+        minDig = 1;
+        if (*offset < length && charAt(*offset, context) == 0x7B /*{*/) {
+            ++(*offset);
+            braces = TRUE;
+            maxDig = 8;
+        } else {
+            maxDig = 2;
+        }
+        break;
+    default:
+        dig = _digit8(c);
+        if (dig >= 0) {
+            minDig = 1;
+            maxDig = 3;
+            n = 1; /* Already have first octal digit */
+            bitsPerDigit = 3;
+            result = dig;
+        }
+        break;
+    }
+    if (minDig != 0) {
+        while (*offset < length && n < maxDig) {
+            c = charAt(*offset, context);
+            dig = (int8_t)((bitsPerDigit == 3) ? _digit8(c) : _digit16(c));
+            if (dig < 0) {
+                break;
+            }
+            result = (result << bitsPerDigit) | dig;
+            ++(*offset);
+            ++n;
+        }
+        if (n < minDig) {
+            goto err;
+        }
+        if (braces) {
+            if (c != 0x7D /*}*/) {
+                goto err;
+            }
+            ++(*offset);
+        }
+        if (result < 0 || result >= 0x110000) {
+            goto err;
+        }
+        /* If an escape sequence specifies a lead surrogate, see if
+         * there is a trail surrogate after it, either as an escape or
+         * as a literal.  If so, join them up into a supplementary.
+         */
+        if (*offset < length && U16_IS_LEAD(result)) {
+            int32_t ahead = *offset + 1;
+            c = charAt(*offset, context);
+            if (c == 0x5C /*'\\'*/ && ahead < length) {
+                c = (UChar) u_unescapeAt(charAt, &ahead, length, context);
+            }
+            if (U16_IS_TRAIL(c)) {
+                *offset = ahead;
+                result = U16_GET_SUPPLEMENTARY(result, c);
+            }
+        }
+        return result;
+    }
+
+    /* Convert C-style escapes in table */
+    for (i=0; i<UNESCAPE_MAP_LENGTH; i+=2) {
+        if (c == UNESCAPE_MAP[i]) {
+            return UNESCAPE_MAP[i+1];
+        } else if (c < UNESCAPE_MAP[i]) {
+            break;
+        }
+    }
+
+    /* Map \cX to control-X: X & 0x1F */
+    if (c == 0x0063 /*'c'*/ && *offset < length) {
+        c = charAt((*offset)++, context);
+        if (U16_IS_LEAD(c) && *offset < length) {
+            UChar c2 = charAt(*offset, context);
+            if (U16_IS_TRAIL(c2)) {
+                ++(*offset);
+                c = (UChar) U16_GET_SUPPLEMENTARY(c, c2); /* [sic] */
+            }
+        }
+        return 0x1F & c;
+    }
+
+    /* If no special forms are recognized, then consider
+     * the backslash to generically escape the next character.
+     * Deal with surrogate pairs. */
+    if (U16_IS_LEAD(c) && *offset < length) {
+        UChar c2 = charAt(*offset, context);
+        if (U16_IS_TRAIL(c2)) {
+            ++(*offset);
+            return U16_GET_SUPPLEMENTARY(c, c2);
+        }
+    }
+    return c;
+
+ err:
+    /* Invalid escape sequence */
+    *offset = start; /* Reset to initial value */
+    return (UChar32)0xFFFFFFFF;
+}
+
+/* u_unescapeAt() callback to return a UChar from a char* */
+static UChar U_CALLCONV
+_charPtr_charAt(int32_t offset, void *context) {
+    UChar c16;
+    /* It would be more efficient to access the invariant tables
+     * directly but there is no API for that. */
+    u_charsToUChars(((char*) context) + offset, &c16, 1);
+    return c16;
+}
+
+/* Append an escape-free segment of the text; used by u_unescape() */
+static void _appendUChars(UChar *dest, int32_t destCapacity,
+                          const char *src, int32_t srcLen) {
+    if (destCapacity < 0) {
+        destCapacity = 0;
+    }
+    if (srcLen > destCapacity) {
+        srcLen = destCapacity;
+    }
+    u_charsToUChars(src, dest, srcLen);
+}
+
+/* Do an invariant conversion of char* -> UChar*, with escape parsing */
+U_CAPI int32_t U_EXPORT2
+u_unescape(const char *src, UChar *dest, int32_t destCapacity) {
+    const char *segment = src;
+    int32_t i = 0;
+    char c;
+
+    while ((c=*src) != 0) {
+        /* '\\' intentionally written as compiler-specific
+         * character constant to correspond to compiler-specific
+         * char* constants. */
+        if (c == '\\') {
+            int32_t lenParsed = 0;
+            UChar32 c32;
+            if (src != segment) {
+                if (dest != NULL) {
+                    _appendUChars(dest + i, destCapacity - i,
+                                  segment, (int32_t)(src - segment));
+                }
+                i += (int32_t)(src - segment);
+            }
+            ++src; /* advance past '\\' */
+            c32 = (UChar32)u_unescapeAt(_charPtr_charAt, &lenParsed, (int32_t)uprv_strlen(src), (void*)src);
+            if (lenParsed == 0) {
+                goto err;
+            }
+            src += lenParsed; /* advance past escape seq. */
+            if (dest != NULL && U16_LENGTH(c32) <= (destCapacity - i)) {
+                U16_APPEND_UNSAFE(dest, i, c32);
+            } else {
+                i += U16_LENGTH(c32);
+            }
+            segment = src;
+        } else {
+            ++src;
+        }
+    }
+    if (src != segment) {
+        if (dest != NULL) {
+            _appendUChars(dest + i, destCapacity - i,
+                          segment, (int32_t)(src - segment));
+        }
+        i += (int32_t)(src - segment);
+    }
+    if (dest != NULL && i < destCapacity) {
+        dest[i] = 0;
+    }
+    return i;
+
+ err:
+    if (dest != NULL && destCapacity > 0) {
+        *dest = 0;
+    }
+    return 0;
+}
+
+/* NUL-termination of strings ----------------------------------------------- */
+
+/**
+ * NUL-terminate a string no matter what its type.
+ * Set warning and error codes accordingly.
+ */
+#define __TERMINATE_STRING(dest, destCapacity, length, pErrorCode)      \
+    if(pErrorCode!=NULL && U_SUCCESS(*pErrorCode)) {                    \
+        /* not a public function, so no complete argument checking */   \
+                                                                        \
+        if(length<0) {                                                  \
+            /* assume that the caller handles this */                   \
+        } else if(length<destCapacity) {                                \
+            /* NUL-terminate the string, the NUL fits */                \
+            dest[length]=0;                                             \
+            /* unset the not-terminated warning but leave all others */ \
+            if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {          \
+                *pErrorCode=U_ZERO_ERROR;                               \
+            }                                                           \
+        } else if(length==destCapacity) {                               \
+            /* unable to NUL-terminate, but the string itself fit - set a warning code */ \
+            *pErrorCode=U_STRING_NOT_TERMINATED_WARNING;                \
+        } else /* length>destCapacity */ {                              \
+            /* even the string itself did not fit - set an error code */ \
+            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;                        \
+        }                                                               \
+    }
+
+U_CAPI int32_t U_EXPORT2
+u_terminateUChars(UChar *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
+    __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
+    return length;
+}
+
+U_CAPI int32_t U_EXPORT2
+u_terminateChars(char *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
+    __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
+    return length;
+}
+
+U_CAPI int32_t U_EXPORT2
+u_terminateUChar32s(UChar32 *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
+    __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
+    return length;
+}
+
+U_CAPI int32_t U_EXPORT2
+u_terminateWChars(wchar_t *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
+    __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
+    return length;
+}
+
+// Compute the hash code for a string -------------------------------------- ***
+
+// Moved here from uhash.c so that UnicodeString::hashCode() does not depend
+// on UHashtable code.
+
+/*
+  Compute the hash by iterating sparsely over about 32 (up to 63)
+  characters spaced evenly through the string.  For each character,
+  multiply the previous hash value by a prime number and add the new
+  character in, like a linear congruential random number generator,
+  producing a pseudorandom deterministic value well distributed over
+  the output range. [LIU]
+*/
+
+#define STRING_HASH(TYPE, STR, STRLEN, DEREF) \
+    uint32_t hash = 0;                        \
+    const TYPE *p = (const TYPE*) STR;        \
+    if (p != NULL) {                          \
+        int32_t len = (int32_t)(STRLEN);      \
+        int32_t inc = ((len - 32) / 32) + 1;  \
+        const TYPE *limit = p + len;          \
+        while (p<limit) {                     \
+            hash = (hash * 37) + DEREF;       \
+            p += inc;                         \
+        }                                     \
+    }                                         \
+    return static_cast<int32_t>(hash)
+
+/* Used by UnicodeString to compute its hashcode - Not public API. */
+U_CAPI int32_t U_EXPORT2
+ustr_hashUCharsN(const UChar *str, int32_t length) {
+    STRING_HASH(UChar, str, length, *p);
+}
+
+U_CAPI int32_t U_EXPORT2
+ustr_hashCharsN(const char *str, int32_t length) {
+    STRING_HASH(uint8_t, str, length, *p);
+}
+
+U_CAPI int32_t U_EXPORT2
+ustr_hashICharsN(const char *str, int32_t length) {
+    STRING_HASH(char, str, length, (uint8_t)uprv_tolower(*p));
+}
diff --git a/src/core/basetypes/icu-ucsdet/utrace.c b/src/core/basetypes/icu-ucsdet/utrace.c
new file mode 100644
index 00000000..c2f36c48
--- /dev/null
+++ b/src/core/basetypes/icu-ucsdet/utrace.c
@@ -0,0 +1,488 @@
+/*
+*******************************************************************************
+*   Copyright (C) 2003-2014, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*   file name:  utrace.c
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*/
+
+#define   UTRACE_IMPL
+#include "unicode/utrace.h"
+#include "utracimp.h"
+#include "cstring.h"
+#include "uassert.h"
+#include "ucln_cmn.h"
+
+
+static UTraceEntry     *pTraceEntryFunc = NULL;
+static UTraceExit      *pTraceExitFunc  = NULL;
+static UTraceData      *pTraceDataFunc  = NULL;
+static const void      *gTraceContext   = NULL;
+
+U_EXPORT int32_t
+utrace_level = UTRACE_ERROR;
+
+U_CAPI void U_EXPORT2
+utrace_entry(int32_t fnNumber) {
+    if (pTraceEntryFunc != NULL) {
+        (*pTraceEntryFunc)(gTraceContext, fnNumber);
+    }
+}
+
+
+static const char gExitFmt[]             = "Returns.";
+static const char gExitFmtValue[]        = "Returns %d.";
+static const char gExitFmtStatus[]       = "Returns.  Status = %d.";
+static const char gExitFmtValueStatus[]  = "Returns %d.  Status = %d.";
+static const char gExitFmtPtrStatus[]    = "Returns %d.  Status = %p.";
+
+U_CAPI void U_EXPORT2
+utrace_exit(int32_t fnNumber, int32_t returnType, ...) {
+    if (pTraceExitFunc != NULL) {
+        va_list     args;
+        const char *fmt;
+
+        switch (returnType) {
+        case 0:
+            fmt = gExitFmt;
+            break;
+        case UTRACE_EXITV_I32:
+            fmt = gExitFmtValue;
+            break;
+        case UTRACE_EXITV_STATUS:
+            fmt = gExitFmtStatus;
+            break;
+        case UTRACE_EXITV_I32 | UTRACE_EXITV_STATUS:
+            fmt = gExitFmtValueStatus;
+            break;
+        case UTRACE_EXITV_PTR | UTRACE_EXITV_STATUS:
+            fmt = gExitFmtPtrStatus;
+            break;
+        default:
+            U_ASSERT(FALSE);
+            fmt = gExitFmt;
+        }
+
+        va_start(args, returnType);
+        (*pTraceExitFunc)(gTraceContext, fnNumber, fmt, args);
+        va_end(args);
+    }
+}
+ 
+
+ 
+U_CAPI void U_EXPORT2 
+utrace_data(int32_t fnNumber, int32_t level, const char *fmt, ...) {
+    if (pTraceDataFunc != NULL) {
+           va_list args;
+           va_start(args, fmt ); 
+           (*pTraceDataFunc)(gTraceContext, fnNumber, level, fmt, args);
+           va_end(args);
+    }
+}
+
+
+static void outputChar(char c, char *outBuf, int32_t *outIx, int32_t capacity, int32_t indent) {
+    int32_t i;
+    /* Check whether a start of line indenting is needed.  Three cases:
+     *   1.  At the start of the first line  (output index == 0).
+     *   2.  At the start of subsequent lines  (preceeding char in buffer == '\n')
+     *   3.  When preflighting buffer len (buffer capacity is exceeded), when
+     *       a \n is output.  Ideally we wouldn't do the indent until the following char
+     *       is received, but that won't work because there's no place to remember that
+     *       the preceding char was \n.  Meaning that we may overstimate the
+     *       buffer size needed.  No harm done.
+     */
+    if (*outIx==0 ||   /* case 1. */
+        (c!='\n' && c!=0 && *outIx < capacity && outBuf[(*outIx)-1]=='\n') ||  /* case 2. */
+        (c=='\n' && *outIx>=capacity))    /* case 3 */
+    {
+        /* At the start of a line.  Indent. */
+        for(i=0; i<indent; i++) {
+            if (*outIx < capacity) {
+                outBuf[*outIx] = ' ';
+            }
+            (*outIx)++;
+        }
+    }
+
+    if (*outIx < capacity) {
+        outBuf[*outIx] = c;
+    }
+    if (c != 0) {
+        /* Nulls only appear as end-of-string terminators.  Move them to the output
+         *  buffer, but do not update the length of the buffer, so that any
+         *  following output will overwrite the null. */
+        (*outIx)++;
+    }
+}
+
+static void outputHexBytes(int64_t val, int32_t charsToOutput,
+                           char *outBuf, int32_t *outIx, int32_t capacity) {
+    static const char gHexChars[] = "0123456789abcdef";
+    int32_t shiftCount;
+    for  (shiftCount=(charsToOutput-1)*4; shiftCount >= 0; shiftCount-=4) {
+        char c = gHexChars[(val >> shiftCount) & 0xf];
+        outputChar(c, outBuf, outIx, capacity, 0);
+    }
+}
+
+/* Output a pointer value in hex.  Work with any size of pointer   */
+static void outputPtrBytes(void *val, char *outBuf, int32_t *outIx, int32_t capacity) {
+    int32_t  i;
+    int32_t  incVal = 1;              /* +1 for big endian, -1 for little endian          */
+    char     *p     = (char *)&val;   /* point to current byte to output in the ptr val  */
+
+#if !U_IS_BIG_ENDIAN
+    /* Little Endian.  Move p to most significant end of the value      */
+    incVal = -1;
+    p += sizeof(void *) - 1;
+#endif
+
+    /* Loop through the bytes of the ptr as it sits in memory, from 
+     * most significant to least significant end                    */
+    for (i=0; i<sizeof(void *); i++) {
+        outputHexBytes(*p, 2, outBuf, outIx, capacity);
+        p += incVal;
+    }
+}
+
+static void outputString(const char *s, char *outBuf, int32_t *outIx, int32_t capacity, int32_t indent) {
+    int32_t i = 0;
+    char    c;
+    if (s==NULL) {
+        s = "*NULL*";
+    }
+    do {
+        c = s[i++];
+        outputChar(c, outBuf, outIx, capacity, indent);
+    } while (c != 0);
+}
+        
+
+
+static void outputUString(const UChar *s, int32_t len, 
+                          char *outBuf, int32_t *outIx, int32_t capacity, int32_t indent) {
+    int32_t i = 0;
+    UChar   c;
+    if (s==NULL) {
+        outputString(NULL, outBuf, outIx, capacity, indent);
+        return;
+    }
+
+    for (i=0; i<len || len==-1; i++) {
+        c = s[i];
+        outputHexBytes(c, 4, outBuf, outIx, capacity);
+        outputChar(' ', outBuf, outIx, capacity, indent);
+        if (len == -1 && c==0) {
+            break;
+        }
+    }
+}
+        
+U_CAPI int32_t U_EXPORT2
+utrace_vformat(char *outBuf, int32_t capacity, int32_t indent, const char *fmt, va_list args) {
+    int32_t   outIx  = 0;
+    int32_t   fmtIx  = 0;
+    char      fmtC;
+    char      c;
+    int32_t   intArg;
+    int64_t   longArg = 0;
+    char      *ptrArg;
+
+    /*   Loop runs once for each character in the format string.
+     */
+    for (;;) {
+        fmtC = fmt[fmtIx++];
+        if (fmtC != '%') {
+            /* Literal character, not part of a %sequence.  Just copy it to the output. */
+            outputChar(fmtC, outBuf, &outIx, capacity, indent);
+            if (fmtC == 0) {
+                /* We hit the null that terminates the format string.
+                 * This is the normal (and only) exit from the loop that
+                 * interprets the format
+                 */
+                break;
+            }
+            continue;
+        }
+
+        /* We encountered a '%'.  Pick up the following format char */
+        fmtC = fmt[fmtIx++];
+
+        switch (fmtC) {
+        case 'c':
+            /* single 8 bit char   */
+            c = (char)va_arg(args, int32_t);
+            outputChar(c, outBuf, &outIx, capacity, indent);
+            break;
+
+        case 's':
+            /* char * string, null terminated.  */
+            ptrArg = va_arg(args, char *);
+            outputString((const char *)ptrArg, outBuf, &outIx, capacity, indent);
+            break;
+
+        case 'S':
+            /* UChar * string, with length, len==-1 for null terminated. */
+            ptrArg = va_arg(args, void *);             /* Ptr    */
+            intArg =(int32_t)va_arg(args, int32_t);    /* Length */
+            outputUString((const UChar *)ptrArg, intArg, outBuf, &outIx, capacity, indent);
+            break;
+
+        case 'b':
+            /*  8 bit int  */
+            intArg = va_arg(args, int);
+            outputHexBytes(intArg, 2, outBuf, &outIx, capacity);
+            break;
+
+        case 'h':
+            /*  16 bit int  */
+            intArg = va_arg(args, int);
+            outputHexBytes(intArg, 4, outBuf, &outIx, capacity);
+            break;
+
+        case 'd':
+            /*  32 bit int  */
+            intArg = va_arg(args, int);
+            outputHexBytes(intArg, 8, outBuf, &outIx, capacity);
+            break;
+
+        case 'l':
+            /*  64 bit long  */
+            longArg = va_arg(args, int64_t);
+            outputHexBytes(longArg, 16, outBuf, &outIx, capacity);
+            break;
+            
+        case 'p':
+            /*  Pointers.   */
+            ptrArg = va_arg(args, void *);
+            outputPtrBytes(ptrArg, outBuf, &outIx, capacity);
+            break;
+
+        case 0:
+            /* Single '%' at end of fmt string.  Output as literal '%'.   
+             * Back up index into format string so that the terminating null will be
+             * re-fetched in the outer loop, causing it to terminate.
+             */
+            outputChar('%', outBuf, &outIx, capacity, indent);
+            fmtIx--;
+            break;
+
+        case 'v':
+            {
+                /* Vector of values, e.g. %vh */
+                char     vectorType;
+                int32_t  vectorLen;
+                const char   *i8Ptr;
+                int16_t  *i16Ptr;
+                int32_t  *i32Ptr;
+                int64_t  *i64Ptr;
+                void     **ptrPtr;
+                int32_t   charsToOutput = 0;
+                int32_t   i;
+                
+                vectorType = fmt[fmtIx];    /* b, h, d, l, p, etc. */
+                if (vectorType != 0) {
+                    fmtIx++;
+                }
+                i8Ptr = (const char *)va_arg(args, void*);
+                i16Ptr = (int16_t *)i8Ptr;
+                i32Ptr = (int32_t *)i8Ptr;
+                i64Ptr = (int64_t *)i8Ptr;
+                ptrPtr = (void **)i8Ptr;
+                vectorLen =(int32_t)va_arg(args, int32_t);
+                if (ptrPtr == NULL) {
+                    outputString("*NULL* ", outBuf, &outIx, capacity, indent);
+                } else {
+                    for (i=0; i<vectorLen || vectorLen==-1; i++) { 
+                        switch (vectorType) {
+                        case 'b':
+                            charsToOutput = 2;
+                            longArg = *i8Ptr++;
+                            break;
+                        case 'h':
+                            charsToOutput = 4;
+                            longArg = *i16Ptr++;
+                            break;
+                        case 'd':
+                            charsToOutput = 8;
+                            longArg = *i32Ptr++;
+                            break;
+                        case 'l':
+                            charsToOutput = 16;
+                            longArg = *i64Ptr++;
+                            break;
+                        case 'p':
+                            charsToOutput = 0;
+                            outputPtrBytes(*ptrPtr, outBuf, &outIx, capacity);
+                            longArg = *ptrPtr==NULL? 0: 1;    /* test for null terminated array. */
+                            ptrPtr++;
+                            break;
+                        case 'c':
+                            charsToOutput = 0;
+                            outputChar(*i8Ptr, outBuf, &outIx, capacity, indent);
+                            longArg = *i8Ptr;    /* for test for null terminated array. */
+                            i8Ptr++;
+                            break;
+                        case 's':
+                            charsToOutput = 0;
+                            outputString(*ptrPtr, outBuf, &outIx, capacity, indent);
+                            outputChar('\n', outBuf, &outIx, capacity, indent);
+                            longArg = *ptrPtr==NULL? 0: 1;   /* for test for null term. array. */
+                            ptrPtr++;
+                            break;
+
+                        case 'S':
+                            charsToOutput = 0;
+                            outputUString((const UChar *)*ptrPtr, -1, outBuf, &outIx, capacity, indent);
+                            outputChar('\n', outBuf, &outIx, capacity, indent);
+                            longArg = *ptrPtr==NULL? 0: 1;   /* for test for null term. array. */
+                            ptrPtr++;
+                            break;
+
+                            
+                        }
+                        if (charsToOutput > 0) {
+                            outputHexBytes(longArg, charsToOutput, outBuf, &outIx, capacity);
+                            outputChar(' ', outBuf, &outIx, capacity, indent);
+                        }
+                        if (vectorLen == -1 && longArg == 0) {
+                            break;
+                        }
+                    }
+                }
+                outputChar('[', outBuf, &outIx, capacity, indent);
+                outputHexBytes(vectorLen, 8, outBuf, &outIx, capacity);
+                outputChar(']', outBuf, &outIx, capacity, indent);
+            }
+            break;
+
+
+        default:
+            /* %. in format string, where . is some character not in the set
+             *    of recognized format chars.  Just output it as if % wasn't there.
+             *    (Covers "%%" outputing a single '%')
+             */
+             outputChar(fmtC, outBuf, &outIx, capacity, indent);
+        }
+    }
+    outputChar(0, outBuf, &outIx, capacity, indent);  /* Make sure that output is null terminated  */
+    return outIx + 1;     /* outIx + 1 because outIx does not increment when outputing final null. */
+}
+
+
+
+
+U_CAPI int32_t U_EXPORT2
+utrace_format(char *outBuf, int32_t capacity,
+                int32_t indent, const char *fmt,  ...) {
+    int32_t retVal;
+    va_list args;
+    va_start(args, fmt ); 
+    retVal = utrace_vformat(outBuf, capacity, indent, fmt, args);
+    va_end(args);
+    return retVal;
+}
+
+
+U_CAPI void U_EXPORT2
+utrace_setFunctions(const void *context,
+                    UTraceEntry *e, UTraceExit *x, UTraceData *d) {
+    pTraceEntryFunc = e;
+    pTraceExitFunc  = x;
+    pTraceDataFunc  = d;
+    gTraceContext   = context;
+}
+
+
+U_CAPI void U_EXPORT2
+utrace_getFunctions(const void **context,
+                    UTraceEntry **e, UTraceExit **x, UTraceData **d) {
+    *e = pTraceEntryFunc;
+    *x = pTraceExitFunc;
+    *d = pTraceDataFunc;
+    *context = gTraceContext;
+}
+
+U_CAPI void U_EXPORT2
+utrace_setLevel(int32_t level) {
+    if (level < UTRACE_OFF) {
+        level = UTRACE_OFF;
+    }
+    if (level > UTRACE_VERBOSE) {
+        level = UTRACE_VERBOSE;
+    }
+    utrace_level = level;
+}
+
+U_CAPI int32_t U_EXPORT2
+utrace_getLevel() {
+    return utrace_level;
+}
+
+
+U_CFUNC UBool 
+utrace_cleanup() {
+    pTraceEntryFunc = NULL;
+    pTraceExitFunc  = NULL;
+    pTraceDataFunc  = NULL;
+    utrace_level    = UTRACE_OFF;
+    gTraceContext   = NULL;
+    return TRUE;
+}
+
+
+static const char * const
+trFnName[] = {
+    "u_init",
+    "u_cleanup",
+    NULL
+};
+
+
+static const char * const
+trConvNames[] = {
+    "ucnv_open",
+    "ucnv_openPackage",
+    "ucnv_openAlgorithmic",
+    "ucnv_clone",
+    "ucnv_close",
+    "ucnv_flushCache",
+    "ucnv_load",
+    "ucnv_unload",
+    NULL
+};
+
+    
+static const char * const
+trCollNames[] = {
+    "ucol_open",
+    "ucol_close",
+    "ucol_strcoll",
+    "ucol_getSortKey",
+    "ucol_getLocale",
+    "ucol_nextSortKeyPart",
+    "ucol_strcollIter",
+    "ucol_openFromShortString",
+    "ucol_strcollUTF8",
+    NULL
+};
+
+                
+U_CAPI const char * U_EXPORT2
+utrace_functionName(int32_t fnNumber) {
+    if(UTRACE_FUNCTION_START <= fnNumber && fnNumber < UTRACE_FUNCTION_LIMIT) {
+        return trFnName[fnNumber];
+    } else if(UTRACE_CONVERSION_START <= fnNumber && fnNumber < UTRACE_CONVERSION_LIMIT) {
+        return trConvNames[fnNumber - UTRACE_CONVERSION_START];
+    } else if(UTRACE_COLLATION_START <= fnNumber && fnNumber < UTRACE_COLLATION_LIMIT){
+        return trCollNames[fnNumber - UTRACE_COLLATION_START];
+    } else {
+        return "[BOGUS Trace Function Number]";
+    }
+}
+
diff --git a/unittest/data/summary/input/1015-windows-1252.eml b/unittest/data/summary/input/1015-windows-1252.eml
new file mode 100644
index 00000000..78e102ae
--- /dev/null
+++ b/unittest/data/summary/input/1015-windows-1252.eml
@@ -0,0 +1,19 @@
+From 0@xxx 2014-12-12 22:59:42 +0000
+From: "Test test" <test@test.net>
+Content-Type: text/plain;charset=windows-1252
+Content-Transfer-Encoding: quoted-printable
+Mime-Version: 1.0 (1.0)
+Subject: Re: Test test
+To: "Test test" <test@test.net>
+Message-Id: <E5D5C2EB-305B-4577-8D3F-F61D1167D52E@test.net>
+Date: Fri, 01 Dec 2014 23:59:42 +0100
+
+Thanks, Lena. So I guess I have an issue with Siri on one of my iPads, whic=
+h is why the microphone wasn=92t showing up.
+
+...
+
+[Description: Description: cid:image001.png@01CD286C.D0001580]
+=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=
+=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=96=
+=96=96=96=96
diff --git a/unittest/data/summary/input/1021-chinese.eml b/unittest/data/summary/input/1021-chinese.eml
new file mode 100644
index 00000000..93c36b4a
--- /dev/null
+++ b/unittest/data/summary/input/1021-chinese.eml
@@ -0,0 +1,64 @@
+HMM_SOURCE_IP:10.62.8.17:58409.173036363
+HMM_ATTACHE_NUM:0000
+HMM_SOURCE_TYPE:SMTP
+Received: from smtpbg328.qq.com (as7.inner-189.cn [10.62.8.17])
+    by 189.cn (HERMES) with ESMTP id 9DAD6260004
+    for <wujingyou@189.cn>; Wed, 24 Dec 2014 04:09:34 +0800 (CST)
+Received: from smtpbg328.qq.com ([14.17.43.160])
+    by 189CN(MEDUSA 10.62.8.17) with ESMTP id 1419365374.28408 for wujingyou@189.cn
+    ;
+    Wed Dec 24 04:09:36 2014
+0/X-Total-Score: -120:
+2/X-Total-Score: 0:
+3/X-Total-Score: 0:
+X-FILTER-SCORE: to=<5254545257515a595451536152595a4f848f>, score=<1419365376ttttpttftt9tpf9Oa7wdLtZZZFZZ6ZZjZF6jmU1+KNZZ>  
+X-REAL-FROM:wujyou@qq.com
+X-Receive-IP:14.17.43.160 wujyou@qq.com
+DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=qq.com; s=s201307;
+    t=1419365374; bh=+L9WKWb/TZ/WN/hsH9zWebN7pLH2WpJWpe8JNnjkHzU=;
+    h=X-QQ-FEAT:X-QQ-SSF:X-QQ-WAPMAIL:X-QQ-BUSINESS-ORIGIN:
+     X-Originating-IP:X-QQ-STYLE:X-QQ-mid:From:To:Subject:Mime-Version:Content-Type:Content-Transfer-Encoding:Date:
+     X-Priority:Message-ID:X-QQ-MIME:X-Mailer:X-QQ-Mailer:
+     X-QQ-SENDSIZE;
+    b=TOFY9TbuFdUSu607LkA8z4siu7aRWfyPRfKR4Zdwjq2VDklCS4pOnYd3K7gUnU+Rw
+     rpJvAGZllbkcuDJM/2YB7SPb3yYH8Z55KnTfUPsm8aQxrXABv8OnOJmoU9zhFt2o9p
+     kfxuV+sdaT4MQJI/WXjN24W7Aq+zTTx3SBmcnONo=
+X-QQ-FEAT: zaIfg0hwV2r5ipAJ//zZfBFZvJON7nl/P/xc0g9oL/Y=
+X-QQ-SSF: 00010000000000F0
+X-QQ-WAPMAIL: 1
+X-QQ-BUSINESS-ORIGIN: 2
+X-Originating-IP: 120.237.120.44
+X-QQ-STYLE: 
+X-QQ-mid: wapmail39t1419365373t592355
+From: "=?gb18030?B?z/LX8w==?=" <wujyou@qq.com>
+To: "=?gb18030?B?MTg509M=?=" <wujingyou@189.cn>
+Subject: =?gb18030?B?uf65/g==?=
+Mime-Version: 1.0
+Content-Type: multipart/alternative;
+    boundary="----=_NextPart_5499CBFD_09046CF8_7A47AD55"
+Content-Transfer-Encoding: 8Bit
+Date: Wed, 24 Dec 2014 04:09:33 +0800
+X-Priority: 3
+Message-ID: <tencent_768EB412354CD73039FB412B@qq.com>
+X-QQ-MIME: TCMime 1.0 by Tencent
+X-Mailer: QQMail 2.x
+X-QQ-Mailer: QQMail 2.x
+X-QQ-SENDSIZE: 520
+
+This is a multi-part message in MIME format.
+
+------=_NextPart_5499CBFD_09046CF8_7A47AD55
+Content-Type: text/plain;
+    charset="gb18030"
+Content-Transfer-Encoding: base64
+
+uf65/g==
+
+------=_NextPart_5499CBFD_09046CF8_7A47AD55
+Content-Type: text/html;
+    charset="gb18030"
+Content-Transfer-Encoding: base64
+
+uf65/g==
+
+------=_NextPart_5499CBFD_09046CF8_7A47AD55--
diff --git a/unittest/data/summary/output/1015-windows-1252.txt b/unittest/data/summary/output/1015-windows-1252.txt
new file mode 100644
index 00000000..1682a73e
--- /dev/null
+++ b/unittest/data/summary/output/1015-windows-1252.txt
@@ -0,0 +1,12 @@
+From:Test test<test@test.net>
+To:Test test<test@test.net>
+Subject:Test test
+Date:December 1, 2014 at 2:59:42 PM PST
+
+
+Thanks, Lena. So I guess I have an issue with Siri on one of my iPads, which is why the microphone wasn’t showing up.
+
+...
+
+[Description: Description: cid:image001.png@01CD286C.D0001580]
+––––––––––––––––––––––––––––––––––––––––––––––––––––––
diff --git a/unittest/data/summary/output/1021-chinese.txt b/unittest/data/summary/output/1021-chinese.txt
new file mode 100644
index 00000000..ad7c9e84
--- /dev/null
+++ b/unittest/data/summary/output/1021-chinese.txt
@@ -0,0 +1,7 @@
+From:"向左"<wujyou@qq.com>
+To:"189佑"<wujingyou@189.cn>
+Subject:哈哈
+Date:December 23, 2014 at 12:09:33 PM PST
+
+
+哈哈
diff --git a/unittest/data/summary/output/2458-encoding.txt b/unittest/data/summary/output/2458-encoding.txt
index 923cad9c..62bd4fb7 100644
--- a/unittest/data/summary/output/2458-encoding.txt
+++ b/unittest/data/summary/output/2458-encoding.txt
@@ -12,28 +12,28 @@ E-Mail
 Cevap: Cevap : Yurt içi iPhone 4 Alanlar - Alamayanlar -(Fiyat-Taksit-Bayi-Stok) bilgi versin lütfen
 ~~~~~~~~~~~~~~
 
-Genpa&#39;dan 11ocakta form doldurdum hala arayacaklar Türkcellden tanýdýk buldum ilk partide alýyorum inþallah
+Genpa&#39;dan 11ocakta form doldurdum hala arayacaklar Türkcellden tanıdık buldum ilk partide alıyorum inşallah
 
 
------------ Mesajýn Sonu -------------
+----------- Mesajın Sonu -------------
 
-Bu mesaj aþaðýdaki adreste bulunmaktadýr:
+Bu mesaj aşağıdaki adreste bulunmaktadır:
 
 http://forum.donanimhaber.com/fb.asp?m=47561754
 
 
-Bu mesaj þu kýsýmlarýn altýndadýr:
+Bu mesaj şu kısımların altındadır:
 
 Bölüm: Apple - iPhone - iPad - http://forum.donanimhaber.com/tt.asp?forumid=462
-Forum: Diðer - http://forum.donanimhaber.com/tt.asp?forumid=514
+Forum: Diğer - http://forum.donanimhaber.com/tt.asp?forumid=514
 
 
-Eðer bu mesajlardan daha fazla almak istemiyorsanýz, sitemize giriþ yapýp, email yoluyla üyeliklerinizi gözden geçirip istemediklerinizi çýkartýnýz.
+Eğer bu mesajlardan daha fazla almak istemiyorsanız, sitemize giriş yapıp, email yoluyla üyeliklerinizi gözden geçirip istemediklerinizi çıkartınız.
 http://forum.donanimhaber.com/subscribe.asp
 
 
 =========================================
-Donaným Haber Forum
+Donanım Haber Forum
 http://forum.donanimhaber.com/
 =========================================
 
diff --git a/unittest/data/summary/output/2518-encoding.txt b/unittest/data/summary/output/2518-encoding.txt
index 923cad9c..62bd4fb7 100644
--- a/unittest/data/summary/output/2518-encoding.txt
+++ b/unittest/data/summary/output/2518-encoding.txt
@@ -12,28 +12,28 @@ E-Mail
 Cevap: Cevap : Yurt içi iPhone 4 Alanlar - Alamayanlar -(Fiyat-Taksit-Bayi-Stok) bilgi versin lütfen
 ~~~~~~~~~~~~~~
 
-Genpa&#39;dan 11ocakta form doldurdum hala arayacaklar Türkcellden tanýdýk buldum ilk partide alýyorum inþallah
+Genpa&#39;dan 11ocakta form doldurdum hala arayacaklar Türkcellden tanıdık buldum ilk partide alıyorum inşallah
 
 
------------ Mesajýn Sonu -------------
+----------- Mesajın Sonu -------------
 
-Bu mesaj aþaðýdaki adreste bulunmaktadýr:
+Bu mesaj aşağıdaki adreste bulunmaktadır:
 
 http://forum.donanimhaber.com/fb.asp?m=47561754
 
 
-Bu mesaj þu kýsýmlarýn altýndadýr:
+Bu mesaj şu kısımların altındadır:
 
 Bölüm: Apple - iPhone - iPad - http://forum.donanimhaber.com/tt.asp?forumid=462
-Forum: Diðer - http://forum.donanimhaber.com/tt.asp?forumid=514
+Forum: Diğer - http://forum.donanimhaber.com/tt.asp?forumid=514
 
 
-Eðer bu mesajlardan daha fazla almak istemiyorsanýz, sitemize giriþ yapýp, email yoluyla üyeliklerinizi gözden geçirip istemediklerinizi çýkartýnýz.
+Eğer bu mesajlardan daha fazla almak istemiyorsanız, sitemize giriş yapıp, email yoluyla üyeliklerinizi gözden geçirip istemediklerinizi çıkartınız.
 http://forum.donanimhaber.com/subscribe.asp
 
 
 =========================================
-Donaným Haber Forum
+Donanım Haber Forum
 http://forum.donanimhaber.com/
 =========================================