From 7c412165256f54ea5e499868d9ab48cd9f442147 Mon Sep 17 00:00:00 2001 From: Karl Ramm Date: Fri, 17 Apr 2009 14:42:18 +0000 Subject: More i18n infrastructure lib/charset.c h/zephyr/zephyr.h Refactor ZGetCharset into ZGetCharset and a function that does all the same defaulting but returns a string. ZTransliterate is a zephyr-metaphor wrapper for iconv. lib/zephyr-tests.py Wrappers for the functions in charset.c (but not tests) lib/zephyr_tests.txt Tests for the functions in charset.c --- lib/charset.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++--- lib/zephyr_tests.py | 41 ++++++++++++++++++++++++++++++ lib/zephyr_tests.txt | 59 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 167 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/charset.c b/lib/charset.c index d1f64fc..34806e3 100644 --- a/lib/charset.c +++ b/lib/charset.c @@ -15,16 +15,18 @@ static const char rcsid_charset_c[] = "$Id$"; #endif /* lint */ #include + #include #include #include #include +#include +#include -unsigned short -ZGetCharset(char *charset) +const char * +ZGetCharsetString(char *charset) { char *p; - short retval; static int once = 1; if (charset == NULL) @@ -46,6 +48,15 @@ ZGetCharset(char *charset) for (p = charset; *p; p++) *p = toupper(*p); + return charset; +} + +unsigned short +ZGetCharset(char *charset) +{ + short retval; + + charset = (char *)ZGetCharsetString(charset); if (!strcmp(charset, "NONE") || !strcmp(charset, "UNKNOWN")) retval = ZCHARSET_UNKNOWN; else if (!strcmp(charset, "ANSI_X3.4-1968")) @@ -72,4 +83,56 @@ ZCharsetToString(unsigned short charset) return "UTF-8"; return "UNKNOWN"; } - + +Code_t +ZTransliterate(char *in, int inlen, char *inset, char *outset, char **out, int *outlen) +{ + iconv_t ih; + char *outset_t, *inp, *outp; + int retval; + size_t size, inleft, outleft; + + *out = NULL; + *outlen = 0; + + outset_t = malloc(strlen(outset) + 11); + if (outset_t == NULL) + return errno; + sprintf(outset_t, "%s//TRANSLIT", outset); + + ih = iconv_open(outset_t, inset); + + free(outset_t); + + if (ih != (iconv_t)-1) { + size = inlen; /* doubling this should be enough, but.. */ + do { + size = size * 2; + + *out = malloc(size); + if (*out == NULL) { + iconv_close(ih); + return errno; + } + + inleft = inlen; + outleft = size; + + inp = in; + outp = *out; + + retval = iconv(ih, &inp, &inleft, &outp, &outleft); + if (retval < 0) + free(*out); + } while (retval < 0 && errno == E2BIG); + + iconv_close(ih); + } + + if (ih == (iconv_t)-1 || retval < 0) + return errno; + + *outlen = size - outleft; + + return ZERR_NONE; +} diff --git a/lib/zephyr_tests.py b/lib/zephyr_tests.py index 791dccc..e78e2a2 100755 --- a/lib/zephyr_tests.py +++ b/lib/zephyr_tests.py @@ -323,6 +323,10 @@ class libZephyr(object): "ZFormatNotice", "ZCompareUID", "ZExpandRealm", + "ZGetCharsetString", + "ZGetCharset", + "ZCharsetToString", + "ZTransliterate", "ZOpenPort", "ZClosePort", "ZMakeAscii", @@ -402,11 +406,48 @@ class libZephyr(object): c_char_p, # realm ] + # unsigned short + # ZGetCharset(char *charset) + self.ZGetCharset.restype = c_ushort + self.ZGetCharset.argtypes = [ + c_char_p, # charset + ] + + # const char * + # ZCharsetToString(unsigned short charset) + self.ZCharsetToString.restype = c_char_p + self.ZCharsetToString.argtypes = [ + c_ushort, # charset + ] + + # Code_t + # ZTransliterate(char *in, + # int inlen, + # char *inset, + # char *outset, + # char **out, + # int *outlen) + self.ZTransliterate.argtypes = [ + c_char_p, # in + c_int, # inlnet, + c_char_p, # inset + c_char_p, # outset + POINTER(c_char_p), # out + POINTER(c_int), # outlen + ] + # Code_t ZOpenPort(u_short *port) self.ZOpenPort.argtypes = [ POINTER(c_ushort), # port ] + # const char * + # ZGetCharsetString(char *charset) + self.ZGetCharsetString.restype = c_char_p + self.ZGetCharsetString.argtypes = [ + c_char_p, # charset + ] + # Code_t # ZMakeAscii(register char *ptr, # int len, diff --git a/lib/zephyr_tests.txt b/lib/zephyr_tests.txt index 4762c6c..af429f8 100644 --- a/lib/zephyr_tests.txt +++ b/lib/zephyr_tests.txt @@ -84,6 +84,64 @@ Trivial test of ZExpandRealm, using terribly well known hostnames: >>> if not Zauthtype: assert _z.ZExpandRealm("localhost") == "LOCALHOST" >>> if not Zauthtype: assert _z.ZExpandRealm("bitsy.mit.edu") == "BITSY.MIT.EDU" +ZGetCharsetString is a utility function for clients that need to know the +full name of the output character set, e.g. zwgc. Calling it +with NULL will get it from $ZEPHYR_CHARSET or the locale. +Trivial testing of ZGetCharsetString: + + >>> os.environ['LANG'] = 'C' + >>> assert _z.ZGetCharsetString(None) == 'ANSI_X3.4-1968' + >>> os.environ['ZEPHYR_CHARSET'] = 'ISO-8859-1' + >>> assert _z.ZGetCharsetString(None) == 'ISO-8859-1' + >>> assert _z.ZGetCharsetString('UTF-8') == 'UTF-8' + +ZGetCharset is a utility function for clients that need to know the +registry number of a character set, e.g. zwrite. It gets its defaults from +alal the places that ZGetCharsetString does, because it calls it. +Trivial testing of ZGetCharset: + + >>> assert _z.ZGetCharset(None) == 4 + >>> assert _z.ZGetCharset('NONE') == 0 + >>> assert _z.ZGetCharset('UNKNOWN') == 0 + >>> assert _z.ZGetCharset('ANSI_X3.4-1968') == 4 + >>> assert _z.ZGetCharset('ISO-8859-1') == 4 + >>> assert _z.ZGetCharset('UTF-8') == 106 + >>> assert _z.ZGetCharset('GIANT RUBBER PANTS') == 0 + +ZCharsetToString converts the registry numbers of the "allowed" character +sets into strings. +Trivial testing of ZCharsetToString: + + >>> assert _z.ZCharsetToString(0) == 'UNKNOWN' + >>> assert _z.ZCharsetToString(4) == 'ISO-8859-1' + >>> assert _z.ZCharsetToString(106) == 'UTF-8' + >>> assert _z.ZCharsetToString(1701) == 'UNKNOWN' + +ZTransliterate does character set conversion for display purposes, and when +it works, it sticks a malloc'd buffer in to **bufp. +"Trivial" testing of ZTransliterate: + + >>> from ctypes import c_char_p, c_int, byref, string_at + >>> from errno import EINVAL, EILSEQ + >>> bufp = c_char_p(None) + >>> length = c_int(0) + >>> assert _z.ZTransliterate('test', 4, 'ANSI_X3.4-1968', 'ANSI_X3.4-1968', byref(bufp), byref(length)) == 0 + >>> assert string_at(bufp, length) == 'test' + >>> assert _z.ZTransliterate('test', 4, 'ANSI_X3.4-1968', 'UTF-8', byref(bufp), byref(length)) == 0 + >>> assert string_at(bufp, length) == 'test' + >>> assert _z.ZTransliterate('test', 4, 'ISO-8859-1', 'ANSI_X3.4-1968', byref(bufp), byref(length)) == 0 + >>> assert string_at(bufp, length) == 'test' + >>> assert _z.ZTransliterate('test', 4, 'ISO-8859-1', 'ANSI_X3.4-1968', byref(bufp), byref(length)) == 0 + >>> assert string_at(bufp, length) == 'test' + >>> assert _z.ZTransliterate('t\xebst', 4, 'ISO-8859-1', 'ANSI_X3.4-1968', byref(bufp), byref(length)) == 0 + >>> assert string_at(bufp, length) == 't?st' + >>> assert _z.ZTransliterate('t\xebst', 4, 'ISO-8859-1', 'UTF-8', byref(bufp), byref(length)) == 0 + >>> assert string_at(bufp, length) == 't\xc3\xabst' + >>> assert _z.ZTransliterate('t\xc3\xabst', 5, 'UTF-8', 'ISO-8859-1', byref(bufp), byref(length)) == 0 + >>> assert string_at(bufp, length) == 't\xebst' + >>> assert _z.ZTransliterate('t\xc3\xabst', 5, 'UTF-8', 'Oh, my bees', byref(bufp), byref(length)) == EINVAL + >>> assert _z.ZTransliterate('t\xc3x\xabst', 5, 'UTF-8', 'ISO-8859-1', byref(bufp), byref(length)) == EILSEQ + Trivial test of ZOpenPort and ZClosePort: >>> from ctypes import c_ushort @@ -151,6 +209,7 @@ Files complete: ZClosePort.c ZExpnRlm.c ZCmpUID.c + charset.c ZGetSender.c (needs richer test) Pending: -- cgit v1.2.3