summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorGravatar Karl Ramm <kcr@1ts.org>2009-04-17 14:42:18 +0000
committerGravatar Karl Ramm <kcr@1ts.org>2009-04-17 14:42:18 +0000
commit7c412165256f54ea5e499868d9ab48cd9f442147 (patch)
tree52123669b176dbadc889e71404134ff5df769a96 /lib
parent7dffd27e0267e0585c35c5f1b59497a1431501a3 (diff)
More i18n infrastructure
lib/charset.c h/zephyr/zephyr.h Refactor ZGetCharset into ZGetCharset and a function that does all the same defaulting but returns a string. ZTransliterate is a zephyr-metaphor wrapper for iconv. lib/zephyr-tests.py Wrappers for the functions in charset.c (but not tests) lib/zephyr_tests.txt Tests for the functions in charset.c
Diffstat (limited to 'lib')
-rw-r--r--lib/charset.c71
-rwxr-xr-xlib/zephyr_tests.py41
-rw-r--r--lib/zephyr_tests.txt59
3 files changed, 167 insertions, 4 deletions
diff --git a/lib/charset.c b/lib/charset.c
index d1f64fc..34806e3 100644
--- a/lib/charset.c
+++ b/lib/charset.c
@@ -15,16 +15,18 @@ static const char rcsid_charset_c[] = "$Id$";
#endif /* lint */
#include <internal.h>
+
#include <string.h>
#include <locale.h>
#include <langinfo.h>
#include <ctype.h>
+#include <iconv.h>
+#include <errno.h>
-unsigned short
-ZGetCharset(char *charset)
+const char *
+ZGetCharsetString(char *charset)
{
char *p;
- short retval;
static int once = 1;
if (charset == NULL)
@@ -46,6 +48,15 @@ ZGetCharset(char *charset)
for (p = charset; *p; p++)
*p = toupper(*p);
+ return charset;
+}
+
+unsigned short
+ZGetCharset(char *charset)
+{
+ short retval;
+
+ charset = (char *)ZGetCharsetString(charset);
if (!strcmp(charset, "NONE") || !strcmp(charset, "UNKNOWN"))
retval = ZCHARSET_UNKNOWN;
else if (!strcmp(charset, "ANSI_X3.4-1968"))
@@ -72,4 +83,56 @@ ZCharsetToString(unsigned short charset)
return "UTF-8";
return "UNKNOWN";
}
-
+
+Code_t
+ZTransliterate(char *in, int inlen, char *inset, char *outset, char **out, int *outlen)
+{
+ iconv_t ih;
+ char *outset_t, *inp, *outp;
+ int retval;
+ size_t size, inleft, outleft;
+
+ *out = NULL;
+ *outlen = 0;
+
+ outset_t = malloc(strlen(outset) + 11);
+ if (outset_t == NULL)
+ return errno;
+ sprintf(outset_t, "%s//TRANSLIT", outset);
+
+ ih = iconv_open(outset_t, inset);
+
+ free(outset_t);
+
+ if (ih != (iconv_t)-1) {
+ size = inlen; /* doubling this should be enough, but.. */
+ do {
+ size = size * 2;
+
+ *out = malloc(size);
+ if (*out == NULL) {
+ iconv_close(ih);
+ return errno;
+ }
+
+ inleft = inlen;
+ outleft = size;
+
+ inp = in;
+ outp = *out;
+
+ retval = iconv(ih, &inp, &inleft, &outp, &outleft);
+ if (retval < 0)
+ free(*out);
+ } while (retval < 0 && errno == E2BIG);
+
+ iconv_close(ih);
+ }
+
+ if (ih == (iconv_t)-1 || retval < 0)
+ return errno;
+
+ *outlen = size - outleft;
+
+ return ZERR_NONE;
+}
diff --git a/lib/zephyr_tests.py b/lib/zephyr_tests.py
index 791dccc..e78e2a2 100755
--- a/lib/zephyr_tests.py
+++ b/lib/zephyr_tests.py
@@ -323,6 +323,10 @@ class libZephyr(object):
"ZFormatNotice",
"ZCompareUID",
"ZExpandRealm",
+ "ZGetCharsetString",
+ "ZGetCharset",
+ "ZCharsetToString",
+ "ZTransliterate",
"ZOpenPort",
"ZClosePort",
"ZMakeAscii",
@@ -402,11 +406,48 @@ class libZephyr(object):
c_char_p, # realm
]
+ # unsigned short
+ # ZGetCharset(char *charset)
+ self.ZGetCharset.restype = c_ushort
+ self.ZGetCharset.argtypes = [
+ c_char_p, # charset
+ ]
+
+ # const char *
+ # ZCharsetToString(unsigned short charset)
+ self.ZCharsetToString.restype = c_char_p
+ self.ZCharsetToString.argtypes = [
+ c_ushort, # charset
+ ]
+
+ # Code_t
+ # ZTransliterate(char *in,
+ # int inlen,
+ # char *inset,
+ # char *outset,
+ # char **out,
+ # int *outlen)
+ self.ZTransliterate.argtypes = [
+ c_char_p, # in
+ c_int, # inlnet,
+ c_char_p, # inset
+ c_char_p, # outset
+ POINTER(c_char_p), # out
+ POINTER(c_int), # outlen
+ ]
+
# Code_t ZOpenPort(u_short *port)
self.ZOpenPort.argtypes = [
POINTER(c_ushort), # port
]
+ # const char *
+ # ZGetCharsetString(char *charset)
+ self.ZGetCharsetString.restype = c_char_p
+ self.ZGetCharsetString.argtypes = [
+ c_char_p, # charset
+ ]
+
# Code_t
# ZMakeAscii(register char *ptr,
# int len,
diff --git a/lib/zephyr_tests.txt b/lib/zephyr_tests.txt
index 4762c6c..af429f8 100644
--- a/lib/zephyr_tests.txt
+++ b/lib/zephyr_tests.txt
@@ -84,6 +84,64 @@ Trivial test of ZExpandRealm, using terribly well known hostnames:
>>> if not Zauthtype: assert _z.ZExpandRealm("localhost") == "LOCALHOST"
>>> if not Zauthtype: assert _z.ZExpandRealm("bitsy.mit.edu") == "BITSY.MIT.EDU"
+ZGetCharsetString is a utility function for clients that need to know the
+full name of the output character set, e.g. zwgc. Calling it
+with NULL will get it from $ZEPHYR_CHARSET or the locale.
+Trivial testing of ZGetCharsetString:
+
+ >>> os.environ['LANG'] = 'C'
+ >>> assert _z.ZGetCharsetString(None) == 'ANSI_X3.4-1968'
+ >>> os.environ['ZEPHYR_CHARSET'] = 'ISO-8859-1'
+ >>> assert _z.ZGetCharsetString(None) == 'ISO-8859-1'
+ >>> assert _z.ZGetCharsetString('UTF-8') == 'UTF-8'
+
+ZGetCharset is a utility function for clients that need to know the
+registry number of a character set, e.g. zwrite. It gets its defaults from
+alal the places that ZGetCharsetString does, because it calls it.
+Trivial testing of ZGetCharset:
+
+ >>> assert _z.ZGetCharset(None) == 4
+ >>> assert _z.ZGetCharset('NONE') == 0
+ >>> assert _z.ZGetCharset('UNKNOWN') == 0
+ >>> assert _z.ZGetCharset('ANSI_X3.4-1968') == 4
+ >>> assert _z.ZGetCharset('ISO-8859-1') == 4
+ >>> assert _z.ZGetCharset('UTF-8') == 106
+ >>> assert _z.ZGetCharset('GIANT RUBBER PANTS') == 0
+
+ZCharsetToString converts the registry numbers of the "allowed" character
+sets into strings.
+Trivial testing of ZCharsetToString:
+
+ >>> assert _z.ZCharsetToString(0) == 'UNKNOWN'
+ >>> assert _z.ZCharsetToString(4) == 'ISO-8859-1'
+ >>> assert _z.ZCharsetToString(106) == 'UTF-8'
+ >>> assert _z.ZCharsetToString(1701) == 'UNKNOWN'
+
+ZTransliterate does character set conversion for display purposes, and when
+it works, it sticks a malloc'd buffer in to **bufp.
+"Trivial" testing of ZTransliterate:
+
+ >>> from ctypes import c_char_p, c_int, byref, string_at
+ >>> from errno import EINVAL, EILSEQ
+ >>> bufp = c_char_p(None)
+ >>> length = c_int(0)
+ >>> assert _z.ZTransliterate('test', 4, 'ANSI_X3.4-1968', 'ANSI_X3.4-1968', byref(bufp), byref(length)) == 0
+ >>> assert string_at(bufp, length) == 'test'
+ >>> assert _z.ZTransliterate('test', 4, 'ANSI_X3.4-1968', 'UTF-8', byref(bufp), byref(length)) == 0
+ >>> assert string_at(bufp, length) == 'test'
+ >>> assert _z.ZTransliterate('test', 4, 'ISO-8859-1', 'ANSI_X3.4-1968', byref(bufp), byref(length)) == 0
+ >>> assert string_at(bufp, length) == 'test'
+ >>> assert _z.ZTransliterate('test', 4, 'ISO-8859-1', 'ANSI_X3.4-1968', byref(bufp), byref(length)) == 0
+ >>> assert string_at(bufp, length) == 'test'
+ >>> assert _z.ZTransliterate('t\xebst', 4, 'ISO-8859-1', 'ANSI_X3.4-1968', byref(bufp), byref(length)) == 0
+ >>> assert string_at(bufp, length) == 't?st'
+ >>> assert _z.ZTransliterate('t\xebst', 4, 'ISO-8859-1', 'UTF-8', byref(bufp), byref(length)) == 0
+ >>> assert string_at(bufp, length) == 't\xc3\xabst'
+ >>> assert _z.ZTransliterate('t\xc3\xabst', 5, 'UTF-8', 'ISO-8859-1', byref(bufp), byref(length)) == 0
+ >>> assert string_at(bufp, length) == 't\xebst'
+ >>> assert _z.ZTransliterate('t\xc3\xabst', 5, 'UTF-8', 'Oh, my bees', byref(bufp), byref(length)) == EINVAL
+ >>> assert _z.ZTransliterate('t\xc3x\xabst', 5, 'UTF-8', 'ISO-8859-1', byref(bufp), byref(length)) == EILSEQ
+
Trivial test of ZOpenPort and ZClosePort:
>>> from ctypes import c_ushort
@@ -151,6 +209,7 @@ Files complete:
ZClosePort.c
ZExpnRlm.c
ZCmpUID.c
+ charset.c
ZGetSender.c (needs richer test)
Pending: