diff options
author | wm4 <wm4@nowhere> | 2014-01-15 16:13:07 +0100 |
---|---|---|
committer | wm4 <wm4@nowhere> | 2014-01-15 16:13:07 +0100 |
commit | ca8937d7d269c0ef8881d2ac7a227fdb990a5753 (patch) | |
tree | 7f659346908f3a8e5b705a553720dceb09000c65 /bstr | |
parent | 904060ad7b3d4d4e7e790bcf94d4f5230c854c43 (diff) |
bstr: add function for splitting UTF-8
Diffstat (limited to 'bstr')
-rw-r--r-- | bstr/bstr.c | 11 | ||||
-rw-r--r-- | bstr/bstr.h | 8 |
2 files changed, 18 insertions, 1 deletions
diff --git a/bstr/bstr.c b/bstr/bstr.c index aacbdc7dbc..964934a100 100644 --- a/bstr/bstr.c +++ b/bstr/bstr.c @@ -296,6 +296,17 @@ int bstr_decode_utf8(struct bstr s, struct bstr *out_next) return codepoint; } +struct bstr bstr_split_utf8(struct bstr str, struct bstr *out_next) +{ + bstr rest; + int code = bstr_decode_utf8(str, &rest); + if (code < 0) + return (bstr){0}; + if (out_next) + *out_next = rest; + return bstr_splice(str, 0, str.len - rest.len); +} + int bstr_validate_utf8(struct bstr s) { while (s.len) { diff --git a/bstr/bstr.h b/bstr/bstr.h index 71d5d473c4..01fe2261a5 100644 --- a/bstr/bstr.h +++ b/bstr/bstr.h @@ -81,13 +81,19 @@ double bstrtod(struct bstr str, struct bstr *rest); void bstr_lower(struct bstr str); int bstr_sscanf(struct bstr str, const char *format, ...); -// Decode the UTF-8 code point at the start of the string,, and return the +// Decode the UTF-8 code point at the start of the string, and return the // character. // After calling this function, *out_next will point to the next character. // out_next can be NULL. // On error, -1 is returned, and *out_next is not modified. int bstr_decode_utf8(struct bstr str, struct bstr *out_next); +// Return the UTF-8 code point at the start of the string. +// After calling this function, *out_next will point to the next character. +// out_next can be NULL. +// On error, an empty string is returned, and *out_next is not modified. +struct bstr bstr_split_utf8(struct bstr str, struct bstr *out_next); + // Return the length of the UTF-8 sequence that starts with the given byte. // Given a string char *s, the next UTF-8 code point is to be expected at // s + bstr_parse_utf8_code_length(s[0]) |