aboutsummaryrefslogtreecommitdiffhomepage
path: root/bstr
diff options
context:
space:
mode:
authorGravatar wm4 <wm4@nowhere>2014-01-15 16:13:07 +0100
committerGravatar wm4 <wm4@nowhere>2014-01-15 16:13:07 +0100
commitca8937d7d269c0ef8881d2ac7a227fdb990a5753 (patch)
tree7f659346908f3a8e5b705a553720dceb09000c65 /bstr
parent904060ad7b3d4d4e7e790bcf94d4f5230c854c43 (diff)
bstr: add function for splitting UTF-8
Diffstat (limited to 'bstr')
-rw-r--r--bstr/bstr.c11
-rw-r--r--bstr/bstr.h8
2 files changed, 18 insertions, 1 deletions
diff --git a/bstr/bstr.c b/bstr/bstr.c
index aacbdc7dbc..964934a100 100644
--- a/bstr/bstr.c
+++ b/bstr/bstr.c
@@ -296,6 +296,17 @@ int bstr_decode_utf8(struct bstr s, struct bstr *out_next)
return codepoint;
}
+struct bstr bstr_split_utf8(struct bstr str, struct bstr *out_next)
+{
+ bstr rest;
+ int code = bstr_decode_utf8(str, &rest);
+ if (code < 0)
+ return (bstr){0};
+ if (out_next)
+ *out_next = rest;
+ return bstr_splice(str, 0, str.len - rest.len);
+}
+
int bstr_validate_utf8(struct bstr s)
{
while (s.len) {
diff --git a/bstr/bstr.h b/bstr/bstr.h
index 71d5d473c4..01fe2261a5 100644
--- a/bstr/bstr.h
+++ b/bstr/bstr.h
@@ -81,13 +81,19 @@ double bstrtod(struct bstr str, struct bstr *rest);
void bstr_lower(struct bstr str);
int bstr_sscanf(struct bstr str, const char *format, ...);
-// Decode the UTF-8 code point at the start of the string,, and return the
+// Decode the UTF-8 code point at the start of the string, and return the
// character.
// After calling this function, *out_next will point to the next character.
// out_next can be NULL.
// On error, -1 is returned, and *out_next is not modified.
int bstr_decode_utf8(struct bstr str, struct bstr *out_next);
+// Return the UTF-8 code point at the start of the string.
+// After calling this function, *out_next will point to the next character.
+// out_next can be NULL.
+// On error, an empty string is returned, and *out_next is not modified.
+struct bstr bstr_split_utf8(struct bstr str, struct bstr *out_next);
+
// Return the length of the UTF-8 sequence that starts with the given byte.
// Given a string char *s, the next UTF-8 code point is to be expected at
// s + bstr_parse_utf8_code_length(s[0])