bstr: add function for splitting UTF-8

author: wm4 <wm4@nowhere> 2014-01-15 16:13:07 +0100
committer: wm4 <wm4@nowhere> 2014-01-15 16:13:07 +0100
commit: ca8937d7d269c0ef8881d2ac7a227fdb990a5753 (patch)
tree: 7f659346908f3a8e5b705a553720dceb09000c65 /bstr
parent: 904060ad7b3d4d4e7e790bcf94d4f5230c854c43 (diff)
2 files changed, 18 insertions, 1 deletions
diff --git a/bstr/bstr.c b/bstr/bstr.c
index aacbdc7dbc..964934a100 100644
--- a/bstr/bstr.c
+++ b/bstr/bstr.c
@@ -296,6 +296,17 @@ int bstr_decode_utf8(struct bstr s, struct bstr *out_next)
     return codepoint;
 }
 
+struct bstr bstr_split_utf8(struct bstr str, struct bstr *out_next)
+{
+    bstr rest;
+    int code = bstr_decode_utf8(str, &rest);
+    if (code < 0)
+        return (bstr){0};
+    if (out_next)
+        *out_next = rest;
+    return bstr_splice(str, 0, str.len - rest.len);
+}
+
 int bstr_validate_utf8(struct bstr s)
 {
     while (s.len) {
diff --git a/bstr/bstr.h b/bstr/bstr.h
index 71d5d473c4..01fe2261a5 100644
--- a/bstr/bstr.h
+++ b/bstr/bstr.h
@@ -81,13 +81,19 @@ double bstrtod(struct bstr str, struct bstr *rest);
 void bstr_lower(struct bstr str);
 int bstr_sscanf(struct bstr str, const char *format, ...);
 
-// Decode the UTF-8 code point at the start of the string,, and return the
+// Decode the UTF-8 code point at the start of the string, and return the
 // character.
 // After calling this function, *out_next will point to the next character.
 // out_next can be NULL.
 // On error, -1 is returned, and *out_next is not modified.
 int bstr_decode_utf8(struct bstr str, struct bstr *out_next);
 
+// Return the UTF-8 code point at the start of the string.
+// After calling this function, *out_next will point to the next character.
+// out_next can be NULL.
+// On error, an empty string is returned, and *out_next is not modified.
+struct bstr bstr_split_utf8(struct bstr str, struct bstr *out_next);
+
 // Return the length of the UTF-8 sequence that starts with the given byte.
 // Given a string char *s, the next UTF-8 code point is to be expected at
 //      s + bstr_parse_utf8_code_length(s[0])
author	wm4 <wm4@nowhere>	2014-01-15 16:13:07 +0100
committer	wm4 <wm4@nowhere>	2014-01-15 16:13:07 +0100
commit	ca8937d7d269c0ef8881d2ac7a227fdb990a5753 (patch)
tree	7f659346908f3a8e5b705a553720dceb09000c65 /bstr
parent	904060ad7b3d4d4e7e790bcf94d4f5230c854c43 (diff)