aboutsummaryrefslogtreecommitdiffhomepage
path: root/util
diff options
context:
space:
mode:
authorGravatar Austin Clements <amdragon@MIT.EDU>2013-01-06 15:22:38 -0500
committerGravatar David Bremner <bremner@debian.org>2013-01-06 22:39:36 -0400
commit25cf5f5dc45cac42f15643f6df09b46d51d7b5ec (patch)
tree20f76784471c8203620b9c74973632ff76d6be62 /util
parentbaca1219af0ef1f81985759e868a68e9dd78e0d1 (diff)
util: Factor out boolean term quoting routine
This is now a generic boolean term quoting function. It performs minimal quoting to produce user-friendly queries. This could live in tag-util as well, but it is really nothing specific to tags (although the conventions are specific to Xapian). The API is changed from "caller-allocates" to "readline-like". The scan for max tag length is pushed down into the quoting routine. Furthermore, this now combines the term prefix with the quoted term; arguably this is just as easy to do in the caller, but this will nicely parallel the boolean term parsing function to be introduced shortly. This is an amalgamation of code written by David Bremner and myself.
Diffstat (limited to 'util')
-rw-r--r--util/string-util.c75
-rw-r--r--util/string-util.h15
2 files changed, 90 insertions, 0 deletions
diff --git a/util/string-util.c b/util/string-util.c
index 44f8cd3a..7a71049a 100644
--- a/util/string-util.c
+++ b/util/string-util.c
@@ -20,6 +20,9 @@
#include "string-util.h"
+#include "talloc.h"
+
+#include <errno.h>
char *
strtok_len (char *s, const char *delim, size_t *len)
@@ -32,3 +35,75 @@ strtok_len (char *s, const char *delim, size_t *len)
return *len ? s : NULL;
}
+
+static int
+is_unquoted_terminator (unsigned char c)
+{
+ return c == 0 || c <= ' ' || c == ')';
+}
+
+int
+make_boolean_term (void *ctx, const char *prefix, const char *term,
+ char **buf, size_t *len)
+{
+ const char *in;
+ char *out;
+ size_t needed = 3;
+ int need_quoting = 0;
+
+ /* Do we need quoting? To be paranoid, we quote anything
+ * containing a quote, even though it only matters at the
+ * beginning, and anything containing non-ASCII text. */
+ for (in = term; *in && !need_quoting; in++)
+ if (is_unquoted_terminator (*in) || *in == '"'
+ || (unsigned char)*in > 127)
+ need_quoting = 1;
+
+ if (need_quoting)
+ for (in = term; *in; in++)
+ needed += (*in == '"') ? 2 : 1;
+ else
+ needed = strlen (term) + 1;
+
+ /* Reserve space for the prefix */
+ if (prefix)
+ needed += strlen (prefix) + 1;
+
+ if ((*buf == NULL) || (needed > *len)) {
+ *len = 2 * needed;
+ *buf = talloc_realloc (ctx, *buf, char, *len);
+ }
+
+ if (! *buf) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ out = *buf;
+
+ /* Copy in the prefix */
+ if (prefix) {
+ strcpy (out, prefix);
+ out += strlen (prefix);
+ *out++ = ':';
+ }
+
+ if (! need_quoting) {
+ strcpy (out, term);
+ return 0;
+ }
+
+ /* Quote term by enclosing it in double quotes and doubling any
+ * internal double quotes. */
+ *out++ = '"';
+ in = term;
+ while (*in) {
+ if (*in == '"')
+ *out++ = '"';
+ *out++ = *in++;
+ }
+ *out++ = '"';
+ *out = '\0';
+
+ return 0;
+}
diff --git a/util/string-util.h b/util/string-util.h
index ac7676c8..719c276a 100644
--- a/util/string-util.h
+++ b/util/string-util.h
@@ -19,4 +19,19 @@
char *strtok_len (char *s, const char *delim, size_t *len);
+/* Construct a boolean term query with the specified prefix (e.g.,
+ * "id") and search term, quoting term as necessary. Specifically, if
+ * term contains any non-printable ASCII characters, non-ASCII
+ * characters, close parenthesis or double quotes, it will be enclosed
+ * in double quotes and any internal double quotes will be doubled
+ * (e.g. a"b -> "a""b"). The result will be a valid notmuch query and
+ * can be parsed by parse_boolean_term.
+ *
+ * Output is into buf; it may be talloc_realloced.
+ * Return: 0 on success, -1 on error. errno will be set to ENOMEM if
+ * there is an allocation failure.
+ */
+int make_boolean_term (void *talloc_ctx, const char *prefix, const char *term,
+ char **buf, size_t *len);
+
#endif