aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib/message-file.c
diff options
context:
space:
mode:
authorGravatar Carl Worth <cworth@cworth.org>2009-11-09 16:12:28 -0800
committerGravatar Carl Worth <cworth@cworth.org>2009-11-09 16:24:03 -0800
commit146549321044615d9aef2b30cedccda9c49f3f38 (patch)
treed12ae3b42b8bca295c8d9443f455690191db1a7a /lib/message-file.c
parent8ae1c3c6e11f7c8d4066ef3d8affe4d07528807b (diff)
libify: Move library sources down into lib directory.
A "make" invocation still works from the top-level, but not from down inside the lib directory yet.
Diffstat (limited to 'lib/message-file.c')
-rw-r--r--lib/message-file.c352
1 files changed, 352 insertions, 0 deletions
diff --git a/lib/message-file.c b/lib/message-file.c
new file mode 100644
index 00000000..75caba6d
--- /dev/null
+++ b/lib/message-file.c
@@ -0,0 +1,352 @@
+/* message.c - Utility functions for parsing an email message for notmuch.
+ *
+ * Copyright © 2009 Carl Worth
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/ .
+ *
+ * Author: Carl Worth <cworth@cworth.org>
+ */
+
+#include <stdarg.h>
+
+#include "notmuch-private.h"
+
+#include <gmime/gmime.h>
+
+#include <glib.h> /* GHashTable */
+
+typedef struct {
+ char *str;
+ size_t size;
+ size_t len;
+} header_value_closure_t;
+
+struct _notmuch_message_file {
+ /* File object */
+ FILE *file;
+
+ /* Header storage */
+ int restrict_headers;
+ GHashTable *headers;
+ int broken_headers;
+ int good_headers;
+ size_t header_size; /* Length of full message header in bytes. */
+
+ /* Parsing state */
+ char *line;
+ size_t line_size;
+ header_value_closure_t value;
+
+ int parsing_started;
+ int parsing_finished;
+};
+
+static int
+strcase_equal (const void *a, const void *b)
+{
+ return strcasecmp (a, b) == 0;
+}
+
+static unsigned int
+strcase_hash (const void *ptr)
+{
+ const char *s = ptr;
+
+ /* This is the djb2 hash. */
+ unsigned int hash = 5381;
+ while (s && *s) {
+ hash = ((hash << 5) + hash) + tolower (*s);
+ s++;
+ }
+
+ return hash;
+}
+
+static int
+_notmuch_message_file_destructor (notmuch_message_file_t *message)
+{
+ if (message->line)
+ free (message->line);
+
+ if (message->value.size)
+ free (message->value.str);
+
+ if (message->headers)
+ g_hash_table_destroy (message->headers);
+
+ if (message->file)
+ fclose (message->file);
+
+ return 0;
+}
+
+/* Create a new notmuch_message_file_t for 'filename' with 'ctx' as
+ * the talloc owner. */
+notmuch_message_file_t *
+_notmuch_message_file_open_ctx (void *ctx, const char *filename)
+{
+ notmuch_message_file_t *message;
+
+ message = talloc_zero (ctx, notmuch_message_file_t);
+ if (unlikely (message == NULL))
+ return NULL;
+
+ talloc_set_destructor (message, _notmuch_message_file_destructor);
+
+ message->file = fopen (filename, "r");
+ if (message->file == NULL)
+ goto FAIL;
+
+ message->headers = g_hash_table_new_full (strcase_hash,
+ strcase_equal,
+ free,
+ free);
+
+ message->parsing_started = 0;
+ message->parsing_finished = 0;
+
+ return message;
+
+ FAIL:
+ fprintf (stderr, "Error opening %s: %s\n", filename, strerror (errno));
+ notmuch_message_file_close (message);
+
+ return NULL;
+}
+
+notmuch_message_file_t *
+notmuch_message_file_open (const char *filename)
+{
+ return _notmuch_message_file_open_ctx (NULL, filename);
+}
+
+void
+notmuch_message_file_close (notmuch_message_file_t *message)
+{
+ talloc_free (message);
+}
+
+void
+notmuch_message_file_restrict_headersv (notmuch_message_file_t *message,
+ va_list va_headers)
+{
+ char *header;
+
+ if (message->parsing_started)
+ INTERNAL_ERROR ("notmuch_message_file_restrict_headers called after parsing has started");
+
+ while (1) {
+ header = va_arg (va_headers, char*);
+ if (header == NULL)
+ break;
+ g_hash_table_insert (message->headers,
+ xstrdup (header), NULL);
+ }
+
+ message->restrict_headers = 1;
+}
+
+void
+notmuch_message_file_restrict_headers (notmuch_message_file_t *message, ...)
+{
+ va_list va_headers;
+
+ va_start (va_headers, message);
+
+ notmuch_message_file_restrict_headersv (message, va_headers);
+}
+
+static void
+copy_header_unfolding (header_value_closure_t *value,
+ const char *chunk)
+{
+ char *last;
+
+ if (chunk == NULL)
+ return;
+
+ while (*chunk == ' ' || *chunk == '\t')
+ chunk++;
+
+ if (value->len + 1 + strlen (chunk) + 1 > value->size) {
+ unsigned int new_size = value->size;
+ if (value->size == 0)
+ new_size = strlen (chunk) + 1;
+ else
+ while (value->len + 1 + strlen (chunk) + 1 > new_size)
+ new_size *= 2;
+ value->str = xrealloc (value->str, new_size);
+ value->size = new_size;
+ }
+
+ last = value->str + value->len;
+ if (value->len) {
+ *last = ' ';
+ last++;
+ value->len++;
+ }
+
+ strcpy (last, chunk);
+ value->len += strlen (chunk);
+
+ last = value->str + value->len - 1;
+ if (*last == '\n') {
+ *last = '\0';
+ value->len--;
+ }
+}
+
+/* As a special-case, a value of NULL for header_desired will force
+ * the entire header to be parsed if it is not parsed already. This is
+ * used by the _notmuch_message_file_get_headers_end function. */
+const char *
+notmuch_message_file_get_header (notmuch_message_file_t *message,
+ const char *header_desired)
+{
+ int contains;
+ char *header, *decoded_value;
+ const char *s, *colon;
+ int match;
+ static int initialized = 0;
+
+ if (! initialized) {
+ g_mime_init (0);
+ initialized = 1;
+ }
+
+ message->parsing_started = 1;
+
+ if (header_desired == NULL)
+ contains = 0;
+ else
+ contains = g_hash_table_lookup_extended (message->headers,
+ header_desired, NULL,
+ (gpointer *) &decoded_value);
+
+ if (contains && decoded_value)
+ return decoded_value;
+
+ if (message->parsing_finished)
+ return NULL;
+
+#define NEXT_HEADER_LINE(closure) \
+ while (1) { \
+ ssize_t bytes_read = getline (&message->line, \
+ &message->line_size, \
+ message->file); \
+ if (bytes_read == -1) { \
+ message->parsing_finished = 1; \
+ break; \
+ } \
+ if (*message->line == '\n') { \
+ message->parsing_finished = 1; \
+ break; \
+ } \
+ if (closure && \
+ (*message->line == ' ' || *message->line == '\t')) \
+ { \
+ copy_header_unfolding ((closure), message->line); \
+ } \
+ if (*message->line == ' ' || *message->line == '\t') \
+ message->header_size += strlen (message->line); \
+ else \
+ break; \
+ }
+
+ if (message->line == NULL)
+ NEXT_HEADER_LINE (NULL);
+
+ while (1) {
+
+ if (message->parsing_finished)
+ break;
+
+ colon = strchr (message->line, ':');
+
+ if (colon == NULL) {
+ message->broken_headers++;
+ /* A simple heuristic for giving up on things that just
+ * don't look like mail messages. */
+ if (message->broken_headers >= 10 &&
+ message->good_headers < 5)
+ {
+ message->parsing_finished = 1;
+ continue;
+ }
+ NEXT_HEADER_LINE (NULL);
+ continue;
+ }
+
+ message->header_size += strlen (message->line);
+
+ message->good_headers++;
+
+ header = xstrndup (message->line, colon - message->line);
+
+ if (message->restrict_headers &&
+ ! g_hash_table_lookup_extended (message->headers,
+ header, NULL, NULL))
+ {
+ free (header);
+ NEXT_HEADER_LINE (NULL);
+ continue;
+ }
+
+ s = colon + 1;
+ while (*s == ' ' || *s == '\t')
+ s++;
+
+ message->value.len = 0;
+ copy_header_unfolding (&message->value, s);
+
+ NEXT_HEADER_LINE (&message->value);
+
+ if (header_desired == 0)
+ match = 0;
+ else
+ match = (strcasecmp (header, header_desired) == 0);
+
+ decoded_value = g_mime_utils_header_decode_text (message->value.str);
+
+ g_hash_table_insert (message->headers, header, decoded_value);
+
+ if (match)
+ return decoded_value;
+ }
+
+ if (message->line)
+ free (message->line);
+ message->line = NULL;
+
+ if (message->value.size) {
+ free (message->value.str);
+ message->value.str = NULL;
+ message->value.size = 0;
+ message->value.len = 0;
+ }
+
+ /* We've parsed all headers and never found the one we're looking
+ * for. It's probably just not there, but let's check that we
+ * didn't make a mistake preventing us from seeing it. */
+ if (message->restrict_headers && header_desired &&
+ ! g_hash_table_lookup_extended (message->headers,
+ header_desired, NULL, NULL))
+ {
+ INTERNAL_ERROR ("Attempt to get header \"%s\" which was not\n"
+ "included in call to notmuch_message_file_restrict_headers\n",
+ header_desired);
+ }
+
+ return NULL;
+}