aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib/index.cc
diff options
context:
space:
mode:
authorGravatar Carl Worth <cworth@cworth.org>2009-11-09 16:12:28 -0800
committerGravatar Carl Worth <cworth@cworth.org>2009-11-09 16:24:03 -0800
commit146549321044615d9aef2b30cedccda9c49f3f38 (patch)
treed12ae3b42b8bca295c8d9443f455690191db1a7a /lib/index.cc
parent8ae1c3c6e11f7c8d4066ef3d8affe4d07528807b (diff)
libify: Move library sources down into lib directory.
A "make" invocation still works from the top-level, but not from down inside the lib directory yet.
Diffstat (limited to 'lib/index.cc')
-rw-r--r--lib/index.cc326
1 files changed, 326 insertions, 0 deletions
diff --git a/lib/index.cc b/lib/index.cc
new file mode 100644
index 00000000..747a4e63
--- /dev/null
+++ b/lib/index.cc
@@ -0,0 +1,326 @@
+/*
+ * Copyright © 2009 Carl Worth
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/ .
+ *
+ * Author: Carl Worth <cworth@cworth.org>
+ */
+
+#include "notmuch-private.h"
+
+#include <gmime/gmime.h>
+
+#include <xapian.h>
+
+/* We're finally down to a single (NAME + address) email "mailbox". */
+static void
+_index_address_mailbox (notmuch_message_t *message,
+ const char *prefix_name,
+ InternetAddress *address)
+{
+ InternetAddressMailbox *mailbox = INTERNET_ADDRESS_MAILBOX (address);
+ const char *name, *addr;
+ char *contact;
+ int own_name = 0;
+
+ name = internet_address_get_name (address);
+ addr = internet_address_mailbox_get_addr (mailbox);
+
+ if (addr) {
+ if (name) {
+ contact = talloc_asprintf (message, "\"%s\" <%s>",
+ name, addr);
+ _notmuch_message_add_term (message, "contact", contact);
+ talloc_free (contact);
+ } else {
+ _notmuch_message_add_term (message, "contact", addr);
+ }
+ }
+
+ /* In the absence of a name, we'll strip the part before the @
+ * from the address. */
+ if (! name) {
+ const char *at;
+
+ at = strchr (addr, '@');
+ if (at) {
+ name = strndup (addr, at - addr);
+ own_name = 1;
+ }
+ }
+
+ if (name)
+ _notmuch_message_gen_terms (message, prefix_name, name);
+ if (addr)
+ _notmuch_message_gen_terms (message, prefix_name, addr);
+}
+
+static void
+_index_address_list (notmuch_message_t *message,
+ const char *prefix_name,
+ InternetAddressList *addresses);
+
+/* The outer loop over the InternetAddressList wasn't quite enough.
+ * There can actually be a tree here where a single member of the list
+ * is a "group" containing another list. Recurse please.
+ */
+static void
+_index_address_group (notmuch_message_t *message,
+ const char *prefix_name,
+ InternetAddress *address)
+{
+ InternetAddressGroup *group;
+ InternetAddressList *list;
+
+ group = INTERNET_ADDRESS_GROUP (address);
+ list = internet_address_group_get_members (group);
+
+ if (! list)
+ return;
+
+ _index_address_list (message, prefix_name, list);
+}
+
+static void
+_index_address_list (notmuch_message_t *message,
+ const char *prefix_name,
+ InternetAddressList *addresses)
+{
+ int i;
+ InternetAddress *address;
+
+ if (addresses == NULL)
+ return;
+
+ for (i = 0; i < internet_address_list_length (addresses); i++) {
+ address = internet_address_list_get_address (addresses, i);
+ if (INTERNET_ADDRESS_IS_MAILBOX (address)) {
+ _index_address_mailbox (message, prefix_name, address);
+ } else if (INTERNET_ADDRESS_IS_GROUP (address)) {
+ _index_address_group (message, prefix_name, address);
+ } else {
+ INTERNAL_ERROR ("GMime InternetAddress is neither a mailbox nor a group.\n");
+ }
+ }
+}
+
+static const char *
+skip_re_in_subject (const char *subject)
+{
+ const char *s = subject;
+
+ if (subject == NULL)
+ return NULL;
+
+ while (*s) {
+ while (*s && isspace (*s))
+ s++;
+ if (strncasecmp (s, "re:", 3) == 0)
+ s += 3;
+ else
+ break;
+ }
+
+ return s;
+}
+
+/* Given a string representing the body of a message, generate terms
+ * for it, (skipping quoted portions and signatures).
+ *
+ * This function is evil in that it modifies the string passed to it,
+ * (changing some newlines into '\0').
+ */
+static void
+_index_body_text (notmuch_message_t *message, char *body)
+{
+ char *line, *line_end, *next_line;
+
+ if (body == NULL)
+ return;
+
+ next_line = body;
+
+ while (1) {
+ line = next_line;
+ if (*line == '\0')
+ break;
+
+ next_line = strchr (line, '\n');
+ if (next_line == NULL) {
+ next_line = line + strlen (line);
+ }
+ line_end = next_line - 1;
+
+ /* Get to the next non-blank line. */
+ while (*next_line == '\n')
+ next_line++;
+
+ /* Skip blank lines. */
+ if (line_end < line)
+ continue;
+
+ /* Skip lines that are quotes. */
+ if (*line == '>')
+ continue;
+
+ /* Also skip lines introducing a quote on the next line. */
+ if (*line_end == ':' && *next_line == '>')
+ continue;
+
+ /* Finally, bail as soon as we see a signature. */
+ /* XXX: Should only do this if "near" the end of the message. */
+ if (strncmp (line, "-- ", 3) == 0)
+ break;
+
+ *(line_end + 1) = '\0';
+
+ _notmuch_message_gen_terms (message, NULL, line);
+ }
+}
+
+/* Callback to generate terms for each mime part of a message. */
+static void
+_index_mime_part (notmuch_message_t *message,
+ GMimeObject *part)
+{
+ GMimeStream *stream;
+ GMimeDataWrapper *wrapper;
+ GByteArray *byte_array;
+ GMimeContentDisposition *disposition;
+ char *body;
+
+ if (GMIME_IS_MULTIPART (part)) {
+ GMimeMultipart *multipart = GMIME_MULTIPART (part);
+ int i;
+
+ for (i = 0; i < g_mime_multipart_get_count (multipart); i++) {
+ if (GMIME_IS_MULTIPART_SIGNED (multipart)) {
+ /* Don't index the signature. */
+ if (i == 1)
+ continue;
+ if (i > 1)
+ fprintf (stderr, "Warning: Unexpected extra parts of mutlipart/signed. Indexing anyway.\n");
+ }
+ _index_mime_part (message,
+ g_mime_multipart_get_part (multipart, i));
+ }
+ return;
+ }
+
+ if (GMIME_IS_MESSAGE_PART (part)) {
+ GMimeMessage *mime_message;
+
+ mime_message = g_mime_message_part_get_message (GMIME_MESSAGE_PART (part));
+
+ _index_mime_part (message, g_mime_message_get_mime_part (mime_message));
+
+ return;
+ }
+
+ if (! (GMIME_IS_PART (part))) {
+ fprintf (stderr, "Warning: Not indexing unknown mime part: %s.\n",
+ g_type_name (G_OBJECT_TYPE (part)));
+ return;
+ }
+
+ disposition = g_mime_object_get_content_disposition (part);
+ if (disposition &&
+ strcmp (disposition->disposition, GMIME_DISPOSITION_ATTACHMENT) == 0)
+ {
+ const char *filename = g_mime_part_get_filename (GMIME_PART (part));
+
+ _notmuch_message_add_term (message, "tag", "attachment");
+ _notmuch_message_gen_terms (message, "attachment", filename);
+
+ /* XXX: Would be nice to call out to something here to parse
+ * the attachment into text and then index that. */
+ return;
+ }
+
+ byte_array = g_byte_array_new ();
+
+ stream = g_mime_stream_mem_new_with_byte_array (byte_array);
+ g_mime_stream_mem_set_owner (GMIME_STREAM_MEM (stream), FALSE);
+ wrapper = g_mime_part_get_content_object (GMIME_PART (part));
+ if (wrapper)
+ g_mime_data_wrapper_write_to_stream (wrapper, stream);
+
+ g_object_unref (stream);
+
+ g_byte_array_append (byte_array, (guint8 *) "\0", 1);
+ body = (char *) g_byte_array_free (byte_array, FALSE);
+
+ _index_body_text (message, body);
+
+ free (body);
+}
+
+notmuch_status_t
+_notmuch_message_index_file (notmuch_message_t *message,
+ const char *filename)
+{
+ GMimeStream *stream = NULL;
+ GMimeParser *parser = NULL;
+ GMimeMessage *mime_message = NULL;
+ InternetAddressList *addresses;
+ FILE *file = NULL;
+ const char *from, *subject;
+ notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
+ static int initialized = 0;
+
+ if (! initialized) {
+ g_mime_init (0);
+ initialized = 1;
+ }
+
+ file = fopen (filename, "r");
+ if (! file) {
+ fprintf (stderr, "Error opening %s: %s\n", filename, strerror (errno));
+ ret = NOTMUCH_STATUS_FILE_ERROR;
+ goto DONE;
+ }
+
+ /* Evil GMime steals my FILE* here so I won't fclose it. */
+ stream = g_mime_stream_file_new (file);
+
+ parser = g_mime_parser_new_with_stream (stream);
+
+ mime_message = g_mime_parser_construct_message (parser);
+
+ from = g_mime_message_get_sender (mime_message);
+ addresses = internet_address_list_parse_string (from);
+
+ _index_address_list (message, "from", addresses);
+
+ addresses = g_mime_message_get_all_recipients (mime_message);
+ _index_address_list (message, "to", addresses);
+
+ subject = g_mime_message_get_subject (mime_message);
+ subject = skip_re_in_subject (subject);
+ _notmuch_message_gen_terms (message, "subject", subject);
+
+ _index_mime_part (message, g_mime_message_get_mime_part (mime_message));
+
+ DONE:
+ if (mime_message)
+ g_object_unref (mime_message);
+
+ if (parser)
+ g_object_unref (parser);
+
+ if (stream)
+ g_object_unref (stream);
+
+ return ret;
+}