From 146549321044615d9aef2b30cedccda9c49f3f38 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 9 Nov 2009 16:12:28 -0800 Subject: libify: Move library sources down into lib directory. A "make" invocation still works from the top-level, but not from down inside the lib directory yet. --- lib/index.cc | 326 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 326 insertions(+) create mode 100644 lib/index.cc (limited to 'lib/index.cc') diff --git a/lib/index.cc b/lib/index.cc new file mode 100644 index 00000000..747a4e63 --- /dev/null +++ b/lib/index.cc @@ -0,0 +1,326 @@ +/* + * Copyright © 2009 Carl Worth + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see http://www.gnu.org/licenses/ . + * + * Author: Carl Worth + */ + +#include "notmuch-private.h" + +#include + +#include + +/* We're finally down to a single (NAME + address) email "mailbox". */ +static void +_index_address_mailbox (notmuch_message_t *message, + const char *prefix_name, + InternetAddress *address) +{ + InternetAddressMailbox *mailbox = INTERNET_ADDRESS_MAILBOX (address); + const char *name, *addr; + char *contact; + int own_name = 0; + + name = internet_address_get_name (address); + addr = internet_address_mailbox_get_addr (mailbox); + + if (addr) { + if (name) { + contact = talloc_asprintf (message, "\"%s\" <%s>", + name, addr); + _notmuch_message_add_term (message, "contact", contact); + talloc_free (contact); + } else { + _notmuch_message_add_term (message, "contact", addr); + } + } + + /* In the absence of a name, we'll strip the part before the @ + * from the address. */ + if (! name) { + const char *at; + + at = strchr (addr, '@'); + if (at) { + name = strndup (addr, at - addr); + own_name = 1; + } + } + + if (name) + _notmuch_message_gen_terms (message, prefix_name, name); + if (addr) + _notmuch_message_gen_terms (message, prefix_name, addr); +} + +static void +_index_address_list (notmuch_message_t *message, + const char *prefix_name, + InternetAddressList *addresses); + +/* The outer loop over the InternetAddressList wasn't quite enough. + * There can actually be a tree here where a single member of the list + * is a "group" containing another list. Recurse please. + */ +static void +_index_address_group (notmuch_message_t *message, + const char *prefix_name, + InternetAddress *address) +{ + InternetAddressGroup *group; + InternetAddressList *list; + + group = INTERNET_ADDRESS_GROUP (address); + list = internet_address_group_get_members (group); + + if (! list) + return; + + _index_address_list (message, prefix_name, list); +} + +static void +_index_address_list (notmuch_message_t *message, + const char *prefix_name, + InternetAddressList *addresses) +{ + int i; + InternetAddress *address; + + if (addresses == NULL) + return; + + for (i = 0; i < internet_address_list_length (addresses); i++) { + address = internet_address_list_get_address (addresses, i); + if (INTERNET_ADDRESS_IS_MAILBOX (address)) { + _index_address_mailbox (message, prefix_name, address); + } else if (INTERNET_ADDRESS_IS_GROUP (address)) { + _index_address_group (message, prefix_name, address); + } else { + INTERNAL_ERROR ("GMime InternetAddress is neither a mailbox nor a group.\n"); + } + } +} + +static const char * +skip_re_in_subject (const char *subject) +{ + const char *s = subject; + + if (subject == NULL) + return NULL; + + while (*s) { + while (*s && isspace (*s)) + s++; + if (strncasecmp (s, "re:", 3) == 0) + s += 3; + else + break; + } + + return s; +} + +/* Given a string representing the body of a message, generate terms + * for it, (skipping quoted portions and signatures). + * + * This function is evil in that it modifies the string passed to it, + * (changing some newlines into '\0'). + */ +static void +_index_body_text (notmuch_message_t *message, char *body) +{ + char *line, *line_end, *next_line; + + if (body == NULL) + return; + + next_line = body; + + while (1) { + line = next_line; + if (*line == '\0') + break; + + next_line = strchr (line, '\n'); + if (next_line == NULL) { + next_line = line + strlen (line); + } + line_end = next_line - 1; + + /* Get to the next non-blank line. */ + while (*next_line == '\n') + next_line++; + + /* Skip blank lines. */ + if (line_end < line) + continue; + + /* Skip lines that are quotes. */ + if (*line == '>') + continue; + + /* Also skip lines introducing a quote on the next line. */ + if (*line_end == ':' && *next_line == '>') + continue; + + /* Finally, bail as soon as we see a signature. */ + /* XXX: Should only do this if "near" the end of the message. */ + if (strncmp (line, "-- ", 3) == 0) + break; + + *(line_end + 1) = '\0'; + + _notmuch_message_gen_terms (message, NULL, line); + } +} + +/* Callback to generate terms for each mime part of a message. */ +static void +_index_mime_part (notmuch_message_t *message, + GMimeObject *part) +{ + GMimeStream *stream; + GMimeDataWrapper *wrapper; + GByteArray *byte_array; + GMimeContentDisposition *disposition; + char *body; + + if (GMIME_IS_MULTIPART (part)) { + GMimeMultipart *multipart = GMIME_MULTIPART (part); + int i; + + for (i = 0; i < g_mime_multipart_get_count (multipart); i++) { + if (GMIME_IS_MULTIPART_SIGNED (multipart)) { + /* Don't index the signature. */ + if (i == 1) + continue; + if (i > 1) + fprintf (stderr, "Warning: Unexpected extra parts of mutlipart/signed. Indexing anyway.\n"); + } + _index_mime_part (message, + g_mime_multipart_get_part (multipart, i)); + } + return; + } + + if (GMIME_IS_MESSAGE_PART (part)) { + GMimeMessage *mime_message; + + mime_message = g_mime_message_part_get_message (GMIME_MESSAGE_PART (part)); + + _index_mime_part (message, g_mime_message_get_mime_part (mime_message)); + + return; + } + + if (! (GMIME_IS_PART (part))) { + fprintf (stderr, "Warning: Not indexing unknown mime part: %s.\n", + g_type_name (G_OBJECT_TYPE (part))); + return; + } + + disposition = g_mime_object_get_content_disposition (part); + if (disposition && + strcmp (disposition->disposition, GMIME_DISPOSITION_ATTACHMENT) == 0) + { + const char *filename = g_mime_part_get_filename (GMIME_PART (part)); + + _notmuch_message_add_term (message, "tag", "attachment"); + _notmuch_message_gen_terms (message, "attachment", filename); + + /* XXX: Would be nice to call out to something here to parse + * the attachment into text and then index that. */ + return; + } + + byte_array = g_byte_array_new (); + + stream = g_mime_stream_mem_new_with_byte_array (byte_array); + g_mime_stream_mem_set_owner (GMIME_STREAM_MEM (stream), FALSE); + wrapper = g_mime_part_get_content_object (GMIME_PART (part)); + if (wrapper) + g_mime_data_wrapper_write_to_stream (wrapper, stream); + + g_object_unref (stream); + + g_byte_array_append (byte_array, (guint8 *) "\0", 1); + body = (char *) g_byte_array_free (byte_array, FALSE); + + _index_body_text (message, body); + + free (body); +} + +notmuch_status_t +_notmuch_message_index_file (notmuch_message_t *message, + const char *filename) +{ + GMimeStream *stream = NULL; + GMimeParser *parser = NULL; + GMimeMessage *mime_message = NULL; + InternetAddressList *addresses; + FILE *file = NULL; + const char *from, *subject; + notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; + static int initialized = 0; + + if (! initialized) { + g_mime_init (0); + initialized = 1; + } + + file = fopen (filename, "r"); + if (! file) { + fprintf (stderr, "Error opening %s: %s\n", filename, strerror (errno)); + ret = NOTMUCH_STATUS_FILE_ERROR; + goto DONE; + } + + /* Evil GMime steals my FILE* here so I won't fclose it. */ + stream = g_mime_stream_file_new (file); + + parser = g_mime_parser_new_with_stream (stream); + + mime_message = g_mime_parser_construct_message (parser); + + from = g_mime_message_get_sender (mime_message); + addresses = internet_address_list_parse_string (from); + + _index_address_list (message, "from", addresses); + + addresses = g_mime_message_get_all_recipients (mime_message); + _index_address_list (message, "to", addresses); + + subject = g_mime_message_get_subject (mime_message); + subject = skip_re_in_subject (subject); + _notmuch_message_gen_terms (message, "subject", subject); + + _index_mime_part (message, g_mime_message_get_mime_part (mime_message)); + + DONE: + if (mime_message) + g_object_unref (mime_message); + + if (parser) + g_object_unref (parser); + + if (stream) + g_object_unref (stream); + + return ret; +} -- cgit v1.2.3