aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--Makefile17
-rw-r--r--database-private.h1
-rw-r--r--database.cc28
-rw-r--r--index.cc260
-rw-r--r--message.cc26
-rw-r--r--notmuch-private.h11
6 files changed, 334 insertions, 9 deletions
diff --git a/Makefile b/Makefile
index b4d77b4e..a1a7a15c 100644
--- a/Makefile
+++ b/Makefile
@@ -2,27 +2,30 @@ PROGS=notmuch
WARN_FLAGS=-Wall -Wextra -Wmissing-declarations -Wwrite-strings -Wswitch-enum
-CDEPENDS_FLAGS=`pkg-config --cflags glib-2.0 talloc`
-CXXDEPENDS_FLAGS=`pkg-config --cflags glib-2.0 talloc` `xapian-config --cxxflags`
+CDEPENDS_FLAGS=`pkg-config --cflags glib-2.0 gmime-2.4 talloc`
+CXXDEPENDS_FLAGS=$(CDEPENDS_FLAGS) `xapian-config --cxxflags`
MYCFLAGS=$(WARN_FLAGS) -O0 -g $(CDEPENDS_FLAGS)
MYCXXFLAGS=$(WARN_FLAGS) -O0 -g $(CXXDEPENDS_FLAGS)
-MYLDFLAGS=`pkg-config --libs glib-2.0 talloc` `xapian-config --libs`
+MYLDFLAGS=`pkg-config --libs glib-2.0 gmime-2.4 talloc` `xapian-config --libs`
-MODULES= \
- notmuch.o \
+LIBRARY= \
database.o \
date.o \
+ index.o \
+ libsha1.o \
message.o \
message-file.o \
query.o \
sha1.o \
tags.o \
thread.o \
- libsha1.o \
xutil.o
+MAIN= \
+ notmuch.o
+
all: $(PROGS)
%.o: %.cc
@@ -31,7 +34,7 @@ all: $(PROGS)
%.o: %.c
$(CC) -c $(CFLAGS) $(MYCFLAGS) $< -o $@
-notmuch: $(MODULES)
+notmuch: $(MAIN) $(LIBRARY)
$(CC) $(MYLDFLAGS) $^ -o $@
Makefile.dep: *.c *.cc
diff --git a/database-private.h b/database-private.h
index a5cca5a4..76e26ce0 100644
--- a/database-private.h
+++ b/database-private.h
@@ -29,6 +29,7 @@ struct _notmuch_database {
char *path;
Xapian::WritableDatabase *xapian_db;
Xapian::QueryParser *query_parser;
+ Xapian::TermGenerator *term_gen;
};
#endif
diff --git a/database.cc b/database.cc
index 71246eb4..583bee82 100644
--- a/database.cc
+++ b/database.cc
@@ -114,6 +114,13 @@ prefix_t BOOLEAN_PREFIX_EXTERNAL[] = {
{ "id", "Q" }
};
+prefix_t PROBABILISTIC_PREFIX[]= {
+ { "from", "XFROM" },
+ { "to", "XTO" },
+ { "attachment", "XATTACHMENT" },
+ { "subject", "XSUBJECT"}
+};
+
int
_internal_error (const char *format, ...)
{
@@ -141,6 +148,10 @@ _find_prefix (const char *name)
if (strcmp (name, BOOLEAN_PREFIX_EXTERNAL[i].name) == 0)
return BOOLEAN_PREFIX_EXTERNAL[i].prefix;
+ for (i = 0; i < ARRAY_SIZE (PROBABILISTIC_PREFIX); i++)
+ if (strcmp (name, PROBABILISTIC_PREFIX[i].name) == 0)
+ return PROBABILISTIC_PREFIX[i].prefix;
+
INTERNAL_ERROR ("No prefix exists for '%s'\n", name);
return "";
@@ -478,14 +489,24 @@ notmuch_database_open (const char *path)
notmuch->xapian_db = new Xapian::WritableDatabase (xapian_path,
Xapian::DB_CREATE_OR_OPEN);
notmuch->query_parser = new Xapian::QueryParser;
+ notmuch->term_gen = new Xapian::TermGenerator;
+ notmuch->term_gen->set_stemmer (Xapian::Stem ("english"));
+
notmuch->query_parser->set_default_op (Xapian::Query::OP_AND);
notmuch->query_parser->set_database (*notmuch->xapian_db);
+ notmuch->query_parser->set_stemmer (Xapian::Stem ("english"));
+ notmuch->query_parser->set_stemming_strategy (Xapian::QueryParser::STEM_SOME);
for (i = 0; i < ARRAY_SIZE (BOOLEAN_PREFIX_EXTERNAL); i++) {
prefix_t *prefix = &BOOLEAN_PREFIX_EXTERNAL[i];
notmuch->query_parser->add_boolean_prefix (prefix->name,
prefix->prefix);
}
+
+ for (i = 0; i < ARRAY_SIZE (PROBABILISTIC_PREFIX); i++) {
+ prefix_t *prefix = &PROBABILISTIC_PREFIX[i];
+ notmuch->query_parser->add_prefix (prefix->name, prefix->prefix);
+ }
} catch (const Xapian::Error &error) {
fprintf (stderr, "A Xapian exception occurred: %s\n",
error.get_msg().c_str());
@@ -508,6 +529,7 @@ notmuch_database_close (notmuch_database_t *notmuch)
{
notmuch->xapian_db->flush ();
+ delete notmuch->term_gen;
delete notmuch->query_parser;
delete notmuch->xapian_db;
talloc_free (notmuch);
@@ -924,9 +946,11 @@ notmuch_database_add_message (notmuch_database_t *notmuch,
{
ret = NOTMUCH_STATUS_FILE_NOT_EMAIL;
goto DONE;
- } else {
- _notmuch_message_sync (message);
}
+
+ _notmuch_message_index_file (message, filename);
+
+ _notmuch_message_sync (message);
} catch (const Xapian::Error &error) {
fprintf (stderr, "A Xapian exception occurred: %s.\n",
error.get_msg().c_str());
diff --git a/index.cc b/index.cc
new file mode 100644
index 00000000..88634fc7
--- /dev/null
+++ b/index.cc
@@ -0,0 +1,260 @@
+/*
+ * Copyright © 2009 Carl Worth
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/ .
+ *
+ * Author: Carl Worth <cworth@cworth.org>
+ */
+
+#include "notmuch-private.h"
+
+#include <gmime/gmime.h>
+
+#include <xapian.h>
+
+/* We're finally down to a single (NAME + address) email "mailbox". */
+static void
+_index_address_mailbox (notmuch_message_t *message,
+ const char *prefix_name,
+ InternetAddress *address)
+{
+ InternetAddressMailbox *mailbox = INTERNET_ADDRESS_MAILBOX (address);
+ const char *name, *addr = internet_address_mailbox_get_addr (mailbox);
+ int own_name = 0;
+
+ if (addr)
+ _notmuch_message_gen_terms (message, prefix_name, addr);
+
+ name = internet_address_get_name (address);
+
+ /* In the absence of a name, we'll strip the part before the @
+ * from the address. */
+ if (! name) {
+ const char *at;
+
+ at = strchr (addr, '@');
+ if (at) {
+ name = strndup (addr, at - addr);
+ own_name = 1;
+ }
+ }
+
+ if (name)
+ _notmuch_message_gen_terms (message, prefix_name, name);
+}
+
+static void
+_index_address_list (notmuch_message_t *message,
+ const char *prefix_name,
+ InternetAddressList *addresses);
+
+/* The outer loop over the InternetAddressList wasn't quite enough.
+ * There can actually be a tree here where a single member of the list
+ * is a "group" containing another list. Recurse please.
+ */
+static void
+_index_address_group (notmuch_message_t *message,
+ const char *prefix_name,
+ InternetAddress *address)
+{
+ InternetAddressGroup *group;
+ InternetAddressList *list;
+
+ group = INTERNET_ADDRESS_GROUP (address);
+ list = internet_address_group_get_members (group);
+
+ if (! list)
+ return;
+
+ _index_address_list (message, prefix_name, list);
+}
+
+static void
+_index_address_list (notmuch_message_t *message,
+ const char *prefix_name,
+ InternetAddressList *addresses)
+{
+ int i;
+ InternetAddress *address;
+
+ if (addresses == NULL)
+ return;
+
+ for (i = 0; i < internet_address_list_length (addresses); i++) {
+ address = internet_address_list_get_address (addresses, i);
+ if (INTERNET_ADDRESS_IS_MAILBOX (address)) {
+ _index_address_mailbox (message, prefix_name, address);
+ } else if (INTERNET_ADDRESS_IS_GROUP (address)) {
+ _index_address_group (message, prefix_name, address);
+ } else {
+ INTERNAL_ERROR ("GMime InternetAddress is neither a mailbox nor a group.\n");
+ }
+ }
+}
+
+static const char *
+skip_re_in_subject (const char *subject)
+{
+ const char *s = subject;
+
+ if (subject == NULL)
+ return NULL;
+
+ while (*s) {
+ while (*s && isspace (*s))
+ s++;
+ if (strncasecmp (s, "re:", 3) == 0)
+ s += 3;
+ else
+ break;
+ }
+
+ return s;
+}
+
+/* Callback to generate terms for each mime part of a message. */
+static void
+_index_mime_part (notmuch_message_t *message,
+ GMimeObject *part)
+{
+ GMimeStream *stream;
+ GMimeDataWrapper *wrapper;
+ GByteArray *byte_array;
+ GMimeContentDisposition *disposition;
+ char *body;
+
+ if (GMIME_IS_MULTIPART (part)) {
+ GMimeMultipart *multipart = GMIME_MULTIPART (part);
+ int i;
+
+ for (i = 0; i < g_mime_multipart_get_count (multipart); i++) {
+ if (GMIME_IS_MULTIPART_SIGNED (multipart)) {
+ /* Don't index the signature. */
+ if (i == 1)
+ continue;
+ if (i > 1)
+ fprintf (stderr, "Warning: Unexpected extra parts of mutlipart/signed. Indexing anyway.\n");
+ }
+ _index_mime_part (message,
+ g_mime_multipart_get_part (multipart, i));
+ }
+ return;
+ }
+
+ if (GMIME_IS_MESSAGE_PART (part)) {
+ GMimeMessage *mime_message;
+
+ mime_message = g_mime_message_part_get_message (GMIME_MESSAGE_PART (part));
+
+ _index_mime_part (message, g_mime_message_get_mime_part (mime_message));
+
+ return;
+ }
+
+ if (! (GMIME_IS_PART (part))) {
+ fprintf (stderr, "Warning: Not indexing unknown mime part: %s.\n",
+ g_type_name (G_OBJECT_TYPE (part)));
+ return;
+ }
+
+ disposition = g_mime_object_get_content_disposition (part);
+ if (disposition &&
+ strcmp (disposition->disposition, GMIME_DISPOSITION_ATTACHMENT) == 0)
+ {
+ const char *filename = g_mime_part_get_filename (GMIME_PART (part));
+
+ _notmuch_message_add_term (message, "tag", "attachment");
+ _notmuch_message_gen_terms (message, "attachment", filename);
+
+ /* XXX: Would be nice to call out to something here to parse
+ * the attachment into text and then index that. */
+ return;
+ }
+
+ byte_array = g_byte_array_new ();
+
+ stream = g_mime_stream_mem_new_with_byte_array (byte_array);
+ g_mime_stream_mem_set_owner (GMIME_STREAM_MEM (stream), FALSE);
+ wrapper = g_mime_part_get_content_object (GMIME_PART (part));
+ if (wrapper)
+ g_mime_data_wrapper_write_to_stream (wrapper, stream);
+
+ g_object_unref (stream);
+
+ g_byte_array_append (byte_array, (guint8 *) "\0", 1);
+ body = (char *) g_byte_array_free (byte_array, FALSE);
+
+ _notmuch_message_gen_terms (message, NULL, body);
+
+ free (body);
+}
+
+notmuch_status_t
+_notmuch_message_index_file (notmuch_message_t *message,
+ const char *filename)
+{
+ GMimeStream *stream = NULL;
+ GMimeParser *parser = NULL;
+ GMimeMessage *mime_message = NULL;
+ InternetAddressList *addresses;
+ FILE *file = NULL;
+ const char *from, *subject;
+ notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
+ static int initialized = 0;
+
+ if (! initialized) {
+ g_mime_init (0);
+ initialized = 1;
+ }
+
+ file = fopen (filename, "r");
+ if (! file) {
+ fprintf (stderr, "Error opening %s: %s\n", filename, strerror (errno));
+ ret = NOTMUCH_STATUS_FILE_ERROR;
+ goto DONE;
+ }
+
+ /* Evil GMime steals my FILE* here so I won't fclose it. */
+ stream = g_mime_stream_file_new (file);
+
+ parser = g_mime_parser_new_with_stream (stream);
+
+ mime_message = g_mime_parser_construct_message (parser);
+
+ from = g_mime_message_get_sender (mime_message);
+ addresses = internet_address_list_parse_string (from);
+
+ _index_address_list (message, "from", addresses);
+
+ addresses = g_mime_message_get_all_recipients (mime_message);
+ _index_address_list (message, "to", addresses);
+
+ subject = g_mime_message_get_subject (mime_message);
+ subject = skip_re_in_subject (subject);
+ _notmuch_message_gen_terms (message, "subject", subject);
+
+ _index_mime_part (message, g_mime_message_get_mime_part (mime_message));
+
+ DONE:
+ if (mime_message)
+ g_object_unref (mime_message);
+
+ if (parser)
+ g_object_unref (parser);
+
+ if (stream)
+ g_object_unref (stream);
+
+ return ret;
+}
diff --git a/message.cc b/message.cc
index 66747b5c..60ddf8a8 100644
--- a/message.cc
+++ b/message.cc
@@ -442,6 +442,32 @@ _notmuch_message_add_term (notmuch_message_t *message,
return NOTMUCH_PRIVATE_STATUS_SUCCESS;
}
+/* Parse 'text' and add a term to 'message' for each parsed word. Each
+ * term will be added both prefixed (if prefix_name is not NULL) and
+ * also unprefixed). */
+notmuch_private_status_t
+_notmuch_message_gen_terms (notmuch_message_t *message,
+ const char *prefix_name,
+ const char *text)
+{
+ Xapian::TermGenerator *term_gen = message->notmuch->term_gen;
+
+ if (text == NULL)
+ return NOTMUCH_PRIVATE_STATUS_NULL_POINTER;
+
+ term_gen->set_document (message->doc);
+
+ if (prefix_name) {
+ const char *prefix = _find_prefix (prefix_name);
+
+ term_gen->index_text (text, 1, prefix);
+ }
+
+ term_gen->index_text (text);
+
+ return NOTMUCH_PRIVATE_STATUS_SUCCESS;
+}
+
/* Remove a name:value term from 'message', (the actual term will be
* encoded by prefixing the value with a short prefix). See
* NORMAL_PREFIX and BOOLEAN_PREFIX arrays for the mapping of term
diff --git a/notmuch-private.h b/notmuch-private.h
index c80f219a..440860ba 100644
--- a/notmuch-private.h
+++ b/notmuch-private.h
@@ -187,6 +187,11 @@ _notmuch_message_remove_term (notmuch_message_t *message,
const char *prefix_name,
const char *value);
+notmuch_private_status_t
+_notmuch_message_gen_terms (notmuch_message_t *message,
+ const char *prefix_name,
+ const char *text);
+
void
_notmuch_message_set_filename (notmuch_message_t *message,
const char *filename);
@@ -205,6 +210,12 @@ _notmuch_message_set_date (notmuch_message_t *message,
void
_notmuch_message_sync (notmuch_message_t *message);
+/* index.cc */
+
+notmuch_status_t
+_notmuch_message_index_file (notmuch_message_t *message,
+ const char *filename);
+
/* message-file.c */
/* XXX: I haven't decided yet whether these will actually get exported