aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib/database.cc
diff options
context:
space:
mode:
authorGravatar Carl Worth <cworth@cworth.org>2010-06-04 12:39:36 -0700
committerGravatar Carl Worth <cworth@cworth.org>2010-06-04 13:35:07 -0700
commit7b78eb4af6e87532795d09bd82152002ab4a74b1 (patch)
treeeb2ccea430a8162698787489dce2c4703f9b0f55 /lib/database.cc
parent77ab73834325f2b2a8870204af60e116d4209f12 (diff)
Add support (and tests) for messages with really long message IDs.
Scott Henson reported an internal error that occurred when he tried to add a message that referenced another message with a message ID well over 300 characters in length. The bug here was running into a Xapian limit for the length of metadata key names, (which is even more restrictive than the Xapian limit for the length of terms). We fix this by noticing long message ID values and instead using a message ID of the form "notmuch-sha1-<sha1_sum_of_message_id>". That is, we use SHA1 to generate a compressed, (but still unique), version of the message ID. We add support to the test suite to exercise this fix. The tests add a message referencing the long message ID, then add the message with the long message ID, then finally add another message referencing the long ID. Each of these tests exercise different code paths where the special handling is implemented. A final test ensures that all three messages are stitched together into a single thread---guaranteeing that the three code paths all act consistently.
Diffstat (limited to 'lib/database.cc')
-rw-r--r--lib/database.cc50
1 files changed, 41 insertions, 9 deletions
diff --git a/lib/database.cc b/lib/database.cc
index dd1fc637..6affc205 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -57,8 +57,12 @@ typedef struct {
*
* type: mail
*
- * id: Unique ID of mail, (from Message-ID header or generated
- * as "notmuch-sha1-<sha1_sum_of_entire_file>.
+ * id: Unique ID of mail. This is from the Message-ID header
+ * if present and not too long (see NOTMUCH_MESSAGE_ID_MAX).
+ * If it's present and too long, then we use
+ * "notmuch-sha1-<sha1_sum_of_message_id>".
+ * If this header is not present, we use
+ * "notmuch-sha1-<sha1_sum_of_entire_file>".
*
* thread: The ID of the thread to which the mail belongs
*
@@ -145,9 +149,11 @@ typedef struct {
*
* thread_id_* A pre-allocated thread ID for a particular
* message. This is actually an arbitarily large
- * family of metadata name. Any particular name
- * is formed by concatenating "thread_id_" with a
- * message ID. The value stored is a thread ID.
+ * family of metadata name. Any particular name is
+ * formed by concatenating "thread_id_" with a message
+ * ID (or the SHA1 sum of a message ID if it is very
+ * long---see description of 'id' in the mail
+ * document). The value stored is a thread ID.
*
* These thread ID metadata values are stored
* whenever a message references a parent message
@@ -334,6 +340,23 @@ find_document_for_doc_id (notmuch_database_t *notmuch, unsigned doc_id)
return notmuch->xapian_db->get_document (doc_id);
}
+/* Generate a compressed version of 'message_id' of the form:
+ *
+ * notmuch-sha1-<sha1_sum_of_message_id>
+ */
+static char *
+_message_id_compressed (void *ctx, const char *message_id)
+{
+ char *sha1, *compressed;
+
+ sha1 = notmuch_sha1_of_string (message_id);
+
+ compressed = talloc_asprintf (ctx, "notmuch-sha1-%s", sha1);
+ free (sha1);
+
+ return compressed;
+}
+
notmuch_message_t *
notmuch_database_find_message (notmuch_database_t *notmuch,
const char *message_id)
@@ -341,6 +364,9 @@ notmuch_database_find_message (notmuch_database_t *notmuch,
notmuch_private_status_t status;
unsigned int doc_id;
+ if (strlen (message_id) > NOTMUCH_MESSAGE_ID_MAX)
+ message_id = _message_id_compressed (notmuch, message_id);
+
try {
status = _notmuch_database_find_unique_doc_id (notmuch, "id",
message_id, &doc_id);
@@ -1217,7 +1243,11 @@ _notmuch_database_generate_thread_id (notmuch_database_t *notmuch)
static char *
_get_metadata_thread_id_key (void *ctx, const char *message_id)
{
- return talloc_asprintf (ctx, "thread_id_%s", message_id);
+ if (strlen (message_id) > NOTMUCH_MESSAGE_ID_MAX)
+ message_id = _message_id_compressed (ctx, message_id);
+
+ return talloc_asprintf (ctx, NOTMUCH_METADATA_THREAD_ID_PREFIX "%s",
+ message_id);
}
/* Find the thread ID to which the message with 'message_id' belongs.
@@ -1570,10 +1600,12 @@ notmuch_database_add_message (notmuch_database_t *notmuch,
if (message_id == NULL)
message_id = talloc_strdup (message_file, header);
- /* Reject a Message ID that's too long. */
- if (message_id && strlen (message_id) + 1 > NOTMUCH_TERM_MAX) {
+ /* If a message ID is too long, substitute its sha1 instead. */
+ if (message_id && strlen (message_id) > NOTMUCH_MESSAGE_ID_MAX) {
+ char *compressed = _message_id_compressed (message_file,
+ message_id);
talloc_free (message_id);
- message_id = NULL;
+ message_id = compressed;
}
}