From 8a443121c63ee38354cd8f657a1a308311faae9d Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Sun, 15 Jun 2014 22:40:30 -0400 Subject: test: Fix from/to search test queries Two of the search tests for "from" and "to" queries were clearly trying to search for prefixed phrases, but forgot to shell quote the phrases. Fix this by quoting them correctly. --- test/T080-search.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/test/T080-search.sh b/test/T080-search.sh index a7a0b18d..4f0e16cf 100755 --- a/test/T080-search.sh +++ b/test/T080-search.sh @@ -59,7 +59,7 @@ test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] searchbyfrom@example. test_begin_subtest "Search by from: (name)" add_message '[subject]="search by from (name)"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' '[from]="Search By From Name "' -output=$(notmuch search from:"Search By From Name" | notmuch_search_sanitize) +output=$(notmuch search 'from:"Search By From Name"' | notmuch_search_sanitize) test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Search By From Name; search by from (name) (inbox unread)" test_begin_subtest "Search by to: (address)" @@ -69,7 +69,7 @@ test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; s test_begin_subtest "Search by to: (name)" add_message '[subject]="search by to (name)"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' '[to]="Search By To Name "' -output=$(notmuch search to:"Search By To Name" | notmuch_search_sanitize) +output=$(notmuch search 'to:"Search By To Name"' | notmuch_search_sanitize) test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by to (name) (inbox unread)" test_begin_subtest "Search by subject: (phrase)" -- cgit v1.2.3 From b547830783ee0732696d5c05a00cfc57baba065f Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Sun, 15 Jun 2014 22:40:31 -0400 Subject: test: Add search tests for combined name/address queries Two of these are currently known-broken. We index the name and address parts in two separate calls to _notmuch_message_gen_terms. Currently this has the effect of placing the term positions of the prefixed terms from the second call right after those of the first call, but screws up the term positions of the non-prefixed terms. --- test/T080-search.sh | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'test') diff --git a/test/T080-search.sh b/test/T080-search.sh index 4f0e16cf..8ed57013 100755 --- a/test/T080-search.sh +++ b/test/T080-search.sh @@ -62,6 +62,15 @@ add_message '[subject]="search by from (name)"' '[date]="Sat, 01 Jan 2000 12:00: output=$(notmuch search 'from:"Search By From Name"' | notmuch_search_sanitize) test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Search By From Name; search by from (name) (inbox unread)" +test_begin_subtest "Search by from: (name and address)" +output=$(notmuch search 'from:"Search By From Name "' | notmuch_search_sanitize) +test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Search By From Name; search by from (name) (inbox unread)" + +test_begin_subtest "Search by from: without prefix (name and address)" +test_subtest_known_broken +output=$(notmuch search '"Search By From Name "' | notmuch_search_sanitize) +test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Search By From Name; search by from (name) (inbox unread)" + test_begin_subtest "Search by to: (address)" add_message '[subject]="search by to (address)"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' [to]=searchbyto@example.com output=$(notmuch search to:searchbyto@example.com | notmuch_search_sanitize) @@ -72,6 +81,15 @@ add_message '[subject]="search by to (name)"' '[date]="Sat, 01 Jan 2000 12:00:00 output=$(notmuch search 'to:"Search By To Name"' | notmuch_search_sanitize) test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by to (name) (inbox unread)" +test_begin_subtest "Search by to: (name and adress)" +output=$(notmuch search 'to:"Search By To Name "' | notmuch_search_sanitize) +test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by to (name) (inbox unread)" + +test_begin_subtest "Search by to: without prefix (name and adress)" +test_subtest_known_broken +output=$(notmuch search '"Search By To Name "' | notmuch_search_sanitize) +test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by to (name) (inbox unread)" + test_begin_subtest "Search by subject: (phrase)" add_message '[subject]="subject search test (phrase)"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' add_message '[subject]="this phrase should not match the subject search test"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' -- cgit v1.2.3 From 44327ca86d8e3563490801f57a2d1ca455d9588e Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Sun, 15 Jun 2014 22:40:32 -0400 Subject: lib: Index name and address of from/to headers as a phrase Previously, we indexed the name and address parts of from/to headers with two calls to _notmuch_message_gen_terms. In general, this indicates that these parts are separate phrases. However, because of an implementation quirk, the two calls to _notmuch_message_gen_terms generated adjacent term positions for the prefixed terms, which happens to be the right thing to do in this case, but the wrong thing to do for all other calls. Furthermore, _notmuch_message_gen_terms produced potentially overlapping term positions for the un-prefixed copies of the terms, which is simply wrong. This change indexes both the name and address in a single call to _notmuch_message_gen_terms, indicating that they should be part of a single phrase. This masks the problem with the un-prefixed terms (fixing the two known-broken tests) and puts us in a position to fix the unintentionally phrases generated by other calls to _notmuch_message_gen_terms. --- lib/index.cc | 24 ++++++++++-------------- test/T080-search.sh | 2 -- 2 files changed, 10 insertions(+), 16 deletions(-) (limited to 'test') diff --git a/lib/index.cc b/lib/index.cc index e1e2a382..1a2e63df 100644 --- a/lib/index.cc +++ b/lib/index.cc @@ -231,26 +231,22 @@ _index_address_mailbox (notmuch_message_t *message, InternetAddress *address) { InternetAddressMailbox *mailbox = INTERNET_ADDRESS_MAILBOX (address); - const char *name, *addr; + const char *name, *addr, *combined; void *local = talloc_new (message); name = internet_address_get_name (address); addr = internet_address_mailbox_get_addr (mailbox); - /* In the absence of a name, we'll strip the part before the @ - * from the address. */ - if (! name) { - const char *at; + /* Combine the name and address and index them as a phrase. */ + if (name && addr) + combined = talloc_asprintf (local, "%s %s", name, addr); + else if (name) + combined = name; + else + combined = addr; - at = strchr (addr, '@'); - if (at) - name = talloc_strndup (local, addr, at - addr); - } - - if (name) - _notmuch_message_gen_terms (message, prefix_name, name); - if (addr) - _notmuch_message_gen_terms (message, prefix_name, addr); + if (combined) + _notmuch_message_gen_terms (message, prefix_name, combined); talloc_free (local); } diff --git a/test/T080-search.sh b/test/T080-search.sh index 8ed57013..b63bf023 100755 --- a/test/T080-search.sh +++ b/test/T080-search.sh @@ -67,7 +67,6 @@ output=$(notmuch search 'from:"Search By From Name "' | notmuc test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Search By From Name; search by from (name) (inbox unread)" test_begin_subtest "Search by from: without prefix (name and address)" -test_subtest_known_broken output=$(notmuch search '"Search By From Name "' | notmuch_search_sanitize) test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Search By From Name; search by from (name) (inbox unread)" @@ -86,7 +85,6 @@ output=$(notmuch search 'to:"Search By To Name "' | notmuch_se test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by to (name) (inbox unread)" test_begin_subtest "Search by to: without prefix (name and adress)" -test_subtest_known_broken output=$(notmuch search '"Search By To Name "' | notmuch_search_sanitize) test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by to (name) (inbox unread)" -- cgit v1.2.3 From c1805576a0f57540bdf8643f1b7989fad793b929 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Sun, 15 Jun 2014 22:40:33 -0400 Subject: test: Known-broken test for overlapping/adjacent termpos This adds two known-broken tests and one working test related to the term positions assigned to terms from different headers or MIME parts. The first test fails because we don't create a termpos gap between different headers. The second test fails because we don't adjust termpos at all when indexing multiple parts. --- test/T080-search.sh | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'test') diff --git a/test/T080-search.sh b/test/T080-search.sh index b63bf023..3f6b565f 100755 --- a/test/T080-search.sh +++ b/test/T080-search.sh @@ -145,4 +145,44 @@ add_message '[subject]="utf8-message-body-subject"' '[date]="Sat, 01 Jan 2000 12 output=$(notmuch search "bödý" | notmuch_search_sanitize) test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-message-body-subject (inbox unread)" + +cat < ${MAIL_DIR}/termpos +From: Source +To: Dest +Subject: part overlap test +Date: Sat, 01 January 2000 00:00:00 +0000 +Message-ID: +MIME-Version: 1.0 +Content-Type: multipart/mixed; boundary="==-==" + +--==-== +Content-Type: text/plain + +a b c + +--==-== +Content-Type: text/plain + +x y z + +--==-==-- +EOF +notmuch new > /dev/null + +test_begin_subtest "headers do not have adjacent term positions" +test_subtest_known_broken +# Regression test for a bug where term positions for non-prefixed +# terms weren't updated +output=$(notmuch search id:termpos and '"com dest"') +test_expect_equal "$output" "" + +test_begin_subtest "parts have non-overlapping term positions" +test_subtest_known_broken +output=$(notmuch search id:termpos and '"a y c"') +test_expect_equal "$output" "" + +test_begin_subtest "parts do not have adjacent term positions" +output=$(notmuch search id:termpos and '"c x"') +test_expect_equal "$output" "" + test_done -- cgit v1.2.3 From dc64ab67207fef897bca88741fc42330793d7bd8 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Sun, 15 Jun 2014 22:40:34 -0400 Subject: lib: Separate all phrases indexed by _notmuch_message_gen_terms This adds a 100 termpos gap between all phrases indexed by _notmuch_message_gen_terms. This fixes a bug where terms from the end of one header and the beginning of another header could match together in a single phrase and a separate bug where term positions of un-prefixed terms overlapped. This fix only affects newly indexed messages. Messages that are already indexed won't benefit from this fix without re-indexing, but the fix won't make things any worse for existing messages. --- lib/message.cc | 9 +++++++-- test/T080-search.sh | 2 -- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'test') diff --git a/lib/message.cc b/lib/message.cc index 9243b769..d0b7351e 100644 --- a/lib/message.cc +++ b/lib/message.cc @@ -1023,16 +1023,21 @@ _notmuch_message_gen_terms (notmuch_message_t *message, return NOTMUCH_PRIVATE_STATUS_NULL_POINTER; term_gen->set_document (message->doc); - term_gen->set_termpos (message->termpos); if (prefix_name) { const char *prefix = _find_prefix (prefix_name); + term_gen->set_termpos (message->termpos); term_gen->index_text (text, 1, prefix); - message->termpos = term_gen->get_termpos (); + /* Create a gap between this an the next terms so they don't + * appear to be a phrase. */ + message->termpos = term_gen->get_termpos () + 100; } + term_gen->set_termpos (message->termpos); term_gen->index_text (text); + /* Create a term gap, as above. */ + message->termpos = term_gen->get_termpos () + 100; return NOTMUCH_PRIVATE_STATUS_SUCCESS; } diff --git a/test/T080-search.sh b/test/T080-search.sh index 3f6b565f..05027fb0 100755 --- a/test/T080-search.sh +++ b/test/T080-search.sh @@ -170,14 +170,12 @@ EOF notmuch new > /dev/null test_begin_subtest "headers do not have adjacent term positions" -test_subtest_known_broken # Regression test for a bug where term positions for non-prefixed # terms weren't updated output=$(notmuch search id:termpos and '"com dest"') test_expect_equal "$output" "" test_begin_subtest "parts have non-overlapping term positions" -test_subtest_known_broken output=$(notmuch search id:termpos and '"a y c"') test_expect_equal "$output" "" -- cgit v1.2.3