aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar ridiculousfish <corydoras@ridiculousfish.com>2015-08-01 16:02:46 -0700
committerGravatar ridiculousfish <corydoras@ridiculousfish.com>2015-08-08 13:55:43 -0700
commit70548f7cc75ea9dcf325d8f2a87126a845e9dc46 (patch)
tree90f425cad8598fc5c9f8bf163af5ef45d7377177
parentf6ab0b4d0775bd48ca10ca7f75522dfbd4fde175 (diff)
Initial implementation of wildcard match refactoring
Not yet turned on.
-rw-r--r--src/expand.cpp2
-rw-r--r--src/fish.cpp2
-rw-r--r--src/fish_tests.cpp1
-rw-r--r--src/wildcard.cpp327
-rw-r--r--src/wutil.cpp8
-rw-r--r--src/wutil.h13
6 files changed, 324 insertions, 29 deletions
diff --git a/src/expand.cpp b/src/expand.cpp
index eeeb7bac..aca6d97a 100644
--- a/src/expand.cpp
+++ b/src/expand.cpp
@@ -1793,7 +1793,7 @@ int expand_string(const wcstring &input, std::vector<completion_t> *output, expa
int wc_res;
remove_internal_separator(next, (EXPAND_SKIP_WILDCARDS & flags) ? true : false);
- const bool has_wildcard = wildcard_has(next, 1);
+ const bool has_wildcard = wildcard_has(next, true /* internal, i.e. ANY_CHAR */);
if (has_wildcard && (flags & EXECUTABLES_ONLY))
{
diff --git a/src/fish.cpp b/src/fish.cpp
index a50e0886..2d44b01f 100644
--- a/src/fish.cpp
+++ b/src/fish.cpp
@@ -523,7 +523,6 @@ int main(int argc, char **argv)
proc_init();
event_init();
- wutil_init();
builtin_init();
function_init();
env_init(&paths);
@@ -636,7 +635,6 @@ int main(int argc, char **argv)
proc_destroy();
builtin_destroy();
reader_destroy();
- wutil_destroy();
event_destroy();
if (g_log_forks)
diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp
index b245dc60..99928290 100644
--- a/src/fish_tests.cpp
+++ b/src/fish_tests.cpp
@@ -4009,7 +4009,6 @@ int main(int argc, char **argv)
reader_destroy();
builtin_destroy();
- wutil_destroy();
event_destroy();
proc_destroy();
diff --git a/src/wildcard.cpp b/src/wildcard.cpp
index a1097829..96fba5a7 100644
--- a/src/wildcard.cpp
+++ b/src/wildcard.cpp
@@ -145,7 +145,10 @@ bool wildcard_has(const wcstring &str, bool internal)
static bool wildcard_match_internal(const wchar_t *str, const wchar_t *wc, bool leading_dots_fail_to_match, bool is_first)
{
if (*str == 0 && *wc==0)
+ {
+ /* We're done */
return true;
+ }
/* Hackish fix for https://github.com/fish-shell/fish-shell/issues/270 . Prevent wildcards from matching . or .., but we must still allow literal matches. */
if (leading_dots_fail_to_match && is_first && contains(str, L".", L".."))
@@ -161,6 +164,12 @@ static bool wildcard_match_internal(const wchar_t *str, const wchar_t *wc, bool
{
return false;
}
+
+ /* Common case of * at the end. In that case we can early out since we know it will match. */
+ if (wc[1] == L'\0')
+ {
+ return true;
+ }
/* Try all submatches */
do
@@ -179,8 +188,7 @@ static bool wildcard_match_internal(const wchar_t *str, const wchar_t *wc, bool
*/
return false;
}
-
- if (*wc == ANY_CHAR)
+ else if (*wc == ANY_CHAR)
{
if (is_first && *str == L'.')
{
@@ -189,9 +197,10 @@ static bool wildcard_match_internal(const wchar_t *str, const wchar_t *wc, bool
return wildcard_match_internal(str+1, wc+1, leading_dots_fail_to_match, false);
}
-
- if (*wc == *str)
+ else if (*wc == *str)
+ {
return wildcard_match_internal(str+1, wc+1, leading_dots_fail_to_match, false);
+ }
return false;
}
@@ -603,7 +612,7 @@ static void wildcard_completion_allocate(std::vector<completion_t> *list,
expansion flags specified. flags can be a combination of
EXECUTABLES_ONLY and DIRECTORIES_ONLY.
*/
-static bool test_flags(const wchar_t *filename, expand_flags_t flags)
+static bool test_flags(const wcstring &filename, expand_flags_t flags)
{
if (flags & DIRECTORIES_ONLY)
{
@@ -646,6 +655,301 @@ static void insert_completion_if_missing(const wcstring &str, std::vector<comple
append_completion(out, str);
}
+class wildcard_expander_t
+{
+ /* the set of items we have resolved, used to avoid duplication */
+ std::set<wcstring> completion_set;
+
+ /* the set of file IDs we have visited, used to avoid symlink loops */
+ std::set<file_id_t> visited_files;
+
+ /* flags controlling expansion */
+ const expand_flags_t flags;
+
+ /* resolved items get inserted into here. This is transient of course. */
+ std::vector<completion_t> *resolved;
+
+ /* whether we have been interrupted */
+ bool did_interrupt;
+
+ /* whether we have successfully added any completions */
+ bool did_add;
+
+ /* We are a trailing slash - expand at the end */
+ void expand_trailing_slash(const wcstring &base_dir);
+
+ /* Given a directory base_dir, which is opened as base_dir_fp, expand an intermediate segment of the wildcard.
+ Treat ANY_STRING_RECURSIVE as ANY_STRING.
+ wc_segment is the wildcard segment for this directory
+ wc_remainder is the wildcard for subdirectories
+ */
+ void expand_intermediate_segment(const wcstring &base_dir, DIR *base_dir_fp, const wcstring &wc_segment, const wchar_t *wc_remainder);
+
+ /* Given a directory base_dir, which is opened as base_dir_fp, expand the last segment of the wildcard.
+ Treat ANY_STRING_RECURSIVE as ANY_STRING.
+ wc is the wildcard segment to use for matching
+ wc_remainder is the wildcard for subdirectories
+ */
+ void expand_last_segment(const wcstring &base_dir, DIR *base_dir_fp, const wcstring &wc);
+
+ /* Given a directory base_dir, which is openend as base_dir_fp, call expand() recursively
+ on matching subdirectories.
+ head_wc is the portion before the recursive match
+ wc_remainder is the portion after it, and starts with ANY_STRING_RECURSIVE
+ */
+ void recurse_to_subdirectories(const wcstring &base_dir, DIR *base_dir_fp, const wcstring &head_wc, const wchar_t *wc_remainder);
+
+ /* Helper to resolve an empty base directory */
+ static DIR *open_dir(const wcstring &base_dir)
+ {
+ return wopendir(base_dir.empty() ? L"." : base_dir);
+ }
+
+public:
+
+ wildcard_expander_t(expand_flags_t f, std::vector<completion_t> *r) : flags(f), resolved(r), did_interrupt(false), did_add(false)
+ {
+ assert(resolved != NULL);
+
+ /* Insert initial completions into our set to avoid duplicates */
+ for (std::vector<completion_t>::const_iterator iter = resolved->begin(); iter != resolved->end(); ++iter)
+ {
+ this->completion_set.insert(iter->completion);
+ }
+ }
+
+ /* Do wildcard expansion. This is recursive. */
+ void expand(const wcstring &base_dir, const wchar_t *wc);
+
+
+ /* Indicate whether we should cancel wildcard expansion. This latches 'interrupt' */
+ bool interrupted()
+ {
+ if (! did_interrupt)
+ {
+ did_interrupt = (is_main_thread() ? reader_interrupted() : reader_thread_job_is_stale());
+ }
+ return did_interrupt;
+ }
+
+ /* Indicates whether something was added */
+ bool added() const
+ {
+ return this->did_add;
+ }
+};
+
+void wildcard_expander_t::expand_trailing_slash(const wcstring &base_dir)
+{
+ if (interrupted())
+ {
+ return;
+ }
+
+ if (! (flags & ACCEPT_INCOMPLETE))
+ {
+ /* Trailing slash and not accepting incomplete, e.g. `echo /tmp/`. Insert this file if it exists. */
+ if (waccess(base_dir, F_OK))
+ {
+ append_completion(this->resolved, base_dir);
+ this->did_add = true;
+ }
+ }
+ else
+ {
+ /* Trailing slashes and accepting incomplete, e.g. `echo /tmp/<tab>`. Everything is added. */
+ DIR *dir = open_dir(base_dir);
+ if (dir)
+ {
+ wcstring next;
+ while (wreaddir(dir, next) && ! interrupted())
+ {
+ if (! next.empty() && next.at(0) != L'.')
+ {
+ const wcstring abs_path = base_dir + next;
+ if (test_flags(abs_path, this->flags))
+ {
+ wildcard_completion_allocate(this->resolved, abs_path, next, L"", flags);
+ this->did_add = true;
+ }
+ }
+ }
+ closedir(dir);
+ }
+ }
+}
+
+void wildcard_expander_t::expand_intermediate_segment(const wcstring &base_dir, DIR *base_dir_fp, const wcstring &wc_segment, const wchar_t *wc_remainder)
+{
+ wcstring name_str;
+ while (!interrupted() && wreaddir(base_dir_fp, name_str))
+ {
+ /* Note that it's critical we ignore leading dots here, else we may descend into . and .. */
+ if (! wildcard_match(name_str, wc_segment, true))
+ {
+ /* Doesn't match the wildcard for this segment, skip it */
+ continue;
+ }
+
+ wcstring full_path = base_dir + name_str;
+ struct stat buf;
+ if (0 != wstat(full_path, &buf) || !S_ISDIR(buf.st_mode))
+ {
+ /* We either can't stat it, or we did but it's not a directory */
+ continue;
+ }
+
+ const file_id_t file_id = file_id_t::file_id_from_stat(&buf);
+ if (!this->visited_files.insert(file_id).second)
+ {
+ /* Symlink loop! This directory was already visited, so skip it */
+ continue;
+ }
+
+ /* We made it through. Perform normal wildcard expansion on this new directory, starting at our tail_wc, which includes the ANY_STRING_RECURSIVE guy. */
+ full_path.push_back(L'/');
+ this->expand(full_path, wc_remainder);
+ }
+}
+
+void wildcard_expander_t::expand_last_segment(const wcstring &base_dir, DIR *base_dir_fp, const wcstring &wc)
+{
+ wcstring name_str;
+ while (wreaddir(base_dir_fp, name_str))
+ {
+ if (flags & ACCEPT_INCOMPLETE)
+ {
+ /* Test for matches before stating file, so as to minimize the number of calls to the much slower stat function. The only expand flag we care about is EXPAND_FUZZY_MATCH; we have no complete flags. */
+ std::vector<completion_t> local_matches;
+ if (wildcard_complete(name_str, wc.c_str(), L"", NULL, &local_matches, flags & EXPAND_FUZZY_MATCH, 0))
+ {
+ const wcstring abs_path = base_dir + name_str;
+ if (test_flags(abs_path, flags))
+ {
+ wildcard_completion_allocate(this->resolved, abs_path, name_str, wc.c_str(), flags);
+ this->did_add = true;
+ }
+ }
+ }
+ else
+ {
+ if (wildcard_match(name_str, wc, true /* skip files with leading dots */))
+ {
+ const wcstring abs_path = base_dir + name_str;
+ if (this->completion_set.insert(abs_path).second)
+ {
+ append_completion(this->resolved, abs_path);
+ this->did_add = true;
+ }
+ }
+ }
+ }
+}
+
+void wildcard_expander_t::recurse_to_subdirectories(const wcstring &base_dir, DIR *base_dir_fp, const wcstring &head_wc, const wchar_t *wc_remainder)
+{
+ assert(! base_dir.empty());
+ assert(wc_remainder[0] == ANY_STRING_RECURSIVE);
+ // note head_wc may be empty
+
+ /* Construct a "head + any" wildcard for matching stuff in this directory. Then just match this segment with that, then future segments with the remainder of the wildcard. */
+ wcstring head_any = head_wc;
+ head_any.push_back(ANY_STRING);
+ this->expand_intermediate_segment(base_dir, base_dir_fp, head_any, wc_remainder);
+}
+
+/**
+ The real implementation of wildcard expansion is in this
+ function. Other functions are just wrappers around this one.
+
+ This function traverses the relevant directory tree looking for
+ matches, and recurses when needed to handle wildcrards spanning
+ multiple components and recursive wildcards.
+
+ Because this function calls itself recursively with substrings,
+ it's important that the parameters be raw pointers instead of wcstring,
+ which would be too expensive to construct for all substrings.
+
+ Args:
+ base_dir: the "working directory" against which the wildcard is to be resolved
+ wc: the wildcard string itself, e.g. foo*bar/baz (where * is acutally ANY_CHAR)
+*/
+void wildcard_expander_t::expand(const wcstring &base_dir, const wchar_t *wc)
+{
+ assert(wc != NULL);
+
+ if (interrupted())
+ {
+ return;
+ }
+
+ /* Get the current segment and compute interesting properties about it. */
+ const size_t wc_len = wcslen(wc);
+ const wchar_t * const next_slash = wcschr(wc, L'/');
+ const bool is_last_segment = (next_slash == NULL);
+ const size_t wc_segment_len = next_slash ? next_slash - wc : wc_len;
+ const wcstring wc_segment = wcstring(wc, wc_segment_len);
+ const bool segment_has_wildcards = wildcard_has(wc_segment, true /* internal, i.e. look for ANY_CHAR instead of ? */);
+
+ if (wc_segment.empty())
+ {
+ /* Handle empty segment */
+ assert(! segment_has_wildcards);
+ if (is_last_segment)
+ {
+ this->expand_trailing_slash(base_dir);
+ }
+ else
+ {
+ /* Multiple adjacent slashes in the wildcard. Just skip them. */
+ this->expand(base_dir, next_slash + 1);
+ }
+ }
+ else if (! segment_has_wildcards && ! is_last_segment)
+ {
+ /* Literal intermediate match. Note that we may not be able to actually read the directory (#2099) */
+ assert(next_slash != NULL);
+ /* This just trumps everything */
+ this->expand(base_dir + wc_segment + L'/', next_slash + 1);
+ }
+ else
+ {
+ assert(! wc_segment.empty() && (segment_has_wildcards || is_last_segment));
+ DIR *dir = open_dir(base_dir);
+ if (dir)
+ {
+ if (is_last_segment)
+ {
+ /* Last wildcard segment, nonempty wildcard */
+ this->expand_last_segment(base_dir, dir, wc_segment);
+ }
+ else
+ {
+ /* Not the last segment, nonempty wildcard */
+ assert(next_slash != NULL);
+ const wchar_t *wc_remainder = next_slash;
+ while (*wc_remainder == L'/')
+ {
+ wc_remainder++;
+ }
+ this->expand_intermediate_segment(base_dir, dir, wc_segment, wc_remainder);
+ }
+
+ /* Recursive wildcards require special handling */
+ size_t asr_idx = wc_segment.find(ANY_STRING_RECURSIVE);
+ if (asr_idx != wcstring::npos)
+ {
+ const wcstring head(wc_segment, 0, asr_idx);
+ const wchar_t *tail = wc + asr_idx; // starts at the ASR wildcard
+ assert(*tail == ANY_STRING_RECURSIVE);
+ rewinddir(dir);
+ this->recurse_to_subdirectories(base_dir, dir, head, tail);
+ }
+ closedir(dir);
+ }
+ }
+}
+
/**
The real implementation of wildcard expansion is in this
function. Other functions are just wrappers around this one.
@@ -657,6 +961,17 @@ static void insert_completion_if_missing(const wcstring &str, std::vector<comple
Because this function calls itself recursively with substrings,
it's important that the parameters be raw pointers instead of wcstring,
which would be too expensive to construct for all substrings.
+
+ Args:
+ wc: the wildcard string itself, e.g. foo* (where * is acutally ANY_CHAR)
+ base_dir: the "working directory" against which the wildcard is to be resolved
+ flags: flags controlling expansion
+ out: resolved items get inserted into here
+ completion_set: the set of items we have resolved, used to avoid duplication
+ visited_files: the set of file IDs we have visited, used to avoid symlink loops
+
+ Returns:
+ 1 if matches were found, 0 if not, -1 on abort (control-C)
*/
static int wildcard_expand_internal(const wchar_t *wc,
const wchar_t * const base_dir,
@@ -1001,7 +1316,7 @@ static int wildcard_expand(const wchar_t *wc,
int wildcard_expand_string(const wcstring &wc, const wcstring &base_dir, expand_flags_t flags, std::vector<completion_t> *output)
{
assert(output != NULL);
- /* Hackish fix for 1631. We are about to call c_str(), which will produce a string truncated at any embedded nulls. We could fix this by passing around the size, etc. However embedded nulls are never allowed in a filename, so we just check for them and return 0 (no matches) if there is an embedded null. This isn't quite right, e.g. it will fail for \0?, but that is an edge case. */
+ /* Hackish fix for 1631. We are about to call c_str(), which will produce a string truncated at any embedded nulls. We could fix this by passing around the size, etc. However embedded nulls are never allowed in a filename, so we just check for them and return 0 (no matches) if there is an embedded null. */
if (wc.find(L'\0') != wcstring::npos)
{
return 0;
diff --git a/src/wutil.cpp b/src/wutil.cpp
index f429b07a..b5689eec 100644
--- a/src/wutil.cpp
+++ b/src/wutil.cpp
@@ -53,14 +53,6 @@ static pthread_mutex_t wgettext_lock;
typedef std::map<wcstring, const wchar_t *> wgettext_map_t;
static wgettext_map_t wgettext_map;
-void wutil_init()
-{
-}
-
-void wutil_destroy()
-{
-}
-
bool wreaddir_resolving(DIR *dir, const std::wstring &dir_path, std::wstring &out_name, bool *out_is_dir)
{
struct dirent *d = readdir(dir);
diff --git a/src/wutil.h b/src/wutil.h
index 011f3477..351846c5 100644
--- a/src/wutil.h
+++ b/src/wutil.h
@@ -16,17 +16,6 @@
#include "common.h"
/**
- Call this function on startup to create internal wutil
- resources. This function doesn't do anything.
-*/
-void wutil_init();
-
-/**
- Call this function on exit to free internal wutil resources
-*/
-void wutil_destroy();
-
-/**
Wide character version of fopen(). This sets CLO_EXEC.
*/
FILE *wfopen(const wcstring &path, const char *mode);
@@ -105,6 +94,8 @@ wchar_t *wrealpath(const wcstring &pathname, wchar_t *resolved_path);
bool wreaddir(DIR *dir, std::wstring &out_name);
bool wreaddir_resolving(DIR *dir, const std::wstring &dir_path, std::wstring &out_name, bool *out_is_dir);
+bool wreaddir_resolving(DIR *dir, const std::wstring &dir_path, std::wstring *out_name, bool *out_is_dir);
+
/**
Wide character version of dirname()
*/