From 0ca103686f3896fcfb9e44601c35371d53140ca4 Mon Sep 17 00:00:00 2001 From: Kurtis Rader Date: Mon, 13 Jun 2016 19:00:30 -0700 Subject: remove unset vars from the environment Remove vars from the environment that are no longer set. Simplify the code by removing an unnecessary loop. Add some tests. Fixes #3124 --- src/builtin_set.cpp | 3 +-- src/env.cpp | 44 +++++++++++++++++-------------------- tests/c-locale.err | 0 tests/c-locale.in | 35 ----------------------------- tests/c-locale.out | 4 ---- tests/c-locale.status | 1 - tests/locale.err | 0 tests/locale.in | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++ tests/locale.out | 8 +++++++ tests/locale.status | 1 + 10 files changed, 91 insertions(+), 66 deletions(-) delete mode 100644 tests/c-locale.err delete mode 100644 tests/c-locale.in delete mode 100644 tests/c-locale.out delete mode 100644 tests/c-locale.status create mode 100644 tests/locale.err create mode 100644 tests/locale.in create mode 100644 tests/locale.out create mode 100644 tests/locale.status diff --git a/src/builtin_set.cpp b/src/builtin_set.cpp index 2333e0c0..cbd0a871 100644 --- a/src/builtin_set.cpp +++ b/src/builtin_set.cpp @@ -321,8 +321,7 @@ static void print_variables(int include_values, int esc, bool shorten_ok, int sc } } -/// The set builtin. Creates, updates and erases environment variables and environemnt variable -/// arrays. +/// The set builtin creates, updates, and erases (removes, deletes) variables. int builtin_set(parser_t &parser, io_streams_t &streams, wchar_t **argv) { wgetopter_t w; // Variables used for parsing the argument list. diff --git a/src/env.cpp b/src/env.cpp index c7924aef..0ec45448 100644 --- a/src/env.cpp +++ b/src/env.cpp @@ -174,16 +174,14 @@ static bool var_is_locale(const wcstring &key) { static void handle_locale(const wchar_t *env_var_name) { debug(2, L"handle_locale() called in response to '%ls' changing", env_var_name); const char *old_msg_locale = setlocale(LC_MESSAGES, NULL); - - for (size_t i = 0; locale_variable[i]; i++) { - const wchar_t *key = locale_variable[i]; - const env_var_t var = env_get_string(key); - if (!var.empty()) { - const std::string &name = wcs2string(key); - const std::string &value = wcs2string(var); - setenv(name.c_str(), value.c_str(), 1); - debug(3, L"locale var %s='%s'", name.c_str(), value.c_str()); - } + const env_var_t val = env_get_string(env_var_name, ENV_EXPORT); + const std::string &value = wcs2string(val); + const std::string &name = wcs2string(env_var_name); + debug(2, L"locale var %s='%s'", name.c_str(), value.c_str()); + if (val.empty()) { + unsetenv(name.c_str()); + } else { + setenv(name.c_str(), value.c_str(), 1); } char *locale = setlocale(LC_ALL, ""); @@ -216,15 +214,14 @@ static bool var_is_curses(const wcstring &key) { /// libraries. static void handle_curses(const wchar_t *env_var_name) { debug(2, L"handle_curses() called in response to '%ls' changing", env_var_name); - for (size_t i = 0; curses_variable[i]; i++) { - const wchar_t *key = curses_variable[i]; - const env_var_t var = env_get_string(key); - if (!var.empty()) { - const std::string &name = wcs2string(key); - const std::string &value = wcs2string(var); - setenv(name.c_str(), value.c_str(), 1); - debug(3, L"curses var %s='%s'", name.c_str(), value.c_str()); - } + const env_var_t val = env_get_string(env_var_name, ENV_EXPORT); + const std::string &name = wcs2string(env_var_name); + const std::string &value = wcs2string(val); + debug(2, L"curses var %s='%s'", name.c_str(), value.c_str()); + if (val.empty()) { + unsetenv(name.c_str()); + } else { + setenv(name.c_str(), value.c_str(), 1); } // TODO: Modify input_init() to allow calling it when the terminfo env vars are dynamically // changed. At the present time it can be called just once. Also, we should really only do this @@ -318,11 +315,10 @@ static bool variable_is_colon_delimited_array(const wcstring &str) { } void env_init(const struct config_paths_t *paths /* or NULL */) { - // env_read_only variables can not be altered directly by the user. + // These variables can not be altered directly by the user. const wchar_t *const ro_keys[] = { - L"status", L"history", L"_", L"LINES", L"COLUMNS", L"PWD", - // L"SHLVL", // will be inserted a bit lower down - L"FISH_VERSION", + L"status", L"history", L"_", L"LINES", L"COLUMNS", L"PWD", L"FISH_VERSION", + // L"SHLVL" is readonly but will be inserted below after we increment it. }; for (size_t i = 0; i < sizeof ro_keys / sizeof *ro_keys; i++) { env_read_only.insert(ro_keys[i]); @@ -339,7 +335,7 @@ void env_init(const struct config_paths_t *paths /* or NULL */) { global_env = top; global = &top->env; - // Now the environemnt variable handling is set up, the next step is to insert valid data. + // Now the environment variable handling is set up, the next step is to insert valid data. // Import environment variables. Walk backwards so that the first one out of any duplicates wins // (#2784). diff --git a/tests/c-locale.err b/tests/c-locale.err deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/c-locale.in b/tests/c-locale.in deleted file mode 100644 index d2f2bd5f..00000000 --- a/tests/c-locale.in +++ /dev/null @@ -1,35 +0,0 @@ -# Verify that fish can pass through non-ASCII characters in the C/POSIX -# locale. This is to prevent regression of -# https://github.com/fish-shell/fish-shell/issues/2802. -# -# These tests are needed because the relevant standards allow the functions -# mbrtowc() and wcrtomb() to treat bytes with the high bit set as either valid -# or invalid in the C/POSIX locales. GNU libc treats those bytes as invalid. -# Other libc implementations (e.g., BSD) treat them as valid. We want fish to -# always treat those bytes as valid. - -# The fish in the middle of the pipeline should be receiving a UTF-8 encoded -# version of the unicode from the echo. It should pass those bytes thru -# literally since it is in the C locale. We verify this by first passing the -# echo output directly to the `xxd` program then via a fish instance. The -# output should be "58c3bb58" for the first statement and "58c3bc58" for the -# second. -echo -n X\u00fbX | \ - xxd --plain -echo X\u00fcX | env LC_ALL=C ../test/root/bin/fish -c 'read foo; echo -n $foo' | \ - xxd --plain - -# This test is subtle. Despite the presence of the \u00fc unicode char (a "u" -# with an umlaut) the fact the locale is C/POSIX will cause the \xfc byte to -# be emitted rather than the usual UTF-8 sequence \xc3\xbc. That's because the -# few single-byte unicode chars (that are not ASCII) are generally in the -# ISO-8859-1 char set which is encompased by the C locale. The output should -# be "59fc59". -env LC_ALL=C ../test/root/bin/fish -c 'echo -n Y\u00fcY' | \ - xxd --plain - -# The user can specify a wide unicode character (one requiring more than a -# single byte). In the C/POSIX locales we substitute a question-mark for the -# unencodable wide char. The output should be "543f54". -env LC_ALL=C ../test/root/bin/fish -c 'echo -n T\u01fdT' | \ - xxd --plain diff --git a/tests/c-locale.out b/tests/c-locale.out deleted file mode 100644 index 10a94d3e..00000000 --- a/tests/c-locale.out +++ /dev/null @@ -1,4 +0,0 @@ -58c3bb58 -58c3bc58 -59fc59 -543f54 diff --git a/tests/c-locale.status b/tests/c-locale.status deleted file mode 100644 index 573541ac..00000000 --- a/tests/c-locale.status +++ /dev/null @@ -1 +0,0 @@ -0 diff --git a/tests/locale.err b/tests/locale.err new file mode 100644 index 00000000..e69de29b diff --git a/tests/locale.in b/tests/locale.in new file mode 100644 index 00000000..d08fd7fd --- /dev/null +++ b/tests/locale.in @@ -0,0 +1,61 @@ +# Test behavior related to the locale. + +# Verify that our UTF-8 locale produces the expected output. +echo -n A\u00FCA | xxd --plain + +# Verify that exporting a change to the C locale produces the expected output. +# The output should include the literal byte \xFC rather than the UTF-8 sequence for \u00FC. +begin + set -lx LC_ALL C + echo -n B\u00FCB | xxd --plain +end + +# Since the previous change was localized to a block it should no +# longer be in effect and we should be back to a UTF-8 locale. +echo -n C\u00FCC | xxd --plain + +# Verify that setting a non-exported locale var doesn't affect the behavior. +# The output should include the UTF-8 sequence for \u00FC rather than that literal byte. +# Just like the previous test. +begin + set -l LC_ALL C + echo -n D\u00FCD | xxd --plain +end + +# Verify that fish can pass through non-ASCII characters in the C/POSIX +# locale. This is to prevent regression of +# https://github.com/fish-shell/fish-shell/issues/2802. +# +# These tests are needed because the relevant standards allow the functions +# mbrtowc() and wcrtomb() to treat bytes with the high bit set as either valid +# or invalid in the C/POSIX locales. GNU libc treats those bytes as invalid. +# Other libc implementations (e.g., BSD) treat them as valid. We want fish to +# always treat those bytes as valid. + +# The fish in the middle of the pipeline should be receiving a UTF-8 encoded +# version of the unicode from the echo. It should pass those bytes thru +# literally since it is in the C locale. We verify this by first passing the +# echo output directly to the `xxd` program then via a fish instance. The +# output should be "58c3bb58" for the first statement and "58c3bc58" for the +# second. +echo -n X\u00FBX | \ + xxd --plain +echo X\u00FCX | env LC_ALL=C ../test/root/bin/fish -c 'read foo; echo -n $foo' | \ + xxd --plain + +# The next tests deliberately spawn another fish instance to test inheritence of env vars. + +# This test is subtle. Despite the presence of the \u00fc unicode char (a "u" +# with an umlaut) the fact the locale is C/POSIX will cause the \xfc byte to +# be emitted rather than the usual UTF-8 sequence \xc3\xbc. That's because the +# few single-byte unicode chars (that are not ASCII) are generally in the +# ISO 8859-x char sets which are encompassed by the C locale. The output should +# be "59fc59". +env LC_ALL=C ../test/root/bin/fish -c 'echo -n Y\u00FCY' | \ + xxd --plain + +# The user can specify a wide unicode character (one requiring more than a +# single byte). In the C/POSIX locales we substitute a question-mark for the +# unencodable wide char. The output should be "543f54". +env LC_ALL=C ../test/root/bin/fish -c 'echo -n T\u01FDT' | \ + xxd --plain diff --git a/tests/locale.out b/tests/locale.out new file mode 100644 index 00000000..b7c97c15 --- /dev/null +++ b/tests/locale.out @@ -0,0 +1,8 @@ +41c3bc41 +42fc42 +43c3bc43 +44c3bc44 +58c3bb58 +58c3bc58 +59fc59 +543f54 diff --git a/tests/locale.status b/tests/locale.status new file mode 100644 index 00000000..573541ac --- /dev/null +++ b/tests/locale.status @@ -0,0 +1 @@ +0 -- cgit v1.2.3