From 0ca103686f3896fcfb9e44601c35371d53140ca4 Mon Sep 17 00:00:00 2001
From: Kurtis Rader <krader@skepticism.us>
Date: Mon, 13 Jun 2016 19:00:30 -0700
Subject: remove unset vars from the environment

Remove vars from the environment that are no longer set. Simplify the code by
removing an unnecessary loop. Add some tests.

Fixes #3124
---
 src/builtin_set.cpp   |  3 +--
 src/env.cpp           | 44 +++++++++++++++++--------------------
 tests/c-locale.err    |  0
 tests/c-locale.in     | 35 -----------------------------
 tests/c-locale.out    |  4 ----
 tests/c-locale.status |  1 -
 tests/locale.err      |  0
 tests/locale.in       | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++
 tests/locale.out      |  8 +++++++
 tests/locale.status   |  1 +
 10 files changed, 91 insertions(+), 66 deletions(-)
 delete mode 100644 tests/c-locale.err
 delete mode 100644 tests/c-locale.in
 delete mode 100644 tests/c-locale.out
 delete mode 100644 tests/c-locale.status
 create mode 100644 tests/locale.err
 create mode 100644 tests/locale.in
 create mode 100644 tests/locale.out
 create mode 100644 tests/locale.status

diff --git a/src/builtin_set.cpp b/src/builtin_set.cpp
index 2333e0c0..cbd0a871 100644
--- a/src/builtin_set.cpp
+++ b/src/builtin_set.cpp
@@ -321,8 +321,7 @@ static void print_variables(int include_values, int esc, bool shorten_ok, int sc
     }
 }
 
-/// The set builtin. Creates, updates and erases environment variables and environemnt variable
-/// arrays.
+/// The set builtin creates, updates, and erases (removes, deletes) variables.
 int builtin_set(parser_t &parser, io_streams_t &streams, wchar_t **argv) {
     wgetopter_t w;
     // Variables used for parsing the argument list.
diff --git a/src/env.cpp b/src/env.cpp
index c7924aef..0ec45448 100644
--- a/src/env.cpp
+++ b/src/env.cpp
@@ -174,16 +174,14 @@ static bool var_is_locale(const wcstring &key) {
 static void handle_locale(const wchar_t *env_var_name) {
     debug(2, L"handle_locale() called in response to '%ls' changing", env_var_name);
     const char *old_msg_locale = setlocale(LC_MESSAGES, NULL);
-
-    for (size_t i = 0; locale_variable[i]; i++) {
-        const wchar_t *key = locale_variable[i];
-        const env_var_t var = env_get_string(key);
-        if (!var.empty()) {
-            const std::string &name = wcs2string(key);
-            const std::string &value = wcs2string(var);
-            setenv(name.c_str(), value.c_str(), 1);
-            debug(3, L"locale var %s='%s'", name.c_str(), value.c_str());
-        }
+    const env_var_t val = env_get_string(env_var_name, ENV_EXPORT);
+    const std::string &value = wcs2string(val);
+    const std::string &name = wcs2string(env_var_name);
+    debug(2, L"locale var %s='%s'", name.c_str(), value.c_str());
+    if (val.empty()) {
+        unsetenv(name.c_str());
+    } else {
+        setenv(name.c_str(), value.c_str(), 1);
     }
 
     char *locale = setlocale(LC_ALL, "");
@@ -216,15 +214,14 @@ static bool var_is_curses(const wcstring &key) {
 /// libraries.
 static void handle_curses(const wchar_t *env_var_name) {
     debug(2, L"handle_curses() called in response to '%ls' changing", env_var_name);
-    for (size_t i = 0; curses_variable[i]; i++) {
-        const wchar_t *key = curses_variable[i];
-        const env_var_t var = env_get_string(key);
-        if (!var.empty()) {
-            const std::string &name = wcs2string(key);
-            const std::string &value = wcs2string(var);
-            setenv(name.c_str(), value.c_str(), 1);
-            debug(3, L"curses var %s='%s'", name.c_str(), value.c_str());
-        }
+    const env_var_t val = env_get_string(env_var_name, ENV_EXPORT);
+    const std::string &name = wcs2string(env_var_name);
+    const std::string &value = wcs2string(val);
+    debug(2, L"curses var %s='%s'", name.c_str(), value.c_str());
+    if (val.empty()) {
+        unsetenv(name.c_str());
+    } else {
+        setenv(name.c_str(), value.c_str(), 1);
     }
     // TODO: Modify input_init() to allow calling it when the terminfo env vars are dynamically
     // changed. At the present time it can be called just once. Also, we should really only do this
@@ -318,11 +315,10 @@ static bool variable_is_colon_delimited_array(const wcstring &str) {
 }
 
 void env_init(const struct config_paths_t *paths /* or NULL */) {
-    // env_read_only variables can not be altered directly by the user.
+    // These variables can not be altered directly by the user.
     const wchar_t *const ro_keys[] = {
-        L"status", L"history", L"_", L"LINES", L"COLUMNS", L"PWD",
-        // L"SHLVL", // will be inserted a bit lower down
-        L"FISH_VERSION",
+        L"status", L"history", L"_", L"LINES", L"COLUMNS", L"PWD", L"FISH_VERSION",
+        // L"SHLVL" is readonly but will be inserted below after we increment it.
     };
     for (size_t i = 0; i < sizeof ro_keys / sizeof *ro_keys; i++) {
         env_read_only.insert(ro_keys[i]);
@@ -339,7 +335,7 @@ void env_init(const struct config_paths_t *paths /* or NULL */) {
     global_env = top;
     global = &top->env;
 
-    // Now the environemnt variable handling is set up, the next step is to insert valid data.
+    // Now the environment variable handling is set up, the next step is to insert valid data.
 
     // Import environment variables. Walk backwards so that the first one out of any duplicates wins
     // (#2784).
diff --git a/tests/c-locale.err b/tests/c-locale.err
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/c-locale.in b/tests/c-locale.in
deleted file mode 100644
index d2f2bd5f..00000000
--- a/tests/c-locale.in
+++ /dev/null
@@ -1,35 +0,0 @@
-# Verify that fish can pass through non-ASCII characters in the C/POSIX
-# locale. This is to prevent regression of
-# https://github.com/fish-shell/fish-shell/issues/2802.
-#
-# These tests are needed because the relevant standards allow the functions
-# mbrtowc() and wcrtomb() to treat bytes with the high bit set as either valid
-# or invalid in the C/POSIX locales. GNU libc treats those bytes as invalid.
-# Other libc implementations (e.g., BSD) treat them as valid. We want fish to
-# always treat those bytes as valid.
-
-# The fish in the middle of the pipeline should be receiving a UTF-8 encoded
-# version of the unicode from the echo. It should pass those bytes thru
-# literally since it is in the C locale. We verify this by first passing the
-# echo output directly to the `xxd` program then via a fish instance. The
-# output should be "58c3bb58" for the first statement and "58c3bc58" for the
-# second.
-echo -n X\u00fbX | \
-  xxd --plain
-echo X\u00fcX | env LC_ALL=C ../test/root/bin/fish -c 'read foo; echo -n $foo' | \
-  xxd --plain
-
-# This test is subtle. Despite the presence of the \u00fc unicode char (a "u"
-# with an umlaut) the fact the locale is C/POSIX will cause the \xfc byte to
-# be emitted rather than the usual UTF-8 sequence \xc3\xbc. That's because the
-# few single-byte unicode chars (that are not ASCII) are generally in the
-# ISO-8859-1 char set which is encompased by the C locale. The output should
-# be "59fc59".
-env LC_ALL=C ../test/root/bin/fish -c 'echo -n Y\u00fcY' | \
-  xxd --plain
-
-# The user can specify a wide unicode character (one requiring more than a
-# single byte). In the C/POSIX locales we substitute a question-mark for the
-# unencodable wide char. The output should be "543f54".
-env LC_ALL=C ../test/root/bin/fish -c 'echo -n T\u01fdT' | \
-  xxd --plain
diff --git a/tests/c-locale.out b/tests/c-locale.out
deleted file mode 100644
index 10a94d3e..00000000
--- a/tests/c-locale.out
+++ /dev/null
@@ -1,4 +0,0 @@
-58c3bb58
-58c3bc58
-59fc59
-543f54
diff --git a/tests/c-locale.status b/tests/c-locale.status
deleted file mode 100644
index 573541ac..00000000
--- a/tests/c-locale.status
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/tests/locale.err b/tests/locale.err
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/locale.in b/tests/locale.in
new file mode 100644
index 00000000..d08fd7fd
--- /dev/null
+++ b/tests/locale.in
@@ -0,0 +1,61 @@
+# Test behavior related to the locale.
+
+# Verify that our UTF-8 locale produces the expected output.
+echo -n A\u00FCA | xxd --plain
+
+# Verify that exporting a change to the C locale produces the expected output.
+# The output should include the literal byte \xFC rather than the UTF-8 sequence for \u00FC.
+begin
+	set -lx LC_ALL C
+	echo -n B\u00FCB | xxd --plain
+end
+
+# Since the previous change was localized to a block it should no
+# longer be in effect and we should be back to a UTF-8 locale.
+echo -n C\u00FCC | xxd --plain
+
+# Verify that setting a non-exported locale var doesn't affect the behavior.
+# The output should include the UTF-8 sequence for \u00FC rather than that literal byte.
+# Just like the previous test.
+begin
+	set -l LC_ALL C
+	echo -n D\u00FCD | xxd --plain
+end
+
+# Verify that fish can pass through non-ASCII characters in the C/POSIX
+# locale. This is to prevent regression of
+# https://github.com/fish-shell/fish-shell/issues/2802.
+#
+# These tests are needed because the relevant standards allow the functions
+# mbrtowc() and wcrtomb() to treat bytes with the high bit set as either valid
+# or invalid in the C/POSIX locales. GNU libc treats those bytes as invalid.
+# Other libc implementations (e.g., BSD) treat them as valid. We want fish to
+# always treat those bytes as valid.
+
+# The fish in the middle of the pipeline should be receiving a UTF-8 encoded
+# version of the unicode from the echo. It should pass those bytes thru
+# literally since it is in the C locale. We verify this by first passing the
+# echo output directly to the `xxd` program then via a fish instance. The
+# output should be "58c3bb58" for the first statement and "58c3bc58" for the
+# second.
+echo -n X\u00FBX | \
+  xxd --plain
+echo X\u00FCX | env LC_ALL=C ../test/root/bin/fish -c 'read foo; echo -n $foo' | \
+  xxd --plain
+
+# The next tests deliberately spawn another fish instance to test inheritence of env vars.
+
+# This test is subtle. Despite the presence of the \u00fc unicode char (a "u"
+# with an umlaut) the fact the locale is C/POSIX will cause the \xfc byte to
+# be emitted rather than the usual UTF-8 sequence \xc3\xbc. That's because the
+# few single-byte unicode chars (that are not ASCII) are generally in the
+# ISO 8859-x char sets which are encompassed by the C locale. The output should
+# be "59fc59".
+env LC_ALL=C ../test/root/bin/fish -c 'echo -n Y\u00FCY' | \
+  xxd --plain
+
+# The user can specify a wide unicode character (one requiring more than a
+# single byte). In the C/POSIX locales we substitute a question-mark for the
+# unencodable wide char. The output should be "543f54".
+env LC_ALL=C ../test/root/bin/fish -c 'echo -n T\u01FDT' | \
+  xxd --plain
diff --git a/tests/locale.out b/tests/locale.out
new file mode 100644
index 00000000..b7c97c15
--- /dev/null
+++ b/tests/locale.out
@@ -0,0 +1,8 @@
+41c3bc41
+42fc42
+43c3bc43
+44c3bc44
+58c3bb58
+58c3bc58
+59fc59
+543f54
diff --git a/tests/locale.status b/tests/locale.status
new file mode 100644
index 00000000..573541ac
--- /dev/null
+++ b/tests/locale.status
@@ -0,0 +1 @@
+0
-- 
cgit v1.2.3