aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/common.h
diff options
context:
space:
mode:
authorGravatar Kurtis Rader <krader@skepticism.us>2016-01-21 19:56:39 -0800
committerGravatar Kurtis Rader <krader@skepticism.us>2016-02-28 18:36:34 -0800
commitf2246dfb343bea19beb176fb2cc534f85513b2eb (patch)
treef95f0c4f4f48d445c1d561aa0986d2b98cdecec5 /src/common.h
parentb41b96233616f26e52663c133f7a29d32b0e9142 (diff)
reduce number of Unicode private-use characters
This narrows the range of Unicode codepoints fish reserves for its own use from U+E000 thru U+F8FE (6399 codepoints) to U+F600 thru U+F73F (320 codepoints). This is still not ideal since fish shouldn't be using any Unicode private-use codepoints but it's a step in the right direction. This partially addresses issue #2684.
Diffstat (limited to 'src/common.h')
-rw-r--r--src/common.h65
1 files changed, 45 insertions, 20 deletions
diff --git a/src/common.h b/src/common.h
index d255ac62..74640988 100644
--- a/src/common.h
+++ b/src/common.h
@@ -35,34 +35,59 @@
typedef std::wstring wcstring;
typedef std::vector<wcstring> wcstring_list_t;
-/**
- Maximum number of bytes used by a single utf-8 character
-*/
+// Maximum number of bytes used by a single utf-8 character.
#define MAX_UTF8_BYTES 6
-/**
- This is in the unicode private use area.
-*/
-#define ENCODE_DIRECT_BASE 0xf100
-
-/**
- Highest legal ascii value
-*/
+// Highest legal ASCII value.
#define ASCII_MAX 127u
-/**
- Highest legal 16-bit unicode value
-*/
-#define UCS2_MAX 0xffffu
+// Highest legal 16-bit Unicode value.
+#define UCS2_MAX 0xFFFFu
-/**
- Highest legal byte value
-*/
-#define BYTE_MAX 0xffu
+// Highest legal byte value.
+#define BYTE_MAX 0xFFu
-/** BOM value */
+// Unicode BOM value.
#define UTF8_BOM_WCHAR 0xFEFFu
+// Unicode replacement character.
+#define REPLACEMENT_WCHAR 0xFFFDu
+
+// Use Unicode "noncharacters" for internal characters as much as we can. This
+// gives us 32 "characters" for internal use that we can guarantee should not
+// appear in our input stream. See http://www.unicode.org/faq/private_use.html.
+#define RESERVED_CHAR_BASE 0xFDD0u
+#define RESERVED_CHAR_END 0xFDF0u
+// Split the available noncharacter values into two ranges to ensure there are
+// no conflicts among the places we use these special characters.
+#define EXPAND_RESERVED_BASE RESERVED_CHAR_BASE
+#define EXPAND_RESERVED_END (EXPAND_RESERVED_BASE + 16)
+#define WILDCARD_RESERVED_BASE EXPAND_RESERVED_END
+#define WILDCARD_RESERVED_END (WILDCARD_RESERVED_BASE + 16)
+// Make sure the ranges defined above don't exceed the range for noncharacters.
+// This is to make sure we didn't do something stupid in subdividing the
+// Unicode range for our needs.
+#if WILDCARD_RESERVED_END > RESERVED_CHAR_END
+#error
+#endif
+
+// These are in the Unicode private-use range. We really shouldn't use this
+// range but have little choice in the matter given how our lexer/parser works.
+// We can't use non-characters for these two ranges because there are only 66 of
+// them and we need at least 256 + 64.
+//
+// If sizeof(wchar_t))==4 we could avoid using private-use chars; however, that
+// would result in fish having different behavior on machines with 16 versus 32
+// bit wchar_t. It's better that fish behave the same on both types of systems.
+//
+// Note: We don't use the highest 8 bit range (0xF800 - 0xF8FF) because we know
+// of at least one use of a codepoint in that range: the Apple symbol (0xF8FF)
+// on Mac OS X. See http://www.unicode.org/faq/private_use.html.
+#define ENCODE_DIRECT_BASE 0xF600u
+#define ENCODE_DIRECT_END (ENCODE_DIRECT_BASE + 256)
+#define INPUT_COMMON_BASE 0xF700u
+#define INPUT_COMMON_END (INPUT_COMMON_BASE + 64)
+
/* Flags for unescape_string functions */
enum
{