aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/builtin_printf.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/builtin_printf.cpp')
-rw-r--r--src/builtin_printf.cpp787
1 files changed, 787 insertions, 0 deletions
diff --git a/src/builtin_printf.cpp b/src/builtin_printf.cpp
new file mode 100644
index 00000000..916166b0
--- /dev/null
+++ b/src/builtin_printf.cpp
@@ -0,0 +1,787 @@
+/* printf - format and print data
+ Copyright (C) 1990-2007 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+/* Usage: printf format [argument...]
+
+ A front end to the printf function that lets it be used from the shell.
+
+ Backslash escapes:
+
+ \" = double quote
+ \\ = backslash
+ \a = alert (bell)
+ \b = backspace
+ \c = produce no further output
+ \e = escape
+ \f = form feed
+ \n = new line
+ \r = carriage return
+ \t = horizontal tab
+ \v = vertical tab
+ \ooo = octal number (ooo is 1 to 3 digits)
+ \xhh = hexadecimal number (hhh is 1 to 2 digits)
+ \uhhhh = 16-bit Unicode character (hhhh is 4 digits)
+ \Uhhhhhhhh = 32-bit Unicode character (hhhhhhhh is 8 digits)
+
+ Additional directive:
+
+ %b = print an argument string, interpreting backslash escapes,
+ except that octal escapes are of the form \0 or \0ooo.
+
+ The `format' argument is re-used as many times as necessary
+ to convert all of the given arguments.
+
+ David MacKenzie <djm@gnu.ai.mit.edu> */
+
+/* This file has been imported from source code of printf command in GNU Coreutils version 6.9 */
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <inttypes.h>
+
+#include "common.h"
+
+struct builtin_printf_state_t
+{
+ /* The status of the operation */
+ int exit_code;
+
+ /* Whether we should stop outputting. This gets set in the case of an error, and also with the \c escape. */
+ bool early_exit;
+
+ builtin_printf_state_t() : exit_code(0), early_exit(false)
+ {
+ }
+
+ void verify_numeric(const wchar_t *s, const wchar_t *end, int errcode);
+
+ void print_direc(const wchar_t *start, size_t length, wchar_t conversion,
+ bool have_field_width, int field_width,
+ bool have_precision, int precision,
+ wchar_t const *argument);
+
+ int print_formatted(const wchar_t *format, int argc, wchar_t **argv);
+
+ void fatal_error(const wchar_t *format, ...);
+
+ long print_esc(const wchar_t *escstart, bool octal_0);
+ void print_esc_string(const wchar_t *str);
+ void print_esc_char(wchar_t c);
+
+ void append_output(wchar_t c);
+ void append_output(const wchar_t *c);
+ void append_format_output(const wchar_t *fmt, ...);
+};
+
+static bool is_octal_digit(wchar_t c)
+{
+ return c != L'\0' && wcschr(L"01234567", c) != NULL;
+}
+
+static bool is_hex_digit(wchar_t c)
+{
+ return c != L'\0' && wcschr(L"0123456789ABCDEFabcdef", c) != NULL;
+}
+
+static int hex_to_bin(const wchar_t &c)
+{
+ switch (c)
+ {
+ case L'0':
+ return 0;
+ case L'1':
+ return 1;
+ case L'2':
+ return 2;
+ case L'3':
+ return 3;
+ case L'4':
+ return 4;
+ case L'5':
+ return 5;
+ case L'6':
+ return 6;
+ case L'7':
+ return 7;
+ case L'8':
+ return 8;
+ case L'9':
+ return 9;
+ case L'a':
+ case L'A':
+ return 10;
+ case L'b':
+ case L'B':
+ return 11;
+ case L'c':
+ case L'C':
+ return 12;
+ case L'd':
+ case L'D':
+ return 13;
+ case L'e':
+ case L'E':
+ return 14;
+ case L'f':
+ case L'F':
+ return 15;
+ default:
+ return -1;
+ }
+}
+
+static int octal_to_bin(wchar_t c)
+{
+ switch (c)
+ {
+ case L'0':
+ return 0;
+ case L'1':
+ return 1;
+ case L'2':
+ return 2;
+ case L'3':
+ return 3;
+ case L'4':
+ return 4;
+ case L'5':
+ return 5;
+ case L'6':
+ return 6;
+ case L'7':
+ return 7;
+ default:
+ return -1;
+ }
+}
+
+/* This message appears in N_() here rather than just in _() below because
+ the sole use would have been in a #define. */
+static wchar_t const *const cfcc_msg =
+ N_(L"warning: %ls: character(s) following character constant have been ignored");
+
+double C_STRTOD(wchar_t const *nptr, wchar_t **endptr)
+{
+ double r;
+
+ const wcstring saved_locale = wsetlocale(LC_NUMERIC, NULL);
+
+ if (!saved_locale.empty())
+ {
+ wsetlocale(LC_NUMERIC, L"C");
+ }
+
+ r = wcstod(nptr, endptr);
+
+ if (!saved_locale.empty())
+ {
+ wsetlocale(LC_NUMERIC, saved_locale.c_str());
+ }
+
+ return r;
+}
+
+void builtin_printf_state_t::fatal_error(const wchar_t *fmt, ...)
+{
+ // Don't error twice
+ if (early_exit)
+ return;
+
+ va_list va;
+ va_start(va, fmt);
+ wcstring errstr = vformat_string(fmt, va);
+ va_end(va);
+ stderr_buffer.append(errstr);
+ if (! string_suffixes_string(L"\n", errstr))
+ stderr_buffer.push_back(L'\n');
+
+ this->exit_code = STATUS_BUILTIN_ERROR;
+ this->early_exit = true;
+}
+
+void builtin_printf_state_t::append_output(wchar_t c)
+{
+ // Don't output if we're done
+ if (early_exit)
+ return;
+
+ stdout_buffer.push_back(c);
+}
+
+void builtin_printf_state_t::append_output(const wchar_t *c)
+{
+ // Don't output if we're done
+ if (early_exit)
+ return;
+
+ stdout_buffer.append(c);
+}
+
+void builtin_printf_state_t::append_format_output(const wchar_t *fmt, ...)
+{
+ // Don't output if we're done
+ if (early_exit)
+ return;
+
+ va_list va;
+ va_start(va, fmt);
+ append_formatv(stdout_buffer, fmt, va);
+ va_end(va);
+}
+
+
+void builtin_printf_state_t::verify_numeric(const wchar_t *s, const wchar_t *end, int errcode)
+{
+ if (errcode != 0)
+ {
+ this->fatal_error(L"%ls: %s", s, strerror(errcode));
+ }
+ else if (*end)
+ {
+ if (s == end)
+ this->fatal_error(_(L"%ls: expected a numeric value"), s);
+ else
+ this->fatal_error(_(L"%ls: value not completely converted"), s);
+ }
+}
+
+template<typename T>
+static T raw_string_to_scalar_type(const wchar_t *s, wchar_t ** end);
+
+// we use wcstoll instead of wcstoimax because FreeBSD 8 has busted wcstoumax and wcstoimax - see #626
+template<>
+intmax_t raw_string_to_scalar_type(const wchar_t *s, wchar_t ** end)
+{
+ return wcstoll(s, end, 0);
+}
+
+template<>
+uintmax_t raw_string_to_scalar_type(const wchar_t *s, wchar_t ** end)
+{
+ return wcstoull(s, end, 0);
+}
+
+template<>
+long double raw_string_to_scalar_type(const wchar_t *s, wchar_t ** end)
+{
+ return C_STRTOD(s, end);
+}
+
+template<typename T>
+static T string_to_scalar_type(const wchar_t *s, builtin_printf_state_t *state)
+{
+ T val;
+ if (*s == L'\"' || *s == L'\'')
+ {
+ wchar_t ch = *++s;
+ val = ch;
+ }
+ else
+ {
+ wchar_t *end = NULL;
+ errno = 0;
+ val = raw_string_to_scalar_type<T>(s, &end);
+ state->verify_numeric(s, end, errno);
+ }
+ return val;
+}
+
+/* Output a single-character \ escape. */
+
+void builtin_printf_state_t::print_esc_char(wchar_t c)
+{
+ switch (c)
+ {
+ case L'a': /* Alert. */
+ this->append_output(L'\a');
+ break;
+ case L'b': /* Backspace. */
+ this->append_output(L'\b');
+ break;
+ case L'c': /* Cancel the rest of the output. */
+ this->early_exit = true;
+ break;
+ case L'e': /* Escape */
+ this->append_output(L'\x1B');
+ break;
+ case L'f': /* Form feed. */
+ this->append_output(L'\f');
+ break;
+ case L'n': /* New line. */
+ this->append_output(L'\n');
+ break;
+ case L'r': /* Carriage return. */
+ this->append_output(L'\r');
+ break;
+ case L't': /* Horizontal tab. */
+ this->append_output(L'\t');
+ break;
+ case L'v': /* Vertical tab. */
+ this->append_output(L'\v');
+ break;
+ default:
+ this->append_output(c);
+ break;
+ }
+}
+
+/* Print a \ escape sequence starting at ESCSTART.
+ Return the number of characters in the escape sequence
+ besides the backslash.
+ If OCTAL_0 is nonzero, octal escapes are of the form \0ooo, where o
+ is an octal digit; otherwise they are of the form \ooo. */
+long builtin_printf_state_t::print_esc(const wchar_t *escstart, bool octal_0)
+{
+ const wchar_t *p = escstart + 1;
+ int esc_value = 0; /* Value of \nnn escape. */
+ int esc_length; /* Length of \nnn escape. */
+
+ if (*p == L'x')
+ {
+ /* A hexadecimal \xhh escape sequence must have 1 or 2 hex. digits. */
+ for (esc_length = 0, ++p; esc_length < 2 && is_hex_digit(*p); ++esc_length, ++p)
+ esc_value = esc_value * 16 + hex_to_bin(*p);
+ if (esc_length == 0)
+ this->fatal_error(_(L"missing hexadecimal number in escape"));
+ this->append_output(ENCODE_DIRECT_BASE + esc_value % 256);
+ }
+ else if (is_octal_digit(*p))
+ {
+ /* Parse \0ooo (if octal_0 && *p == L'0') or \ooo (otherwise).
+ Allow \ooo if octal_0 && *p != L'0'; this is an undocumented
+ extension to POSIX that is compatible with Bash 2.05b. */
+ /* Wrap mod 256, which matches historic behavior */
+ for (esc_length = 0, p += octal_0 && *p == L'0'; esc_length < 3 && is_octal_digit(*p); ++esc_length, ++p)
+ esc_value = esc_value * 8 + octal_to_bin(*p);
+ this->append_output(ENCODE_DIRECT_BASE + esc_value % 256);
+ }
+ else if (*p && wcschr(L"\"\\abcefnrtv", *p))
+ {
+ print_esc_char(*p++);
+ }
+ else if (*p == L'u' || *p == L'U')
+ {
+ wchar_t esc_char = *p;
+ p++;
+ uint32_t uni_value = 0;
+ for (size_t esc_length = 0; esc_length < (esc_char == L'u' ? 4 : 8); esc_length++)
+ {
+ if (! is_hex_digit(*p))
+ {
+ /* Escape sequence must be done. Complain if we didn't get anything */
+ if (esc_length == 0)
+ {
+ this->fatal_error(_(L"Missing hexadecimal number in Unicode escape"));
+ }
+ break;
+ }
+ uni_value = uni_value * 16 + hex_to_bin(*p);
+ p++;
+ }
+
+ /* PCA GNU printf respects the limitations described in ISO N717, about which universal characters "shall not" be specified. I believe this limitation is for the benefit of compilers; I see no reason to impose it in builtin_printf.
+
+ If __STDC_ISO_10646__ is defined, then it means wchar_t can and does hold Unicode code points, so just use that. If not defined, use the %lc printf conversion; this probably won't do anything good if your wide character set is not Unicode, but such platforms are exceedingly rare.
+ */
+ if (uni_value > 0x10FFFF)
+ {
+ this->fatal_error(_(L"Unicode character out of range: \\%c%0*x"), esc_char, (esc_char == L'u' ? 4 : 8), uni_value);
+ }
+ else
+ {
+#if defined(__STDC_ISO_10646__)
+ this->append_output(uni_value);
+#else
+ this->append_format_output(L"%lc", uni_value);
+#endif
+ }
+ }
+ else
+ {
+ this->append_output(L'\\');
+ if (*p)
+ {
+ this->append_output(*p);
+ p++;
+ }
+ }
+ return p - escstart - 1;
+}
+
+/* Print string STR, evaluating \ escapes. */
+
+void builtin_printf_state_t::print_esc_string(const wchar_t *str)
+{
+ for (; *str; str++)
+ if (*str == L'\\')
+ str += print_esc(str, true);
+ else
+ this->append_output(*str);
+}
+
+/* Evaluate a printf conversion specification. START is the start of
+ the directive, LENGTH is its length, and CONVERSION specifies the
+ type of conversion. LENGTH does not include any length modifier or
+ the conversion specifier itself. FIELD_WIDTH and PRECISION are the
+ field width and precision for '*' values, if HAVE_FIELD_WIDTH and
+ HAVE_PRECISION are true, respectively. ARGUMENT is the argument to
+ be formatted. */
+
+void builtin_printf_state_t::print_direc(const wchar_t *start, size_t length, wchar_t conversion,
+ bool have_field_width, int field_width,
+ bool have_precision, int precision,
+ wchar_t const *argument)
+{
+ // Start with everything except the conversion specifier
+ wcstring fmt(start, length);
+
+ /* Create a copy of the % directive, with an intmax_t-wide width modifier substituted for any existing integer length modifier. */
+ switch (conversion)
+ {
+ case L'd':
+ case L'i':
+ case L'u':
+ fmt.append(L"ll");
+ break;
+ case L'a':
+ case L'e':
+ case L'f':
+ case L'g':
+ case L'A':
+ case L'E':
+ case L'F':
+ case L'G':
+ fmt.append(L"L");
+ break;
+ case L's':
+ case L'c':
+ fmt.append(L"l");
+ break;
+ default:
+ break;
+ }
+
+ // Append the conversion itself
+ fmt.push_back(conversion);
+
+ switch (conversion)
+ {
+ case L'd':
+ case L'i':
+ {
+ intmax_t arg = string_to_scalar_type<intmax_t>(argument, this);
+ if (! have_field_width)
+ {
+ if (! have_precision)
+ this->append_format_output(fmt.c_str(), arg);
+ else
+ this->append_format_output(fmt.c_str(), precision, arg);
+ }
+ else
+ {
+ if (! have_precision)
+ this->append_format_output(fmt.c_str(), field_width, arg);
+ else
+ this->append_format_output(fmt.c_str(), field_width, precision, arg);
+ }
+ }
+ break;
+
+ case L'o':
+ case L'u':
+ case L'x':
+ case L'X':
+ {
+ uintmax_t arg = string_to_scalar_type<uintmax_t>(argument, this);
+ if (!have_field_width)
+ {
+ if (!have_precision)
+ this->append_format_output(fmt.c_str(), arg);
+ else
+ this->append_format_output(fmt.c_str(), precision, arg);
+ }
+ else
+ {
+ if (!have_precision)
+ this->append_format_output(fmt.c_str(), field_width, arg);
+ else
+ this->append_format_output(fmt.c_str(), field_width, precision, arg);
+ }
+ }
+ break;
+
+ case L'a':
+ case L'A':
+ case L'e':
+ case L'E':
+ case L'f':
+ case L'F':
+ case L'g':
+ case L'G':
+ {
+ long double arg = string_to_scalar_type<long double>(argument, this);
+ if (!have_field_width)
+ {
+ if (!have_precision)
+ this->append_format_output(fmt.c_str(), arg);
+ else
+ this->append_format_output(fmt.c_str(), precision, arg);
+ }
+ else
+ {
+ if (!have_precision)
+ this->append_format_output(fmt.c_str(), field_width, arg);
+ else
+ this->append_format_output(fmt.c_str(), field_width, precision, arg);
+ }
+ }
+ break;
+
+ case L'c':
+ if (!have_field_width)
+ this->append_format_output(fmt.c_str(), *argument);
+ else
+ this->append_format_output(fmt.c_str(), field_width, *argument);
+ break;
+ case L's':
+ if (!have_field_width)
+ {
+ if (!have_precision)
+ {
+ this->append_format_output(fmt.c_str(), argument);
+ }
+ else
+ this->append_format_output(fmt.c_str(), precision, argument);
+ }
+ else
+ {
+ if (!have_precision)
+ this->append_format_output(fmt.c_str(), field_width, argument);
+ else
+ this->append_format_output(fmt.c_str(), field_width, precision, argument);
+ }
+ break;
+ }
+}
+
+/* For each character in str, set the corresponding boolean in the array to the given flag */
+static inline void modify_allowed_format_specifiers(bool ok[UCHAR_MAX + 1], const char *str, bool flag)
+{
+ for (const char *c = str; *c != '\0'; c++)
+ {
+ unsigned char idx = static_cast<unsigned char>(*c);
+ ok[idx] = flag;
+ }
+}
+
+/* Print the text in FORMAT, using ARGV (with ARGC elements) for
+ arguments to any `%' directives.
+ Return the number of elements of ARGV used. */
+
+int builtin_printf_state_t::print_formatted(const wchar_t *format, int argc, wchar_t **argv)
+{
+ int save_argc = argc; /* Preserve original value. */
+ const wchar_t *f; /* Pointer into `format'. */
+ const wchar_t *direc_start; /* Start of % directive. */
+ size_t direc_length; /* Length of % directive. */
+ bool have_field_width; /* True if FIELD_WIDTH is valid. */
+ int field_width = 0; /* Arg to first '*'. */
+ bool have_precision; /* True if PRECISION is valid. */
+ int precision = 0; /* Arg to second '*'. */
+ bool ok[UCHAR_MAX + 1] = { }; /* ok['x'] is true if %x is allowed. */
+
+ for (f = format; *f != L'\0'; ++f)
+ {
+ switch (*f)
+ {
+ case L'%':
+ direc_start = f++;
+ direc_length = 1;
+ have_field_width = have_precision = false;
+ if (*f == L'%')
+ {
+ this->append_output(L'%');
+ break;
+ }
+ if (*f == L'b')
+ {
+ /* FIXME: Field width and precision are not supported
+ for %b, even though POSIX requires it. */
+ if (argc > 0)
+ {
+ print_esc_string(*argv);
+ ++argv;
+ --argc;
+ }
+ break;
+ }
+
+ modify_allowed_format_specifiers(ok, "aAcdeEfFgGiosuxX", true);
+
+ for (;; f++, direc_length++)
+ {
+ switch (*f)
+ {
+ case L'I':
+ case L'\'':
+ modify_allowed_format_specifiers(ok, "aAceEosxX", false);
+ break;
+ case '-':
+ case '+':
+ case ' ':
+ break;
+ case L'#':
+ modify_allowed_format_specifiers(ok, "cdisu", false);
+ break;
+ case '0':
+ modify_allowed_format_specifiers(ok, "cs", false);
+ break;
+ default:
+ goto no_more_flag_characters;
+ }
+ }
+no_more_flag_characters:
+ ;
+
+ if (*f == L'*')
+ {
+ ++f;
+ ++direc_length;
+ if (argc > 0)
+ {
+ intmax_t width = string_to_scalar_type<intmax_t>(*argv, this);
+ if (INT_MIN <= width && width <= INT_MAX)
+ field_width = static_cast<int>(width);
+ else
+ this->fatal_error(_(L"invalid field width: %ls"), *argv);
+ ++argv;
+ --argc;
+ }
+ else
+ {
+ field_width = 0;
+ }
+ have_field_width = true;
+ }
+ else
+ {
+ while (iswdigit(*f))
+ {
+ ++f;
+ ++direc_length;
+ }
+ }
+ if (*f == L'.')
+ {
+ ++f;
+ ++direc_length;
+ modify_allowed_format_specifiers(ok, "c", false);
+ if (*f == L'*')
+ {
+ ++f;
+ ++direc_length;
+ if (argc > 0)
+ {
+ intmax_t prec = string_to_scalar_type<intmax_t>(*argv, this);
+ if (prec < 0)
+ {
+ /* A negative precision is taken as if the
+ precision were omitted, so -1 is safe
+ here even if prec < INT_MIN. */
+ precision = -1;
+ }
+ else if (INT_MAX < prec)
+ this->fatal_error(_(L"invalid precision: %ls"), *argv);
+ else
+ {
+ precision = static_cast<int>(prec);
+ }
+ ++argv;
+ --argc;
+ }
+ else
+ {
+ precision = 0;
+ }
+ have_precision = true;
+ }
+ else
+ {
+ while (iswdigit(*f))
+ {
+ ++f;
+ ++direc_length;
+ }
+ }
+ }
+
+ while (*f == L'l' || *f == L'L' || *f == L'h' || *f == L'j' || *f == L't' || *f == L'z')
+ ++f;
+
+ {
+ wchar_t conversion = *f;
+ if (conversion > 0xFF || ! ok[conversion])
+ {
+ this->fatal_error(_(L"%.*ls: invalid conversion specification"), (int)(f + 1 - direc_start), direc_start);
+ return 0;
+ }
+ }
+
+ print_direc(direc_start, direc_length, *f,
+ have_field_width, field_width,
+ have_precision, precision,
+ (argc <= 0 ? L"" : (argc--, *argv++)));
+ break;
+
+ case L'\\':
+ f += print_esc(f, false);
+ break;
+
+ default:
+ this->append_output(*f);
+ }
+ }
+ return save_argc - argc;
+}
+
+static int builtin_printf(parser_t &parser, wchar_t **argv)
+{
+ builtin_printf_state_t state;
+
+ wchar_t *format;
+ int args_used;
+ int argc = builtin_count_args(argv);
+
+ if (argc <= 1)
+ {
+ state.fatal_error(_(L"printf: not enough arguments"));
+ return STATUS_BUILTIN_ERROR;
+ }
+
+ format = argv[1];
+ argc -= 2;
+ argv += 2;
+
+ do
+ {
+ args_used = state.print_formatted(format, argc, argv);
+ argc -= args_used;
+ argv += args_used;
+ }
+ while (args_used > 0 && argc > 0 && ! state.early_exit);
+ return state.exit_code;
+}