diff options
Diffstat (limited to 'src/builtin_printf.cpp')
-rw-r--r-- | src/builtin_printf.cpp | 787 |
1 files changed, 787 insertions, 0 deletions
diff --git a/src/builtin_printf.cpp b/src/builtin_printf.cpp new file mode 100644 index 00000000..916166b0 --- /dev/null +++ b/src/builtin_printf.cpp @@ -0,0 +1,787 @@ +/* printf - format and print data + Copyright (C) 1990-2007 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +/* Usage: printf format [argument...] + + A front end to the printf function that lets it be used from the shell. + + Backslash escapes: + + \" = double quote + \\ = backslash + \a = alert (bell) + \b = backspace + \c = produce no further output + \e = escape + \f = form feed + \n = new line + \r = carriage return + \t = horizontal tab + \v = vertical tab + \ooo = octal number (ooo is 1 to 3 digits) + \xhh = hexadecimal number (hhh is 1 to 2 digits) + \uhhhh = 16-bit Unicode character (hhhh is 4 digits) + \Uhhhhhhhh = 32-bit Unicode character (hhhhhhhh is 8 digits) + + Additional directive: + + %b = print an argument string, interpreting backslash escapes, + except that octal escapes are of the form \0 or \0ooo. + + The `format' argument is re-used as many times as necessary + to convert all of the given arguments. + + David MacKenzie <djm@gnu.ai.mit.edu> */ + +/* This file has been imported from source code of printf command in GNU Coreutils version 6.9 */ + +#include <stdio.h> +#include <sys/types.h> +#include <inttypes.h> + +#include "common.h" + +struct builtin_printf_state_t +{ + /* The status of the operation */ + int exit_code; + + /* Whether we should stop outputting. This gets set in the case of an error, and also with the \c escape. */ + bool early_exit; + + builtin_printf_state_t() : exit_code(0), early_exit(false) + { + } + + void verify_numeric(const wchar_t *s, const wchar_t *end, int errcode); + + void print_direc(const wchar_t *start, size_t length, wchar_t conversion, + bool have_field_width, int field_width, + bool have_precision, int precision, + wchar_t const *argument); + + int print_formatted(const wchar_t *format, int argc, wchar_t **argv); + + void fatal_error(const wchar_t *format, ...); + + long print_esc(const wchar_t *escstart, bool octal_0); + void print_esc_string(const wchar_t *str); + void print_esc_char(wchar_t c); + + void append_output(wchar_t c); + void append_output(const wchar_t *c); + void append_format_output(const wchar_t *fmt, ...); +}; + +static bool is_octal_digit(wchar_t c) +{ + return c != L'\0' && wcschr(L"01234567", c) != NULL; +} + +static bool is_hex_digit(wchar_t c) +{ + return c != L'\0' && wcschr(L"0123456789ABCDEFabcdef", c) != NULL; +} + +static int hex_to_bin(const wchar_t &c) +{ + switch (c) + { + case L'0': + return 0; + case L'1': + return 1; + case L'2': + return 2; + case L'3': + return 3; + case L'4': + return 4; + case L'5': + return 5; + case L'6': + return 6; + case L'7': + return 7; + case L'8': + return 8; + case L'9': + return 9; + case L'a': + case L'A': + return 10; + case L'b': + case L'B': + return 11; + case L'c': + case L'C': + return 12; + case L'd': + case L'D': + return 13; + case L'e': + case L'E': + return 14; + case L'f': + case L'F': + return 15; + default: + return -1; + } +} + +static int octal_to_bin(wchar_t c) +{ + switch (c) + { + case L'0': + return 0; + case L'1': + return 1; + case L'2': + return 2; + case L'3': + return 3; + case L'4': + return 4; + case L'5': + return 5; + case L'6': + return 6; + case L'7': + return 7; + default: + return -1; + } +} + +/* This message appears in N_() here rather than just in _() below because + the sole use would have been in a #define. */ +static wchar_t const *const cfcc_msg = + N_(L"warning: %ls: character(s) following character constant have been ignored"); + +double C_STRTOD(wchar_t const *nptr, wchar_t **endptr) +{ + double r; + + const wcstring saved_locale = wsetlocale(LC_NUMERIC, NULL); + + if (!saved_locale.empty()) + { + wsetlocale(LC_NUMERIC, L"C"); + } + + r = wcstod(nptr, endptr); + + if (!saved_locale.empty()) + { + wsetlocale(LC_NUMERIC, saved_locale.c_str()); + } + + return r; +} + +void builtin_printf_state_t::fatal_error(const wchar_t *fmt, ...) +{ + // Don't error twice + if (early_exit) + return; + + va_list va; + va_start(va, fmt); + wcstring errstr = vformat_string(fmt, va); + va_end(va); + stderr_buffer.append(errstr); + if (! string_suffixes_string(L"\n", errstr)) + stderr_buffer.push_back(L'\n'); + + this->exit_code = STATUS_BUILTIN_ERROR; + this->early_exit = true; +} + +void builtin_printf_state_t::append_output(wchar_t c) +{ + // Don't output if we're done + if (early_exit) + return; + + stdout_buffer.push_back(c); +} + +void builtin_printf_state_t::append_output(const wchar_t *c) +{ + // Don't output if we're done + if (early_exit) + return; + + stdout_buffer.append(c); +} + +void builtin_printf_state_t::append_format_output(const wchar_t *fmt, ...) +{ + // Don't output if we're done + if (early_exit) + return; + + va_list va; + va_start(va, fmt); + append_formatv(stdout_buffer, fmt, va); + va_end(va); +} + + +void builtin_printf_state_t::verify_numeric(const wchar_t *s, const wchar_t *end, int errcode) +{ + if (errcode != 0) + { + this->fatal_error(L"%ls: %s", s, strerror(errcode)); + } + else if (*end) + { + if (s == end) + this->fatal_error(_(L"%ls: expected a numeric value"), s); + else + this->fatal_error(_(L"%ls: value not completely converted"), s); + } +} + +template<typename T> +static T raw_string_to_scalar_type(const wchar_t *s, wchar_t ** end); + +// we use wcstoll instead of wcstoimax because FreeBSD 8 has busted wcstoumax and wcstoimax - see #626 +template<> +intmax_t raw_string_to_scalar_type(const wchar_t *s, wchar_t ** end) +{ + return wcstoll(s, end, 0); +} + +template<> +uintmax_t raw_string_to_scalar_type(const wchar_t *s, wchar_t ** end) +{ + return wcstoull(s, end, 0); +} + +template<> +long double raw_string_to_scalar_type(const wchar_t *s, wchar_t ** end) +{ + return C_STRTOD(s, end); +} + +template<typename T> +static T string_to_scalar_type(const wchar_t *s, builtin_printf_state_t *state) +{ + T val; + if (*s == L'\"' || *s == L'\'') + { + wchar_t ch = *++s; + val = ch; + } + else + { + wchar_t *end = NULL; + errno = 0; + val = raw_string_to_scalar_type<T>(s, &end); + state->verify_numeric(s, end, errno); + } + return val; +} + +/* Output a single-character \ escape. */ + +void builtin_printf_state_t::print_esc_char(wchar_t c) +{ + switch (c) + { + case L'a': /* Alert. */ + this->append_output(L'\a'); + break; + case L'b': /* Backspace. */ + this->append_output(L'\b'); + break; + case L'c': /* Cancel the rest of the output. */ + this->early_exit = true; + break; + case L'e': /* Escape */ + this->append_output(L'\x1B'); + break; + case L'f': /* Form feed. */ + this->append_output(L'\f'); + break; + case L'n': /* New line. */ + this->append_output(L'\n'); + break; + case L'r': /* Carriage return. */ + this->append_output(L'\r'); + break; + case L't': /* Horizontal tab. */ + this->append_output(L'\t'); + break; + case L'v': /* Vertical tab. */ + this->append_output(L'\v'); + break; + default: + this->append_output(c); + break; + } +} + +/* Print a \ escape sequence starting at ESCSTART. + Return the number of characters in the escape sequence + besides the backslash. + If OCTAL_0 is nonzero, octal escapes are of the form \0ooo, where o + is an octal digit; otherwise they are of the form \ooo. */ +long builtin_printf_state_t::print_esc(const wchar_t *escstart, bool octal_0) +{ + const wchar_t *p = escstart + 1; + int esc_value = 0; /* Value of \nnn escape. */ + int esc_length; /* Length of \nnn escape. */ + + if (*p == L'x') + { + /* A hexadecimal \xhh escape sequence must have 1 or 2 hex. digits. */ + for (esc_length = 0, ++p; esc_length < 2 && is_hex_digit(*p); ++esc_length, ++p) + esc_value = esc_value * 16 + hex_to_bin(*p); + if (esc_length == 0) + this->fatal_error(_(L"missing hexadecimal number in escape")); + this->append_output(ENCODE_DIRECT_BASE + esc_value % 256); + } + else if (is_octal_digit(*p)) + { + /* Parse \0ooo (if octal_0 && *p == L'0') or \ooo (otherwise). + Allow \ooo if octal_0 && *p != L'0'; this is an undocumented + extension to POSIX that is compatible with Bash 2.05b. */ + /* Wrap mod 256, which matches historic behavior */ + for (esc_length = 0, p += octal_0 && *p == L'0'; esc_length < 3 && is_octal_digit(*p); ++esc_length, ++p) + esc_value = esc_value * 8 + octal_to_bin(*p); + this->append_output(ENCODE_DIRECT_BASE + esc_value % 256); + } + else if (*p && wcschr(L"\"\\abcefnrtv", *p)) + { + print_esc_char(*p++); + } + else if (*p == L'u' || *p == L'U') + { + wchar_t esc_char = *p; + p++; + uint32_t uni_value = 0; + for (size_t esc_length = 0; esc_length < (esc_char == L'u' ? 4 : 8); esc_length++) + { + if (! is_hex_digit(*p)) + { + /* Escape sequence must be done. Complain if we didn't get anything */ + if (esc_length == 0) + { + this->fatal_error(_(L"Missing hexadecimal number in Unicode escape")); + } + break; + } + uni_value = uni_value * 16 + hex_to_bin(*p); + p++; + } + + /* PCA GNU printf respects the limitations described in ISO N717, about which universal characters "shall not" be specified. I believe this limitation is for the benefit of compilers; I see no reason to impose it in builtin_printf. + + If __STDC_ISO_10646__ is defined, then it means wchar_t can and does hold Unicode code points, so just use that. If not defined, use the %lc printf conversion; this probably won't do anything good if your wide character set is not Unicode, but such platforms are exceedingly rare. + */ + if (uni_value > 0x10FFFF) + { + this->fatal_error(_(L"Unicode character out of range: \\%c%0*x"), esc_char, (esc_char == L'u' ? 4 : 8), uni_value); + } + else + { +#if defined(__STDC_ISO_10646__) + this->append_output(uni_value); +#else + this->append_format_output(L"%lc", uni_value); +#endif + } + } + else + { + this->append_output(L'\\'); + if (*p) + { + this->append_output(*p); + p++; + } + } + return p - escstart - 1; +} + +/* Print string STR, evaluating \ escapes. */ + +void builtin_printf_state_t::print_esc_string(const wchar_t *str) +{ + for (; *str; str++) + if (*str == L'\\') + str += print_esc(str, true); + else + this->append_output(*str); +} + +/* Evaluate a printf conversion specification. START is the start of + the directive, LENGTH is its length, and CONVERSION specifies the + type of conversion. LENGTH does not include any length modifier or + the conversion specifier itself. FIELD_WIDTH and PRECISION are the + field width and precision for '*' values, if HAVE_FIELD_WIDTH and + HAVE_PRECISION are true, respectively. ARGUMENT is the argument to + be formatted. */ + +void builtin_printf_state_t::print_direc(const wchar_t *start, size_t length, wchar_t conversion, + bool have_field_width, int field_width, + bool have_precision, int precision, + wchar_t const *argument) +{ + // Start with everything except the conversion specifier + wcstring fmt(start, length); + + /* Create a copy of the % directive, with an intmax_t-wide width modifier substituted for any existing integer length modifier. */ + switch (conversion) + { + case L'd': + case L'i': + case L'u': + fmt.append(L"ll"); + break; + case L'a': + case L'e': + case L'f': + case L'g': + case L'A': + case L'E': + case L'F': + case L'G': + fmt.append(L"L"); + break; + case L's': + case L'c': + fmt.append(L"l"); + break; + default: + break; + } + + // Append the conversion itself + fmt.push_back(conversion); + + switch (conversion) + { + case L'd': + case L'i': + { + intmax_t arg = string_to_scalar_type<intmax_t>(argument, this); + if (! have_field_width) + { + if (! have_precision) + this->append_format_output(fmt.c_str(), arg); + else + this->append_format_output(fmt.c_str(), precision, arg); + } + else + { + if (! have_precision) + this->append_format_output(fmt.c_str(), field_width, arg); + else + this->append_format_output(fmt.c_str(), field_width, precision, arg); + } + } + break; + + case L'o': + case L'u': + case L'x': + case L'X': + { + uintmax_t arg = string_to_scalar_type<uintmax_t>(argument, this); + if (!have_field_width) + { + if (!have_precision) + this->append_format_output(fmt.c_str(), arg); + else + this->append_format_output(fmt.c_str(), precision, arg); + } + else + { + if (!have_precision) + this->append_format_output(fmt.c_str(), field_width, arg); + else + this->append_format_output(fmt.c_str(), field_width, precision, arg); + } + } + break; + + case L'a': + case L'A': + case L'e': + case L'E': + case L'f': + case L'F': + case L'g': + case L'G': + { + long double arg = string_to_scalar_type<long double>(argument, this); + if (!have_field_width) + { + if (!have_precision) + this->append_format_output(fmt.c_str(), arg); + else + this->append_format_output(fmt.c_str(), precision, arg); + } + else + { + if (!have_precision) + this->append_format_output(fmt.c_str(), field_width, arg); + else + this->append_format_output(fmt.c_str(), field_width, precision, arg); + } + } + break; + + case L'c': + if (!have_field_width) + this->append_format_output(fmt.c_str(), *argument); + else + this->append_format_output(fmt.c_str(), field_width, *argument); + break; + case L's': + if (!have_field_width) + { + if (!have_precision) + { + this->append_format_output(fmt.c_str(), argument); + } + else + this->append_format_output(fmt.c_str(), precision, argument); + } + else + { + if (!have_precision) + this->append_format_output(fmt.c_str(), field_width, argument); + else + this->append_format_output(fmt.c_str(), field_width, precision, argument); + } + break; + } +} + +/* For each character in str, set the corresponding boolean in the array to the given flag */ +static inline void modify_allowed_format_specifiers(bool ok[UCHAR_MAX + 1], const char *str, bool flag) +{ + for (const char *c = str; *c != '\0'; c++) + { + unsigned char idx = static_cast<unsigned char>(*c); + ok[idx] = flag; + } +} + +/* Print the text in FORMAT, using ARGV (with ARGC elements) for + arguments to any `%' directives. + Return the number of elements of ARGV used. */ + +int builtin_printf_state_t::print_formatted(const wchar_t *format, int argc, wchar_t **argv) +{ + int save_argc = argc; /* Preserve original value. */ + const wchar_t *f; /* Pointer into `format'. */ + const wchar_t *direc_start; /* Start of % directive. */ + size_t direc_length; /* Length of % directive. */ + bool have_field_width; /* True if FIELD_WIDTH is valid. */ + int field_width = 0; /* Arg to first '*'. */ + bool have_precision; /* True if PRECISION is valid. */ + int precision = 0; /* Arg to second '*'. */ + bool ok[UCHAR_MAX + 1] = { }; /* ok['x'] is true if %x is allowed. */ + + for (f = format; *f != L'\0'; ++f) + { + switch (*f) + { + case L'%': + direc_start = f++; + direc_length = 1; + have_field_width = have_precision = false; + if (*f == L'%') + { + this->append_output(L'%'); + break; + } + if (*f == L'b') + { + /* FIXME: Field width and precision are not supported + for %b, even though POSIX requires it. */ + if (argc > 0) + { + print_esc_string(*argv); + ++argv; + --argc; + } + break; + } + + modify_allowed_format_specifiers(ok, "aAcdeEfFgGiosuxX", true); + + for (;; f++, direc_length++) + { + switch (*f) + { + case L'I': + case L'\'': + modify_allowed_format_specifiers(ok, "aAceEosxX", false); + break; + case '-': + case '+': + case ' ': + break; + case L'#': + modify_allowed_format_specifiers(ok, "cdisu", false); + break; + case '0': + modify_allowed_format_specifiers(ok, "cs", false); + break; + default: + goto no_more_flag_characters; + } + } +no_more_flag_characters: + ; + + if (*f == L'*') + { + ++f; + ++direc_length; + if (argc > 0) + { + intmax_t width = string_to_scalar_type<intmax_t>(*argv, this); + if (INT_MIN <= width && width <= INT_MAX) + field_width = static_cast<int>(width); + else + this->fatal_error(_(L"invalid field width: %ls"), *argv); + ++argv; + --argc; + } + else + { + field_width = 0; + } + have_field_width = true; + } + else + { + while (iswdigit(*f)) + { + ++f; + ++direc_length; + } + } + if (*f == L'.') + { + ++f; + ++direc_length; + modify_allowed_format_specifiers(ok, "c", false); + if (*f == L'*') + { + ++f; + ++direc_length; + if (argc > 0) + { + intmax_t prec = string_to_scalar_type<intmax_t>(*argv, this); + if (prec < 0) + { + /* A negative precision is taken as if the + precision were omitted, so -1 is safe + here even if prec < INT_MIN. */ + precision = -1; + } + else if (INT_MAX < prec) + this->fatal_error(_(L"invalid precision: %ls"), *argv); + else + { + precision = static_cast<int>(prec); + } + ++argv; + --argc; + } + else + { + precision = 0; + } + have_precision = true; + } + else + { + while (iswdigit(*f)) + { + ++f; + ++direc_length; + } + } + } + + while (*f == L'l' || *f == L'L' || *f == L'h' || *f == L'j' || *f == L't' || *f == L'z') + ++f; + + { + wchar_t conversion = *f; + if (conversion > 0xFF || ! ok[conversion]) + { + this->fatal_error(_(L"%.*ls: invalid conversion specification"), (int)(f + 1 - direc_start), direc_start); + return 0; + } + } + + print_direc(direc_start, direc_length, *f, + have_field_width, field_width, + have_precision, precision, + (argc <= 0 ? L"" : (argc--, *argv++))); + break; + + case L'\\': + f += print_esc(f, false); + break; + + default: + this->append_output(*f); + } + } + return save_argc - argc; +} + +static int builtin_printf(parser_t &parser, wchar_t **argv) +{ + builtin_printf_state_t state; + + wchar_t *format; + int args_used; + int argc = builtin_count_args(argv); + + if (argc <= 1) + { + state.fatal_error(_(L"printf: not enough arguments")); + return STATUS_BUILTIN_ERROR; + } + + format = argv[1]; + argc -= 2; + argv += 2; + + do + { + args_used = state.print_formatted(format, argc, argv); + argc -= args_used; + argv += args_used; + } + while (args_used > 0 && argc > 0 && ! state.early_exit); + return state.exit_code; +} |