diff options
-rw-r--r-- | .gitignore | 18 | ||||
-rw-r--r-- | LICENSE | 202 | ||||
-rw-r--r-- | build.ninja | 38 | ||||
-rw-r--r-- | goldfishlocale.cc | 268 | ||||
-rw-r--r-- | goldfishlocale.h | 94 | ||||
-rw-r--r-- | skiphead.re | 193 |
6 files changed, 813 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b569374 --- /dev/null +++ b/.gitignore @@ -0,0 +1,18 @@ +# Copyright 2022 Benjamin Barenblat +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +.ninja_* +skiphead.cc +*.o +skiphead @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/build.ninja b/build.ninja new file mode 100644 index 0000000..e63787f --- /dev/null +++ b/build.ninja @@ -0,0 +1,38 @@ +# Copyright 2022 Benjamin Barenblat +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +ninja_required_version = 1.3 + +rule cxx + command = g++ -MD -MT $out -MF $out.d -pipe -std=c++20 -Wall -Wextra $ + -Wno-sign-compare -fdiagnostics-show-template-tree -O3 -flto $ + -fstack-protector-strong -Wformat -Werror=format-security -DNDEBUG $ + -ffunction-sections -fdata-sections -c $in -o $out + description = Compiling $out + depfile = $out.d + deps = gcc + +rule link + command = g++ -fuse-ld=gold -flto -Wl,-O2 -Wl,--gc-sections -Wl,--as-needed $ + -o $out $in && strip $out + description = Linking $out + +rule re2c + command = re2c --empty-class error --no-generation-date -W $in -o $out + description = Generating DFAs in $out + +build goldfishlocale.o: cxx goldfishlocale.cc +build skiphead.cc: re2c skiphead.re +build skiphead.o: cxx skiphead.cc +build skiphead: link goldfishlocale.o skiphead.o diff --git a/goldfishlocale.cc b/goldfishlocale.cc new file mode 100644 index 0000000..f7fd20d --- /dev/null +++ b/goldfishlocale.cc @@ -0,0 +1,268 @@ +// Copyright 2022 Benjamin Barenblat +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy of +// the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. + +#include "goldfishlocale.h" + +#include <assert.h> +#include <errno.h> +#include <iconv.h> +#include <langinfo.h> +#include <locale.h> +#include <stddef.h> + +#include <iostream> +#include <limits> +#include <locale> +#include <new> +#include <optional> +#include <stdexcept> +#include <string> +#include <system_error> +#include <type_traits> + +namespace goldfishlocale_internal { + +namespace { + +// The codeset of the current process's locale. std::string isn't trivially +// destructible, so we just leak this. +const std::string* system_codeset; + +// The value returned by iconv_open(3) when things go wrong. This needs to be a +// function for Reasons; just call this function whenever you need the +// value, and let the compiler inline the value. +template <typename T = iconv_t> +T InvalidIconv() noexcept { + // POSIX specifies the invalid iconv_t as as (iconv_t)-1, but it doesn't + // specify the representation of iconv_t. On glibc, it's a pointer, but it + // could also legally be an index into a table or something. This function + // thus needs to return "whatever -1 looks like" for any scalar type. + static_assert(std::is_scalar_v<T>); + + // It would be very strange if iconv_t were std::nullptr_t. + static_assert(!std::is_null_pointer_v<T>); + + // This function is a template so the compiler doesn't try to validate + // branches of this if statement. (If iconv_t is a pointer, iconv_t{-1} is + // invalid.) + if constexpr (std::is_pointer_v<T> || std::is_member_pointer_v<T>) { + return reinterpret_cast<T>(std::numeric_limits<uintptr_t>::max()); + } else { + return T{-1}; + } +} + +// The value returned by iconv(3) when things go wrong. +constexpr auto kIconvError = + // POSIX specifies this as (size_t)-1. + static_cast<size_t>(-1); + +// Convenience wrapper for iconv_open(3). Returns a new iconv_t if one can be +// constructed and std::nullopt if iconv can't handle the conversion. +std::optional<iconv_t> IconvOpen(const char* from, const char* to) { + iconv_t conv = iconv_open(to, from); + if (conv == InvalidIconv()) { + if (errno == EINVAL) { + return std::nullopt; + } + throw std::system_error(errno, std::system_category(), + "goldfishlocale: iconv_open"); + } + return conv; +} + +// This class is thread-compatible. +class Iconv final { + public: + // Creates an iconv converter. + explicit Iconv(const char* from, std::string to) { + size_t to_size = to.size(); + + // glibc has a nice mode for graceful degradation (i.e., converting © to (C) + // in locales that only support ASCII). Try that first. + to.append("//TRANSLIT"); + if (std::optional<iconv_t> conv = IconvOpen(from, to.c_str()); + conv.has_value()) { + conv_ = *conv; + return; + } + + // We might still be on glibc, in which case we need to specify //IGNORE to + // get iconv to ignore characters that don't exist in the target character + // set instead of erroring out. + to.replace(to_size, to.size(), "//IGNORE"); + if (std::optional<iconv_t> conv = IconvOpen(from, to.c_str()); + conv.has_value()) { + conv_ = *conv; + return; + } + + to.resize(to_size); + if (std::optional<iconv_t> conv = IconvOpen(from, to.c_str()); + conv.has_value()) { + conv_ = *conv; + return; + } + + throw std::system_error(EINVAL, std::system_category(), + "goldfishlocale: iconv_open"); + } + + std::string Convert(char* in_buf, size_t in_bytes_left) { + // Reset the iconv state. + if (iconv(conv_, nullptr, nullptr, nullptr, nullptr) == kIconvError) { + throw std::system_error(errno, std::system_category(), + "goldfishlocale: iconv"); + } + + // Having the string expand during this translation is unusual. If we're + // going from UTF-8 to UTF-8, this is just going to be a memcpy; if we're + // going from UTF-8 to C, most multibyte characters are going to degrade to + // single-byte equivalents. Start by allocating the same number of bytes in + // the output buffer as are in the input buffer; we can always expand later. + std::string result(in_bytes_left, '\0'); + char* out_buf = result.data(); + size_t out_bytes_left = + result.size() * sizeof(decltype(result)::value_type); + + while (iconv(conv_, &in_buf, &in_bytes_left, &out_buf, &out_bytes_left) == + kIconvError) { + if (errno == E2BIG) { + // result is full, but we still need to decode more characters. We're + // going to reallocate result, which may invalidate out_buf; save our + // position as an index so we can recompute out_buf later. + ptrdiff_t result_index = out_buf - result.data(); + + // Just double the result buffer size. + int increment = result.size(); + result.append(increment, '\0'); + + out_buf = result.data() + result_index; + out_bytes_left += increment; + } else { + throw std::system_error(errno, std::system_category(), + "goldfishlocale: iconv"); + } + } + result.resize(result.size() - out_bytes_left); + return result; + } + + private: + iconv_t conv_; +}; + +#ifndef NDEBUG + +// The error code returned from locale functions. Since the error code is 0, +// this is valid whether locale_t is a pointer or an arithmetic type. If only +// iconv_open(3) could have worked this way. +constexpr locale_t kZeroLocale{0}; + +// A C locale that we own. +class Locale final { + public: + static Locale Duplicate(locale_t locale) { return Locale(locale); } + + // These are deleted for simplicity's sake in the current implementation; + // there's no requirement that they be absent. Copies could be implemented + // atop duplocale(3), and moves are easy to implement via swap. + Locale(const Locale&) = delete; + Locale& operator=(const Locale&) = delete; + + ~Locale() noexcept { freelocale(locale_); } + + const locale_t& get() noexcept { return locale_; } + + private: + explicit Locale(locale_t locale) : locale_(duplocale(locale)) { + if (locale_ == kZeroLocale) { + if (errno == ENOMEM) { + throw std::bad_alloc(); + } + throw std::system_error(errno, std::system_category(), + "goldfishlocale: duplocale"); + } + } + + locale_t locale_; +}; + +// Looks up the codeset of the current thread's locale. +std::string CurrentThreadCodeset() { + // Get an (unowned) reference to the current thread's locale. + locale_t locale_desc = uselocale(kZeroLocale); + if (locale_desc == kZeroLocale) { + throw std::system_error(errno, std::system_category(), + "goldfishlocale: uselocale"); + } + + // locale_desc might be LC_GLOBAL_LOCALE, which is an illegal argument to + // nl_langinfo_l(3). Duplicate the locale before asking for the codeset to get + // rid of any LC_GLOBAL_LOCALEs. + return nl_langinfo_l(CODESET, Locale::Duplicate(locale_desc).get()); +} + +#endif // !defined(NDEBUG) + +} // namespace + +std::string ToSystem(char* in_buf, size_t in_bytes_left) { +#ifndef NDEBUG + if (system_codeset == nullptr) { + throw std::logic_error( + "goldfishlocale: ToSystem was called before SetLocaleFromEnvironment"); + } + + if (CurrentThreadCodeset() != *system_codeset) { + throw std::logic_error( + "goldfishlocale: Process locale changed during execution"); + } +#endif + + // Iconv is thread-compatible, not thread-safe. Make it thread-safe and avoid + // contention by just having one per thread. + static thread_local Iconv conv("UTF-8", *system_codeset); + + return conv.Convert(in_buf, in_bytes_left); +} + +} // namespace goldfishlocale_internal + +namespace goldfishlocale { + +void SetLocaleFromEnvironment() { +#ifndef NDEBUG + if (goldfishlocale_internal::system_codeset != nullptr) { + throw std::logic_error( + "goldfishlocale: SetLocaleFromEnvironment has already been called"); + } +#endif + + std::locale loc(""); + std::locale::global(loc); + std::cin.imbue(loc); + std::cout.imbue(loc); + std::cerr.imbue(loc); + std::clog.imbue(loc); + std::wcin.imbue(loc); + std::wcout.imbue(loc); + std::wcerr.imbue(loc); + std::wclog.imbue(loc); + + goldfishlocale_internal::system_codeset = + new std::string(nl_langinfo(CODESET)); +} + +} // namespace goldfishlocale diff --git a/goldfishlocale.h b/goldfishlocale.h new file mode 100644 index 0000000..c9448c1 --- /dev/null +++ b/goldfishlocale.h @@ -0,0 +1,94 @@ +// Copyright 2022 Benjamin Barenblat +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy of +// the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. + +// This is goldfishlocale, a library to help you get your terminal output +// encoded correctly. Use it like this: +// +// int main(int argc, char* argv[]) { +// goldfishlocale::SetLocaleFromEnvironment(); // call this once +// std::cout << goldfishlocale::ToSystem(u8"These are “curly quotes”.\n"); +// } +// +// Running this on most terminals will produce +// +// These are “curly quotes”. +// +// On terminals that lack curly quote characters, this will gracefully degrade +// to +// +// These are "curly quotes". +// +// rather than +// +// These are 窶彡urly quotes窶� +// +// or something similarly unexpected. +// +// LIBRARY ASSUMPTION: Goldfishlocale assumes your system's locale is set once +// (by goldfishlocale) and does not change during program execution. In +// practice, this means you should not call std::locale::global or setlocale(3). +// By default, goldfishlocale detects and throws on locale changes; however, +// this detection does carry a runtime cost, so it's disabled if you define +// NDEBUG. + +#include <stddef.h> + +#include <string> +#include <string_view> + +namespace goldfishlocale_internal { + +std::string ToSystem(char*, size_t); + +} // namespace goldfishlocale_internal + +namespace goldfishlocale { + +// Initializes goldfishlocale based on your environment. After this call +// returns, the C and C++ libraries are aware of the system locale; this may +// affect some behavior, including the way numbers and dates are formatted. +// +// This function must be called exactly once during the execution of your +// program, ideally near the start of main. +// +// Your program must be single-threaded at the time this function is called. +// Having multiple threads executing triggers undefined behavior. +void SetLocaleFromEnvironment(); + +// Converts the specified string to the system locale. Assumes a UTF-8 encoding. +inline std::string ToSystem(std::string s) { + return goldfishlocale_internal::ToSystem(s.data(), s.size()); +} +inline std::string ToSystem(std::string_view s) { + return ToSystem(std::string(s)); +} +inline std::string ToSystem(const char s[]) { return ToSystem(std::string(s)); } + +#if defined(__cpp_char8_t) && defined(__cpp_lib_char8_t) + +// Converts the specified string to the system locale. +inline std::string ToSystem(std::u8string s) { + return goldfishlocale_internal::ToSystem(reinterpret_cast<char*>(s.data()), + s.size()); +} +inline std::string ToSystem(std::u8string_view s) { + return ToSystem(std::u8string(s)); +} +inline std::string ToSystem(const char8_t s[]) { + return ToSystem(std::u8string(s)); +} + +#endif // defined(__cpp_char8_t) && defined(__cpp_lib_char8_t) + +} // namespace goldfishlocale diff --git a/skiphead.re b/skiphead.re new file mode 100644 index 0000000..f7f48e5 --- /dev/null +++ b/skiphead.re @@ -0,0 +1,193 @@ +// Copyright 2022 Benjamin Barenblat +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy of +// the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. + +#include <assert.h> +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <algorithm> +#include <array> +#include <ios> +#include <iostream> +#include <string_view> + +#include "goldfishlocale.h" + +namespace { + +constexpr std::string_view kShortUsage = + "Usage: skiphead [OPTION...] COMMAND [ARGS...]\n"; + +constexpr std::u8string_view kHelp = + u8R"( +With no options, copy the first line of standard input to standard output, and +then execute the specified command. This is useful for processing the output of +commands that emit headers; for example, “ps -ef | skiphead grep systemd” will +print the headers from “ps” before grepping for “systemd” in the remaining +output. + +Like head(1), skiphead accepts -NUM, -n, and --lines arguments to control the +number of lines printed before invoking COMMAND. + +If your COMMAND starts with -, you may terminate skiphead’s option processing +with --. + +Options: + -NUM, -n NUM, --lines=NUM print NUM header lines instead of 1 + -- terminate option processing + --help display this help and exit + --version display version information and exit + +Please report bugs to Benjamin Barenblat <bbarenblat@gmail.com>. +)"; + +constexpr std::u8string_view kAskForHelp = + u8"Try “skiphead --help” for more information.\n"; + +constexpr std::string_view kVersionInfo = R"(skiphead 1.0.0 +Copyright 2022 Benjamin Barenblat +Licensed under the Apache License, Version 2.0 +)"; + +int ParseOptionsAndAdvanceArgv(char**& argv) { + int lines_to_skip = 1; + bool expect_bare_number = false; + for (++argv; argv[0] != nullptr; ++argv) { + const char* YYCURSOR = argv[0]; + const char* a; + /*!stags:re2c format = "const char* @@;"; */ + + /*!re2c + re2c:define:YYCTYPE = char; + re2c:flags:tags = 1; + re2c:yyfill:enable = 0; + + ("-" | "-n" | "--lines=") @a [0-9]+ { + lines_to_skip = atoi(a); + continue; + } + + "-n" | "--lines" { + expect_bare_number = true; + continue; + } + + [0-9]+ { + if (expect_bare_number) { + lines_to_skip = atoi(argv[0]); + expect_bare_number = false; + continue; + } + break; + } + + "--help" { + std::cout << kShortUsage << goldfishlocale::ToSystem(kHelp); + exit(0); + } + + "--version" { + std::cout << kVersionInfo; + exit(0); + } + + "--" { + ++argv; + break; + } + + "-" [^\x00]+ { + std::clog << goldfishlocale::ToSystem( + u8"skiphead: Unrecognized option “") + << argv[0] << goldfishlocale::ToSystem(u8"”\n") + << goldfishlocale::ToSystem(kAskForHelp); + exit(1); + } + + * { + break; + } + */ + } + return lines_to_skip; +} + +int Read(int fd, void* buf, size_t count) { + ssize_t bytes_read = read(fd, buf, count); + if (bytes_read < 0) { + if (errno == EINTR) { + return Read(fd, buf, count); // Just try again. + } + std::clog << "skiphead: Read failed: " << strerror(errno) << '\n'; + exit(1); + } + return bytes_read; +} + +void CopyLines(int lines_to_skip) { + std::array<char, 16> buffer; + while (lines_to_skip > 0) { + int bytes_read = + Read(STDIN_FILENO, buffer.data(), + std::min(static_cast<int>(buffer.size()), lines_to_skip)); + if (bytes_read == 0) { + // Standard input got closed. Just move on. + break; + } + + for (int i = 0; i < bytes_read; ++i) { + if (buffer[i] == '\n') { + --lines_to_skip; + } + } + + std::cout.write(buffer.data(), bytes_read); + } +} + +} // namespace + +int main(int argc, char* argv[]) { + goldfishlocale::SetLocaleFromEnvironment(); + + // We're not using the C stdio functions in this program, so enable extra + // userspace buffering to reduce syscall overhead. + std::ios_base::sync_with_stdio(false); + + if (argc < 2) { + std::clog << kShortUsage << goldfishlocale::ToSystem(kAskForHelp); + return 1; + } + + int lines_to_skip = ParseOptionsAndAdvanceArgv(argv); + assert(lines_to_skip >= 0); + if (argv[0] == nullptr) { + std::clog << kShortUsage << goldfishlocale::ToSystem(kAskForHelp); + return 1; + } + + CopyLines(lines_to_skip); + std::cout.flush(); + + execvp(argv[0], argv); + + int r = errno; + std::clog << goldfishlocale::ToSystem( + u8"skiphead: Failed to execute command “") + << argv[0] << goldfishlocale::ToSystem(u8"”: ") << strerror(r) + << '\n'; + return r; +} |