summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Benjamin Barenblat <bbarenblat@gmail.com>2022-01-25 18:23:21 -0500
committerGravatar Benjamin Barenblat <bbarenblat@gmail.com>2022-01-25 18:23:21 -0500
commit07c61498bd7fa6166029c1ab093a35f82d926667 (patch)
treecfbd79dec9a767f858b89033c922fd4129b85e3c
skiphead, a program to preserve headers in a pipeline
This is a faster, more robust rewrite of a shell script I wrote a few years ago to preserve headers when grepping through program output. I can never remember what the headers are when I run things like 'ps', so being able to say something like 'ps -ef | skiphead grep systemd' is useful. As a bonus, the program detects your locale and automatically displays help and error messages using the correct encoding.
-rw-r--r--.gitignore18
-rw-r--r--LICENSE202
-rw-r--r--build.ninja38
-rw-r--r--goldfishlocale.cc268
-rw-r--r--goldfishlocale.h94
-rw-r--r--skiphead.re193
6 files changed, 813 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..b569374
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,18 @@
+# Copyright 2022 Benjamin Barenblat
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+
+.ninja_*
+skiphead.cc
+*.o
+skiphead
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..d645695
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,202 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/build.ninja b/build.ninja
new file mode 100644
index 0000000..e63787f
--- /dev/null
+++ b/build.ninja
@@ -0,0 +1,38 @@
+# Copyright 2022 Benjamin Barenblat
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+
+ninja_required_version = 1.3
+
+rule cxx
+ command = g++ -MD -MT $out -MF $out.d -pipe -std=c++20 -Wall -Wextra $
+ -Wno-sign-compare -fdiagnostics-show-template-tree -O3 -flto $
+ -fstack-protector-strong -Wformat -Werror=format-security -DNDEBUG $
+ -ffunction-sections -fdata-sections -c $in -o $out
+ description = Compiling $out
+ depfile = $out.d
+ deps = gcc
+
+rule link
+ command = g++ -fuse-ld=gold -flto -Wl,-O2 -Wl,--gc-sections -Wl,--as-needed $
+ -o $out $in && strip $out
+ description = Linking $out
+
+rule re2c
+ command = re2c --empty-class error --no-generation-date -W $in -o $out
+ description = Generating DFAs in $out
+
+build goldfishlocale.o: cxx goldfishlocale.cc
+build skiphead.cc: re2c skiphead.re
+build skiphead.o: cxx skiphead.cc
+build skiphead: link goldfishlocale.o skiphead.o
diff --git a/goldfishlocale.cc b/goldfishlocale.cc
new file mode 100644
index 0000000..f7fd20d
--- /dev/null
+++ b/goldfishlocale.cc
@@ -0,0 +1,268 @@
+// Copyright 2022 Benjamin Barenblat
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy of
+// the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations under
+// the License.
+
+#include "goldfishlocale.h"
+
+#include <assert.h>
+#include <errno.h>
+#include <iconv.h>
+#include <langinfo.h>
+#include <locale.h>
+#include <stddef.h>
+
+#include <iostream>
+#include <limits>
+#include <locale>
+#include <new>
+#include <optional>
+#include <stdexcept>
+#include <string>
+#include <system_error>
+#include <type_traits>
+
+namespace goldfishlocale_internal {
+
+namespace {
+
+// The codeset of the current process's locale. std::string isn't trivially
+// destructible, so we just leak this.
+const std::string* system_codeset;
+
+// The value returned by iconv_open(3) when things go wrong. This needs to be a
+// function for Reasons; just call this function whenever you need the
+// value, and let the compiler inline the value.
+template <typename T = iconv_t>
+T InvalidIconv() noexcept {
+ // POSIX specifies the invalid iconv_t as as (iconv_t)-1, but it doesn't
+ // specify the representation of iconv_t. On glibc, it's a pointer, but it
+ // could also legally be an index into a table or something. This function
+ // thus needs to return "whatever -1 looks like" for any scalar type.
+ static_assert(std::is_scalar_v<T>);
+
+ // It would be very strange if iconv_t were std::nullptr_t.
+ static_assert(!std::is_null_pointer_v<T>);
+
+ // This function is a template so the compiler doesn't try to validate
+ // branches of this if statement. (If iconv_t is a pointer, iconv_t{-1} is
+ // invalid.)
+ if constexpr (std::is_pointer_v<T> || std::is_member_pointer_v<T>) {
+ return reinterpret_cast<T>(std::numeric_limits<uintptr_t>::max());
+ } else {
+ return T{-1};
+ }
+}
+
+// The value returned by iconv(3) when things go wrong.
+constexpr auto kIconvError =
+ // POSIX specifies this as (size_t)-1.
+ static_cast<size_t>(-1);
+
+// Convenience wrapper for iconv_open(3). Returns a new iconv_t if one can be
+// constructed and std::nullopt if iconv can't handle the conversion.
+std::optional<iconv_t> IconvOpen(const char* from, const char* to) {
+ iconv_t conv = iconv_open(to, from);
+ if (conv == InvalidIconv()) {
+ if (errno == EINVAL) {
+ return std::nullopt;
+ }
+ throw std::system_error(errno, std::system_category(),
+ "goldfishlocale: iconv_open");
+ }
+ return conv;
+}
+
+// This class is thread-compatible.
+class Iconv final {
+ public:
+ // Creates an iconv converter.
+ explicit Iconv(const char* from, std::string to) {
+ size_t to_size = to.size();
+
+ // glibc has a nice mode for graceful degradation (i.e., converting © to (C)
+ // in locales that only support ASCII). Try that first.
+ to.append("//TRANSLIT");
+ if (std::optional<iconv_t> conv = IconvOpen(from, to.c_str());
+ conv.has_value()) {
+ conv_ = *conv;
+ return;
+ }
+
+ // We might still be on glibc, in which case we need to specify //IGNORE to
+ // get iconv to ignore characters that don't exist in the target character
+ // set instead of erroring out.
+ to.replace(to_size, to.size(), "//IGNORE");
+ if (std::optional<iconv_t> conv = IconvOpen(from, to.c_str());
+ conv.has_value()) {
+ conv_ = *conv;
+ return;
+ }
+
+ to.resize(to_size);
+ if (std::optional<iconv_t> conv = IconvOpen(from, to.c_str());
+ conv.has_value()) {
+ conv_ = *conv;
+ return;
+ }
+
+ throw std::system_error(EINVAL, std::system_category(),
+ "goldfishlocale: iconv_open");
+ }
+
+ std::string Convert(char* in_buf, size_t in_bytes_left) {
+ // Reset the iconv state.
+ if (iconv(conv_, nullptr, nullptr, nullptr, nullptr) == kIconvError) {
+ throw std::system_error(errno, std::system_category(),
+ "goldfishlocale: iconv");
+ }
+
+ // Having the string expand during this translation is unusual. If we're
+ // going from UTF-8 to UTF-8, this is just going to be a memcpy; if we're
+ // going from UTF-8 to C, most multibyte characters are going to degrade to
+ // single-byte equivalents. Start by allocating the same number of bytes in
+ // the output buffer as are in the input buffer; we can always expand later.
+ std::string result(in_bytes_left, '\0');
+ char* out_buf = result.data();
+ size_t out_bytes_left =
+ result.size() * sizeof(decltype(result)::value_type);
+
+ while (iconv(conv_, &in_buf, &in_bytes_left, &out_buf, &out_bytes_left) ==
+ kIconvError) {
+ if (errno == E2BIG) {
+ // result is full, but we still need to decode more characters. We're
+ // going to reallocate result, which may invalidate out_buf; save our
+ // position as an index so we can recompute out_buf later.
+ ptrdiff_t result_index = out_buf - result.data();
+
+ // Just double the result buffer size.
+ int increment = result.size();
+ result.append(increment, '\0');
+
+ out_buf = result.data() + result_index;
+ out_bytes_left += increment;
+ } else {
+ throw std::system_error(errno, std::system_category(),
+ "goldfishlocale: iconv");
+ }
+ }
+ result.resize(result.size() - out_bytes_left);
+ return result;
+ }
+
+ private:
+ iconv_t conv_;
+};
+
+#ifndef NDEBUG
+
+// The error code returned from locale functions. Since the error code is 0,
+// this is valid whether locale_t is a pointer or an arithmetic type. If only
+// iconv_open(3) could have worked this way.
+constexpr locale_t kZeroLocale{0};
+
+// A C locale that we own.
+class Locale final {
+ public:
+ static Locale Duplicate(locale_t locale) { return Locale(locale); }
+
+ // These are deleted for simplicity's sake in the current implementation;
+ // there's no requirement that they be absent. Copies could be implemented
+ // atop duplocale(3), and moves are easy to implement via swap.
+ Locale(const Locale&) = delete;
+ Locale& operator=(const Locale&) = delete;
+
+ ~Locale() noexcept { freelocale(locale_); }
+
+ const locale_t& get() noexcept { return locale_; }
+
+ private:
+ explicit Locale(locale_t locale) : locale_(duplocale(locale)) {
+ if (locale_ == kZeroLocale) {
+ if (errno == ENOMEM) {
+ throw std::bad_alloc();
+ }
+ throw std::system_error(errno, std::system_category(),
+ "goldfishlocale: duplocale");
+ }
+ }
+
+ locale_t locale_;
+};
+
+// Looks up the codeset of the current thread's locale.
+std::string CurrentThreadCodeset() {
+ // Get an (unowned) reference to the current thread's locale.
+ locale_t locale_desc = uselocale(kZeroLocale);
+ if (locale_desc == kZeroLocale) {
+ throw std::system_error(errno, std::system_category(),
+ "goldfishlocale: uselocale");
+ }
+
+ // locale_desc might be LC_GLOBAL_LOCALE, which is an illegal argument to
+ // nl_langinfo_l(3). Duplicate the locale before asking for the codeset to get
+ // rid of any LC_GLOBAL_LOCALEs.
+ return nl_langinfo_l(CODESET, Locale::Duplicate(locale_desc).get());
+}
+
+#endif // !defined(NDEBUG)
+
+} // namespace
+
+std::string ToSystem(char* in_buf, size_t in_bytes_left) {
+#ifndef NDEBUG
+ if (system_codeset == nullptr) {
+ throw std::logic_error(
+ "goldfishlocale: ToSystem was called before SetLocaleFromEnvironment");
+ }
+
+ if (CurrentThreadCodeset() != *system_codeset) {
+ throw std::logic_error(
+ "goldfishlocale: Process locale changed during execution");
+ }
+#endif
+
+ // Iconv is thread-compatible, not thread-safe. Make it thread-safe and avoid
+ // contention by just having one per thread.
+ static thread_local Iconv conv("UTF-8", *system_codeset);
+
+ return conv.Convert(in_buf, in_bytes_left);
+}
+
+} // namespace goldfishlocale_internal
+
+namespace goldfishlocale {
+
+void SetLocaleFromEnvironment() {
+#ifndef NDEBUG
+ if (goldfishlocale_internal::system_codeset != nullptr) {
+ throw std::logic_error(
+ "goldfishlocale: SetLocaleFromEnvironment has already been called");
+ }
+#endif
+
+ std::locale loc("");
+ std::locale::global(loc);
+ std::cin.imbue(loc);
+ std::cout.imbue(loc);
+ std::cerr.imbue(loc);
+ std::clog.imbue(loc);
+ std::wcin.imbue(loc);
+ std::wcout.imbue(loc);
+ std::wcerr.imbue(loc);
+ std::wclog.imbue(loc);
+
+ goldfishlocale_internal::system_codeset =
+ new std::string(nl_langinfo(CODESET));
+}
+
+} // namespace goldfishlocale
diff --git a/goldfishlocale.h b/goldfishlocale.h
new file mode 100644
index 0000000..c9448c1
--- /dev/null
+++ b/goldfishlocale.h
@@ -0,0 +1,94 @@
+// Copyright 2022 Benjamin Barenblat
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy of
+// the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations under
+// the License.
+
+// This is goldfishlocale, a library to help you get your terminal output
+// encoded correctly. Use it like this:
+//
+// int main(int argc, char* argv[]) {
+// goldfishlocale::SetLocaleFromEnvironment(); // call this once
+// std::cout << goldfishlocale::ToSystem(u8"These are “curly quotes”.\n");
+// }
+//
+// Running this on most terminals will produce
+//
+// These are “curly quotes”.
+//
+// On terminals that lack curly quote characters, this will gracefully degrade
+// to
+//
+// These are "curly quotes".
+//
+// rather than
+//
+// These are 窶彡urly quotes窶�
+//
+// or something similarly unexpected.
+//
+// LIBRARY ASSUMPTION: Goldfishlocale assumes your system's locale is set once
+// (by goldfishlocale) and does not change during program execution. In
+// practice, this means you should not call std::locale::global or setlocale(3).
+// By default, goldfishlocale detects and throws on locale changes; however,
+// this detection does carry a runtime cost, so it's disabled if you define
+// NDEBUG.
+
+#include <stddef.h>
+
+#include <string>
+#include <string_view>
+
+namespace goldfishlocale_internal {
+
+std::string ToSystem(char*, size_t);
+
+} // namespace goldfishlocale_internal
+
+namespace goldfishlocale {
+
+// Initializes goldfishlocale based on your environment. After this call
+// returns, the C and C++ libraries are aware of the system locale; this may
+// affect some behavior, including the way numbers and dates are formatted.
+//
+// This function must be called exactly once during the execution of your
+// program, ideally near the start of main.
+//
+// Your program must be single-threaded at the time this function is called.
+// Having multiple threads executing triggers undefined behavior.
+void SetLocaleFromEnvironment();
+
+// Converts the specified string to the system locale. Assumes a UTF-8 encoding.
+inline std::string ToSystem(std::string s) {
+ return goldfishlocale_internal::ToSystem(s.data(), s.size());
+}
+inline std::string ToSystem(std::string_view s) {
+ return ToSystem(std::string(s));
+}
+inline std::string ToSystem(const char s[]) { return ToSystem(std::string(s)); }
+
+#if defined(__cpp_char8_t) && defined(__cpp_lib_char8_t)
+
+// Converts the specified string to the system locale.
+inline std::string ToSystem(std::u8string s) {
+ return goldfishlocale_internal::ToSystem(reinterpret_cast<char*>(s.data()),
+ s.size());
+}
+inline std::string ToSystem(std::u8string_view s) {
+ return ToSystem(std::u8string(s));
+}
+inline std::string ToSystem(const char8_t s[]) {
+ return ToSystem(std::u8string(s));
+}
+
+#endif // defined(__cpp_char8_t) && defined(__cpp_lib_char8_t)
+
+} // namespace goldfishlocale
diff --git a/skiphead.re b/skiphead.re
new file mode 100644
index 0000000..f7f48e5
--- /dev/null
+++ b/skiphead.re
@@ -0,0 +1,193 @@
+// Copyright 2022 Benjamin Barenblat
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy of
+// the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations under
+// the License.
+
+#include <assert.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <array>
+#include <ios>
+#include <iostream>
+#include <string_view>
+
+#include "goldfishlocale.h"
+
+namespace {
+
+constexpr std::string_view kShortUsage =
+ "Usage: skiphead [OPTION...] COMMAND [ARGS...]\n";
+
+constexpr std::u8string_view kHelp =
+ u8R"(
+With no options, copy the first line of standard input to standard output, and
+then execute the specified command. This is useful for processing the output of
+commands that emit headers; for example, “ps -ef | skiphead grep systemd” will
+print the headers from “ps” before grepping for “systemd” in the remaining
+output.
+
+Like head(1), skiphead accepts -NUM, -n, and --lines arguments to control the
+number of lines printed before invoking COMMAND.
+
+If your COMMAND starts with -, you may terminate skiphead’s option processing
+with --.
+
+Options:
+ -NUM, -n NUM, --lines=NUM print NUM header lines instead of 1
+ -- terminate option processing
+ --help display this help and exit
+ --version display version information and exit
+
+Please report bugs to Benjamin Barenblat <bbarenblat@gmail.com>.
+)";
+
+constexpr std::u8string_view kAskForHelp =
+ u8"Try “skiphead --help” for more information.\n";
+
+constexpr std::string_view kVersionInfo = R"(skiphead 1.0.0
+Copyright 2022 Benjamin Barenblat
+Licensed under the Apache License, Version 2.0
+)";
+
+int ParseOptionsAndAdvanceArgv(char**& argv) {
+ int lines_to_skip = 1;
+ bool expect_bare_number = false;
+ for (++argv; argv[0] != nullptr; ++argv) {
+ const char* YYCURSOR = argv[0];
+ const char* a;
+ /*!stags:re2c format = "const char* @@;"; */
+
+ /*!re2c
+ re2c:define:YYCTYPE = char;
+ re2c:flags:tags = 1;
+ re2c:yyfill:enable = 0;
+
+ ("-" | "-n" | "--lines=") @a [0-9]+ {
+ lines_to_skip = atoi(a);
+ continue;
+ }
+
+ "-n" | "--lines" {
+ expect_bare_number = true;
+ continue;
+ }
+
+ [0-9]+ {
+ if (expect_bare_number) {
+ lines_to_skip = atoi(argv[0]);
+ expect_bare_number = false;
+ continue;
+ }
+ break;
+ }
+
+ "--help" {
+ std::cout << kShortUsage << goldfishlocale::ToSystem(kHelp);
+ exit(0);
+ }
+
+ "--version" {
+ std::cout << kVersionInfo;
+ exit(0);
+ }
+
+ "--" {
+ ++argv;
+ break;
+ }
+
+ "-" [^\x00]+ {
+ std::clog << goldfishlocale::ToSystem(
+ u8"skiphead: Unrecognized option “")
+ << argv[0] << goldfishlocale::ToSystem(u8"”\n")
+ << goldfishlocale::ToSystem(kAskForHelp);
+ exit(1);
+ }
+
+ * {
+ break;
+ }
+ */
+ }
+ return lines_to_skip;
+}
+
+int Read(int fd, void* buf, size_t count) {
+ ssize_t bytes_read = read(fd, buf, count);
+ if (bytes_read < 0) {
+ if (errno == EINTR) {
+ return Read(fd, buf, count); // Just try again.
+ }
+ std::clog << "skiphead: Read failed: " << strerror(errno) << '\n';
+ exit(1);
+ }
+ return bytes_read;
+}
+
+void CopyLines(int lines_to_skip) {
+ std::array<char, 16> buffer;
+ while (lines_to_skip > 0) {
+ int bytes_read =
+ Read(STDIN_FILENO, buffer.data(),
+ std::min(static_cast<int>(buffer.size()), lines_to_skip));
+ if (bytes_read == 0) {
+ // Standard input got closed. Just move on.
+ break;
+ }
+
+ for (int i = 0; i < bytes_read; ++i) {
+ if (buffer[i] == '\n') {
+ --lines_to_skip;
+ }
+ }
+
+ std::cout.write(buffer.data(), bytes_read);
+ }
+}
+
+} // namespace
+
+int main(int argc, char* argv[]) {
+ goldfishlocale::SetLocaleFromEnvironment();
+
+ // We're not using the C stdio functions in this program, so enable extra
+ // userspace buffering to reduce syscall overhead.
+ std::ios_base::sync_with_stdio(false);
+
+ if (argc < 2) {
+ std::clog << kShortUsage << goldfishlocale::ToSystem(kAskForHelp);
+ return 1;
+ }
+
+ int lines_to_skip = ParseOptionsAndAdvanceArgv(argv);
+ assert(lines_to_skip >= 0);
+ if (argv[0] == nullptr) {
+ std::clog << kShortUsage << goldfishlocale::ToSystem(kAskForHelp);
+ return 1;
+ }
+
+ CopyLines(lines_to_skip);
+ std::cout.flush();
+
+ execvp(argv[0], argv);
+
+ int r = errno;
+ std::clog << goldfishlocale::ToSystem(
+ u8"skiphead: Failed to execute command “")
+ << argv[0] << goldfishlocale::ToSystem(u8"”: ") << strerror(r)
+ << '\n';
+ return r;
+}