aboutsummaryrefslogtreecommitdiffhomepage
path: root/pcre2-10.20/src/pcre2_substitute.c
diff options
context:
space:
mode:
authorGravatar Michael Steed <msteed@saltstack.com>2015-09-12 12:59:40 -0700
committerGravatar ridiculousfish <corydoras@ridiculousfish.com>2015-09-21 16:41:25 -0700
commitd83ef07ca76c03852366e4e810053edc19796761 (patch)
tree93f671c5fe577128fd14a1f013fff764e5c5abba /pcre2-10.20/src/pcre2_substitute.c
parente70ed961eab80dab41ebfda7b91d80d9ef041be7 (diff)
Merge new string builtin
This adds the new builtin 'string' which supports various string manipulation and matching algorithms, including PCRE based regular expressions. Fixes #2296 Squashed commit of the following: commit 4c3eaeb6e57d76463e9683c327142b0aeafb92b8 Author: ridiculousfish <corydoras@ridiculousfish.com> Date: Sat Sep 12 12:51:30 2015 -0700 Remove testdata and doc dirs from pcre2 source commit b2a8b4b50f2398b204fb72cfe4b5ba77ece2e1ab Merge: 11c8a47 7974aab Author: ridiculousfish <corydoras@ridiculousfish.com> Date: Sat Sep 12 12:32:40 2015 -0700 Merge branch 'string' of git://github.com/msteed/fish-shell into string-test commit 7974aab6d367f999f1140ab34c2535cef5cf3b00 Author: Michael Steed <msteed@saltstack.com> Date: Fri Sep 11 13:00:02 2015 -0600 build pcre2 lib only, no docs commit eb20b43d2d96b7e6d24618158ce71078de83c40b Merge: 1a09e70 5f519cb Author: Michael Steed <msteed68@gmail.com> Date: Thu Sep 10 20:00:47 2015 -0600 Merge branch 'string' of github.com:msteed/fish-shell into string commit 1a09e709d028393c9e9e6dc9a84278f399a15f3d Author: Michael Steed <msteed68@gmail.com> Date: Thu Sep 10 19:58:24 2015 -0600 rebase on master & address the fallout commit a0ec9772cd1a0a548a501a7633be05dab4e5ee46 Author: Michael Steed <msteed68@gmail.com> Date: Thu Sep 10 19:26:45 2015 -0600 use fish's wildcard_match() for glob matching commit 64c25a01e3f7234f220ba13545cf658a7492b1a4 Author: Michael Steed <msteed68@gmail.com> Date: Thu Aug 27 08:19:23 2015 -0600 some fixes from review - string_get_arg_stdin(): simplify and don't discard the argument when the trailing newline is absent - fix calls to pcre2 for e.g. string match -r -a 'a*' 'b' - correct test for args coming from stdin commit ece7f35ec5f4093763627d68d671b6c0c876896d Author: Michael Steed <msteed68@gmail.com> Date: Sat Aug 22 19:35:56 2015 -0600 fixes from review - Makefile.in: restore iwyu target - regex_replacer_t::replace_matches(): correct size passed to realloc() commit 9ff7477a926c4572e26171cab3cd42f8086be678 Author: Michael Steed <msteed68@gmail.com> Date: Thu Aug 20 13:08:33 2015 -0600 Minor doc improvements commit baf4e096b22dde3063b85b833795eb570d660ba7 Author: Michael Steed <msteed68@gmail.com> Date: Wed Aug 19 18:29:02 2015 -0600 another attempt to fix the ci build commit 896a2c2b279a419747bea26102229fbe84534a6f Author: Michael Steed <msteed68@gmail.com> Date: Wed Aug 19 18:03:49 2015 -0600 Updates after review comments - make match/replace without -a operate on the first match on each argument - use different exit codes for "no operation performed" and errors, as grep does - refactor regex compile code - use human-friendly error messages from pcre2 - improve error handling & reporting elsewhere - add a few tests - make some doc fixes - some simplification & cleanup - fix ci build failure (I hope) commit efd47dcbda2ca247d58bee56a7774cd75a1062fd Author: Michael Steed <msteed68@gmail.com> Date: Wed Aug 12 00:26:07 2015 -0600 fix dependencies for parallel make commit ed0850e2db467362066a3d94e3ececd17c1756cd Author: Michael Steed <msteed68@gmail.com> Date: Tue Aug 11 23:37:22 2015 -0600 Add missing pcre2 files + .gitignore commit 9492e7a7e929c03554336be1ddf80ca6b37f53c5 Author: Michael Steed <msteed68@gmail.com> Date: Tue Aug 11 22:44:05 2015 -0600 add pcre2-10.20 and update license.hdr commit 1a60b933718feb20c0bf7c9e257b8e495014ea1b Author: Michael Steed <msteed68@gmail.com> Date: Tue Aug 11 22:41:19 2015 -0600 add string builtin files - string builtin source, tests, & docs - changes to configure.ac & Makefile.in commit 5f519cb2a2c05213e0a88a7add7af288bc1c1352 Author: Michael Steed <msteed68@gmail.com> Date: Thu Sep 10 19:26:45 2015 -0600 use fish's wildcard_match() for glob matching commit 2ecd24f79500879e2de5bdf1b4c19dd44fc6ac85 Author: Michael Steed <msteed68@gmail.com> Date: Thu Aug 27 08:19:23 2015 -0600 some fixes from review - string_get_arg_stdin(): simplify and don't discard the argument when the trailing newline is absent - fix calls to pcre2 for e.g. string match -r -a 'a*' 'b' - correct test for args coming from stdin commit 45b777e4dc85c05cd4a186f4bdcae543c21aaf08 Author: Michael Steed <msteed68@gmail.com> Date: Sat Aug 22 19:35:56 2015 -0600 fixes from review - Makefile.in: restore iwyu target - regex_replacer_t::replace_matches(): correct size passed to realloc() commit 981cbb6ddf742a5fe8881af916e7b870b7e6422a Author: Michael Steed <msteed68@gmail.com> Date: Thu Aug 20 13:08:33 2015 -0600 Minor doc improvements commit ddb6a2a8fdb6aa31aad41e80d5481bb32c6ed8ff Author: Michael Steed <msteed68@gmail.com> Date: Wed Aug 19 18:29:02 2015 -0600 another attempt to fix the ci build commit 1e34e3191b028162863d263e9868052f75194aa5 Author: Michael Steed <msteed68@gmail.com> Date: Wed Aug 19 18:03:49 2015 -0600 Updates after review comments - make match/replace without -a operate on the first match on each argument - use different exit codes for "no operation performed" and errors, as grep does - refactor regex compile code - use human-friendly error messages from pcre2 - improve error handling & reporting elsewhere - add a few tests - make some doc fixes - some simplification & cleanup - fix ci build failure (I hope) commit 34232e152df17a3cfbf0a094dd51d148a4f04e6f Author: Michael Steed <msteed68@gmail.com> Date: Wed Aug 12 00:26:07 2015 -0600 fix dependencies for parallel make commit 00d7e781697f53454beb91c1d0fc4b2d28d6e034 Author: Michael Steed <msteed68@gmail.com> Date: Tue Aug 11 23:37:22 2015 -0600 Add missing pcre2 files + .gitignore commit 4498aa5f576e09634f7f619443e74d2f33c108e4 Author: Michael Steed <msteed68@gmail.com> Date: Tue Aug 11 22:44:05 2015 -0600 add pcre2-10.20 and update license.hdr commit 290c58c72e22db644ccf6fa9088051644980ed0a Author: Michael Steed <msteed68@gmail.com> Date: Tue Aug 11 22:41:19 2015 -0600 add string builtin files - string builtin source, tests, & docs - changes to configure.ac & Makefile.in
Diffstat (limited to 'pcre2-10.20/src/pcre2_substitute.c')
-rw-r--r--pcre2-10.20/src/pcre2_substitute.c315
1 files changed, 315 insertions, 0 deletions
diff --git a/pcre2-10.20/src/pcre2_substitute.c b/pcre2-10.20/src/pcre2_substitute.c
new file mode 100644
index 00000000..ec00ebb8
--- /dev/null
+++ b/pcre2-10.20/src/pcre2_substitute.c
@@ -0,0 +1,315 @@
+/*************************************************
+* Perl-Compatible Regular Expressions *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+ Written by Philip Hazel
+ Original API code Copyright (c) 1997-2012 University of Cambridge
+ New API code Copyright (c) 2014 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of the University of Cambridge nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "pcre2_internal.h"
+
+
+/*************************************************
+* Match and substitute *
+*************************************************/
+
+/* This function applies a compiled re to a subject string and creates a new
+string with substitutions. The first 7 arguments are the same as for
+pcre2_match(). Either string length may be PCRE2_ZERO_TERMINATED.
+
+Arguments:
+ code points to the compiled expression
+ subject points to the subject string
+ length length of subject string (may contain binary zeros)
+ start_offset where to start in the subject string
+ options option bits
+ match_data points to a match_data block, or is NULL
+ context points a PCRE2 context
+ replacement points to the replacement string
+ rlength length of replacement string
+ buffer where to put the substituted string
+ blength points to length of buffer; updated to length of string
+
+Returns: >= 0 number of substitutions made
+ < 0 an error code
+ PCRE2_ERROR_BADREPLACEMENT means invalid use of $
+*/
+
+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
+ PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
+ pcre2_match_context *mcontext, PCRE2_SPTR replacement, PCRE2_SIZE rlength,
+ PCRE2_UCHAR *buffer, PCRE2_SIZE *blength)
+{
+int rc;
+int subs;
+uint32_t ovector_count;
+uint32_t goptions = 0;
+BOOL match_data_created = FALSE;
+BOOL global = FALSE;
+PCRE2_SIZE buff_offset, lengthleft, fraglength;
+PCRE2_SIZE *ovector;
+
+/* Partial matching is not valid. */
+
+if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0)
+ return PCRE2_ERROR_BADOPTION;
+
+/* If no match data block is provided, create one. */
+
+if (match_data == NULL)
+ {
+ pcre2_general_context *gcontext = (mcontext == NULL)?
+ (pcre2_general_context *)code :
+ (pcre2_general_context *)mcontext;
+ match_data = pcre2_match_data_create_from_pattern(code, gcontext);
+ if (match_data == NULL) return PCRE2_ERROR_NOMEMORY;
+ match_data_created = TRUE;
+ }
+ovector = pcre2_get_ovector_pointer(match_data);
+ovector_count = pcre2_get_ovector_count(match_data);
+
+/* Check UTF replacement string if necessary. */
+
+#ifdef SUPPORT_UNICODE
+if ((code->overall_options & PCRE2_UTF) != 0 &&
+ (options & PCRE2_NO_UTF_CHECK) == 0)
+ {
+ rc = PRIV(valid_utf)(replacement, rlength, &(match_data->rightchar));
+ if (rc != 0)
+ {
+ match_data->leftchar = 0;
+ goto EXIT;
+ }
+ }
+#endif /* SUPPORT_UNICODE */
+
+/* Notice the global option and remove it from the options that are passed to
+pcre2_match(). */
+
+if ((options & PCRE2_SUBSTITUTE_GLOBAL) != 0)
+ {
+ options &= ~PCRE2_SUBSTITUTE_GLOBAL;
+ global = TRUE;
+ }
+
+/* Find lengths of zero-terminated strings. */
+
+if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
+if (rlength == PCRE2_ZERO_TERMINATED) rlength = PRIV(strlen)(replacement);
+
+/* Copy up to the start offset */
+
+if (start_offset > *blength) goto NOROOM;
+memcpy(buffer, subject, start_offset * (PCRE2_CODE_UNIT_WIDTH/8));
+buff_offset = start_offset;
+lengthleft = *blength - start_offset;
+
+/* Loop for global substituting. */
+
+subs = 0;
+do
+ {
+ PCRE2_SIZE i;
+
+ rc = pcre2_match(code, subject, length, start_offset, options|goptions,
+ match_data, mcontext);
+
+ /* Any error other than no match returns the error code. No match when not
+ doing the special after-empty-match global rematch, or when at the end of the
+ subject, breaks the global loop. Otherwise, advance the starting point by one
+ character, copying it to the output, and try again. */
+
+ if (rc < 0)
+ {
+ PCRE2_SIZE save_start;
+
+ if (rc != PCRE2_ERROR_NOMATCH) goto EXIT;
+ if (goptions == 0 || start_offset >= length) break;
+
+ save_start = start_offset++;
+ if ((code->overall_options & PCRE2_UTF) != 0)
+ {
+#if PCRE2_CODE_UNIT_WIDTH == 8
+ while (start_offset < length && (subject[start_offset] & 0xc0) == 0x80)
+ start_offset++;
+#elif PCRE2_CODE_UNIT_WIDTH == 16
+ while (start_offset < length &&
+ (subject[start_offset] & 0xfc00) == 0xdc00)
+ start_offset++;
+#endif
+ }
+
+ fraglength = start_offset - save_start;
+ if (lengthleft < fraglength) goto NOROOM;
+ memcpy(buffer + buff_offset, subject + save_start,
+ fraglength*(PCRE2_CODE_UNIT_WIDTH/8));
+ buff_offset += fraglength;
+ lengthleft -= fraglength;
+
+ goptions = 0;
+ continue;
+ }
+
+ /* Handle a successful match. */
+
+ subs++;
+ if (rc == 0) rc = ovector_count;
+ fraglength = ovector[0] - start_offset;
+ if (fraglength >= lengthleft) goto NOROOM;
+ memcpy(buffer + buff_offset, subject + start_offset,
+ fraglength*(PCRE2_CODE_UNIT_WIDTH/8));
+ buff_offset += fraglength;
+ lengthleft -= fraglength;
+
+ for (i = 0; i < rlength; i++)
+ {
+ if (replacement[i] == CHAR_DOLLAR_SIGN)
+ {
+ int group, n;
+ BOOL inparens;
+ PCRE2_SIZE sublength;
+ PCRE2_UCHAR next;
+ PCRE2_UCHAR name[33];
+
+ if (++i == rlength) goto BAD;
+ if ((next = replacement[i]) == CHAR_DOLLAR_SIGN) goto LITERAL;
+
+ group = -1;
+ n = 0;
+ inparens = FALSE;
+
+ if (next == CHAR_LEFT_CURLY_BRACKET)
+ {
+ if (++i == rlength) goto BAD;
+ next = replacement[i];
+ inparens = TRUE;
+ }
+
+ if (next >= CHAR_0 && next <= CHAR_9)
+ {
+ group = next - CHAR_0;
+ while (++i < rlength)
+ {
+ next = replacement[i];
+ if (next < CHAR_0 || next > CHAR_9) break;
+ group = group * 10 + next - CHAR_0;
+ }
+ }
+ else
+ {
+ const uint8_t *ctypes = code->tables + ctypes_offset;
+ while (MAX_255(next) && (ctypes[next] & ctype_word) != 0)
+ {
+ name[n++] = next;
+ if (n > 32) goto BAD;
+ if (i == rlength) break;
+ next = replacement[++i];
+ }
+ if (n == 0) goto BAD;
+ name[n] = 0;
+ }
+
+ if (inparens)
+ {
+ if (i == rlength || next != CHAR_RIGHT_CURLY_BRACKET) goto BAD;
+ }
+ else i--; /* Last code unit of name/number */
+
+ /* Have found a syntactically correct group number or name. */
+
+ sublength = lengthleft;
+ if (group < 0)
+ rc = pcre2_substring_copy_byname(match_data, name,
+ buffer + buff_offset, &sublength);
+ else
+ rc = pcre2_substring_copy_bynumber(match_data, group,
+ buffer + buff_offset, &sublength);
+
+ if (rc < 0) goto EXIT;
+ buff_offset += sublength;
+ lengthleft -= sublength;
+ }
+
+ /* Handle a literal code unit */
+
+ else
+ {
+ LITERAL:
+ if (lengthleft-- < 1) goto NOROOM;
+ buffer[buff_offset++] = replacement[i];
+ }
+ }
+
+ /* The replacement has been copied to the output. Update the start offset to
+ point to the rest of the subject string. If we matched an empty string,
+ do the magic for global matches. */
+
+ start_offset = ovector[1];
+ goptions = (ovector[0] != ovector[1])? 0 :
+ PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART;
+ } while (global); /* Repeat "do" loop */
+
+/* Copy the rest of the subject and return the number of substitutions. */
+
+rc = subs;
+fraglength = length - start_offset;
+if (fraglength + 1 > lengthleft) goto NOROOM;
+memcpy(buffer + buff_offset, subject + start_offset,
+ fraglength*(PCRE2_CODE_UNIT_WIDTH/8));
+buff_offset += fraglength;
+buffer[buff_offset] = 0;
+*blength = buff_offset;
+
+EXIT:
+if (match_data_created) pcre2_match_data_free(match_data);
+ else match_data->rc = rc;
+return rc;
+
+NOROOM:
+rc = PCRE2_ERROR_NOMEMORY;
+goto EXIT;
+
+BAD:
+rc = PCRE2_ERROR_BADREPLACEMENT;
+goto EXIT;
+}
+
+/* End of pcre2_substitute.c */