aboutsummaryrefslogtreecommitdiff
path: root/Foundation/GTMRegex.h
diff options
context:
space:
mode:
authorGravatar thomasvl <thomasvl@7dc7ac4e-7543-0410-b95c-c1676fc8e2a3>2008-01-30 18:42:33 +0000
committerGravatar thomasvl <thomasvl@7dc7ac4e-7543-0410-b95c-c1676fc8e2a3>2008-01-30 18:42:33 +0000
commit62301a30a8b6e9b71d3549178f89d62c8c3c7d48 (patch)
tree81c44899cc4f52b883b6e77f870802880e44d20b /Foundation/GTMRegex.h
parent038074fa41a100c52f98536b1c4f47e5e748d8eb (diff)
fold in GTMRegex and ignore the build dir
Diffstat (limited to 'Foundation/GTMRegex.h')
-rw-r--r--Foundation/GTMRegex.h338
1 files changed, 338 insertions, 0 deletions
diff --git a/Foundation/GTMRegex.h b/Foundation/GTMRegex.h
new file mode 100644
index 0000000..8e0f492
--- /dev/null
+++ b/Foundation/GTMRegex.h
@@ -0,0 +1,338 @@
+//
+// GTMRegex.h
+//
+// Copyright 2007-2008 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy
+// of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations under
+// the License.
+//
+
+#import <Foundation/Foundation.h>
+#import <regex.h>
+
+/// Options for controlling the behavior of the matches
+typedef enum {
+
+ kGTMRegexOptionIgnoreCase = 0x01,
+ // Ignore case in matching, ie: 'a' matches 'a' or 'A'
+
+ kGTMRegexOptionSupressNewlineSupport = 0x02,
+ // By default (without this option), regular expressions are implicitly
+ // processed on a line by line basis, where "lines" are delimited by newline
+ // characters. In this mode '.' (dot) does NOT match newline characters, and
+ // '^' and '$' match at the beginning and end of the string as well as
+ // around newline characters. This behavior matches the default behavior for
+ // regular expressions in other languages including Perl and Python. For
+ // example,
+ // foo.*bar
+ // would match
+ // fooAAAbar
+ // but would NOT match
+ // fooAAA\nbar
+ // With the kGTMRegexOptionSupressNewlineSupport option, newlines are treated
+ // just like any other character which means that '.' will match them. In
+ // this mode, ^ and $ only match the beginning and end of the input string
+ // and do NOT match around the newline characters. For example,
+ // foo.*bar
+ // would match
+ // fooAAAbar
+ // and would also match
+ // fooAAA\nbar
+
+} GTMRegexOptions;
+
+/// Class for doing Extended Regex operations w/ libregex (see re_format(7)).
+//
+// NOTE: the docs for recomp/regexec make *no* claims about i18n. All work
+// within this class is done w/ UTF-8 so Unicode should move through it safely,
+// however, the character classes described in re_format(7) might not really
+// be unicode "savvy", so use them and this class w/ that in mind.
+//
+// Example usage:
+//
+// NSArray *inputArrayOfStrings = ...
+// NSEnumerator *enumerator = [inputArrayOfString objectEnumerator];
+// NSString *curStr = nil;
+// NSArray *matches = [NSMutableArray array];
+//
+// GTMRegex *regex = [GTMRegex regexWithPattern:@"foo.*bar"];
+// while ((curStr = [enumerator nextObject]) != nil) {
+// if ([regex matchesString:curStr])
+// [matches addObject:curStr];
+// }
+// ....
+//
+// -------------
+//
+// If you need to include something dynamic in a pattern:
+//
+// NSString *pattern =
+// [NSString stringWithFormat:@"^foo:%@bar",
+// [GTMRegex escapedPatternForString:inputStr]];
+// GTMRegex *regex = [GTMRegex regexWithPattern:pattern];
+// ....
+//
+// -------------
+//
+// GTMRegex *regex = [GTMRegex regexWithPattern:@"(foo+)(bar)"];
+// NSString *highlighted =
+// [regex stringByReplacingMatchesInString:inputString
+// withReplacement:@"<i>\\1</i><b>\\2</b>"];
+// ....
+//
+@interface GTMRegex : NSObject {
+ @private
+ NSString *pattern_;
+ GTMRegexOptions options_;
+ regex_t regexData_;
+}
+
+/// Create a new, autoreleased object w/ the given regex pattern with the default options
++ (id)regexWithPattern:(NSString *)pattern;
+
+/// Create a new, autoreleased object w/ the given regex pattern and specify the matching options
++ (id)regexWithPattern:(NSString *)pattern options:(GTMRegexOptions)options;
+
+/// Returns a new, autoreleased copy of |str| w/ any pattern chars in it escaped so they have no meaning when used w/in a pattern.
++ (NSString *)escapedPatternForString:(NSString *)str;
+
+/// Initialize a new object w/ the given regex pattern with the default options
+- (id)initWithPattern:(NSString *)pattern;
+
+/// Initialize a new object w/ the given regex pattern and specify the matching options
+- (id)initWithPattern:(NSString *)pattern options:(GTMRegexOptions)options;
+
+/// Returns the number of sub patterns in the pattern
+//
+// Sub Patterns are basically the number of parenthesis blocks w/in the pattern.
+// ie: The pattern "foo((bar)|(baz))" has 3 sub patterns.
+//
+- (int)subPatternCount;
+
+/// Returns YES if the whole string |str| matches the pattern.
+- (BOOL)matchesString:(NSString *)str;
+
+/// Returns a new, autoreleased array of string that contain the subpattern matches for the string.
+//
+// If the whole string does not match the pattern, nil is returned.
+//
+// The api follows the conventions of most regex engines, and index 0 (zero) is
+// the full match, then the subpatterns are index 1, 2, ... going left to right.
+// If the pattern has optional subpatterns, then anything that didn't match
+// will have NSNull at that index.
+// ie: The pattern "(fo(o+))((bar)|(baz))" has five subpatterns, and when
+// applied to the string "foooooobaz" you'd get an array of:
+// 0: "foooooobaz"
+// 1: "foooooo"
+// 2: "ooooo"
+// 3: "baz"
+// 4: NSNull
+// 5: "baz"
+//
+- (NSArray *)subPatternsOfString:(NSString *)str;
+
+/// Returns a new, autoreleased enumerator that will walk segments (GTMRegexStringSegment) of |str| based on the pattern.
+//
+// This will split the string into "segments" using the given pattern. You get
+// both the matches and parts that are inbetween matches. ie-the entire string
+// will eventually be returned.
+//
+// See GTMRegexStringSegment for more infomation and examples.
+//
+- (NSEnumerator *)segmentEnumeratorForString:(NSString *)str;
+
+/// Returns a new, autoreleased enumerator that will walk only the matching segments (GTMRegexStringSegment) of |str| based on the pattern.
+//
+// This extracts the "segments" of the string that used the pattern. So it can
+// be used to collect all of the matching substrings from within a string.
+//
+// See GTMRegexStringSegment for more infomation and examples.
+//
+- (NSEnumerator *)matchSegmentEnumeratorForString:(NSString *)str;
+
+/// Returns a new, autoreleased string with all matches of the pattern in |str| replaced with |replacementPattern|.
+//
+// Replacement uses the SED substitution like syntax w/in |replacementPattern|
+// to allow the use of matches in the replacment. The replacement pattern can
+// make use of any number of match references by using a backslash followed by
+// the match subexpression number (ie-"\2", "\0", ...), see subPatternsOfString:
+// for details on the subexpression indexing.
+//
+// REMINDER: you need to double-slash since the slash has meaning to the
+// compiler/preprocessor. ie: "\\0"
+//
+- (NSString *)stringByReplacingMatchesInString:(NSString *)str
+ withReplacement:(NSString *)replacementPattern;
+
+@end
+
+/// Class returned by the nextObject for the enumerators from GTMRegex
+//
+// The two enumerators on from GTMRegex return objects of this type. This object
+// represents a "piece" of the string the enumerator is walking. It's the apis
+// on this object allow you to figure out why each segment was returned and to
+// act on it.
+//
+// The easiest way to under stand this how the enumerators and this class works
+// is through and examples ::
+// Pattern: "foo+"
+// String: "fo bar foobar foofooo baz"
+// If you walk this w/ -segmentEnumeratorForString you'll get:
+// # nextObjects Calls -isMatch -string
+// 1 NO "fo bar "
+// 2 YES "foo"
+// 3 NO "bar "
+// 4 YES "foo"
+// 5 YES "fooo"
+// 6 NO " baz"
+// And if you walk this w/ -matchSegmentEnumeratorForString you'll get:
+// # nextObjects Calls -isMatch -string
+// 1 YES "foo"
+// 2 YES "foo"
+// 3 YES "fooo"
+// (see the comments on subPatternString for how it works)
+//
+// Example usage:
+//
+// NSMutableString processedStr = [NSMutableString string];
+// NSEnumerator *enumerator =
+// [inputStr segmentEnumeratorForPattern:@"foo+((ba+r)|(ba+z))"];
+// GTMRegexStringSegment *segment = nil;
+// while ((segment = [enumerator nextObject]) != nil) {
+// if ([segment isMatch]) {
+// if ([segment subPatterString:2] != nil) {
+// // matched: "(ba+r)"
+// [processStr appendFormat:@"<b>%@</b>", [segment string]];
+// } else {
+// // matched: "(ba+z)"
+// [processStr appendFormat:@"<i>%@</i>", [segment string]];
+// }
+// } else {
+// [processStr appendString:[segment string]];
+// }
+// }
+// // proccessedStr now has all the versions of foobar wrapped in bold tags,
+// // and all the versons of foobaz in italics tags.
+// // ie: " fooobar foobaaz " ==> " <b>fooobar</b> <i>foobaaz</i> "
+//
+@interface GTMRegexStringSegment : NSObject {
+ @private
+ NSData *utf8StrBuf_;
+ regmatch_t *regMatches_; // STRONG: ie-we call free
+ int numRegMatches_;
+ BOOL isMatch_;
+}
+
+/// Returns YES if this segment from from a match of the regex, false if it was a segment between matches.
+//
+// Use -isMatch to see if the segment from from a match of the pattern or if the
+// segment is some text between matches. (NOTE: isMatch is always YES for
+// matchSegmentEnumeratorForString)
+//
+- (BOOL)isMatch;
+
+/// Returns a new, autoreleased string w/ the full text segment from the original string.
+- (NSString *)string;
+
+/// Returns a new, autoreleased string w/ the |index| sub pattern from this segment of the original string.
+//
+// This api follows the conventions of most regex engines, and index 0 (zero) is
+// the full match, then the subpatterns are index 1, 2, ... going left to right.
+// If the pattern has optional subpatterns, then anything that didn't match
+// will return nil.
+// ie: When using the pattern "(fo(o+))((bar)|(baz))" the following indexes
+// fetch these values for a segment where -string is @"foooooobaz":
+// 0: "foooooobaz"
+// 1: "foooooo"
+// 2: "ooooo"
+// 3: "baz"
+// 4: nil
+// 5: "baz"
+//
+- (NSString *)subPatternString:(int)index;
+
+@end
+
+/// Some helpers to streamline usage of GTMRegex
+//
+// Example usage:
+//
+// if ([inputStr matchesPattern:@"foo.*bar"]) {
+// // act on match
+// ....
+// }
+//
+// -------------
+//
+// NSString *subStr = [inputStr firstSubStringMatchedByPattern:@"^foo:.*$"];
+// if (subStr != nil) {
+// // act on subStr
+// ....
+// }
+//
+// -------------
+//
+// NSArray *headingList =
+// [inputStr allSubstringsMatchedByPattern:@"^Heading:.*$"];
+// // act on the list of headings
+// ....
+//
+// -------------
+//
+// NSString *highlightedString =
+// [inputString stringByReplacingMatchesOfPattern:@"(foo+)(bar)"
+// withReplacement:@"<i>\\1</i><b>\\2</b>"];
+// ....
+//
+@interface NSString (GTMRegexAdditions)
+
+/// Returns YES if the full string matches regex |pattern| using the default match options
+- (BOOL)gtm_matchesPattern:(NSString *)pattern;
+
+/// Returns a new, autoreleased array of strings that contain the subpattern matches of |pattern| using the default match options
+//
+// See [GTMRegex subPatternsOfString:] for information about the returned array.
+//
+- (NSArray *)gtm_subPatternsOfPattern:(NSString *)pattern;
+
+/// Returns a new, autoreleased string w/ the first substring that matched the regex |pattern| using the default match options
+- (NSString *)gtm_firstSubStringMatchedByPattern:(NSString *)pattern;
+
+/// Returns a new, autoreleased array of substrings in the string that match the regex |pattern| using the default match options
+//
+// Note: if the string has no matches, you get an empty array.
+- (NSArray *)gtm_allSubstringsMatchedByPattern:(NSString *)pattern;
+
+/// Returns a new, autoreleased segment enumerator that will break the string using pattern w/ the default match options
+//
+// The enumerator returns GTMRegexStringSegment options, see that class for more
+// details and examples.
+//
+- (NSEnumerator *)gtm_segmentEnumeratorForPattern:(NSString *)pattern;
+
+/// Returns a new, autoreleased segment enumerator that will only return matching segments from the string using pattern w/ the default match options
+//
+// The enumerator returns GTMRegexStringSegment options, see that class for more
+// details and examples.
+//
+- (NSEnumerator *)gtm_matchSegmentEnumeratorForPattern:(NSString *)pattern;
+
+/// Returns a new, autoreleased string with all matches for pattern |pattern| are replaced w/ |replacementPattern|. Uses the default match options.
+//
+// |replacemetPattern| has support for using any subExpression that matched,
+// see [GTMRegex stringByReplacingMatchesInString:withReplacement:] above
+// for details.
+//
+- (NSString *)gtm_stringByReplacingMatchesOfPattern:(NSString *)pattern
+ withReplacement:(NSString *)replacementPattern;
+
+@end