aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar thomasvl <thomasvl@7dc7ac4e-7543-0410-b95c-c1676fc8e2a3>2008-01-30 18:42:33 +0000
committerGravatar thomasvl <thomasvl@7dc7ac4e-7543-0410-b95c-c1676fc8e2a3>2008-01-30 18:42:33 +0000
commit62301a30a8b6e9b71d3549178f89d62c8c3c7d48 (patch)
tree81c44899cc4f52b883b6e77f870802880e44d20b
parent038074fa41a100c52f98536b1c4f47e5e748d8eb (diff)
fold in GTMRegex and ignore the build dir
-rw-r--r--Foundation/GTMRegex.h338
-rw-r--r--Foundation/GTMRegex.m674
-rw-r--r--Foundation/GTMRegexTest.m955
-rw-r--r--GTM.xcodeproj/project.pbxproj12
4 files changed, 1979 insertions, 0 deletions
diff --git a/Foundation/GTMRegex.h b/Foundation/GTMRegex.h
new file mode 100644
index 0000000..8e0f492
--- /dev/null
+++ b/Foundation/GTMRegex.h
@@ -0,0 +1,338 @@
+//
+// GTMRegex.h
+//
+// Copyright 2007-2008 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy
+// of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations under
+// the License.
+//
+
+#import <Foundation/Foundation.h>
+#import <regex.h>
+
+/// Options for controlling the behavior of the matches
+typedef enum {
+
+ kGTMRegexOptionIgnoreCase = 0x01,
+ // Ignore case in matching, ie: 'a' matches 'a' or 'A'
+
+ kGTMRegexOptionSupressNewlineSupport = 0x02,
+ // By default (without this option), regular expressions are implicitly
+ // processed on a line by line basis, where "lines" are delimited by newline
+ // characters. In this mode '.' (dot) does NOT match newline characters, and
+ // '^' and '$' match at the beginning and end of the string as well as
+ // around newline characters. This behavior matches the default behavior for
+ // regular expressions in other languages including Perl and Python. For
+ // example,
+ // foo.*bar
+ // would match
+ // fooAAAbar
+ // but would NOT match
+ // fooAAA\nbar
+ // With the kGTMRegexOptionSupressNewlineSupport option, newlines are treated
+ // just like any other character which means that '.' will match them. In
+ // this mode, ^ and $ only match the beginning and end of the input string
+ // and do NOT match around the newline characters. For example,
+ // foo.*bar
+ // would match
+ // fooAAAbar
+ // and would also match
+ // fooAAA\nbar
+
+} GTMRegexOptions;
+
+/// Class for doing Extended Regex operations w/ libregex (see re_format(7)).
+//
+// NOTE: the docs for recomp/regexec make *no* claims about i18n. All work
+// within this class is done w/ UTF-8 so Unicode should move through it safely,
+// however, the character classes described in re_format(7) might not really
+// be unicode "savvy", so use them and this class w/ that in mind.
+//
+// Example usage:
+//
+// NSArray *inputArrayOfStrings = ...
+// NSEnumerator *enumerator = [inputArrayOfString objectEnumerator];
+// NSString *curStr = nil;
+// NSArray *matches = [NSMutableArray array];
+//
+// GTMRegex *regex = [GTMRegex regexWithPattern:@"foo.*bar"];
+// while ((curStr = [enumerator nextObject]) != nil) {
+// if ([regex matchesString:curStr])
+// [matches addObject:curStr];
+// }
+// ....
+//
+// -------------
+//
+// If you need to include something dynamic in a pattern:
+//
+// NSString *pattern =
+// [NSString stringWithFormat:@"^foo:%@bar",
+// [GTMRegex escapedPatternForString:inputStr]];
+// GTMRegex *regex = [GTMRegex regexWithPattern:pattern];
+// ....
+//
+// -------------
+//
+// GTMRegex *regex = [GTMRegex regexWithPattern:@"(foo+)(bar)"];
+// NSString *highlighted =
+// [regex stringByReplacingMatchesInString:inputString
+// withReplacement:@"<i>\\1</i><b>\\2</b>"];
+// ....
+//
+@interface GTMRegex : NSObject {
+ @private
+ NSString *pattern_;
+ GTMRegexOptions options_;
+ regex_t regexData_;
+}
+
+/// Create a new, autoreleased object w/ the given regex pattern with the default options
++ (id)regexWithPattern:(NSString *)pattern;
+
+/// Create a new, autoreleased object w/ the given regex pattern and specify the matching options
++ (id)regexWithPattern:(NSString *)pattern options:(GTMRegexOptions)options;
+
+/// Returns a new, autoreleased copy of |str| w/ any pattern chars in it escaped so they have no meaning when used w/in a pattern.
++ (NSString *)escapedPatternForString:(NSString *)str;
+
+/// Initialize a new object w/ the given regex pattern with the default options
+- (id)initWithPattern:(NSString *)pattern;
+
+/// Initialize a new object w/ the given regex pattern and specify the matching options
+- (id)initWithPattern:(NSString *)pattern options:(GTMRegexOptions)options;
+
+/// Returns the number of sub patterns in the pattern
+//
+// Sub Patterns are basically the number of parenthesis blocks w/in the pattern.
+// ie: The pattern "foo((bar)|(baz))" has 3 sub patterns.
+//
+- (int)subPatternCount;
+
+/// Returns YES if the whole string |str| matches the pattern.
+- (BOOL)matchesString:(NSString *)str;
+
+/// Returns a new, autoreleased array of string that contain the subpattern matches for the string.
+//
+// If the whole string does not match the pattern, nil is returned.
+//
+// The api follows the conventions of most regex engines, and index 0 (zero) is
+// the full match, then the subpatterns are index 1, 2, ... going left to right.
+// If the pattern has optional subpatterns, then anything that didn't match
+// will have NSNull at that index.
+// ie: The pattern "(fo(o+))((bar)|(baz))" has five subpatterns, and when
+// applied to the string "foooooobaz" you'd get an array of:
+// 0: "foooooobaz"
+// 1: "foooooo"
+// 2: "ooooo"
+// 3: "baz"
+// 4: NSNull
+// 5: "baz"
+//
+- (NSArray *)subPatternsOfString:(NSString *)str;
+
+/// Returns a new, autoreleased enumerator that will walk segments (GTMRegexStringSegment) of |str| based on the pattern.
+//
+// This will split the string into "segments" using the given pattern. You get
+// both the matches and parts that are inbetween matches. ie-the entire string
+// will eventually be returned.
+//
+// See GTMRegexStringSegment for more infomation and examples.
+//
+- (NSEnumerator *)segmentEnumeratorForString:(NSString *)str;
+
+/// Returns a new, autoreleased enumerator that will walk only the matching segments (GTMRegexStringSegment) of |str| based on the pattern.
+//
+// This extracts the "segments" of the string that used the pattern. So it can
+// be used to collect all of the matching substrings from within a string.
+//
+// See GTMRegexStringSegment for more infomation and examples.
+//
+- (NSEnumerator *)matchSegmentEnumeratorForString:(NSString *)str;
+
+/// Returns a new, autoreleased string with all matches of the pattern in |str| replaced with |replacementPattern|.
+//
+// Replacement uses the SED substitution like syntax w/in |replacementPattern|
+// to allow the use of matches in the replacment. The replacement pattern can
+// make use of any number of match references by using a backslash followed by
+// the match subexpression number (ie-"\2", "\0", ...), see subPatternsOfString:
+// for details on the subexpression indexing.
+//
+// REMINDER: you need to double-slash since the slash has meaning to the
+// compiler/preprocessor. ie: "\\0"
+//
+- (NSString *)stringByReplacingMatchesInString:(NSString *)str
+ withReplacement:(NSString *)replacementPattern;
+
+@end
+
+/// Class returned by the nextObject for the enumerators from GTMRegex
+//
+// The two enumerators on from GTMRegex return objects of this type. This object
+// represents a "piece" of the string the enumerator is walking. It's the apis
+// on this object allow you to figure out why each segment was returned and to
+// act on it.
+//
+// The easiest way to under stand this how the enumerators and this class works
+// is through and examples ::
+// Pattern: "foo+"
+// String: "fo bar foobar foofooo baz"
+// If you walk this w/ -segmentEnumeratorForString you'll get:
+// # nextObjects Calls -isMatch -string
+// 1 NO "fo bar "
+// 2 YES "foo"
+// 3 NO "bar "
+// 4 YES "foo"
+// 5 YES "fooo"
+// 6 NO " baz"
+// And if you walk this w/ -matchSegmentEnumeratorForString you'll get:
+// # nextObjects Calls -isMatch -string
+// 1 YES "foo"
+// 2 YES "foo"
+// 3 YES "fooo"
+// (see the comments on subPatternString for how it works)
+//
+// Example usage:
+//
+// NSMutableString processedStr = [NSMutableString string];
+// NSEnumerator *enumerator =
+// [inputStr segmentEnumeratorForPattern:@"foo+((ba+r)|(ba+z))"];
+// GTMRegexStringSegment *segment = nil;
+// while ((segment = [enumerator nextObject]) != nil) {
+// if ([segment isMatch]) {
+// if ([segment subPatterString:2] != nil) {
+// // matched: "(ba+r)"
+// [processStr appendFormat:@"<b>%@</b>", [segment string]];
+// } else {
+// // matched: "(ba+z)"
+// [processStr appendFormat:@"<i>%@</i>", [segment string]];
+// }
+// } else {
+// [processStr appendString:[segment string]];
+// }
+// }
+// // proccessedStr now has all the versions of foobar wrapped in bold tags,
+// // and all the versons of foobaz in italics tags.
+// // ie: " fooobar foobaaz " ==> " <b>fooobar</b> <i>foobaaz</i> "
+//
+@interface GTMRegexStringSegment : NSObject {
+ @private
+ NSData *utf8StrBuf_;
+ regmatch_t *regMatches_; // STRONG: ie-we call free
+ int numRegMatches_;
+ BOOL isMatch_;
+}
+
+/// Returns YES if this segment from from a match of the regex, false if it was a segment between matches.
+//
+// Use -isMatch to see if the segment from from a match of the pattern or if the
+// segment is some text between matches. (NOTE: isMatch is always YES for
+// matchSegmentEnumeratorForString)
+//
+- (BOOL)isMatch;
+
+/// Returns a new, autoreleased string w/ the full text segment from the original string.
+- (NSString *)string;
+
+/// Returns a new, autoreleased string w/ the |index| sub pattern from this segment of the original string.
+//
+// This api follows the conventions of most regex engines, and index 0 (zero) is
+// the full match, then the subpatterns are index 1, 2, ... going left to right.
+// If the pattern has optional subpatterns, then anything that didn't match
+// will return nil.
+// ie: When using the pattern "(fo(o+))((bar)|(baz))" the following indexes
+// fetch these values for a segment where -string is @"foooooobaz":
+// 0: "foooooobaz"
+// 1: "foooooo"
+// 2: "ooooo"
+// 3: "baz"
+// 4: nil
+// 5: "baz"
+//
+- (NSString *)subPatternString:(int)index;
+
+@end
+
+/// Some helpers to streamline usage of GTMRegex
+//
+// Example usage:
+//
+// if ([inputStr matchesPattern:@"foo.*bar"]) {
+// // act on match
+// ....
+// }
+//
+// -------------
+//
+// NSString *subStr = [inputStr firstSubStringMatchedByPattern:@"^foo:.*$"];
+// if (subStr != nil) {
+// // act on subStr
+// ....
+// }
+//
+// -------------
+//
+// NSArray *headingList =
+// [inputStr allSubstringsMatchedByPattern:@"^Heading:.*$"];
+// // act on the list of headings
+// ....
+//
+// -------------
+//
+// NSString *highlightedString =
+// [inputString stringByReplacingMatchesOfPattern:@"(foo+)(bar)"
+// withReplacement:@"<i>\\1</i><b>\\2</b>"];
+// ....
+//
+@interface NSString (GTMRegexAdditions)
+
+/// Returns YES if the full string matches regex |pattern| using the default match options
+- (BOOL)gtm_matchesPattern:(NSString *)pattern;
+
+/// Returns a new, autoreleased array of strings that contain the subpattern matches of |pattern| using the default match options
+//
+// See [GTMRegex subPatternsOfString:] for information about the returned array.
+//
+- (NSArray *)gtm_subPatternsOfPattern:(NSString *)pattern;
+
+/// Returns a new, autoreleased string w/ the first substring that matched the regex |pattern| using the default match options
+- (NSString *)gtm_firstSubStringMatchedByPattern:(NSString *)pattern;
+
+/// Returns a new, autoreleased array of substrings in the string that match the regex |pattern| using the default match options
+//
+// Note: if the string has no matches, you get an empty array.
+- (NSArray *)gtm_allSubstringsMatchedByPattern:(NSString *)pattern;
+
+/// Returns a new, autoreleased segment enumerator that will break the string using pattern w/ the default match options
+//
+// The enumerator returns GTMRegexStringSegment options, see that class for more
+// details and examples.
+//
+- (NSEnumerator *)gtm_segmentEnumeratorForPattern:(NSString *)pattern;
+
+/// Returns a new, autoreleased segment enumerator that will only return matching segments from the string using pattern w/ the default match options
+//
+// The enumerator returns GTMRegexStringSegment options, see that class for more
+// details and examples.
+//
+- (NSEnumerator *)gtm_matchSegmentEnumeratorForPattern:(NSString *)pattern;
+
+/// Returns a new, autoreleased string with all matches for pattern |pattern| are replaced w/ |replacementPattern|. Uses the default match options.
+//
+// |replacemetPattern| has support for using any subExpression that matched,
+// see [GTMRegex stringByReplacingMatchesInString:withReplacement:] above
+// for details.
+//
+- (NSString *)gtm_stringByReplacingMatchesOfPattern:(NSString *)pattern
+ withReplacement:(NSString *)replacementPattern;
+
+@end
diff --git a/Foundation/GTMRegex.m b/Foundation/GTMRegex.m
new file mode 100644
index 0000000..c582b1e
--- /dev/null
+++ b/Foundation/GTMRegex.m
@@ -0,0 +1,674 @@
+//
+// GTMRegex.m
+//
+// Copyright 2007-2008 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy
+// of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations under
+// the License.
+//
+
+#import "GTMRegex.h"
+
+// This is the pattern to use for walking replacement text when doing
+// substitutions.
+//
+// this pattern may look over escaped, but remember the compiler will consume
+// one layer of slashes, and then we have to escape the slashes for them to be
+// seen as we want in the pattern.
+static NSString *const kReplacementPattern =
+ @"((^|[^\\\\])(\\\\\\\\)*)(\\\\([0-9]+))";
+#define kReplacementPatternLeadingTextIndex 1
+#define kReplacementPatternSubpatternNumberIndex 5
+
+@interface GTMRegex (PrivateMethods)
+- (NSString *)errorMessage:(int)errCode;
+- (BOOL)runRegexOnUTF8:(const char*)utf8Str
+ nmatch:(size_t)nmatch
+ pmatch:(regmatch_t *)pmatch
+ flags:(int)flags;
+@end
+
+// private enumerator as impl detail
+@interface GTMRegexEnumerator : NSEnumerator {
+ @private
+ GTMRegex *regex_;
+ NSData *utf8StrBuf_;
+ BOOL allSegments_;
+ regoff_t curParseIndex_;
+ regmatch_t *savedRegMatches_;
+}
+- (id)initWithRegex:(GTMRegex *)regex
+ processString:(NSString *)str
+ allSegments:(BOOL)allSegments;
+@end
+
+@interface GTMRegexStringSegment (PrivateMethods)
+- (id)initWithUTF8StrBuf:(NSData *)utf8StrBuf
+ regMatches:(regmatch_t *)regMatches
+ numRegMatches:(int)numRegMatches
+ isMatch:(BOOL)isMatch;
+@end
+
+@implementation GTMRegex
+
++ (id)regexWithPattern:(NSString *)pattern {
+ return [[[self alloc] initWithPattern:pattern] autorelease];
+}
+
++ (id)regexWithPattern:(NSString *)pattern options:(GTMRegexOptions)options {
+ return [[[self alloc] initWithPattern:pattern
+ options:options] autorelease];
+}
+
++ (NSString *)escapedPatternForString:(NSString *)str {
+ if (str == nil)
+ return nil;
+
+ // NOTE: this could be done more efficiently by fetching the whole string into
+ // a unichar buffer and scanning that, along w/ pushing the data over in
+ // chunks (when possible).
+
+ unsigned int len = [str length];
+ NSMutableString *result = [NSMutableString stringWithCapacity:len];
+
+ for (unsigned int x = 0; x < len; ++x) {
+ unichar ch = [str characterAtIndex:x];
+ switch (ch) {
+ case '^':
+ case '.':
+ case '[':
+ case '$':
+ case '(':
+ case ')':
+ case '|':
+ case '*':
+ case '+':
+ case '?':
+ case '{':
+ case '\\':
+ [result appendFormat:@"\\%C", ch];
+ break;
+ default:
+ [result appendFormat:@"%C", ch];
+ break;
+ }
+ }
+
+ return result;
+}
+
+- (id)init {
+ return [self initWithPattern:nil];
+}
+
+- (id)initWithPattern:(NSString *)pattern {
+ return [self initWithPattern:pattern options:0];
+}
+
+- (id)initWithPattern:(NSString *)pattern options:(GTMRegexOptions)options {
+ self = [super init];
+ if (!self) return nil;
+
+ if ([pattern length] == 0) {
+ [self release];
+ return nil;
+ }
+
+ // figure out the flags
+ options_ = options;
+ int flags = REG_EXTENDED;
+ if (options_ & kGTMRegexOptionIgnoreCase)
+ flags |= REG_ICASE;
+ if ((options_ & kGTMRegexOptionSupressNewlineSupport) == 0)
+ flags |= REG_NEWLINE;
+
+ // even if regcomp failes we need a flags that we did call regcomp so we'll
+ // call regfree (because the structure can get filled in some to allow better
+ // error info). we use pattern_ as this flag.
+ pattern_ = [pattern copy];
+ if (!pattern_) {
+ [self release];
+ return nil;
+ }
+
+ // compile it
+ int compResult = regcomp(&regexData_, [pattern_ UTF8String], flags);
+ if (compResult != 0) {
+ // we don't want to throw if we failed, so we'll return nil, but still
+ // log the error just so it's out there.
+ NSString *errorStr = [self errorMessage:compResult];
+ NSLog(@"Invalid pattern \"%@\", error: \"%@\"", pattern_, errorStr);
+
+ [self release];
+ return nil;
+ }
+
+ return self;
+}
+
+- (void)dealloc {
+ // we used pattern_ as our flag that we initialized the regex_t
+ if (pattern_) {
+ regfree(&regexData_);
+ [pattern_ release];
+ // play it safe and clear it since we use it as a flag for regexData_
+ pattern_ = nil;
+ }
+ [super dealloc];
+}
+
+- (int)subPatternCount {
+ return regexData_.re_nsub;
+}
+
+- (BOOL)matchesString:(NSString *)str {
+ regmatch_t regMatch;
+ if (![self runRegexOnUTF8:[str UTF8String]
+ nmatch:1
+ pmatch:&regMatch
+ flags:0]) {
+ // no match
+ return NO;
+ }
+
+ // make sure the match is the full string
+ return (regMatch.rm_so == 0) &&
+ (regMatch.rm_eo == [str lengthOfBytesUsingEncoding:NSUTF8StringEncoding]);
+}
+
+- (NSArray *)subPatternsOfString:(NSString *)str {
+ NSArray *result = nil;
+
+ int count = regexData_.re_nsub + 1;
+ regmatch_t *regMatches = malloc(sizeof(regmatch_t) * count);
+ if (!regMatches)
+ return nil;
+
+ // wrap it all in a try so we don't leak the malloc
+ @try {
+ const char *utf8Str = [str UTF8String];
+ if (![self runRegexOnUTF8:utf8Str
+ nmatch:count
+ pmatch:regMatches
+ flags:0]) {
+ // no match
+ return nil;
+ }
+
+ // make sure the match is the full string
+ if ((regMatches[0].rm_so != 0) ||
+ (regMatches[0].rm_eo != [str lengthOfBytesUsingEncoding:NSUTF8StringEncoding])) {
+ // only matched a sub part of the string
+ return NO;
+ }
+
+ NSMutableArray *buildResult = [NSMutableArray arrayWithCapacity:count];
+
+ for (int x = 0 ; x < count ; ++x) {
+ if ((regMatches[x].rm_so == -1) && (regMatches[x].rm_eo == -1)) {
+ // add NSNull since it wasn't used
+ [buildResult addObject:[NSNull null]];
+ } else {
+ // fetch the string
+ const char *base = utf8Str + regMatches[x].rm_so;
+ unsigned len = regMatches[x].rm_eo - regMatches[x].rm_so;
+ NSString *sub =
+ [[[NSString alloc] initWithBytes:base
+ length:len
+ encoding:NSUTF8StringEncoding] autorelease];
+ [buildResult addObject:sub];
+ }
+ }
+
+ result = buildResult;
+ }
+ @finally {
+ free(regMatches);
+ }
+
+ return result;
+}
+
+- (NSEnumerator *)segmentEnumeratorForString:(NSString *)str {
+ return [[[GTMRegexEnumerator alloc] initWithRegex:self
+ processString:str
+ allSegments:YES] autorelease];
+}
+
+- (NSEnumerator *)matchSegmentEnumeratorForString:(NSString *)str {
+ return [[[GTMRegexEnumerator alloc] initWithRegex:self
+ processString:str
+ allSegments:NO] autorelease];
+}
+
+- (NSString *)stringByReplacingMatchesInString:(NSString *)str
+ withReplacement:(NSString *)replacementPattern {
+ if (!str)
+ return nil;
+
+ // if we have a replacement, we go ahead and crack it now. if the replacement
+ // is just an empty string (or nil), just use the nil marker.
+ NSArray *replacements = nil;
+ if ([replacementPattern length]) {
+ // don't need newline support, just match the start of the pattern for '^'
+ GTMRegex *replacementRegex =
+ [GTMRegex regexWithPattern:kReplacementPattern
+ options:kGTMRegexOptionSupressNewlineSupport];
+ // pull them all into an array so we can walk this as many times as needed.
+ replacements =
+ [[replacementRegex segmentEnumeratorForString:replacementPattern] allObjects];
+ if (!replacements) {
+ NSLog(@"failed to create the replacements for subtituations");
+ return nil;
+ }
+ }
+
+ NSMutableString *result = [NSMutableString stringWithCapacity:[str length]];
+
+ NSEnumerator *enumerator = [self segmentEnumeratorForString:str];
+ GTMRegexStringSegment *segment = nil;
+ while ((segment = [enumerator nextObject]) != nil) {
+ if (![segment isMatch]) {
+ // not a match, just move this chunk over
+ [result appendString:[segment string]];
+ } else {
+ // match...
+ if (!replacements) {
+ // no replacements, they want to eat matches, nothing to do
+ } else {
+ // spin over the split up replacement
+ NSEnumerator *replacementEnumerator = [replacements objectEnumerator];
+ GTMRegexStringSegment *replacementSegment = nil;
+ while ((replacementSegment = [replacementEnumerator nextObject]) != nil) {
+ if (![replacementSegment isMatch]) {
+ // not a match, raw text to put in
+ [result appendString:[replacementSegment string]];
+ } else {
+ // match...
+
+ // first goes any leading text
+ NSString *leading =
+ [replacementSegment subPatternString:kReplacementPatternLeadingTextIndex];
+ if (leading)
+ [result appendString:leading];
+ // then use the subpattern number to find what goes in from the
+ // original string match.
+ int subPatternNum =
+ [[replacementSegment subPatternString:kReplacementPatternSubpatternNumberIndex] intValue];
+ NSString *matchSubPatStr = [segment subPatternString:subPatternNum];
+ // handle an unused subpattern (ie-nil result)
+ if (matchSubPatStr)
+ [result appendString:matchSubPatStr];
+ }
+ }
+ }
+ }
+ }
+ return result;
+}
+
+- (NSString *)description {
+ NSMutableString *result =
+ [NSMutableString stringWithFormat:@"%@<%p> { pattern=\"%@\", rawNumSubPatterns=%z, options=(",
+ [self class], self, pattern_, regexData_.re_nsub];
+ if (options_) {
+ if (options_ & kGTMRegexOptionIgnoreCase)
+ [result appendString:@" IgnoreCase"];
+ if ((options_ & kGTMRegexOptionSupressNewlineSupport) == kGTMRegexOptionSupressNewlineSupport)
+ [result appendString:@" NoNewlineSupport"];
+ } else {
+ [result appendString:@" None(Default)"];
+ }
+ [result appendString:@" ) }"];
+ return result;
+}
+
+@end
+
+@implementation GTMRegex (PrivateMethods)
+
+- (NSString *)errorMessage:(int)errCode {
+ NSString *result = @"internal error";
+
+ // size the buffer we need
+ size_t len = regerror(errCode, &regexData_, nil, 0);
+ char buffer[len];
+ // fetch the error
+ if (len == regerror(errCode, &regexData_, buffer, len)) {
+ NSString *generatedError = [NSString stringWithUTF8String:buffer];
+ if (generatedError)
+ result = generatedError;
+ }
+ return result;
+}
+
+// private helper to run the regex on a block
+- (BOOL)runRegexOnUTF8:(const char*)utf8Str
+ nmatch:(size_t)nmatch
+ pmatch:(regmatch_t *)pmatch
+ flags:(int)flags {
+ if (!utf8Str)
+ return NO;
+
+ int execResult = regexec(&regexData_, utf8Str, nmatch, pmatch, flags);
+ if (execResult != 0) {
+#ifdef DEBUG
+ if (execResult != REG_NOMATCH) {
+ NSString *errorStr = [self errorMessage:execResult];
+ NSLog(@"%@: matching string \"%.20s...\", had error: \"%@\"",
+ self, utf8Str, errorStr);
+ }
+#endif
+ return NO;
+ }
+ return YES;
+}
+
+@end
+
+@implementation GTMRegexEnumerator
+
+- (id)init {
+ return [self initWithRegex:nil processString:nil allSegments:NO];
+}
+
+- (id)initWithRegex:(GTMRegex *)regex
+ processString:(NSString *)str
+ allSegments:(BOOL)allSegments {
+ self = [super init];
+ if (!self) return nil;
+
+ // collect args
+ regex_ = [regex retain];
+ utf8StrBuf_ = [[str dataUsingEncoding:NSUTF8StringEncoding] retain];
+ allSegments_ = allSegments;
+
+ // arg check
+ if (!regex_ || !utf8StrBuf_) {
+ [self release];
+ return nil;
+ }
+
+ // parsing state initialized to zero for us by object creation
+
+ return self;
+}
+
+- (void)dealloc {
+ if (savedRegMatches_) {
+ free(savedRegMatches_);
+ savedRegMatches_ = nil;
+ }
+ [regex_ release];
+ [utf8StrBuf_ release];
+ [super dealloc];
+}
+
+- (id)nextObject {
+
+ GTMRegexStringSegment *result = nil;
+ regmatch_t *nextMatches = nil;
+ BOOL isMatch = NO;
+
+ // we do all this w/in a try, so if something throws, the memory we malloced
+ // will still get cleaned up
+ @try {
+
+ // if we have a saved match, use that...
+ if (savedRegMatches_) {
+ nextMatches = savedRegMatches_;
+ savedRegMatches_ = nil;
+ isMatch = YES; // if we have something saved, it was a pattern match
+ }
+ // have we reached the end?
+ else if (curParseIndex_ >= [utf8StrBuf_ length]) {
+ // done, do nothing, we'll return nil
+ }
+ // do the search.
+ else {
+
+ // alloc the match structure (extra space for the zero (full) match)
+ size_t matchBufSize = ([regex_ subPatternCount] + 1) * sizeof(regmatch_t);
+ nextMatches = malloc(matchBufSize);
+ if (!nextMatches)
+ return nil;
+
+ // setup our range to work on
+ nextMatches[0].rm_so = curParseIndex_;
+ nextMatches[0].rm_eo = [utf8StrBuf_ length];
+
+ // call for the match
+ if ([regex_ runRegexOnUTF8:[utf8StrBuf_ bytes]
+ nmatch:([regex_ subPatternCount] + 1)
+ pmatch:nextMatches
+ flags:REG_STARTEND]) {
+ // match
+
+ if (allSegments_ &&
+ (nextMatches[0].rm_so != curParseIndex_)) {
+ // we should return all segments (not just matches), and there was
+ // something before this match. So safe off this match for later
+ // and create a range for this.
+
+ savedRegMatches_ = nextMatches;
+ nextMatches = malloc(matchBufSize);
+ if (!nextMatches)
+ return nil;
+
+ isMatch = NO;
+ // mark everything but the zero slot w/ not used
+ for (int x = [regex_ subPatternCount]; x > 0; --x) {
+ nextMatches[x].rm_so = nextMatches[x].rm_eo = -1;
+ }
+ nextMatches[0].rm_so = curParseIndex_;
+ nextMatches[0].rm_eo = savedRegMatches_[0].rm_so;
+
+ // advance our marker
+ curParseIndex_ = savedRegMatches_[0].rm_eo;
+
+ } else {
+ // we only return matches or are pointed at a match
+
+ // no real work to do, just fall through to return to return the
+ // current match.
+ isMatch = YES;
+
+ // advance our marker
+ curParseIndex_ = nextMatches[0].rm_eo;
+ }
+
+ } else {
+ // no match
+
+ // should we return the last non matching segment?
+ if (allSegments_) {
+ isMatch = NO;
+ // mark everything but the zero slot w/ not used
+ for (int x = [regex_ subPatternCount]; x > 0; --x) {
+ nextMatches[x].rm_so = nextMatches[x].rm_eo = -1;
+ }
+ nextMatches[0].rm_so = curParseIndex_;
+ nextMatches[0].rm_eo = [utf8StrBuf_ length];
+ } else {
+ // drop match set, we don't want it
+ free(nextMatches);
+ nextMatches = nil;
+ }
+
+ // advance our marker since we're done
+ curParseIndex_ = [utf8StrBuf_ length];
+
+ }
+ }
+
+ // create the segment to return
+ if (nextMatches) {
+ result =
+ [[[GTMRegexStringSegment alloc] initWithUTF8StrBuf:utf8StrBuf_
+ regMatches:nextMatches
+ numRegMatches:[regex_ subPatternCount]
+ isMatch:isMatch] autorelease];
+ nextMatches = nil;
+ }
+ }
+ @catch (id e) {
+ NSLog(@"Exceptions while trying to advance enumeration (%@)", e);
+ }
+
+ // if we still have something in our temp, free it
+ if (nextMatches)
+ free(nextMatches);
+
+ return result;
+}
+
+- (NSString *)description {
+ return [NSString stringWithFormat:@"%@<%p> { regex=\"%@\", allSegments=%s, string=\"%.20s...\" }",
+ [self class], self,
+ regex_,
+ (allSegments_ ? "YES" : "NO"),
+ [utf8StrBuf_ bytes]];
+}
+
+@end
+
+@implementation GTMRegexStringSegment
+
+- (id)init {
+ return [self initWithUTF8StrBuf:nil
+ regMatches:nil
+ numRegMatches:0
+ isMatch:NO];
+}
+
+- (void)dealloc {
+ if (regMatches_) {
+ free(regMatches_);
+ regMatches_ = nil;
+ }
+ [utf8StrBuf_ release];
+ [super dealloc];
+}
+
+- (BOOL)isMatch {
+ return isMatch_;
+}
+
+- (NSString *)string {
+ // fetch match zero
+ return [self subPatternString:0];
+}
+
+- (NSString *)subPatternString:(int)index {
+ if ((index < 0) || (index > numRegMatches_))
+ return nil;
+
+ // pick off when it wasn't found
+ if ((regMatches_[index].rm_so == -1) && (regMatches_[index].rm_eo == -1))
+ return nil;
+
+ // fetch the string
+ const char *base = (const char*)[utf8StrBuf_ bytes] + regMatches_[index].rm_so;
+ unsigned len = regMatches_[index].rm_eo - regMatches_[index].rm_so;
+ return [[[NSString alloc] initWithBytes:base
+ length:len
+ encoding:NSUTF8StringEncoding] autorelease];
+}
+
+- (NSString *)description {
+ NSMutableString *result =
+ [NSMutableString stringWithFormat:@"%@<%p> { isMatch=\"%s\", subPatterns=(",
+ [self class], self, (isMatch_ ? "YES" : "NO")];
+ for (int x = 0; x <= numRegMatches_; ++x) {
+ NSString *format = @", \"%.*s\"";
+ if (x == 0)
+ format = @" \"%.*s\"";
+
+ [result appendFormat:format,
+ (int)(regMatches_[x].rm_eo - regMatches_[x].rm_so),
+ (((const char*)[utf8StrBuf_ bytes]) + regMatches_[x].rm_so)];
+ }
+ [result appendString:@" ) }"];
+
+ return result;
+}
+
+@end
+
+@implementation GTMRegexStringSegment (PrivateMethods)
+
+- (id)initWithUTF8StrBuf:(NSData *)utf8StrBuf
+ regMatches:(regmatch_t *)regMatches
+ numRegMatches:(int)numRegMatches
+ isMatch:(BOOL)isMatch {
+ self = [super init];
+ if (!self) return nil;
+
+ utf8StrBuf_ = [utf8StrBuf retain];
+ regMatches_ = regMatches;
+ numRegMatches_ = numRegMatches;
+ isMatch_ = isMatch;
+
+ // check the args
+ if (!utf8StrBuf_ || !regMatches_ || (numRegMatches_ < 0)) {
+ [self release];
+ return nil;
+ }
+
+ return self;
+}
+
+@end
+
+@implementation NSString (GTMRegexAdditions)
+
+- (BOOL)gtm_matchesPattern:(NSString *)pattern {
+ GTMRegex *regex = [GTMRegex regexWithPattern:pattern];
+ return [regex matchesString:self];
+}
+
+- (NSArray *)gtm_subPatternsOfPattern:(NSString *)pattern {
+ GTMRegex *regex = [GTMRegex regexWithPattern:pattern];
+ return [regex subPatternsOfString:self];
+}
+
+- (NSString *)gtm_firstSubStringMatchedByPattern:(NSString *)pattern {
+ GTMRegex *regex = [GTMRegex regexWithPattern:pattern];
+ NSEnumerator *enumerator = [regex matchSegmentEnumeratorForString:self];
+ GTMRegexStringSegment *firstMatch = [enumerator nextObject];
+ return [firstMatch string];
+}
+
+- (NSArray *)gtm_allSubstringsMatchedByPattern:(NSString *)pattern {
+ NSEnumerator *enumerator = [self gtm_matchSegmentEnumeratorForPattern:pattern];
+ NSArray *allSegments = [enumerator allObjects];
+ return [allSegments valueForKey:@"string"];
+}
+
+- (NSEnumerator *)gtm_segmentEnumeratorForPattern:(NSString *)pattern {
+ GTMRegex *regex = [GTMRegex regexWithPattern:pattern];
+ return [regex segmentEnumeratorForString:self];
+}
+
+- (NSEnumerator *)gtm_matchSegmentEnumeratorForPattern:(NSString *)pattern {
+ GTMRegex *regex = [GTMRegex regexWithPattern:pattern];
+ return [regex matchSegmentEnumeratorForString:self];
+}
+
+- (NSString *)gtm_stringByReplacingMatchesOfPattern:(NSString *)pattern
+ withReplacement:(NSString *)replacementPattern {
+ GTMRegex *regex = [GTMRegex regexWithPattern:pattern];
+ return [regex stringByReplacingMatchesInString:self
+ withReplacement:replacementPattern];
+}
+
+@end
diff --git a/Foundation/GTMRegexTest.m b/Foundation/GTMRegexTest.m
new file mode 100644
index 0000000..ef7d1e5
--- /dev/null
+++ b/Foundation/GTMRegexTest.m
@@ -0,0 +1,955 @@
+//
+// GTMRegexTest.m
+//
+// Copyright 2007-2008 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy
+// of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations under
+// the License.
+//
+
+#import <SenTestingKit/SenTestingKit.h>
+
+#import "GTMSenTestCase.h"
+#import "GTMRegex.h"
+
+//
+// NOTE:
+//
+// We don't really test any of the pattern matching since that's testing
+// libregex, we just want to test our wrapper.
+//
+
+@interface GTMRegexTest : SenTestCase
+@end
+
+@interface NSString_GTMRegexAdditions : SenTestCase
+@end
+
+@implementation GTMRegexTest
+
+- (void)testEscapedPatternForString {
+ STAssertEqualStrings([GTMRegex escapedPatternForString:@"abcdefghijklmnopqrstuvwxyz0123456789"],
+ @"abcdefghijklmnopqrstuvwxyz0123456789",
+ nil);
+ STAssertEqualStrings([GTMRegex escapedPatternForString:@"^.[$()|*+?{\\"],
+ @"\\^\\.\\[\\$\\(\\)\\|\\*\\+\\?\\{\\\\",
+ nil);
+ STAssertEqualStrings([GTMRegex escapedPatternForString:@"a^b.c[d$e(f)g|h*i+j?k{l\\m"],
+ @"a\\^b\\.c\\[d\\$e\\(f\\)g\\|h\\*i\\+j\\?k\\{l\\\\m",
+ nil);
+
+ STAssertNil([GTMRegex escapedPatternForString:nil], nil);
+ STAssertEqualStrings([GTMRegex escapedPatternForString:@""], @"", nil);
+}
+
+
+- (void)testInit {
+
+ // fail cases
+ STAssertNil([[[GTMRegex alloc] init] autorelease], nil);
+ STAssertNil([[[GTMRegex alloc] initWithPattern:nil] autorelease], nil);
+ STAssertNil([[[GTMRegex alloc] initWithPattern:nil
+ options:kGTMRegexOptionIgnoreCase] autorelease], nil);
+ STAssertNil([[[GTMRegex alloc] initWithPattern:@"(."] autorelease], nil);
+ STAssertNil([[[GTMRegex alloc] initWithPattern:@"(."
+ options:kGTMRegexOptionIgnoreCase] autorelease], nil);
+
+ // basic pattern w/ options
+ STAssertNotNil([[[GTMRegex alloc] initWithPattern:@"(.*)"] autorelease], nil);
+ STAssertNotNil([[[GTMRegex alloc] initWithPattern:@"(.*)"
+ options:0] autorelease], nil);
+ STAssertNotNil([[[GTMRegex alloc] initWithPattern:@"(.*)"
+ options:kGTMRegexOptionIgnoreCase] autorelease], nil);
+
+ // fail cases (helper)
+ STAssertNil([GTMRegex regexWithPattern:nil], nil);
+ STAssertNil([GTMRegex regexWithPattern:nil
+ options:0], nil);
+ STAssertNil([GTMRegex regexWithPattern:@"(."], nil);
+ STAssertNil([GTMRegex regexWithPattern:@"(."
+ options:0], nil);
+
+ // basic pattern w/ options (helper)
+ STAssertNotNil([GTMRegex regexWithPattern:@"(.*)"], nil);
+ STAssertNotNil([GTMRegex regexWithPattern:@"(.*)"
+ options:0], nil);
+ STAssertNotNil([GTMRegex regexWithPattern:@"(.*)"
+ options:kGTMRegexOptionIgnoreCase], nil);
+}
+
+- (void)testOptions {
+
+ NSString *testString = @"aaa AAA\nbbb BBB\n aaa aAa\n bbb BbB";
+
+ // default options
+ GTMRegex *regex = [GTMRegex regexWithPattern:@"a+"];
+ STAssertNotNil(regex, nil);
+ NSEnumerator *enumerator = [regex segmentEnumeratorForString:testString];
+ STAssertNotNil(enumerator, nil);
+ // "aaa"
+ GTMRegexStringSegment *seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertTrue([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"aaa", nil);
+ // " AAA\nbbb BBB\n "
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertFalse([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @" AAA\nbbb BBB\n ", nil);
+ // "aaa"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertTrue([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"aaa", nil);
+ // " "
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertFalse([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @" ", nil);
+ // "a"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertTrue([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"a", nil);
+ // "A"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertFalse([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"A", nil);
+ // "a"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertTrue([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"a", nil);
+ // "\n bbb BbB"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertFalse([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"\n bbb BbB", nil);
+ // (end)
+ seg = [enumerator nextObject];
+ STAssertNil(seg, nil);
+
+ // kGTMRegexOptionIgnoreCase
+ regex = [GTMRegex regexWithPattern:@"a+" options:kGTMRegexOptionIgnoreCase];
+ STAssertNotNil(regex, nil);
+ enumerator = [regex segmentEnumeratorForString:testString];
+ STAssertNotNil(enumerator, nil);
+ // "aaa"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertTrue([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"aaa", nil);
+ // " "
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertFalse([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @" ", nil);
+ // "AAA"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertTrue([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"AAA", nil);
+ // "\nbbb BBB\n "
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertFalse([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"\nbbb BBB\n ", nil);
+ // "aaa"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertTrue([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"aaa", nil);
+ // " "
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertFalse([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @" ", nil);
+ // "aAa"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertTrue([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"aAa", nil);
+ // "\n bbb BbB"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertFalse([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"\n bbb BbB", nil);
+ // (end)
+ seg = [enumerator nextObject];
+ STAssertNil(seg, nil);
+
+ // defaults w/ '^'
+ regex = [GTMRegex regexWithPattern:@"^a+"];
+ STAssertNotNil(regex, nil);
+ enumerator = [regex segmentEnumeratorForString:testString];
+ STAssertNotNil(enumerator, nil);
+ // "aaa"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertTrue([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"aaa", nil);
+ // " AAA\nbbb BBB\n aaa aAa\n bbb BbB"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertFalse([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @" AAA\nbbb BBB\n aaa aAa\n bbb BbB", nil);
+ // (end)
+ seg = [enumerator nextObject];
+ STAssertNil(seg, nil);
+
+ // defaults w/ '$'
+ regex = [GTMRegex regexWithPattern:@"B+$"];
+ STAssertNotNil(regex, nil);
+ enumerator = [regex segmentEnumeratorForString:testString];
+ STAssertNotNil(enumerator, nil);
+ // "aaa AAA\nbbb "
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertFalse([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"aaa AAA\nbbb ", nil);
+ // "BBB"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertTrue([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"BBB", nil);
+ // "\n aaa aAa\n bbb Bb"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertFalse([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"\n aaa aAa\n bbb Bb", nil);
+ // "B"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertTrue([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"B", nil);
+ // (end)
+ seg = [enumerator nextObject];
+ STAssertNil(seg, nil);
+
+ // kGTMRegexOptionIgnoreCase w/ '$'
+ regex = [GTMRegex regexWithPattern:@"B+$"
+ options:kGTMRegexOptionIgnoreCase];
+ STAssertNotNil(regex, nil);
+ enumerator = [regex segmentEnumeratorForString:testString];
+ STAssertNotNil(enumerator, nil);
+ // "aaa AAA\nbbb "
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertFalse([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"aaa AAA\nbbb ", nil);
+ // "BBB"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertTrue([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"BBB", nil);
+ // "\n aaa aAa\n bbb "
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertFalse([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"\n aaa aAa\n bbb ", nil);
+ // "BbB"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertTrue([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"BbB", nil);
+ // (end)
+ seg = [enumerator nextObject];
+ STAssertNil(seg, nil);
+
+ // test w/ kGTMRegexOptionSupressNewlineSupport and \n in the string
+ regex = [GTMRegex regexWithPattern:@"a.*b" options:kGTMRegexOptionSupressNewlineSupport];
+ STAssertNotNil(regex, nil);
+ enumerator = [regex segmentEnumeratorForString:testString];
+ STAssertNotNil(enumerator, nil);
+ // "aaa AAA\nbbb BBB\n aaa aAa\n bbb Bb"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertTrue([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"aaa AAA\nbbb BBB\n aaa aAa\n bbb Bb", nil);
+ // "B"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertFalse([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"B", nil);
+ // (end)
+ seg = [enumerator nextObject];
+ STAssertNil(seg, nil);
+
+ // test w/o kGTMRegexOptionSupressNewlineSupport and \n in the string
+ // (this is no match since it '.' can't match the '\n')
+ regex = [GTMRegex regexWithPattern:@"a.*b"];
+ STAssertNotNil(regex, nil);
+ enumerator = [regex segmentEnumeratorForString:testString];
+ STAssertNotNil(enumerator, nil);
+ // "aaa AAA\nbbb BBB\n aaa aAa\n bbb BbB"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertFalse([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"aaa AAA\nbbb BBB\n aaa aAa\n bbb BbB", nil);
+ // (end)
+ seg = [enumerator nextObject];
+ STAssertNil(seg, nil);
+
+ // kGTMRegexOptionSupressNewlineSupport w/ '^'
+ regex = [GTMRegex regexWithPattern:@"^a+" options:kGTMRegexOptionSupressNewlineSupport];
+ STAssertNotNil(regex, nil);
+ enumerator = [regex segmentEnumeratorForString:testString];
+ STAssertNotNil(enumerator, nil);
+ // "aaa"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertTrue([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"aaa", nil);
+ // " AAA\nbbb BBB\n aaa aAa\n bbb BbB"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertFalse([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @" AAA\nbbb BBB\n aaa aAa\n bbb BbB", nil);
+ // (end)
+ seg = [enumerator nextObject];
+ STAssertNil(seg, nil);
+
+ // kGTMRegexOptionSupressNewlineSupport w/ '$'
+ regex = [GTMRegex regexWithPattern:@"B+$" options:kGTMRegexOptionSupressNewlineSupport];
+ STAssertNotNil(regex, nil);
+ enumerator = [regex segmentEnumeratorForString:testString];
+ STAssertNotNil(enumerator, nil);
+ // "aaa AAA\nbbb BBB\n aaa aAa\n bbb Bb"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertFalse([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"aaa AAA\nbbb BBB\n aaa aAa\n bbb Bb", nil);
+ // "B"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertTrue([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"B", nil);
+ // (end)
+ seg = [enumerator nextObject];
+ STAssertNil(seg, nil);
+}
+
+- (void)testSubPatternCount {
+ STAssertEquals(0, [[GTMRegex regexWithPattern:@".*"] subPatternCount], nil);
+ STAssertEquals(1, [[GTMRegex regexWithPattern:@"(.*)"] subPatternCount], nil);
+ STAssertEquals(1, [[GTMRegex regexWithPattern:@"[fo]*(.*)[bar]*"] subPatternCount], nil);
+ STAssertEquals(3, [[GTMRegex regexWithPattern:@"([fo]*)(.*)([bar]*)"] subPatternCount], nil);
+ STAssertEquals(7, [[GTMRegex regexWithPattern:@"(([bar]*)|([fo]*))(.*)(([bar]*)|([fo]*))"] subPatternCount], nil);
+}
+
+- (void)testMatchesString {
+ // simple pattern
+ GTMRegex *regex = [GTMRegex regexWithPattern:@"foo.*bar"];
+ STAssertNotNil(regex, nil);
+ STAssertTrue([regex matchesString:@"foobar"], nil);
+ STAssertTrue([regex matchesString:@"foobydoo spambar"], nil);
+ STAssertFalse([regex matchesString:@"zzfoobarzz"], nil);
+ STAssertFalse([regex matchesString:@"zzfoobydoo spambarzz"], nil);
+ STAssertFalse([regex matchesString:@"abcdef"], nil);
+ STAssertFalse([regex matchesString:@""], nil);
+ STAssertFalse([regex matchesString:nil], nil);
+ // pattern w/ sub patterns
+ regex = [GTMRegex regexWithPattern:@"(foo)(.*)(bar)"];
+ STAssertNotNil(regex, nil);
+ STAssertTrue([regex matchesString:@"foobar"], nil);
+ STAssertTrue([regex matchesString:@"foobydoo spambar"], nil);
+ STAssertFalse([regex matchesString:@"zzfoobarzz"], nil);
+ STAssertFalse([regex matchesString:@"zzfoobydoo spambarzz"], nil);
+ STAssertFalse([regex matchesString:@"abcdef"], nil);
+ STAssertFalse([regex matchesString:@""], nil);
+ STAssertFalse([regex matchesString:nil], nil);
+}
+
+- (void)testSubPatternsOfString {
+ GTMRegex *regex = [GTMRegex regexWithPattern:@"(fo(o+))((bar)|(baz))"];
+ STAssertNotNil(regex, nil);
+ STAssertEquals(5, [regex subPatternCount], nil);
+ NSArray *subPatterns = [regex subPatternsOfString:@"foooooobaz"];
+ STAssertNotNil(subPatterns, nil);
+ STAssertEquals(6U, [subPatterns count], nil);
+ STAssertEqualStrings(@"foooooobaz", [subPatterns objectAtIndex:0], nil);
+ STAssertEqualStrings(@"foooooo", [subPatterns objectAtIndex:1], nil);
+ STAssertEqualStrings(@"ooooo", [subPatterns objectAtIndex:2], nil);
+ STAssertEqualStrings(@"baz", [subPatterns objectAtIndex:3], nil);
+ STAssertTrue(([NSNull null] == [subPatterns objectAtIndex:4]), nil);
+ STAssertEqualStrings(@"baz", [subPatterns objectAtIndex:5], nil);
+
+ // not there
+ subPatterns = [regex subPatternsOfString:@"aaa"];
+ STAssertNil(subPatterns, nil);
+
+ // not extra stuff on either end
+ subPatterns = [regex subPatternsOfString:@"ZZZfoooooobaz"];
+ STAssertNil(subPatterns, nil);
+ subPatterns = [regex subPatternsOfString:@"foooooobazZZZ"];
+ STAssertNil(subPatterns, nil);
+ subPatterns = [regex subPatternsOfString:@"ZZZfoooooobazZZZ"];
+ STAssertNil(subPatterns, nil);
+}
+
+- (void)testSegmentEnumeratorForString {
+ GTMRegex *regex = [GTMRegex regexWithPattern:@"foo+ba+r"];
+ STAssertNotNil(regex, nil);
+ NSEnumerator *enumerator = [regex segmentEnumeratorForString:@"afoobarbfooobaarfoobarzz"];
+ STAssertNotNil(enumerator, nil);
+ // "a"
+ GTMRegexStringSegment *seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertFalse([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"a", nil);
+ // "foobar"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertTrue([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"foobar", nil);
+ // "b"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertFalse([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"b", nil);
+ // "fooobaar"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertTrue([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"fooobaar", nil);
+ // "foobar"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertTrue([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"foobar", nil);
+ // "zz"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertFalse([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"zz", nil);
+ // (end)
+ seg = [enumerator nextObject];
+ STAssertNil(seg, nil);
+
+ // test no match
+ enumerator = [regex segmentEnumeratorForString:@"aaa"];
+ STAssertNotNil(enumerator, nil);
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertFalse([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"aaa", nil);
+ seg = [enumerator nextObject];
+ STAssertNil(seg, nil);
+
+ // test only match
+ enumerator = [regex segmentEnumeratorForString:@"foobar"];
+ STAssertNotNil(enumerator, nil);
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertTrue([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"foobar", nil);
+ seg = [enumerator nextObject];
+ STAssertNil(seg, nil);
+
+ // now test the saved sub segments
+ regex = [GTMRegex regexWithPattern:@"(foo)((bar)|(baz))"];
+ STAssertNotNil(regex, nil);
+ STAssertEquals(4, [regex subPatternCount], nil);
+ enumerator = [regex segmentEnumeratorForString:@"foobarxxfoobaz"];
+ STAssertNotNil(enumerator, nil);
+ // "foobar"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertTrue([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"foobar", nil);
+ STAssertEqualStrings([seg subPatternString:0], @"foobar", nil);
+ STAssertEqualStrings([seg subPatternString:1], @"foo", nil);
+ STAssertEqualStrings([seg subPatternString:2], @"bar", nil);
+ STAssertEqualStrings([seg subPatternString:3], @"bar", nil);
+ STAssertNil([seg subPatternString:4], nil); // nothing matched "(baz)"
+ STAssertNil([seg subPatternString:5], nil);
+ // "xx"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertFalse([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"xx", nil);
+ STAssertEqualStrings([seg subPatternString:0], @"xx", nil);
+ STAssertNil([seg subPatternString:1], nil);
+ // "foobaz"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertTrue([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"foobaz", nil);
+ STAssertEqualStrings([seg subPatternString:0], @"foobaz", nil);
+ STAssertEqualStrings([seg subPatternString:1], @"foo", nil);
+ STAssertEqualStrings([seg subPatternString:2], @"baz", nil);
+ STAssertNil([seg subPatternString:3], nil); // (nothing matched "(bar)"
+ STAssertEqualStrings([seg subPatternString:4], @"baz", nil);
+ STAssertNil([seg subPatternString:5], nil);
+ // (end)
+ seg = [enumerator nextObject];
+ STAssertNil(seg, nil);
+
+ // test all objects
+ regex = [GTMRegex regexWithPattern:@"foo+ba+r"];
+ STAssertNotNil(regex, nil);
+ enumerator = [regex segmentEnumeratorForString:@"afoobarbfooobaarfoobarzz"];
+ STAssertNotNil(enumerator, nil);
+ NSArray *allSegments = [enumerator allObjects];
+ STAssertNotNil(allSegments, nil);
+ STAssertEquals(6U, [allSegments count], nil);
+}
+
+- (void)testMatchSegmentEnumeratorForString {
+ GTMRegex *regex = [GTMRegex regexWithPattern:@"foo+ba+r"];
+ STAssertNotNil(regex, nil);
+ NSEnumerator *enumerator = [regex matchSegmentEnumeratorForString:@"afoobarbfooobaarfoobarzz"];
+ STAssertNotNil(enumerator, nil);
+ // "a" - skipped
+ // "foobar"
+ GTMRegexStringSegment *seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertTrue([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"foobar", nil);
+ // "b" - skipped
+ // "fooobaar"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertTrue([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"fooobaar", nil);
+ // "foobar"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertTrue([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"foobar", nil);
+ // "zz" - skipped
+ // (end)
+ seg = [enumerator nextObject];
+ STAssertNil(seg, nil);
+
+ // test no match
+ enumerator = [regex matchSegmentEnumeratorForString:@"aaa"];
+ STAssertNotNil(enumerator, nil);
+ seg = [enumerator nextObject];
+ STAssertNil(seg, nil); // should have gotten nothing
+
+ // test only match
+ enumerator = [regex matchSegmentEnumeratorForString:@"foobar"];
+ STAssertNotNil(enumerator, nil);
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertTrue([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"foobar", nil);
+ seg = [enumerator nextObject];
+ STAssertNil(seg, nil);
+
+ // now test the saved sub segments
+ regex = [GTMRegex regexWithPattern:@"(foo)((bar)|(baz))"];
+ STAssertNotNil(regex, nil);
+ STAssertEquals(4, [regex subPatternCount], nil);
+ enumerator = [regex matchSegmentEnumeratorForString:@"foobarxxfoobaz"];
+ STAssertNotNil(enumerator, nil);
+ // "foobar"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertTrue([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"foobar", nil);
+ STAssertEqualStrings([seg subPatternString:0], @"foobar", nil);
+ STAssertEqualStrings([seg subPatternString:1], @"foo", nil);
+ STAssertEqualStrings([seg subPatternString:2], @"bar", nil);
+ STAssertEqualStrings([seg subPatternString:3], @"bar", nil);
+ STAssertNil([seg subPatternString:4], nil); // nothing matched "(baz)"
+ STAssertNil([seg subPatternString:5], nil);
+ // "xx" - skipped
+ // "foobaz"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertTrue([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"foobaz", nil);
+ STAssertEqualStrings([seg subPatternString:0], @"foobaz", nil);
+ STAssertEqualStrings([seg subPatternString:1], @"foo", nil);
+ STAssertEqualStrings([seg subPatternString:2], @"baz", nil);
+ STAssertNil([seg subPatternString:3], nil); // (nothing matched "(bar)"
+ STAssertEqualStrings([seg subPatternString:4], @"baz", nil);
+ STAssertNil([seg subPatternString:5], nil);
+ // (end)
+ seg = [enumerator nextObject];
+ STAssertNil(seg, nil);
+
+ // test all objects
+ regex = [GTMRegex regexWithPattern:@"foo+ba+r"];
+ STAssertNotNil(regex, nil);
+ enumerator = [regex matchSegmentEnumeratorForString:@"afoobarbfooobaarfoobarzz"];
+ STAssertNotNil(enumerator, nil);
+ NSArray *allSegments = [enumerator allObjects];
+ STAssertNotNil(allSegments, nil);
+ STAssertEquals(3U, [allSegments count], nil);
+}
+
+- (void)testStringByReplacingMatchesInStringWithReplacement {
+ GTMRegex *regex = [GTMRegex regexWithPattern:@"(foo)(.*)(bar)"];
+ STAssertNotNil(regex, nil);
+ // the basics
+ STAssertEqualStrings(@"weeZbarZbydoo spamZfooZdoggies",
+ [regex stringByReplacingMatchesInString:@"weefoobydoo spambardoggies"
+ withReplacement:@"Z\\3Z\\2Z\\1Z"],
+ nil);
+ // nil/empty replacement
+ STAssertEqualStrings(@"weedoggies",
+ [regex stringByReplacingMatchesInString:@"weefoobydoo spambardoggies"
+ withReplacement:nil],
+ nil);
+ STAssertEqualStrings(@"weedoggies",
+ [regex stringByReplacingMatchesInString:@"weefoobydoo spambardoggies"
+ withReplacement:@""],
+ nil);
+ // use optional and invale subexpression parts to confirm that works
+ regex = [GTMRegex regexWithPattern:@"(fo(o+))((bar)|(baz))"];
+ STAssertNotNil(regex, nil);
+ STAssertEqualStrings(@"aaa baz bar bar foo baz aaa",
+ [regex stringByReplacingMatchesInString:@"aaa foooooobaz fooobar bar foo baz aaa"
+ withReplacement:@"\\4\\5"],
+ nil);
+ STAssertEqualStrings(@"aaa ZZZ ZZZ bar foo baz aaa",
+ [regex stringByReplacingMatchesInString:@"aaa foooooobaz fooobar bar foo baz aaa"
+ withReplacement:@"Z\\10Z\\12Z"],
+ nil);
+ // test slashes in replacement that aren't part of the subpattern reference
+ regex = [GTMRegex regexWithPattern:@"a+"];
+ STAssertNotNil(regex, nil);
+ STAssertEqualStrings(@"z\\\\0 \\\\a \\\\\\\\0z",
+ [regex stringByReplacingMatchesInString:@"zaz"
+ withReplacement:@"\\\\0 \\\\\\0 \\\\\\\\0"],
+ nil);
+ STAssertEqualStrings(@"z\\\\a \\\\\\\\0 \\\\\\\\az",
+ [regex stringByReplacingMatchesInString:@"zaz"
+ withReplacement:@"\\\\\\0 \\\\\\\\0 \\\\\\\\\\0"],
+ nil);
+ STAssertEqualStrings(@"z\\\\\\\\0 \\\\\\\\a \\\\\\\\\\\\0z",
+ [regex stringByReplacingMatchesInString:@"zaz"
+ withReplacement:@"\\\\\\\\0 \\\\\\\\\\0 \\\\\\\\\\\\0"],
+ nil);
+}
+
+@end
+
+@implementation NSString_GTMRegexAdditions
+// Only partial tests to test that the call get through correctly since the
+// above really tests them.
+
+- (void)testMatchesPattern {
+ // simple pattern
+ STAssertTrue([@"foobar" gtm_matchesPattern:@"foo.*bar"], nil);
+ STAssertTrue([@"foobydoo spambar" gtm_matchesPattern:@"foo.*bar"], nil);
+ STAssertFalse([@"zzfoobarzz" gtm_matchesPattern:@"foo.*bar"], nil);
+ STAssertFalse([@"zzfoobydoo spambarzz" gtm_matchesPattern:@"foo.*bar"], nil);
+ STAssertFalse([@"abcdef" gtm_matchesPattern:@"foo.*bar"], nil);
+ STAssertFalse([@"" gtm_matchesPattern:@"foo.*bar"], nil);
+ // pattern w/ sub patterns
+ STAssertTrue([@"foobar" gtm_matchesPattern:@"(foo)(.*)(bar)"], nil);
+ STAssertTrue([@"foobydoo spambar" gtm_matchesPattern:@"(foo)(.*)(bar)"], nil);
+ STAssertFalse([@"zzfoobarzz" gtm_matchesPattern:@"(foo)(.*)(bar)"], nil);
+ STAssertFalse([@"zzfoobydoo spambarzz" gtm_matchesPattern:@"(foo)(.*)(bar)"], nil);
+ STAssertFalse([@"abcdef" gtm_matchesPattern:@"(foo)(.*)(bar)"], nil);
+ STAssertFalse([@"" gtm_matchesPattern:@"(foo)(.*)(bar)"], nil);
+}
+
+- (void)testSubPatternsOfPattern {
+ NSArray *subPatterns = [@"foooooobaz" gtm_subPatternsOfPattern:@"(fo(o+))((bar)|(baz))"];
+ STAssertNotNil(subPatterns, nil);
+ STAssertEquals(6U, [subPatterns count], nil);
+ STAssertEqualStrings(@"foooooobaz", [subPatterns objectAtIndex:0], nil);
+ STAssertEqualStrings(@"foooooo", [subPatterns objectAtIndex:1], nil);
+ STAssertEqualStrings(@"ooooo", [subPatterns objectAtIndex:2], nil);
+ STAssertEqualStrings(@"baz", [subPatterns objectAtIndex:3], nil);
+ STAssertTrue(([NSNull null] == [subPatterns objectAtIndex:4]), nil);
+ STAssertEqualStrings(@"baz", [subPatterns objectAtIndex:5], nil);
+
+ // not there
+ subPatterns = [@"aaa" gtm_subPatternsOfPattern:@"(fo(o+))((bar)|(baz))"];
+ STAssertNil(subPatterns, nil);
+
+ // not extra stuff on either end
+ subPatterns = [@"ZZZfoooooobaz" gtm_subPatternsOfPattern:@"(fo(o+))((bar)|(baz))"];
+ STAssertNil(subPatterns, nil);
+ subPatterns = [@"foooooobazZZZ" gtm_subPatternsOfPattern:@"(fo(o+))((bar)|(baz))"];
+ STAssertNil(subPatterns, nil);
+ subPatterns = [@"ZZZfoooooobazZZZ" gtm_subPatternsOfPattern:@"(fo(o+))((bar)|(baz))"];
+ STAssertNil(subPatterns, nil);
+}
+
+- (void)testFirstSubStringMatchedByPattern {
+ // simple pattern
+ STAssertEqualStrings([@"foobar" gtm_firstSubStringMatchedByPattern:@"foo.*bar"],
+ @"foobar", nil);
+ STAssertEqualStrings([@"foobydoo spambar" gtm_firstSubStringMatchedByPattern:@"foo.*bar"],
+ @"foobydoo spambar", nil);
+ STAssertEqualStrings([@"zzfoobarzz" gtm_firstSubStringMatchedByPattern:@"foo.*bar"],
+ @"foobar", nil);
+ STAssertEqualStrings([@"zzfoobydoo spambarzz" gtm_firstSubStringMatchedByPattern:@"foo.*bar"],
+ @"foobydoo spambar", nil);
+ STAssertNil([@"abcdef" gtm_firstSubStringMatchedByPattern:@"foo.*bar"], nil);
+ STAssertNil([@"" gtm_firstSubStringMatchedByPattern:@"foo.*bar"], nil);
+ // pattern w/ sub patterns
+ STAssertEqualStrings([@"foobar" gtm_firstSubStringMatchedByPattern:@"(foo)(.*)(bar)"],
+ @"foobar", nil);
+ STAssertEqualStrings([@"foobydoo spambar" gtm_firstSubStringMatchedByPattern:@"(foo)(.*)(bar)"],
+ @"foobydoo spambar", nil);
+ STAssertEqualStrings([@"zzfoobarzz" gtm_firstSubStringMatchedByPattern:@"(foo)(.*)(bar)"],
+ @"foobar", nil);
+ STAssertEqualStrings([@"zzfoobydoo spambarzz" gtm_firstSubStringMatchedByPattern:@"(foo)(.*)(bar)"],
+ @"foobydoo spambar", nil);
+ STAssertNil([@"abcdef" gtm_firstSubStringMatchedByPattern:@"(foo)(.*)(bar)"], nil);
+ STAssertNil([@"" gtm_firstSubStringMatchedByPattern:@"(foo)(.*)(bar)"], nil);
+}
+
+- (void)testSegmentEnumeratorForPattern {
+ NSEnumerator *enumerator =
+ [@"afoobarbfooobaarfoobarzz" gtm_segmentEnumeratorForPattern:@"foo+ba+r"];
+ STAssertNotNil(enumerator, nil);
+ // "a"
+ GTMRegexStringSegment *seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertFalse([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"a", nil);
+ // "foobar"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertTrue([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"foobar", nil);
+ // "b"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertFalse([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"b", nil);
+ // "fooobaar"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertTrue([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"fooobaar", nil);
+ // "foobar"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertTrue([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"foobar", nil);
+ // "zz"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertFalse([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"zz", nil);
+ // (end)
+ seg = [enumerator nextObject];
+ STAssertNil(seg, nil);
+
+ // test no match
+ enumerator = [@"aaa" gtm_segmentEnumeratorForPattern:@"foo+ba+r"];
+ STAssertNotNil(enumerator, nil);
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertFalse([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"aaa", nil);
+ seg = [enumerator nextObject];
+ STAssertNil(seg, nil);
+
+ // test only match
+ enumerator = [@"foobar" gtm_segmentEnumeratorForPattern:@"foo+ba+r"];
+ STAssertNotNil(enumerator, nil);
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertTrue([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"foobar", nil);
+ seg = [enumerator nextObject];
+ STAssertNil(seg, nil);
+
+ // now test the saved sub segments
+ enumerator =
+ [@"foobarxxfoobaz" gtm_segmentEnumeratorForPattern:@"(foo)((bar)|(baz))"];
+ STAssertNotNil(enumerator, nil);
+ // "foobar"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertTrue([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"foobar", nil);
+ STAssertEqualStrings([seg subPatternString:0], @"foobar", nil);
+ STAssertEqualStrings([seg subPatternString:1], @"foo", nil);
+ STAssertEqualStrings([seg subPatternString:2], @"bar", nil);
+ STAssertEqualStrings([seg subPatternString:3], @"bar", nil);
+ STAssertNil([seg subPatternString:4], nil); // nothing matched "(baz)"
+ STAssertNil([seg subPatternString:5], nil);
+ // "xx"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertFalse([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"xx", nil);
+ STAssertEqualStrings([seg subPatternString:0], @"xx", nil);
+ STAssertNil([seg subPatternString:1], nil);
+ // "foobaz"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertTrue([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"foobaz", nil);
+ STAssertEqualStrings([seg subPatternString:0], @"foobaz", nil);
+ STAssertEqualStrings([seg subPatternString:1], @"foo", nil);
+ STAssertEqualStrings([seg subPatternString:2], @"baz", nil);
+ STAssertNil([seg subPatternString:3], nil); // (nothing matched "(bar)"
+ STAssertEqualStrings([seg subPatternString:4], @"baz", nil);
+ STAssertNil([seg subPatternString:5], nil);
+ // (end)
+ seg = [enumerator nextObject];
+ STAssertNil(seg, nil);
+
+ // test all objects
+ enumerator = [@"afoobarbfooobaarfoobarzz" gtm_segmentEnumeratorForPattern:@"foo+ba+r"];
+ STAssertNotNil(enumerator, nil);
+ NSArray *allSegments = [enumerator allObjects];
+ STAssertNotNil(allSegments, nil);
+ STAssertEquals(6U, [allSegments count], nil);
+}
+
+- (void)testMatchSegmentEnumeratorForPattern {
+ NSEnumerator *enumerator =
+ [@"afoobarbfooobaarfoobarzz" gtm_matchSegmentEnumeratorForPattern:@"foo+ba+r"];
+ STAssertNotNil(enumerator, nil);
+ // "a" - skipped
+ // "foobar"
+ GTMRegexStringSegment *seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertTrue([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"foobar", nil);
+ // "b" - skipped
+ // "fooobaar"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertTrue([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"fooobaar", nil);
+ // "foobar"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertTrue([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"foobar", nil);
+ // "zz" - skipped
+ // (end)
+ seg = [enumerator nextObject];
+ STAssertNil(seg, nil);
+
+ // test no match
+ enumerator = [@"aaa" gtm_matchSegmentEnumeratorForPattern:@"foo+ba+r"];
+ STAssertNotNil(enumerator, nil);
+ seg = [enumerator nextObject];
+ STAssertNil(seg, nil);
+
+ // test only match
+ enumerator = [@"foobar" gtm_matchSegmentEnumeratorForPattern:@"foo+ba+r"];
+ STAssertNotNil(enumerator, nil);
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertTrue([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"foobar", nil);
+ seg = [enumerator nextObject];
+ STAssertNil(seg, nil);
+
+ // now test the saved sub segments
+ enumerator =
+ [@"foobarxxfoobaz" gtm_matchSegmentEnumeratorForPattern:@"(foo)((bar)|(baz))"];
+ STAssertNotNil(enumerator, nil);
+ // "foobar"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertTrue([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"foobar", nil);
+ STAssertEqualStrings([seg subPatternString:0], @"foobar", nil);
+ STAssertEqualStrings([seg subPatternString:1], @"foo", nil);
+ STAssertEqualStrings([seg subPatternString:2], @"bar", nil);
+ STAssertEqualStrings([seg subPatternString:3], @"bar", nil);
+ STAssertNil([seg subPatternString:4], nil); // nothing matched "(baz)"
+ STAssertNil([seg subPatternString:5], nil);
+ // "xx" - skipped
+ // "foobaz"
+ seg = [enumerator nextObject];
+ STAssertNotNil(seg, nil);
+ STAssertTrue([seg isMatch], nil);
+ STAssertEqualStrings([seg string], @"foobaz", nil);
+ STAssertEqualStrings([seg subPatternString:0], @"foobaz", nil);
+ STAssertEqualStrings([seg subPatternString:1], @"foo", nil);
+ STAssertEqualStrings([seg subPatternString:2], @"baz", nil);
+ STAssertNil([seg subPatternString:3], nil); // (nothing matched "(bar)"
+ STAssertEqualStrings([seg subPatternString:4], @"baz", nil);
+ STAssertNil([seg subPatternString:5], nil);
+ // (end)
+ seg = [enumerator nextObject];
+ STAssertNil(seg, nil);
+
+ // test all objects
+ enumerator = [@"afoobarbfooobaarfoobarzz" gtm_matchSegmentEnumeratorForPattern:@"foo+ba+r"];
+ STAssertNotNil(enumerator, nil);
+ NSArray *allSegments = [enumerator allObjects];
+ STAssertNotNil(allSegments, nil);
+ STAssertEquals(3U, [allSegments count], nil);
+}
+
+- (void)testAllSubstringsMatchedByPattern {
+ NSArray *segments =
+ [@"afoobarbfooobaarfoobarzz" gtm_allSubstringsMatchedByPattern:@"foo+ba+r"];
+ STAssertNotNil(segments, nil);
+ STAssertEquals(3U, [segments count], nil);
+ STAssertEqualStrings([segments objectAtIndex:0], @"foobar", nil);
+ STAssertEqualStrings([segments objectAtIndex:1], @"fooobaar", nil);
+ STAssertEqualStrings([segments objectAtIndex:2], @"foobar", nil);
+
+ // test no match
+ segments = [@"aaa" gtm_allSubstringsMatchedByPattern:@"foo+ba+r"];
+ STAssertNotNil(segments, nil);
+ STAssertEquals(0U, [segments count], nil);
+
+ // test only match
+ segments = [@"foobar" gtm_allSubstringsMatchedByPattern:@"foo+ba+r"];
+ STAssertNotNil(segments, nil);
+ STAssertEquals(1U, [segments count], nil);
+ STAssertEqualStrings([segments objectAtIndex:0], @"foobar", nil);
+}
+
+- (void)testStringByReplacingMatchesOfPatternWithReplacement {
+ // the basics
+ STAssertEqualStrings(@"weeZbarZbydoo spamZfooZdoggies",
+ [@"weefoobydoo spambardoggies" gtm_stringByReplacingMatchesOfPattern:@"(foo)(.*)(bar)"
+ withReplacement:@"Z\\3Z\\2Z\\1Z"],
+ nil);
+ // nil/empty replacement
+ STAssertEqualStrings(@"weedoggies",
+ [@"weefoobydoo spambardoggies" gtm_stringByReplacingMatchesOfPattern:@"(foo)(.*)(bar)"
+ withReplacement:nil],
+ nil);
+ STAssertEqualStrings(@"weedoggies",
+ [@"weefoobydoo spambardoggies" gtm_stringByReplacingMatchesOfPattern:@"(foo)(.*)(bar)"
+ withReplacement:@""],
+ nil);
+ // use optional and invale subexpression parts to confirm that works
+ STAssertEqualStrings(@"aaa baz bar bar foo baz aaa",
+ [@"aaa foooooobaz fooobar bar foo baz aaa" gtm_stringByReplacingMatchesOfPattern:@"(fo(o+))((bar)|(baz))"
+ withReplacement:@"\\4\\5"],
+ nil);
+ STAssertEqualStrings(@"aaa ZZZ ZZZ bar foo baz aaa",
+ [@"aaa foooooobaz fooobar bar foo baz aaa" gtm_stringByReplacingMatchesOfPattern:@"(fo(o+))((bar)|(baz))"
+ withReplacement:@"Z\\10Z\\12Z"],
+ nil);
+ // test slashes in replacement that aren't part of the subpattern reference
+ STAssertEqualStrings(@"z\\\\0 \\\\a \\\\\\\\0z",
+ [@"zaz" gtm_stringByReplacingMatchesOfPattern:@"a+"
+ withReplacement:@"\\\\0 \\\\\\0 \\\\\\\\0"],
+ nil);
+ STAssertEqualStrings(@"z\\\\a \\\\\\\\0 \\\\\\\\az",
+ [@"zaz" gtm_stringByReplacingMatchesOfPattern:@"a+"
+ withReplacement:@"\\\\\\0 \\\\\\\\0 \\\\\\\\\\0"],
+ nil);
+ STAssertEqualStrings(@"z\\\\\\\\0 \\\\\\\\a \\\\\\\\\\\\0z",
+ [@"zaz" gtm_stringByReplacingMatchesOfPattern:@"a+"
+ withReplacement:@"\\\\\\\\0 \\\\\\\\\\0 \\\\\\\\\\\\0"],
+ nil);
+}
+
+@end
diff --git a/GTM.xcodeproj/project.pbxproj b/GTM.xcodeproj/project.pbxproj
index 9314398..f0257ea 100644
--- a/GTM.xcodeproj/project.pbxproj
+++ b/GTM.xcodeproj/project.pbxproj
@@ -56,6 +56,9 @@
F42E09AB0D19A5E300D5DDE0 /* GTMNSWorkspace+Theme.m in Sources */ = {isa = PBXBuildFile; fileRef = F42E09A90D19A5E300D5DDE0 /* GTMNSWorkspace+Theme.m */; };
F42E09AE0D19A62F00D5DDE0 /* Carbon.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = F42E09AD0D19A62F00D5DDE0 /* Carbon.framework */; };
F42E0B0A0D19A6FB00D5DDE0 /* GTMNSWorkspace+ThemeTest.m in Sources */ = {isa = PBXBuildFile; fileRef = F42E0B090D19A6FB00D5DDE0 /* GTMNSWorkspace+ThemeTest.m */; };
+ F437F55D0D50BC0A00F5C3A4 /* GTMRegex.h in Headers */ = {isa = PBXBuildFile; fileRef = F437F55A0D50BC0A00F5C3A4 /* GTMRegex.h */; };
+ F437F55E0D50BC0A00F5C3A4 /* GTMRegex.m in Sources */ = {isa = PBXBuildFile; fileRef = F437F55B0D50BC0A00F5C3A4 /* GTMRegex.m */; };
+ F437F5620D50BC1D00F5C3A4 /* GTMRegexTest.m in Sources */ = {isa = PBXBuildFile; fileRef = F437F55C0D50BC0A00F5C3A4 /* GTMRegexTest.m */; };
F43DCDCD0D4796C600959A62 /* GTMLoginItems.h in Headers */ = {isa = PBXBuildFile; fileRef = F43DCDCB0D4796C600959A62 /* GTMLoginItems.h */; };
F43DCDCE0D4796C600959A62 /* GTMLoginItems.m in Sources */ = {isa = PBXBuildFile; fileRef = F43DCDCC0D4796C600959A62 /* GTMLoginItems.m */; };
F43DCEC70D47BEA000959A62 /* GTMLoginItemsTest.m in Sources */ = {isa = PBXBuildFile; fileRef = F43DCEC60D47BEA000959A62 /* GTMLoginItemsTest.m */; };
@@ -134,6 +137,9 @@
F42E09A90D19A5E300D5DDE0 /* GTMNSWorkspace+Theme.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "GTMNSWorkspace+Theme.m"; sourceTree = "<group>"; };
F42E09AD0D19A62F00D5DDE0 /* Carbon.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Carbon.framework; path = /System/Library/Frameworks/Carbon.framework; sourceTree = "<absolute>"; };
F42E0B090D19A6FB00D5DDE0 /* GTMNSWorkspace+ThemeTest.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "GTMNSWorkspace+ThemeTest.m"; sourceTree = "<group>"; };
+ F437F55A0D50BC0A00F5C3A4 /* GTMRegex.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = GTMRegex.h; sourceTree = "<group>"; };
+ F437F55B0D50BC0A00F5C3A4 /* GTMRegex.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = GTMRegex.m; sourceTree = "<group>"; };
+ F437F55C0D50BC0A00F5C3A4 /* GTMRegexTest.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = GTMRegexTest.m; sourceTree = "<group>"; };
F43DCDCB0D4796C600959A62 /* GTMLoginItems.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = GTMLoginItems.h; sourceTree = "<group>"; };
F43DCDCC0D4796C600959A62 /* GTMLoginItems.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = GTMLoginItems.m; sourceTree = "<group>"; };
F43DCEC60D47BEA000959A62 /* GTMLoginItemsTest.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = GTMLoginItemsTest.m; sourceTree = "<group>"; };
@@ -340,6 +346,9 @@
F43E4E5F0D4E5EC90041161F /* GTMNSData+zlib.m */,
F43E4E600D4E5EC90041161F /* GTMNSData+zlibTest.m */,
F48FE2910D198D24009257D2 /* GTMObjectSingleton.h */,
+ F437F55A0D50BC0A00F5C3A4 /* GTMRegex.h */,
+ F437F55B0D50BC0A00F5C3A4 /* GTMRegex.m */,
+ F437F55C0D50BC0A00F5C3A4 /* GTMRegexTest.m */,
F48FE2920D198D24009257D2 /* GTMSystemVersion.h */,
F48FE2930D198D24009257D2 /* GTMSystemVersion.m */,
F48FE2E10D198E4C009257D2 /* GTMSystemVersionTest.m */,
@@ -384,6 +393,7 @@
F43E4C280D4E361D0041161F /* GTMNSString+XML.h in Headers */,
F43E4DD90D4E56320041161F /* GTMNSEnumerator+Filter.h in Headers */,
F43E4E610D4E5EC90041161F /* GTMNSData+zlib.h in Headers */,
+ F437F55D0D50BC0A00F5C3A4 /* GTMRegex.h in Headers */,
);
runOnlyForDeploymentPostprocessing = 0;
};
@@ -532,6 +542,7 @@
F47F1D350D4914B600925B8F /* GTMCalculatedRangeTest.m in Sources */,
F43E4C2D0D4E36230041161F /* GTMNSString+XMLTest.m in Sources */,
F43E4DDE0D4E56380041161F /* GTMNSEnumerator+FilterTest.m in Sources */,
+ F437F5620D50BC1D00F5C3A4 /* GTMRegexTest.m in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
@@ -554,6 +565,7 @@
F43E4C290D4E361D0041161F /* GTMNSString+XML.m in Sources */,
F43E4DDA0D4E56320041161F /* GTMNSEnumerator+Filter.m in Sources */,
F43E4E620D4E5EC90041161F /* GTMNSData+zlib.m in Sources */,
+ F437F55E0D50BC0A00F5C3A4 /* GTMRegex.m in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};