From 62301a30a8b6e9b71d3549178f89d62c8c3c7d48 Mon Sep 17 00:00:00 2001 From: thomasvl Date: Wed, 30 Jan 2008 18:42:33 +0000 Subject: fold in GTMRegex and ignore the build dir --- Foundation/GTMRegex.h | 338 ++++++++++++++++ Foundation/GTMRegex.m | 674 ++++++++++++++++++++++++++++++++ Foundation/GTMRegexTest.m | 955 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 1967 insertions(+) create mode 100644 Foundation/GTMRegex.h create mode 100644 Foundation/GTMRegex.m create mode 100644 Foundation/GTMRegexTest.m (limited to 'Foundation') diff --git a/Foundation/GTMRegex.h b/Foundation/GTMRegex.h new file mode 100644 index 0000000..8e0f492 --- /dev/null +++ b/Foundation/GTMRegex.h @@ -0,0 +1,338 @@ +// +// GTMRegex.h +// +// Copyright 2007-2008 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy +// of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. +// + +#import +#import + +/// Options for controlling the behavior of the matches +typedef enum { + + kGTMRegexOptionIgnoreCase = 0x01, + // Ignore case in matching, ie: 'a' matches 'a' or 'A' + + kGTMRegexOptionSupressNewlineSupport = 0x02, + // By default (without this option), regular expressions are implicitly + // processed on a line by line basis, where "lines" are delimited by newline + // characters. In this mode '.' (dot) does NOT match newline characters, and + // '^' and '$' match at the beginning and end of the string as well as + // around newline characters. This behavior matches the default behavior for + // regular expressions in other languages including Perl and Python. For + // example, + // foo.*bar + // would match + // fooAAAbar + // but would NOT match + // fooAAA\nbar + // With the kGTMRegexOptionSupressNewlineSupport option, newlines are treated + // just like any other character which means that '.' will match them. In + // this mode, ^ and $ only match the beginning and end of the input string + // and do NOT match around the newline characters. For example, + // foo.*bar + // would match + // fooAAAbar + // and would also match + // fooAAA\nbar + +} GTMRegexOptions; + +/// Class for doing Extended Regex operations w/ libregex (see re_format(7)). +// +// NOTE: the docs for recomp/regexec make *no* claims about i18n. All work +// within this class is done w/ UTF-8 so Unicode should move through it safely, +// however, the character classes described in re_format(7) might not really +// be unicode "savvy", so use them and this class w/ that in mind. +// +// Example usage: +// +// NSArray *inputArrayOfStrings = ... +// NSEnumerator *enumerator = [inputArrayOfString objectEnumerator]; +// NSString *curStr = nil; +// NSArray *matches = [NSMutableArray array]; +// +// GTMRegex *regex = [GTMRegex regexWithPattern:@"foo.*bar"]; +// while ((curStr = [enumerator nextObject]) != nil) { +// if ([regex matchesString:curStr]) +// [matches addObject:curStr]; +// } +// .... +// +// ------------- +// +// If you need to include something dynamic in a pattern: +// +// NSString *pattern = +// [NSString stringWithFormat:@"^foo:%@bar", +// [GTMRegex escapedPatternForString:inputStr]]; +// GTMRegex *regex = [GTMRegex regexWithPattern:pattern]; +// .... +// +// ------------- +// +// GTMRegex *regex = [GTMRegex regexWithPattern:@"(foo+)(bar)"]; +// NSString *highlighted = +// [regex stringByReplacingMatchesInString:inputString +// withReplacement:@"\\1\\2"]; +// .... +// +@interface GTMRegex : NSObject { + @private + NSString *pattern_; + GTMRegexOptions options_; + regex_t regexData_; +} + +/// Create a new, autoreleased object w/ the given regex pattern with the default options ++ (id)regexWithPattern:(NSString *)pattern; + +/// Create a new, autoreleased object w/ the given regex pattern and specify the matching options ++ (id)regexWithPattern:(NSString *)pattern options:(GTMRegexOptions)options; + +/// Returns a new, autoreleased copy of |str| w/ any pattern chars in it escaped so they have no meaning when used w/in a pattern. ++ (NSString *)escapedPatternForString:(NSString *)str; + +/// Initialize a new object w/ the given regex pattern with the default options +- (id)initWithPattern:(NSString *)pattern; + +/// Initialize a new object w/ the given regex pattern and specify the matching options +- (id)initWithPattern:(NSString *)pattern options:(GTMRegexOptions)options; + +/// Returns the number of sub patterns in the pattern +// +// Sub Patterns are basically the number of parenthesis blocks w/in the pattern. +// ie: The pattern "foo((bar)|(baz))" has 3 sub patterns. +// +- (int)subPatternCount; + +/// Returns YES if the whole string |str| matches the pattern. +- (BOOL)matchesString:(NSString *)str; + +/// Returns a new, autoreleased array of string that contain the subpattern matches for the string. +// +// If the whole string does not match the pattern, nil is returned. +// +// The api follows the conventions of most regex engines, and index 0 (zero) is +// the full match, then the subpatterns are index 1, 2, ... going left to right. +// If the pattern has optional subpatterns, then anything that didn't match +// will have NSNull at that index. +// ie: The pattern "(fo(o+))((bar)|(baz))" has five subpatterns, and when +// applied to the string "foooooobaz" you'd get an array of: +// 0: "foooooobaz" +// 1: "foooooo" +// 2: "ooooo" +// 3: "baz" +// 4: NSNull +// 5: "baz" +// +- (NSArray *)subPatternsOfString:(NSString *)str; + +/// Returns a new, autoreleased enumerator that will walk segments (GTMRegexStringSegment) of |str| based on the pattern. +// +// This will split the string into "segments" using the given pattern. You get +// both the matches and parts that are inbetween matches. ie-the entire string +// will eventually be returned. +// +// See GTMRegexStringSegment for more infomation and examples. +// +- (NSEnumerator *)segmentEnumeratorForString:(NSString *)str; + +/// Returns a new, autoreleased enumerator that will walk only the matching segments (GTMRegexStringSegment) of |str| based on the pattern. +// +// This extracts the "segments" of the string that used the pattern. So it can +// be used to collect all of the matching substrings from within a string. +// +// See GTMRegexStringSegment for more infomation and examples. +// +- (NSEnumerator *)matchSegmentEnumeratorForString:(NSString *)str; + +/// Returns a new, autoreleased string with all matches of the pattern in |str| replaced with |replacementPattern|. +// +// Replacement uses the SED substitution like syntax w/in |replacementPattern| +// to allow the use of matches in the replacment. The replacement pattern can +// make use of any number of match references by using a backslash followed by +// the match subexpression number (ie-"\2", "\0", ...), see subPatternsOfString: +// for details on the subexpression indexing. +// +// REMINDER: you need to double-slash since the slash has meaning to the +// compiler/preprocessor. ie: "\\0" +// +- (NSString *)stringByReplacingMatchesInString:(NSString *)str + withReplacement:(NSString *)replacementPattern; + +@end + +/// Class returned by the nextObject for the enumerators from GTMRegex +// +// The two enumerators on from GTMRegex return objects of this type. This object +// represents a "piece" of the string the enumerator is walking. It's the apis +// on this object allow you to figure out why each segment was returned and to +// act on it. +// +// The easiest way to under stand this how the enumerators and this class works +// is through and examples :: +// Pattern: "foo+" +// String: "fo bar foobar foofooo baz" +// If you walk this w/ -segmentEnumeratorForString you'll get: +// # nextObjects Calls -isMatch -string +// 1 NO "fo bar " +// 2 YES "foo" +// 3 NO "bar " +// 4 YES "foo" +// 5 YES "fooo" +// 6 NO " baz" +// And if you walk this w/ -matchSegmentEnumeratorForString you'll get: +// # nextObjects Calls -isMatch -string +// 1 YES "foo" +// 2 YES "foo" +// 3 YES "fooo" +// (see the comments on subPatternString for how it works) +// +// Example usage: +// +// NSMutableString processedStr = [NSMutableString string]; +// NSEnumerator *enumerator = +// [inputStr segmentEnumeratorForPattern:@"foo+((ba+r)|(ba+z))"]; +// GTMRegexStringSegment *segment = nil; +// while ((segment = [enumerator nextObject]) != nil) { +// if ([segment isMatch]) { +// if ([segment subPatterString:2] != nil) { +// // matched: "(ba+r)" +// [processStr appendFormat:@"%@", [segment string]]; +// } else { +// // matched: "(ba+z)" +// [processStr appendFormat:@"%@", [segment string]]; +// } +// } else { +// [processStr appendString:[segment string]]; +// } +// } +// // proccessedStr now has all the versions of foobar wrapped in bold tags, +// // and all the versons of foobaz in italics tags. +// // ie: " fooobar foobaaz " ==> " fooobar foobaaz " +// +@interface GTMRegexStringSegment : NSObject { + @private + NSData *utf8StrBuf_; + regmatch_t *regMatches_; // STRONG: ie-we call free + int numRegMatches_; + BOOL isMatch_; +} + +/// Returns YES if this segment from from a match of the regex, false if it was a segment between matches. +// +// Use -isMatch to see if the segment from from a match of the pattern or if the +// segment is some text between matches. (NOTE: isMatch is always YES for +// matchSegmentEnumeratorForString) +// +- (BOOL)isMatch; + +/// Returns a new, autoreleased string w/ the full text segment from the original string. +- (NSString *)string; + +/// Returns a new, autoreleased string w/ the |index| sub pattern from this segment of the original string. +// +// This api follows the conventions of most regex engines, and index 0 (zero) is +// the full match, then the subpatterns are index 1, 2, ... going left to right. +// If the pattern has optional subpatterns, then anything that didn't match +// will return nil. +// ie: When using the pattern "(fo(o+))((bar)|(baz))" the following indexes +// fetch these values for a segment where -string is @"foooooobaz": +// 0: "foooooobaz" +// 1: "foooooo" +// 2: "ooooo" +// 3: "baz" +// 4: nil +// 5: "baz" +// +- (NSString *)subPatternString:(int)index; + +@end + +/// Some helpers to streamline usage of GTMRegex +// +// Example usage: +// +// if ([inputStr matchesPattern:@"foo.*bar"]) { +// // act on match +// .... +// } +// +// ------------- +// +// NSString *subStr = [inputStr firstSubStringMatchedByPattern:@"^foo:.*$"]; +// if (subStr != nil) { +// // act on subStr +// .... +// } +// +// ------------- +// +// NSArray *headingList = +// [inputStr allSubstringsMatchedByPattern:@"^Heading:.*$"]; +// // act on the list of headings +// .... +// +// ------------- +// +// NSString *highlightedString = +// [inputString stringByReplacingMatchesOfPattern:@"(foo+)(bar)" +// withReplacement:@"\\1\\2"]; +// .... +// +@interface NSString (GTMRegexAdditions) + +/// Returns YES if the full string matches regex |pattern| using the default match options +- (BOOL)gtm_matchesPattern:(NSString *)pattern; + +/// Returns a new, autoreleased array of strings that contain the subpattern matches of |pattern| using the default match options +// +// See [GTMRegex subPatternsOfString:] for information about the returned array. +// +- (NSArray *)gtm_subPatternsOfPattern:(NSString *)pattern; + +/// Returns a new, autoreleased string w/ the first substring that matched the regex |pattern| using the default match options +- (NSString *)gtm_firstSubStringMatchedByPattern:(NSString *)pattern; + +/// Returns a new, autoreleased array of substrings in the string that match the regex |pattern| using the default match options +// +// Note: if the string has no matches, you get an empty array. +- (NSArray *)gtm_allSubstringsMatchedByPattern:(NSString *)pattern; + +/// Returns a new, autoreleased segment enumerator that will break the string using pattern w/ the default match options +// +// The enumerator returns GTMRegexStringSegment options, see that class for more +// details and examples. +// +- (NSEnumerator *)gtm_segmentEnumeratorForPattern:(NSString *)pattern; + +/// Returns a new, autoreleased segment enumerator that will only return matching segments from the string using pattern w/ the default match options +// +// The enumerator returns GTMRegexStringSegment options, see that class for more +// details and examples. +// +- (NSEnumerator *)gtm_matchSegmentEnumeratorForPattern:(NSString *)pattern; + +/// Returns a new, autoreleased string with all matches for pattern |pattern| are replaced w/ |replacementPattern|. Uses the default match options. +// +// |replacemetPattern| has support for using any subExpression that matched, +// see [GTMRegex stringByReplacingMatchesInString:withReplacement:] above +// for details. +// +- (NSString *)gtm_stringByReplacingMatchesOfPattern:(NSString *)pattern + withReplacement:(NSString *)replacementPattern; + +@end diff --git a/Foundation/GTMRegex.m b/Foundation/GTMRegex.m new file mode 100644 index 0000000..c582b1e --- /dev/null +++ b/Foundation/GTMRegex.m @@ -0,0 +1,674 @@ +// +// GTMRegex.m +// +// Copyright 2007-2008 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy +// of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. +// + +#import "GTMRegex.h" + +// This is the pattern to use for walking replacement text when doing +// substitutions. +// +// this pattern may look over escaped, but remember the compiler will consume +// one layer of slashes, and then we have to escape the slashes for them to be +// seen as we want in the pattern. +static NSString *const kReplacementPattern = + @"((^|[^\\\\])(\\\\\\\\)*)(\\\\([0-9]+))"; +#define kReplacementPatternLeadingTextIndex 1 +#define kReplacementPatternSubpatternNumberIndex 5 + +@interface GTMRegex (PrivateMethods) +- (NSString *)errorMessage:(int)errCode; +- (BOOL)runRegexOnUTF8:(const char*)utf8Str + nmatch:(size_t)nmatch + pmatch:(regmatch_t *)pmatch + flags:(int)flags; +@end + +// private enumerator as impl detail +@interface GTMRegexEnumerator : NSEnumerator { + @private + GTMRegex *regex_; + NSData *utf8StrBuf_; + BOOL allSegments_; + regoff_t curParseIndex_; + regmatch_t *savedRegMatches_; +} +- (id)initWithRegex:(GTMRegex *)regex + processString:(NSString *)str + allSegments:(BOOL)allSegments; +@end + +@interface GTMRegexStringSegment (PrivateMethods) +- (id)initWithUTF8StrBuf:(NSData *)utf8StrBuf + regMatches:(regmatch_t *)regMatches + numRegMatches:(int)numRegMatches + isMatch:(BOOL)isMatch; +@end + +@implementation GTMRegex + ++ (id)regexWithPattern:(NSString *)pattern { + return [[[self alloc] initWithPattern:pattern] autorelease]; +} + ++ (id)regexWithPattern:(NSString *)pattern options:(GTMRegexOptions)options { + return [[[self alloc] initWithPattern:pattern + options:options] autorelease]; +} + ++ (NSString *)escapedPatternForString:(NSString *)str { + if (str == nil) + return nil; + + // NOTE: this could be done more efficiently by fetching the whole string into + // a unichar buffer and scanning that, along w/ pushing the data over in + // chunks (when possible). + + unsigned int len = [str length]; + NSMutableString *result = [NSMutableString stringWithCapacity:len]; + + for (unsigned int x = 0; x < len; ++x) { + unichar ch = [str characterAtIndex:x]; + switch (ch) { + case '^': + case '.': + case '[': + case '$': + case '(': + case ')': + case '|': + case '*': + case '+': + case '?': + case '{': + case '\\': + [result appendFormat:@"\\%C", ch]; + break; + default: + [result appendFormat:@"%C", ch]; + break; + } + } + + return result; +} + +- (id)init { + return [self initWithPattern:nil]; +} + +- (id)initWithPattern:(NSString *)pattern { + return [self initWithPattern:pattern options:0]; +} + +- (id)initWithPattern:(NSString *)pattern options:(GTMRegexOptions)options { + self = [super init]; + if (!self) return nil; + + if ([pattern length] == 0) { + [self release]; + return nil; + } + + // figure out the flags + options_ = options; + int flags = REG_EXTENDED; + if (options_ & kGTMRegexOptionIgnoreCase) + flags |= REG_ICASE; + if ((options_ & kGTMRegexOptionSupressNewlineSupport) == 0) + flags |= REG_NEWLINE; + + // even if regcomp failes we need a flags that we did call regcomp so we'll + // call regfree (because the structure can get filled in some to allow better + // error info). we use pattern_ as this flag. + pattern_ = [pattern copy]; + if (!pattern_) { + [self release]; + return nil; + } + + // compile it + int compResult = regcomp(®exData_, [pattern_ UTF8String], flags); + if (compResult != 0) { + // we don't want to throw if we failed, so we'll return nil, but still + // log the error just so it's out there. + NSString *errorStr = [self errorMessage:compResult]; + NSLog(@"Invalid pattern \"%@\", error: \"%@\"", pattern_, errorStr); + + [self release]; + return nil; + } + + return self; +} + +- (void)dealloc { + // we used pattern_ as our flag that we initialized the regex_t + if (pattern_) { + regfree(®exData_); + [pattern_ release]; + // play it safe and clear it since we use it as a flag for regexData_ + pattern_ = nil; + } + [super dealloc]; +} + +- (int)subPatternCount { + return regexData_.re_nsub; +} + +- (BOOL)matchesString:(NSString *)str { + regmatch_t regMatch; + if (![self runRegexOnUTF8:[str UTF8String] + nmatch:1 + pmatch:®Match + flags:0]) { + // no match + return NO; + } + + // make sure the match is the full string + return (regMatch.rm_so == 0) && + (regMatch.rm_eo == [str lengthOfBytesUsingEncoding:NSUTF8StringEncoding]); +} + +- (NSArray *)subPatternsOfString:(NSString *)str { + NSArray *result = nil; + + int count = regexData_.re_nsub + 1; + regmatch_t *regMatches = malloc(sizeof(regmatch_t) * count); + if (!regMatches) + return nil; + + // wrap it all in a try so we don't leak the malloc + @try { + const char *utf8Str = [str UTF8String]; + if (![self runRegexOnUTF8:utf8Str + nmatch:count + pmatch:regMatches + flags:0]) { + // no match + return nil; + } + + // make sure the match is the full string + if ((regMatches[0].rm_so != 0) || + (regMatches[0].rm_eo != [str lengthOfBytesUsingEncoding:NSUTF8StringEncoding])) { + // only matched a sub part of the string + return NO; + } + + NSMutableArray *buildResult = [NSMutableArray arrayWithCapacity:count]; + + for (int x = 0 ; x < count ; ++x) { + if ((regMatches[x].rm_so == -1) && (regMatches[x].rm_eo == -1)) { + // add NSNull since it wasn't used + [buildResult addObject:[NSNull null]]; + } else { + // fetch the string + const char *base = utf8Str + regMatches[x].rm_so; + unsigned len = regMatches[x].rm_eo - regMatches[x].rm_so; + NSString *sub = + [[[NSString alloc] initWithBytes:base + length:len + encoding:NSUTF8StringEncoding] autorelease]; + [buildResult addObject:sub]; + } + } + + result = buildResult; + } + @finally { + free(regMatches); + } + + return result; +} + +- (NSEnumerator *)segmentEnumeratorForString:(NSString *)str { + return [[[GTMRegexEnumerator alloc] initWithRegex:self + processString:str + allSegments:YES] autorelease]; +} + +- (NSEnumerator *)matchSegmentEnumeratorForString:(NSString *)str { + return [[[GTMRegexEnumerator alloc] initWithRegex:self + processString:str + allSegments:NO] autorelease]; +} + +- (NSString *)stringByReplacingMatchesInString:(NSString *)str + withReplacement:(NSString *)replacementPattern { + if (!str) + return nil; + + // if we have a replacement, we go ahead and crack it now. if the replacement + // is just an empty string (or nil), just use the nil marker. + NSArray *replacements = nil; + if ([replacementPattern length]) { + // don't need newline support, just match the start of the pattern for '^' + GTMRegex *replacementRegex = + [GTMRegex regexWithPattern:kReplacementPattern + options:kGTMRegexOptionSupressNewlineSupport]; + // pull them all into an array so we can walk this as many times as needed. + replacements = + [[replacementRegex segmentEnumeratorForString:replacementPattern] allObjects]; + if (!replacements) { + NSLog(@"failed to create the replacements for subtituations"); + return nil; + } + } + + NSMutableString *result = [NSMutableString stringWithCapacity:[str length]]; + + NSEnumerator *enumerator = [self segmentEnumeratorForString:str]; + GTMRegexStringSegment *segment = nil; + while ((segment = [enumerator nextObject]) != nil) { + if (![segment isMatch]) { + // not a match, just move this chunk over + [result appendString:[segment string]]; + } else { + // match... + if (!replacements) { + // no replacements, they want to eat matches, nothing to do + } else { + // spin over the split up replacement + NSEnumerator *replacementEnumerator = [replacements objectEnumerator]; + GTMRegexStringSegment *replacementSegment = nil; + while ((replacementSegment = [replacementEnumerator nextObject]) != nil) { + if (![replacementSegment isMatch]) { + // not a match, raw text to put in + [result appendString:[replacementSegment string]]; + } else { + // match... + + // first goes any leading text + NSString *leading = + [replacementSegment subPatternString:kReplacementPatternLeadingTextIndex]; + if (leading) + [result appendString:leading]; + // then use the subpattern number to find what goes in from the + // original string match. + int subPatternNum = + [[replacementSegment subPatternString:kReplacementPatternSubpatternNumberIndex] intValue]; + NSString *matchSubPatStr = [segment subPatternString:subPatternNum]; + // handle an unused subpattern (ie-nil result) + if (matchSubPatStr) + [result appendString:matchSubPatStr]; + } + } + } + } + } + return result; +} + +- (NSString *)description { + NSMutableString *result = + [NSMutableString stringWithFormat:@"%@<%p> { pattern=\"%@\", rawNumSubPatterns=%z, options=(", + [self class], self, pattern_, regexData_.re_nsub]; + if (options_) { + if (options_ & kGTMRegexOptionIgnoreCase) + [result appendString:@" IgnoreCase"]; + if ((options_ & kGTMRegexOptionSupressNewlineSupport) == kGTMRegexOptionSupressNewlineSupport) + [result appendString:@" NoNewlineSupport"]; + } else { + [result appendString:@" None(Default)"]; + } + [result appendString:@" ) }"]; + return result; +} + +@end + +@implementation GTMRegex (PrivateMethods) + +- (NSString *)errorMessage:(int)errCode { + NSString *result = @"internal error"; + + // size the buffer we need + size_t len = regerror(errCode, ®exData_, nil, 0); + char buffer[len]; + // fetch the error + if (len == regerror(errCode, ®exData_, buffer, len)) { + NSString *generatedError = [NSString stringWithUTF8String:buffer]; + if (generatedError) + result = generatedError; + } + return result; +} + +// private helper to run the regex on a block +- (BOOL)runRegexOnUTF8:(const char*)utf8Str + nmatch:(size_t)nmatch + pmatch:(regmatch_t *)pmatch + flags:(int)flags { + if (!utf8Str) + return NO; + + int execResult = regexec(®exData_, utf8Str, nmatch, pmatch, flags); + if (execResult != 0) { +#ifdef DEBUG + if (execResult != REG_NOMATCH) { + NSString *errorStr = [self errorMessage:execResult]; + NSLog(@"%@: matching string \"%.20s...\", had error: \"%@\"", + self, utf8Str, errorStr); + } +#endif + return NO; + } + return YES; +} + +@end + +@implementation GTMRegexEnumerator + +- (id)init { + return [self initWithRegex:nil processString:nil allSegments:NO]; +} + +- (id)initWithRegex:(GTMRegex *)regex + processString:(NSString *)str + allSegments:(BOOL)allSegments { + self = [super init]; + if (!self) return nil; + + // collect args + regex_ = [regex retain]; + utf8StrBuf_ = [[str dataUsingEncoding:NSUTF8StringEncoding] retain]; + allSegments_ = allSegments; + + // arg check + if (!regex_ || !utf8StrBuf_) { + [self release]; + return nil; + } + + // parsing state initialized to zero for us by object creation + + return self; +} + +- (void)dealloc { + if (savedRegMatches_) { + free(savedRegMatches_); + savedRegMatches_ = nil; + } + [regex_ release]; + [utf8StrBuf_ release]; + [super dealloc]; +} + +- (id)nextObject { + + GTMRegexStringSegment *result = nil; + regmatch_t *nextMatches = nil; + BOOL isMatch = NO; + + // we do all this w/in a try, so if something throws, the memory we malloced + // will still get cleaned up + @try { + + // if we have a saved match, use that... + if (savedRegMatches_) { + nextMatches = savedRegMatches_; + savedRegMatches_ = nil; + isMatch = YES; // if we have something saved, it was a pattern match + } + // have we reached the end? + else if (curParseIndex_ >= [utf8StrBuf_ length]) { + // done, do nothing, we'll return nil + } + // do the search. + else { + + // alloc the match structure (extra space for the zero (full) match) + size_t matchBufSize = ([regex_ subPatternCount] + 1) * sizeof(regmatch_t); + nextMatches = malloc(matchBufSize); + if (!nextMatches) + return nil; + + // setup our range to work on + nextMatches[0].rm_so = curParseIndex_; + nextMatches[0].rm_eo = [utf8StrBuf_ length]; + + // call for the match + if ([regex_ runRegexOnUTF8:[utf8StrBuf_ bytes] + nmatch:([regex_ subPatternCount] + 1) + pmatch:nextMatches + flags:REG_STARTEND]) { + // match + + if (allSegments_ && + (nextMatches[0].rm_so != curParseIndex_)) { + // we should return all segments (not just matches), and there was + // something before this match. So safe off this match for later + // and create a range for this. + + savedRegMatches_ = nextMatches; + nextMatches = malloc(matchBufSize); + if (!nextMatches) + return nil; + + isMatch = NO; + // mark everything but the zero slot w/ not used + for (int x = [regex_ subPatternCount]; x > 0; --x) { + nextMatches[x].rm_so = nextMatches[x].rm_eo = -1; + } + nextMatches[0].rm_so = curParseIndex_; + nextMatches[0].rm_eo = savedRegMatches_[0].rm_so; + + // advance our marker + curParseIndex_ = savedRegMatches_[0].rm_eo; + + } else { + // we only return matches or are pointed at a match + + // no real work to do, just fall through to return to return the + // current match. + isMatch = YES; + + // advance our marker + curParseIndex_ = nextMatches[0].rm_eo; + } + + } else { + // no match + + // should we return the last non matching segment? + if (allSegments_) { + isMatch = NO; + // mark everything but the zero slot w/ not used + for (int x = [regex_ subPatternCount]; x > 0; --x) { + nextMatches[x].rm_so = nextMatches[x].rm_eo = -1; + } + nextMatches[0].rm_so = curParseIndex_; + nextMatches[0].rm_eo = [utf8StrBuf_ length]; + } else { + // drop match set, we don't want it + free(nextMatches); + nextMatches = nil; + } + + // advance our marker since we're done + curParseIndex_ = [utf8StrBuf_ length]; + + } + } + + // create the segment to return + if (nextMatches) { + result = + [[[GTMRegexStringSegment alloc] initWithUTF8StrBuf:utf8StrBuf_ + regMatches:nextMatches + numRegMatches:[regex_ subPatternCount] + isMatch:isMatch] autorelease]; + nextMatches = nil; + } + } + @catch (id e) { + NSLog(@"Exceptions while trying to advance enumeration (%@)", e); + } + + // if we still have something in our temp, free it + if (nextMatches) + free(nextMatches); + + return result; +} + +- (NSString *)description { + return [NSString stringWithFormat:@"%@<%p> { regex=\"%@\", allSegments=%s, string=\"%.20s...\" }", + [self class], self, + regex_, + (allSegments_ ? "YES" : "NO"), + [utf8StrBuf_ bytes]]; +} + +@end + +@implementation GTMRegexStringSegment + +- (id)init { + return [self initWithUTF8StrBuf:nil + regMatches:nil + numRegMatches:0 + isMatch:NO]; +} + +- (void)dealloc { + if (regMatches_) { + free(regMatches_); + regMatches_ = nil; + } + [utf8StrBuf_ release]; + [super dealloc]; +} + +- (BOOL)isMatch { + return isMatch_; +} + +- (NSString *)string { + // fetch match zero + return [self subPatternString:0]; +} + +- (NSString *)subPatternString:(int)index { + if ((index < 0) || (index > numRegMatches_)) + return nil; + + // pick off when it wasn't found + if ((regMatches_[index].rm_so == -1) && (regMatches_[index].rm_eo == -1)) + return nil; + + // fetch the string + const char *base = (const char*)[utf8StrBuf_ bytes] + regMatches_[index].rm_so; + unsigned len = regMatches_[index].rm_eo - regMatches_[index].rm_so; + return [[[NSString alloc] initWithBytes:base + length:len + encoding:NSUTF8StringEncoding] autorelease]; +} + +- (NSString *)description { + NSMutableString *result = + [NSMutableString stringWithFormat:@"%@<%p> { isMatch=\"%s\", subPatterns=(", + [self class], self, (isMatch_ ? "YES" : "NO")]; + for (int x = 0; x <= numRegMatches_; ++x) { + NSString *format = @", \"%.*s\""; + if (x == 0) + format = @" \"%.*s\""; + + [result appendFormat:format, + (int)(regMatches_[x].rm_eo - regMatches_[x].rm_so), + (((const char*)[utf8StrBuf_ bytes]) + regMatches_[x].rm_so)]; + } + [result appendString:@" ) }"]; + + return result; +} + +@end + +@implementation GTMRegexStringSegment (PrivateMethods) + +- (id)initWithUTF8StrBuf:(NSData *)utf8StrBuf + regMatches:(regmatch_t *)regMatches + numRegMatches:(int)numRegMatches + isMatch:(BOOL)isMatch { + self = [super init]; + if (!self) return nil; + + utf8StrBuf_ = [utf8StrBuf retain]; + regMatches_ = regMatches; + numRegMatches_ = numRegMatches; + isMatch_ = isMatch; + + // check the args + if (!utf8StrBuf_ || !regMatches_ || (numRegMatches_ < 0)) { + [self release]; + return nil; + } + + return self; +} + +@end + +@implementation NSString (GTMRegexAdditions) + +- (BOOL)gtm_matchesPattern:(NSString *)pattern { + GTMRegex *regex = [GTMRegex regexWithPattern:pattern]; + return [regex matchesString:self]; +} + +- (NSArray *)gtm_subPatternsOfPattern:(NSString *)pattern { + GTMRegex *regex = [GTMRegex regexWithPattern:pattern]; + return [regex subPatternsOfString:self]; +} + +- (NSString *)gtm_firstSubStringMatchedByPattern:(NSString *)pattern { + GTMRegex *regex = [GTMRegex regexWithPattern:pattern]; + NSEnumerator *enumerator = [regex matchSegmentEnumeratorForString:self]; + GTMRegexStringSegment *firstMatch = [enumerator nextObject]; + return [firstMatch string]; +} + +- (NSArray *)gtm_allSubstringsMatchedByPattern:(NSString *)pattern { + NSEnumerator *enumerator = [self gtm_matchSegmentEnumeratorForPattern:pattern]; + NSArray *allSegments = [enumerator allObjects]; + return [allSegments valueForKey:@"string"]; +} + +- (NSEnumerator *)gtm_segmentEnumeratorForPattern:(NSString *)pattern { + GTMRegex *regex = [GTMRegex regexWithPattern:pattern]; + return [regex segmentEnumeratorForString:self]; +} + +- (NSEnumerator *)gtm_matchSegmentEnumeratorForPattern:(NSString *)pattern { + GTMRegex *regex = [GTMRegex regexWithPattern:pattern]; + return [regex matchSegmentEnumeratorForString:self]; +} + +- (NSString *)gtm_stringByReplacingMatchesOfPattern:(NSString *)pattern + withReplacement:(NSString *)replacementPattern { + GTMRegex *regex = [GTMRegex regexWithPattern:pattern]; + return [regex stringByReplacingMatchesInString:self + withReplacement:replacementPattern]; +} + +@end diff --git a/Foundation/GTMRegexTest.m b/Foundation/GTMRegexTest.m new file mode 100644 index 0000000..ef7d1e5 --- /dev/null +++ b/Foundation/GTMRegexTest.m @@ -0,0 +1,955 @@ +// +// GTMRegexTest.m +// +// Copyright 2007-2008 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy +// of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. +// + +#import + +#import "GTMSenTestCase.h" +#import "GTMRegex.h" + +// +// NOTE: +// +// We don't really test any of the pattern matching since that's testing +// libregex, we just want to test our wrapper. +// + +@interface GTMRegexTest : SenTestCase +@end + +@interface NSString_GTMRegexAdditions : SenTestCase +@end + +@implementation GTMRegexTest + +- (void)testEscapedPatternForString { + STAssertEqualStrings([GTMRegex escapedPatternForString:@"abcdefghijklmnopqrstuvwxyz0123456789"], + @"abcdefghijklmnopqrstuvwxyz0123456789", + nil); + STAssertEqualStrings([GTMRegex escapedPatternForString:@"^.[$()|*+?{\\"], + @"\\^\\.\\[\\$\\(\\)\\|\\*\\+\\?\\{\\\\", + nil); + STAssertEqualStrings([GTMRegex escapedPatternForString:@"a^b.c[d$e(f)g|h*i+j?k{l\\m"], + @"a\\^b\\.c\\[d\\$e\\(f\\)g\\|h\\*i\\+j\\?k\\{l\\\\m", + nil); + + STAssertNil([GTMRegex escapedPatternForString:nil], nil); + STAssertEqualStrings([GTMRegex escapedPatternForString:@""], @"", nil); +} + + +- (void)testInit { + + // fail cases + STAssertNil([[[GTMRegex alloc] init] autorelease], nil); + STAssertNil([[[GTMRegex alloc] initWithPattern:nil] autorelease], nil); + STAssertNil([[[GTMRegex alloc] initWithPattern:nil + options:kGTMRegexOptionIgnoreCase] autorelease], nil); + STAssertNil([[[GTMRegex alloc] initWithPattern:@"(."] autorelease], nil); + STAssertNil([[[GTMRegex alloc] initWithPattern:@"(." + options:kGTMRegexOptionIgnoreCase] autorelease], nil); + + // basic pattern w/ options + STAssertNotNil([[[GTMRegex alloc] initWithPattern:@"(.*)"] autorelease], nil); + STAssertNotNil([[[GTMRegex alloc] initWithPattern:@"(.*)" + options:0] autorelease], nil); + STAssertNotNil([[[GTMRegex alloc] initWithPattern:@"(.*)" + options:kGTMRegexOptionIgnoreCase] autorelease], nil); + + // fail cases (helper) + STAssertNil([GTMRegex regexWithPattern:nil], nil); + STAssertNil([GTMRegex regexWithPattern:nil + options:0], nil); + STAssertNil([GTMRegex regexWithPattern:@"(."], nil); + STAssertNil([GTMRegex regexWithPattern:@"(." + options:0], nil); + + // basic pattern w/ options (helper) + STAssertNotNil([GTMRegex regexWithPattern:@"(.*)"], nil); + STAssertNotNil([GTMRegex regexWithPattern:@"(.*)" + options:0], nil); + STAssertNotNil([GTMRegex regexWithPattern:@"(.*)" + options:kGTMRegexOptionIgnoreCase], nil); +} + +- (void)testOptions { + + NSString *testString = @"aaa AAA\nbbb BBB\n aaa aAa\n bbb BbB"; + + // default options + GTMRegex *regex = [GTMRegex regexWithPattern:@"a+"]; + STAssertNotNil(regex, nil); + NSEnumerator *enumerator = [regex segmentEnumeratorForString:testString]; + STAssertNotNil(enumerator, nil); + // "aaa" + GTMRegexStringSegment *seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertTrue([seg isMatch], nil); + STAssertEqualStrings([seg string], @"aaa", nil); + // " AAA\nbbb BBB\n " + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertFalse([seg isMatch], nil); + STAssertEqualStrings([seg string], @" AAA\nbbb BBB\n ", nil); + // "aaa" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertTrue([seg isMatch], nil); + STAssertEqualStrings([seg string], @"aaa", nil); + // " " + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertFalse([seg isMatch], nil); + STAssertEqualStrings([seg string], @" ", nil); + // "a" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertTrue([seg isMatch], nil); + STAssertEqualStrings([seg string], @"a", nil); + // "A" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertFalse([seg isMatch], nil); + STAssertEqualStrings([seg string], @"A", nil); + // "a" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertTrue([seg isMatch], nil); + STAssertEqualStrings([seg string], @"a", nil); + // "\n bbb BbB" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertFalse([seg isMatch], nil); + STAssertEqualStrings([seg string], @"\n bbb BbB", nil); + // (end) + seg = [enumerator nextObject]; + STAssertNil(seg, nil); + + // kGTMRegexOptionIgnoreCase + regex = [GTMRegex regexWithPattern:@"a+" options:kGTMRegexOptionIgnoreCase]; + STAssertNotNil(regex, nil); + enumerator = [regex segmentEnumeratorForString:testString]; + STAssertNotNil(enumerator, nil); + // "aaa" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertTrue([seg isMatch], nil); + STAssertEqualStrings([seg string], @"aaa", nil); + // " " + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertFalse([seg isMatch], nil); + STAssertEqualStrings([seg string], @" ", nil); + // "AAA" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertTrue([seg isMatch], nil); + STAssertEqualStrings([seg string], @"AAA", nil); + // "\nbbb BBB\n " + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertFalse([seg isMatch], nil); + STAssertEqualStrings([seg string], @"\nbbb BBB\n ", nil); + // "aaa" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertTrue([seg isMatch], nil); + STAssertEqualStrings([seg string], @"aaa", nil); + // " " + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertFalse([seg isMatch], nil); + STAssertEqualStrings([seg string], @" ", nil); + // "aAa" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertTrue([seg isMatch], nil); + STAssertEqualStrings([seg string], @"aAa", nil); + // "\n bbb BbB" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertFalse([seg isMatch], nil); + STAssertEqualStrings([seg string], @"\n bbb BbB", nil); + // (end) + seg = [enumerator nextObject]; + STAssertNil(seg, nil); + + // defaults w/ '^' + regex = [GTMRegex regexWithPattern:@"^a+"]; + STAssertNotNil(regex, nil); + enumerator = [regex segmentEnumeratorForString:testString]; + STAssertNotNil(enumerator, nil); + // "aaa" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertTrue([seg isMatch], nil); + STAssertEqualStrings([seg string], @"aaa", nil); + // " AAA\nbbb BBB\n aaa aAa\n bbb BbB" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertFalse([seg isMatch], nil); + STAssertEqualStrings([seg string], @" AAA\nbbb BBB\n aaa aAa\n bbb BbB", nil); + // (end) + seg = [enumerator nextObject]; + STAssertNil(seg, nil); + + // defaults w/ '$' + regex = [GTMRegex regexWithPattern:@"B+$"]; + STAssertNotNil(regex, nil); + enumerator = [regex segmentEnumeratorForString:testString]; + STAssertNotNil(enumerator, nil); + // "aaa AAA\nbbb " + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertFalse([seg isMatch], nil); + STAssertEqualStrings([seg string], @"aaa AAA\nbbb ", nil); + // "BBB" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertTrue([seg isMatch], nil); + STAssertEqualStrings([seg string], @"BBB", nil); + // "\n aaa aAa\n bbb Bb" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertFalse([seg isMatch], nil); + STAssertEqualStrings([seg string], @"\n aaa aAa\n bbb Bb", nil); + // "B" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertTrue([seg isMatch], nil); + STAssertEqualStrings([seg string], @"B", nil); + // (end) + seg = [enumerator nextObject]; + STAssertNil(seg, nil); + + // kGTMRegexOptionIgnoreCase w/ '$' + regex = [GTMRegex regexWithPattern:@"B+$" + options:kGTMRegexOptionIgnoreCase]; + STAssertNotNil(regex, nil); + enumerator = [regex segmentEnumeratorForString:testString]; + STAssertNotNil(enumerator, nil); + // "aaa AAA\nbbb " + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertFalse([seg isMatch], nil); + STAssertEqualStrings([seg string], @"aaa AAA\nbbb ", nil); + // "BBB" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertTrue([seg isMatch], nil); + STAssertEqualStrings([seg string], @"BBB", nil); + // "\n aaa aAa\n bbb " + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertFalse([seg isMatch], nil); + STAssertEqualStrings([seg string], @"\n aaa aAa\n bbb ", nil); + // "BbB" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertTrue([seg isMatch], nil); + STAssertEqualStrings([seg string], @"BbB", nil); + // (end) + seg = [enumerator nextObject]; + STAssertNil(seg, nil); + + // test w/ kGTMRegexOptionSupressNewlineSupport and \n in the string + regex = [GTMRegex regexWithPattern:@"a.*b" options:kGTMRegexOptionSupressNewlineSupport]; + STAssertNotNil(regex, nil); + enumerator = [regex segmentEnumeratorForString:testString]; + STAssertNotNil(enumerator, nil); + // "aaa AAA\nbbb BBB\n aaa aAa\n bbb Bb" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertTrue([seg isMatch], nil); + STAssertEqualStrings([seg string], @"aaa AAA\nbbb BBB\n aaa aAa\n bbb Bb", nil); + // "B" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertFalse([seg isMatch], nil); + STAssertEqualStrings([seg string], @"B", nil); + // (end) + seg = [enumerator nextObject]; + STAssertNil(seg, nil); + + // test w/o kGTMRegexOptionSupressNewlineSupport and \n in the string + // (this is no match since it '.' can't match the '\n') + regex = [GTMRegex regexWithPattern:@"a.*b"]; + STAssertNotNil(regex, nil); + enumerator = [regex segmentEnumeratorForString:testString]; + STAssertNotNil(enumerator, nil); + // "aaa AAA\nbbb BBB\n aaa aAa\n bbb BbB" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertFalse([seg isMatch], nil); + STAssertEqualStrings([seg string], @"aaa AAA\nbbb BBB\n aaa aAa\n bbb BbB", nil); + // (end) + seg = [enumerator nextObject]; + STAssertNil(seg, nil); + + // kGTMRegexOptionSupressNewlineSupport w/ '^' + regex = [GTMRegex regexWithPattern:@"^a+" options:kGTMRegexOptionSupressNewlineSupport]; + STAssertNotNil(regex, nil); + enumerator = [regex segmentEnumeratorForString:testString]; + STAssertNotNil(enumerator, nil); + // "aaa" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertTrue([seg isMatch], nil); + STAssertEqualStrings([seg string], @"aaa", nil); + // " AAA\nbbb BBB\n aaa aAa\n bbb BbB" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertFalse([seg isMatch], nil); + STAssertEqualStrings([seg string], @" AAA\nbbb BBB\n aaa aAa\n bbb BbB", nil); + // (end) + seg = [enumerator nextObject]; + STAssertNil(seg, nil); + + // kGTMRegexOptionSupressNewlineSupport w/ '$' + regex = [GTMRegex regexWithPattern:@"B+$" options:kGTMRegexOptionSupressNewlineSupport]; + STAssertNotNil(regex, nil); + enumerator = [regex segmentEnumeratorForString:testString]; + STAssertNotNil(enumerator, nil); + // "aaa AAA\nbbb BBB\n aaa aAa\n bbb Bb" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertFalse([seg isMatch], nil); + STAssertEqualStrings([seg string], @"aaa AAA\nbbb BBB\n aaa aAa\n bbb Bb", nil); + // "B" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertTrue([seg isMatch], nil); + STAssertEqualStrings([seg string], @"B", nil); + // (end) + seg = [enumerator nextObject]; + STAssertNil(seg, nil); +} + +- (void)testSubPatternCount { + STAssertEquals(0, [[GTMRegex regexWithPattern:@".*"] subPatternCount], nil); + STAssertEquals(1, [[GTMRegex regexWithPattern:@"(.*)"] subPatternCount], nil); + STAssertEquals(1, [[GTMRegex regexWithPattern:@"[fo]*(.*)[bar]*"] subPatternCount], nil); + STAssertEquals(3, [[GTMRegex regexWithPattern:@"([fo]*)(.*)([bar]*)"] subPatternCount], nil); + STAssertEquals(7, [[GTMRegex regexWithPattern:@"(([bar]*)|([fo]*))(.*)(([bar]*)|([fo]*))"] subPatternCount], nil); +} + +- (void)testMatchesString { + // simple pattern + GTMRegex *regex = [GTMRegex regexWithPattern:@"foo.*bar"]; + STAssertNotNil(regex, nil); + STAssertTrue([regex matchesString:@"foobar"], nil); + STAssertTrue([regex matchesString:@"foobydoo spambar"], nil); + STAssertFalse([regex matchesString:@"zzfoobarzz"], nil); + STAssertFalse([regex matchesString:@"zzfoobydoo spambarzz"], nil); + STAssertFalse([regex matchesString:@"abcdef"], nil); + STAssertFalse([regex matchesString:@""], nil); + STAssertFalse([regex matchesString:nil], nil); + // pattern w/ sub patterns + regex = [GTMRegex regexWithPattern:@"(foo)(.*)(bar)"]; + STAssertNotNil(regex, nil); + STAssertTrue([regex matchesString:@"foobar"], nil); + STAssertTrue([regex matchesString:@"foobydoo spambar"], nil); + STAssertFalse([regex matchesString:@"zzfoobarzz"], nil); + STAssertFalse([regex matchesString:@"zzfoobydoo spambarzz"], nil); + STAssertFalse([regex matchesString:@"abcdef"], nil); + STAssertFalse([regex matchesString:@""], nil); + STAssertFalse([regex matchesString:nil], nil); +} + +- (void)testSubPatternsOfString { + GTMRegex *regex = [GTMRegex regexWithPattern:@"(fo(o+))((bar)|(baz))"]; + STAssertNotNil(regex, nil); + STAssertEquals(5, [regex subPatternCount], nil); + NSArray *subPatterns = [regex subPatternsOfString:@"foooooobaz"]; + STAssertNotNil(subPatterns, nil); + STAssertEquals(6U, [subPatterns count], nil); + STAssertEqualStrings(@"foooooobaz", [subPatterns objectAtIndex:0], nil); + STAssertEqualStrings(@"foooooo", [subPatterns objectAtIndex:1], nil); + STAssertEqualStrings(@"ooooo", [subPatterns objectAtIndex:2], nil); + STAssertEqualStrings(@"baz", [subPatterns objectAtIndex:3], nil); + STAssertTrue(([NSNull null] == [subPatterns objectAtIndex:4]), nil); + STAssertEqualStrings(@"baz", [subPatterns objectAtIndex:5], nil); + + // not there + subPatterns = [regex subPatternsOfString:@"aaa"]; + STAssertNil(subPatterns, nil); + + // not extra stuff on either end + subPatterns = [regex subPatternsOfString:@"ZZZfoooooobaz"]; + STAssertNil(subPatterns, nil); + subPatterns = [regex subPatternsOfString:@"foooooobazZZZ"]; + STAssertNil(subPatterns, nil); + subPatterns = [regex subPatternsOfString:@"ZZZfoooooobazZZZ"]; + STAssertNil(subPatterns, nil); +} + +- (void)testSegmentEnumeratorForString { + GTMRegex *regex = [GTMRegex regexWithPattern:@"foo+ba+r"]; + STAssertNotNil(regex, nil); + NSEnumerator *enumerator = [regex segmentEnumeratorForString:@"afoobarbfooobaarfoobarzz"]; + STAssertNotNil(enumerator, nil); + // "a" + GTMRegexStringSegment *seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertFalse([seg isMatch], nil); + STAssertEqualStrings([seg string], @"a", nil); + // "foobar" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertTrue([seg isMatch], nil); + STAssertEqualStrings([seg string], @"foobar", nil); + // "b" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertFalse([seg isMatch], nil); + STAssertEqualStrings([seg string], @"b", nil); + // "fooobaar" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertTrue([seg isMatch], nil); + STAssertEqualStrings([seg string], @"fooobaar", nil); + // "foobar" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertTrue([seg isMatch], nil); + STAssertEqualStrings([seg string], @"foobar", nil); + // "zz" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertFalse([seg isMatch], nil); + STAssertEqualStrings([seg string], @"zz", nil); + // (end) + seg = [enumerator nextObject]; + STAssertNil(seg, nil); + + // test no match + enumerator = [regex segmentEnumeratorForString:@"aaa"]; + STAssertNotNil(enumerator, nil); + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertFalse([seg isMatch], nil); + STAssertEqualStrings([seg string], @"aaa", nil); + seg = [enumerator nextObject]; + STAssertNil(seg, nil); + + // test only match + enumerator = [regex segmentEnumeratorForString:@"foobar"]; + STAssertNotNil(enumerator, nil); + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertTrue([seg isMatch], nil); + STAssertEqualStrings([seg string], @"foobar", nil); + seg = [enumerator nextObject]; + STAssertNil(seg, nil); + + // now test the saved sub segments + regex = [GTMRegex regexWithPattern:@"(foo)((bar)|(baz))"]; + STAssertNotNil(regex, nil); + STAssertEquals(4, [regex subPatternCount], nil); + enumerator = [regex segmentEnumeratorForString:@"foobarxxfoobaz"]; + STAssertNotNil(enumerator, nil); + // "foobar" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertTrue([seg isMatch], nil); + STAssertEqualStrings([seg string], @"foobar", nil); + STAssertEqualStrings([seg subPatternString:0], @"foobar", nil); + STAssertEqualStrings([seg subPatternString:1], @"foo", nil); + STAssertEqualStrings([seg subPatternString:2], @"bar", nil); + STAssertEqualStrings([seg subPatternString:3], @"bar", nil); + STAssertNil([seg subPatternString:4], nil); // nothing matched "(baz)" + STAssertNil([seg subPatternString:5], nil); + // "xx" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertFalse([seg isMatch], nil); + STAssertEqualStrings([seg string], @"xx", nil); + STAssertEqualStrings([seg subPatternString:0], @"xx", nil); + STAssertNil([seg subPatternString:1], nil); + // "foobaz" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertTrue([seg isMatch], nil); + STAssertEqualStrings([seg string], @"foobaz", nil); + STAssertEqualStrings([seg subPatternString:0], @"foobaz", nil); + STAssertEqualStrings([seg subPatternString:1], @"foo", nil); + STAssertEqualStrings([seg subPatternString:2], @"baz", nil); + STAssertNil([seg subPatternString:3], nil); // (nothing matched "(bar)" + STAssertEqualStrings([seg subPatternString:4], @"baz", nil); + STAssertNil([seg subPatternString:5], nil); + // (end) + seg = [enumerator nextObject]; + STAssertNil(seg, nil); + + // test all objects + regex = [GTMRegex regexWithPattern:@"foo+ba+r"]; + STAssertNotNil(regex, nil); + enumerator = [regex segmentEnumeratorForString:@"afoobarbfooobaarfoobarzz"]; + STAssertNotNil(enumerator, nil); + NSArray *allSegments = [enumerator allObjects]; + STAssertNotNil(allSegments, nil); + STAssertEquals(6U, [allSegments count], nil); +} + +- (void)testMatchSegmentEnumeratorForString { + GTMRegex *regex = [GTMRegex regexWithPattern:@"foo+ba+r"]; + STAssertNotNil(regex, nil); + NSEnumerator *enumerator = [regex matchSegmentEnumeratorForString:@"afoobarbfooobaarfoobarzz"]; + STAssertNotNil(enumerator, nil); + // "a" - skipped + // "foobar" + GTMRegexStringSegment *seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertTrue([seg isMatch], nil); + STAssertEqualStrings([seg string], @"foobar", nil); + // "b" - skipped + // "fooobaar" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertTrue([seg isMatch], nil); + STAssertEqualStrings([seg string], @"fooobaar", nil); + // "foobar" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertTrue([seg isMatch], nil); + STAssertEqualStrings([seg string], @"foobar", nil); + // "zz" - skipped + // (end) + seg = [enumerator nextObject]; + STAssertNil(seg, nil); + + // test no match + enumerator = [regex matchSegmentEnumeratorForString:@"aaa"]; + STAssertNotNil(enumerator, nil); + seg = [enumerator nextObject]; + STAssertNil(seg, nil); // should have gotten nothing + + // test only match + enumerator = [regex matchSegmentEnumeratorForString:@"foobar"]; + STAssertNotNil(enumerator, nil); + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertTrue([seg isMatch], nil); + STAssertEqualStrings([seg string], @"foobar", nil); + seg = [enumerator nextObject]; + STAssertNil(seg, nil); + + // now test the saved sub segments + regex = [GTMRegex regexWithPattern:@"(foo)((bar)|(baz))"]; + STAssertNotNil(regex, nil); + STAssertEquals(4, [regex subPatternCount], nil); + enumerator = [regex matchSegmentEnumeratorForString:@"foobarxxfoobaz"]; + STAssertNotNil(enumerator, nil); + // "foobar" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertTrue([seg isMatch], nil); + STAssertEqualStrings([seg string], @"foobar", nil); + STAssertEqualStrings([seg subPatternString:0], @"foobar", nil); + STAssertEqualStrings([seg subPatternString:1], @"foo", nil); + STAssertEqualStrings([seg subPatternString:2], @"bar", nil); + STAssertEqualStrings([seg subPatternString:3], @"bar", nil); + STAssertNil([seg subPatternString:4], nil); // nothing matched "(baz)" + STAssertNil([seg subPatternString:5], nil); + // "xx" - skipped + // "foobaz" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertTrue([seg isMatch], nil); + STAssertEqualStrings([seg string], @"foobaz", nil); + STAssertEqualStrings([seg subPatternString:0], @"foobaz", nil); + STAssertEqualStrings([seg subPatternString:1], @"foo", nil); + STAssertEqualStrings([seg subPatternString:2], @"baz", nil); + STAssertNil([seg subPatternString:3], nil); // (nothing matched "(bar)" + STAssertEqualStrings([seg subPatternString:4], @"baz", nil); + STAssertNil([seg subPatternString:5], nil); + // (end) + seg = [enumerator nextObject]; + STAssertNil(seg, nil); + + // test all objects + regex = [GTMRegex regexWithPattern:@"foo+ba+r"]; + STAssertNotNil(regex, nil); + enumerator = [regex matchSegmentEnumeratorForString:@"afoobarbfooobaarfoobarzz"]; + STAssertNotNil(enumerator, nil); + NSArray *allSegments = [enumerator allObjects]; + STAssertNotNil(allSegments, nil); + STAssertEquals(3U, [allSegments count], nil); +} + +- (void)testStringByReplacingMatchesInStringWithReplacement { + GTMRegex *regex = [GTMRegex regexWithPattern:@"(foo)(.*)(bar)"]; + STAssertNotNil(regex, nil); + // the basics + STAssertEqualStrings(@"weeZbarZbydoo spamZfooZdoggies", + [regex stringByReplacingMatchesInString:@"weefoobydoo spambardoggies" + withReplacement:@"Z\\3Z\\2Z\\1Z"], + nil); + // nil/empty replacement + STAssertEqualStrings(@"weedoggies", + [regex stringByReplacingMatchesInString:@"weefoobydoo spambardoggies" + withReplacement:nil], + nil); + STAssertEqualStrings(@"weedoggies", + [regex stringByReplacingMatchesInString:@"weefoobydoo spambardoggies" + withReplacement:@""], + nil); + // use optional and invale subexpression parts to confirm that works + regex = [GTMRegex regexWithPattern:@"(fo(o+))((bar)|(baz))"]; + STAssertNotNil(regex, nil); + STAssertEqualStrings(@"aaa baz bar bar foo baz aaa", + [regex stringByReplacingMatchesInString:@"aaa foooooobaz fooobar bar foo baz aaa" + withReplacement:@"\\4\\5"], + nil); + STAssertEqualStrings(@"aaa ZZZ ZZZ bar foo baz aaa", + [regex stringByReplacingMatchesInString:@"aaa foooooobaz fooobar bar foo baz aaa" + withReplacement:@"Z\\10Z\\12Z"], + nil); + // test slashes in replacement that aren't part of the subpattern reference + regex = [GTMRegex regexWithPattern:@"a+"]; + STAssertNotNil(regex, nil); + STAssertEqualStrings(@"z\\\\0 \\\\a \\\\\\\\0z", + [regex stringByReplacingMatchesInString:@"zaz" + withReplacement:@"\\\\0 \\\\\\0 \\\\\\\\0"], + nil); + STAssertEqualStrings(@"z\\\\a \\\\\\\\0 \\\\\\\\az", + [regex stringByReplacingMatchesInString:@"zaz" + withReplacement:@"\\\\\\0 \\\\\\\\0 \\\\\\\\\\0"], + nil); + STAssertEqualStrings(@"z\\\\\\\\0 \\\\\\\\a \\\\\\\\\\\\0z", + [regex stringByReplacingMatchesInString:@"zaz" + withReplacement:@"\\\\\\\\0 \\\\\\\\\\0 \\\\\\\\\\\\0"], + nil); +} + +@end + +@implementation NSString_GTMRegexAdditions +// Only partial tests to test that the call get through correctly since the +// above really tests them. + +- (void)testMatchesPattern { + // simple pattern + STAssertTrue([@"foobar" gtm_matchesPattern:@"foo.*bar"], nil); + STAssertTrue([@"foobydoo spambar" gtm_matchesPattern:@"foo.*bar"], nil); + STAssertFalse([@"zzfoobarzz" gtm_matchesPattern:@"foo.*bar"], nil); + STAssertFalse([@"zzfoobydoo spambarzz" gtm_matchesPattern:@"foo.*bar"], nil); + STAssertFalse([@"abcdef" gtm_matchesPattern:@"foo.*bar"], nil); + STAssertFalse([@"" gtm_matchesPattern:@"foo.*bar"], nil); + // pattern w/ sub patterns + STAssertTrue([@"foobar" gtm_matchesPattern:@"(foo)(.*)(bar)"], nil); + STAssertTrue([@"foobydoo spambar" gtm_matchesPattern:@"(foo)(.*)(bar)"], nil); + STAssertFalse([@"zzfoobarzz" gtm_matchesPattern:@"(foo)(.*)(bar)"], nil); + STAssertFalse([@"zzfoobydoo spambarzz" gtm_matchesPattern:@"(foo)(.*)(bar)"], nil); + STAssertFalse([@"abcdef" gtm_matchesPattern:@"(foo)(.*)(bar)"], nil); + STAssertFalse([@"" gtm_matchesPattern:@"(foo)(.*)(bar)"], nil); +} + +- (void)testSubPatternsOfPattern { + NSArray *subPatterns = [@"foooooobaz" gtm_subPatternsOfPattern:@"(fo(o+))((bar)|(baz))"]; + STAssertNotNil(subPatterns, nil); + STAssertEquals(6U, [subPatterns count], nil); + STAssertEqualStrings(@"foooooobaz", [subPatterns objectAtIndex:0], nil); + STAssertEqualStrings(@"foooooo", [subPatterns objectAtIndex:1], nil); + STAssertEqualStrings(@"ooooo", [subPatterns objectAtIndex:2], nil); + STAssertEqualStrings(@"baz", [subPatterns objectAtIndex:3], nil); + STAssertTrue(([NSNull null] == [subPatterns objectAtIndex:4]), nil); + STAssertEqualStrings(@"baz", [subPatterns objectAtIndex:5], nil); + + // not there + subPatterns = [@"aaa" gtm_subPatternsOfPattern:@"(fo(o+))((bar)|(baz))"]; + STAssertNil(subPatterns, nil); + + // not extra stuff on either end + subPatterns = [@"ZZZfoooooobaz" gtm_subPatternsOfPattern:@"(fo(o+))((bar)|(baz))"]; + STAssertNil(subPatterns, nil); + subPatterns = [@"foooooobazZZZ" gtm_subPatternsOfPattern:@"(fo(o+))((bar)|(baz))"]; + STAssertNil(subPatterns, nil); + subPatterns = [@"ZZZfoooooobazZZZ" gtm_subPatternsOfPattern:@"(fo(o+))((bar)|(baz))"]; + STAssertNil(subPatterns, nil); +} + +- (void)testFirstSubStringMatchedByPattern { + // simple pattern + STAssertEqualStrings([@"foobar" gtm_firstSubStringMatchedByPattern:@"foo.*bar"], + @"foobar", nil); + STAssertEqualStrings([@"foobydoo spambar" gtm_firstSubStringMatchedByPattern:@"foo.*bar"], + @"foobydoo spambar", nil); + STAssertEqualStrings([@"zzfoobarzz" gtm_firstSubStringMatchedByPattern:@"foo.*bar"], + @"foobar", nil); + STAssertEqualStrings([@"zzfoobydoo spambarzz" gtm_firstSubStringMatchedByPattern:@"foo.*bar"], + @"foobydoo spambar", nil); + STAssertNil([@"abcdef" gtm_firstSubStringMatchedByPattern:@"foo.*bar"], nil); + STAssertNil([@"" gtm_firstSubStringMatchedByPattern:@"foo.*bar"], nil); + // pattern w/ sub patterns + STAssertEqualStrings([@"foobar" gtm_firstSubStringMatchedByPattern:@"(foo)(.*)(bar)"], + @"foobar", nil); + STAssertEqualStrings([@"foobydoo spambar" gtm_firstSubStringMatchedByPattern:@"(foo)(.*)(bar)"], + @"foobydoo spambar", nil); + STAssertEqualStrings([@"zzfoobarzz" gtm_firstSubStringMatchedByPattern:@"(foo)(.*)(bar)"], + @"foobar", nil); + STAssertEqualStrings([@"zzfoobydoo spambarzz" gtm_firstSubStringMatchedByPattern:@"(foo)(.*)(bar)"], + @"foobydoo spambar", nil); + STAssertNil([@"abcdef" gtm_firstSubStringMatchedByPattern:@"(foo)(.*)(bar)"], nil); + STAssertNil([@"" gtm_firstSubStringMatchedByPattern:@"(foo)(.*)(bar)"], nil); +} + +- (void)testSegmentEnumeratorForPattern { + NSEnumerator *enumerator = + [@"afoobarbfooobaarfoobarzz" gtm_segmentEnumeratorForPattern:@"foo+ba+r"]; + STAssertNotNil(enumerator, nil); + // "a" + GTMRegexStringSegment *seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertFalse([seg isMatch], nil); + STAssertEqualStrings([seg string], @"a", nil); + // "foobar" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertTrue([seg isMatch], nil); + STAssertEqualStrings([seg string], @"foobar", nil); + // "b" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertFalse([seg isMatch], nil); + STAssertEqualStrings([seg string], @"b", nil); + // "fooobaar" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertTrue([seg isMatch], nil); + STAssertEqualStrings([seg string], @"fooobaar", nil); + // "foobar" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertTrue([seg isMatch], nil); + STAssertEqualStrings([seg string], @"foobar", nil); + // "zz" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertFalse([seg isMatch], nil); + STAssertEqualStrings([seg string], @"zz", nil); + // (end) + seg = [enumerator nextObject]; + STAssertNil(seg, nil); + + // test no match + enumerator = [@"aaa" gtm_segmentEnumeratorForPattern:@"foo+ba+r"]; + STAssertNotNil(enumerator, nil); + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertFalse([seg isMatch], nil); + STAssertEqualStrings([seg string], @"aaa", nil); + seg = [enumerator nextObject]; + STAssertNil(seg, nil); + + // test only match + enumerator = [@"foobar" gtm_segmentEnumeratorForPattern:@"foo+ba+r"]; + STAssertNotNil(enumerator, nil); + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertTrue([seg isMatch], nil); + STAssertEqualStrings([seg string], @"foobar", nil); + seg = [enumerator nextObject]; + STAssertNil(seg, nil); + + // now test the saved sub segments + enumerator = + [@"foobarxxfoobaz" gtm_segmentEnumeratorForPattern:@"(foo)((bar)|(baz))"]; + STAssertNotNil(enumerator, nil); + // "foobar" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertTrue([seg isMatch], nil); + STAssertEqualStrings([seg string], @"foobar", nil); + STAssertEqualStrings([seg subPatternString:0], @"foobar", nil); + STAssertEqualStrings([seg subPatternString:1], @"foo", nil); + STAssertEqualStrings([seg subPatternString:2], @"bar", nil); + STAssertEqualStrings([seg subPatternString:3], @"bar", nil); + STAssertNil([seg subPatternString:4], nil); // nothing matched "(baz)" + STAssertNil([seg subPatternString:5], nil); + // "xx" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertFalse([seg isMatch], nil); + STAssertEqualStrings([seg string], @"xx", nil); + STAssertEqualStrings([seg subPatternString:0], @"xx", nil); + STAssertNil([seg subPatternString:1], nil); + // "foobaz" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertTrue([seg isMatch], nil); + STAssertEqualStrings([seg string], @"foobaz", nil); + STAssertEqualStrings([seg subPatternString:0], @"foobaz", nil); + STAssertEqualStrings([seg subPatternString:1], @"foo", nil); + STAssertEqualStrings([seg subPatternString:2], @"baz", nil); + STAssertNil([seg subPatternString:3], nil); // (nothing matched "(bar)" + STAssertEqualStrings([seg subPatternString:4], @"baz", nil); + STAssertNil([seg subPatternString:5], nil); + // (end) + seg = [enumerator nextObject]; + STAssertNil(seg, nil); + + // test all objects + enumerator = [@"afoobarbfooobaarfoobarzz" gtm_segmentEnumeratorForPattern:@"foo+ba+r"]; + STAssertNotNil(enumerator, nil); + NSArray *allSegments = [enumerator allObjects]; + STAssertNotNil(allSegments, nil); + STAssertEquals(6U, [allSegments count], nil); +} + +- (void)testMatchSegmentEnumeratorForPattern { + NSEnumerator *enumerator = + [@"afoobarbfooobaarfoobarzz" gtm_matchSegmentEnumeratorForPattern:@"foo+ba+r"]; + STAssertNotNil(enumerator, nil); + // "a" - skipped + // "foobar" + GTMRegexStringSegment *seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertTrue([seg isMatch], nil); + STAssertEqualStrings([seg string], @"foobar", nil); + // "b" - skipped + // "fooobaar" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertTrue([seg isMatch], nil); + STAssertEqualStrings([seg string], @"fooobaar", nil); + // "foobar" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertTrue([seg isMatch], nil); + STAssertEqualStrings([seg string], @"foobar", nil); + // "zz" - skipped + // (end) + seg = [enumerator nextObject]; + STAssertNil(seg, nil); + + // test no match + enumerator = [@"aaa" gtm_matchSegmentEnumeratorForPattern:@"foo+ba+r"]; + STAssertNotNil(enumerator, nil); + seg = [enumerator nextObject]; + STAssertNil(seg, nil); + + // test only match + enumerator = [@"foobar" gtm_matchSegmentEnumeratorForPattern:@"foo+ba+r"]; + STAssertNotNil(enumerator, nil); + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertTrue([seg isMatch], nil); + STAssertEqualStrings([seg string], @"foobar", nil); + seg = [enumerator nextObject]; + STAssertNil(seg, nil); + + // now test the saved sub segments + enumerator = + [@"foobarxxfoobaz" gtm_matchSegmentEnumeratorForPattern:@"(foo)((bar)|(baz))"]; + STAssertNotNil(enumerator, nil); + // "foobar" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertTrue([seg isMatch], nil); + STAssertEqualStrings([seg string], @"foobar", nil); + STAssertEqualStrings([seg subPatternString:0], @"foobar", nil); + STAssertEqualStrings([seg subPatternString:1], @"foo", nil); + STAssertEqualStrings([seg subPatternString:2], @"bar", nil); + STAssertEqualStrings([seg subPatternString:3], @"bar", nil); + STAssertNil([seg subPatternString:4], nil); // nothing matched "(baz)" + STAssertNil([seg subPatternString:5], nil); + // "xx" - skipped + // "foobaz" + seg = [enumerator nextObject]; + STAssertNotNil(seg, nil); + STAssertTrue([seg isMatch], nil); + STAssertEqualStrings([seg string], @"foobaz", nil); + STAssertEqualStrings([seg subPatternString:0], @"foobaz", nil); + STAssertEqualStrings([seg subPatternString:1], @"foo", nil); + STAssertEqualStrings([seg subPatternString:2], @"baz", nil); + STAssertNil([seg subPatternString:3], nil); // (nothing matched "(bar)" + STAssertEqualStrings([seg subPatternString:4], @"baz", nil); + STAssertNil([seg subPatternString:5], nil); + // (end) + seg = [enumerator nextObject]; + STAssertNil(seg, nil); + + // test all objects + enumerator = [@"afoobarbfooobaarfoobarzz" gtm_matchSegmentEnumeratorForPattern:@"foo+ba+r"]; + STAssertNotNil(enumerator, nil); + NSArray *allSegments = [enumerator allObjects]; + STAssertNotNil(allSegments, nil); + STAssertEquals(3U, [allSegments count], nil); +} + +- (void)testAllSubstringsMatchedByPattern { + NSArray *segments = + [@"afoobarbfooobaarfoobarzz" gtm_allSubstringsMatchedByPattern:@"foo+ba+r"]; + STAssertNotNil(segments, nil); + STAssertEquals(3U, [segments count], nil); + STAssertEqualStrings([segments objectAtIndex:0], @"foobar", nil); + STAssertEqualStrings([segments objectAtIndex:1], @"fooobaar", nil); + STAssertEqualStrings([segments objectAtIndex:2], @"foobar", nil); + + // test no match + segments = [@"aaa" gtm_allSubstringsMatchedByPattern:@"foo+ba+r"]; + STAssertNotNil(segments, nil); + STAssertEquals(0U, [segments count], nil); + + // test only match + segments = [@"foobar" gtm_allSubstringsMatchedByPattern:@"foo+ba+r"]; + STAssertNotNil(segments, nil); + STAssertEquals(1U, [segments count], nil); + STAssertEqualStrings([segments objectAtIndex:0], @"foobar", nil); +} + +- (void)testStringByReplacingMatchesOfPatternWithReplacement { + // the basics + STAssertEqualStrings(@"weeZbarZbydoo spamZfooZdoggies", + [@"weefoobydoo spambardoggies" gtm_stringByReplacingMatchesOfPattern:@"(foo)(.*)(bar)" + withReplacement:@"Z\\3Z\\2Z\\1Z"], + nil); + // nil/empty replacement + STAssertEqualStrings(@"weedoggies", + [@"weefoobydoo spambardoggies" gtm_stringByReplacingMatchesOfPattern:@"(foo)(.*)(bar)" + withReplacement:nil], + nil); + STAssertEqualStrings(@"weedoggies", + [@"weefoobydoo spambardoggies" gtm_stringByReplacingMatchesOfPattern:@"(foo)(.*)(bar)" + withReplacement:@""], + nil); + // use optional and invale subexpression parts to confirm that works + STAssertEqualStrings(@"aaa baz bar bar foo baz aaa", + [@"aaa foooooobaz fooobar bar foo baz aaa" gtm_stringByReplacingMatchesOfPattern:@"(fo(o+))((bar)|(baz))" + withReplacement:@"\\4\\5"], + nil); + STAssertEqualStrings(@"aaa ZZZ ZZZ bar foo baz aaa", + [@"aaa foooooobaz fooobar bar foo baz aaa" gtm_stringByReplacingMatchesOfPattern:@"(fo(o+))((bar)|(baz))" + withReplacement:@"Z\\10Z\\12Z"], + nil); + // test slashes in replacement that aren't part of the subpattern reference + STAssertEqualStrings(@"z\\\\0 \\\\a \\\\\\\\0z", + [@"zaz" gtm_stringByReplacingMatchesOfPattern:@"a+" + withReplacement:@"\\\\0 \\\\\\0 \\\\\\\\0"], + nil); + STAssertEqualStrings(@"z\\\\a \\\\\\\\0 \\\\\\\\az", + [@"zaz" gtm_stringByReplacingMatchesOfPattern:@"a+" + withReplacement:@"\\\\\\0 \\\\\\\\0 \\\\\\\\\\0"], + nil); + STAssertEqualStrings(@"z\\\\\\\\0 \\\\\\\\a \\\\\\\\\\\\0z", + [@"zaz" gtm_stringByReplacingMatchesOfPattern:@"a+" + withReplacement:@"\\\\\\\\0 \\\\\\\\\\0 \\\\\\\\\\\\0"], + nil); +} + +@end -- cgit v1.2.3