aboutsummaryrefslogtreecommitdiff
path: root/Foundation/GTMRegex.m
blob: 4c393a6368d45847df817c078e8a97e19f3cc278 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
//
//  GTMRegex.m
//
//  Copyright 2007-2008 Google Inc.
//
//  Licensed under the Apache License, Version 2.0 (the "License"); you may not
//  use this file except in compliance with the License.  You may obtain a copy
//  of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
//  Unless required by applicable law or agreed to in writing, software
//  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
//  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
//  License for the specific language governing permissions and limitations under
//  the License.
//

#define GTMREGEX_DEFINE_GLOBALS 1
#import "GTMRegex.h"
#import "GTMDefines.h"

// This is the pattern to use for walking replacement text when doing
// substitutions.
//
// This pattern may look over-escaped, but remember the compiler will consume
// one layer of slashes, and then we have to escape the slashes for them to be
// seen as we want in the pattern.
static NSString *const kReplacementPattern =
  @"((^|[^\\\\])(\\\\\\\\)*)(\\\\([0-9]+))";
#define kReplacementPatternLeadingTextIndex       1
#define kReplacementPatternSubpatternNumberIndex  5

@interface GTMRegex (PrivateMethods)
- (NSString *)errorMessage:(int)errCode;
- (BOOL)runRegexOnUTF8:(const char*)utf8Str
                nmatch:(size_t)nmatch
                pmatch:(regmatch_t *)pmatch
                 flags:(int)flags;
@end

// private enumerator as impl detail
@interface GTMRegexEnumerator : NSEnumerator {
 @private
  GTMRegex *regex_;
  NSData *utf8StrBuf_;
  BOOL allSegments_;
  BOOL treatStartOfNewSegmentAsBeginningOfString_;
  regoff_t curParseIndex_;
  __strong regmatch_t *savedRegMatches_;
}
- (id)initWithRegex:(GTMRegex *)regex
      processString:(NSString *)str
        allSegments:(BOOL)allSegments;
- (void)treatStartOfNewSegmentAsBeginningOfString:(BOOL)yesNo;
@end

@interface GTMRegexStringSegment (PrivateMethods)
- (id)initWithUTF8StrBuf:(NSData *)utf8StrBuf
              regMatches:(regmatch_t *)regMatches
           numRegMatches:(NSUInteger)numRegMatches
                 isMatch:(BOOL)isMatch;
@end

@implementation GTMRegex

+ (id)regexWithPattern:(NSString *)pattern {
  return [[[self alloc] initWithPattern:pattern] autorelease];
}

+ (id)regexWithPattern:(NSString *)pattern options:(GTMRegexOptions)options {
  return [[[self alloc] initWithPattern:pattern
                                options:options] autorelease];
}

+ (id)regexWithPattern:(NSString *)pattern
               options:(GTMRegexOptions)options
             withError:(NSError **)outErrorOrNULL {
  return [[[self alloc] initWithPattern:pattern
                                options:options
                              withError:outErrorOrNULL] autorelease];
}

+ (NSString *)escapedPatternForString:(NSString *)str {
  if (str == nil)
    return nil;

  // NOTE: this could be done more efficiently by fetching the whole string into
  // a unichar buffer and scanning that, along w/ pushing the data over in
  // chunks (when possible).

  NSUInteger len = [str length];
  NSMutableString *result = [NSMutableString stringWithCapacity:len];

  for (NSUInteger x = 0; x < len; ++x) {
    unichar ch = [str characterAtIndex:x];
    switch (ch) {
      case '^':
      case '.':
      case '[':
      case '$':
      case '(':
      case ')':
      case '|':
      case '*':
      case '+':
      case '?':
      case '{':
      case '\\':
        [result appendFormat:@"\\%C", ch];
        break;
      default:
        [result appendFormat:@"%C", ch];
        break;
    }
  }

  return result;
}

- (id)init {
  return [self initWithPattern:nil];
}

- (id)initWithPattern:(NSString *)pattern {
  return [self initWithPattern:pattern options:0];
}

- (id)initWithPattern:(NSString *)pattern options:(GTMRegexOptions)options {
  return [self initWithPattern:pattern options:options withError:nil];
}

- (id)initWithPattern:(NSString *)pattern
              options:(GTMRegexOptions)options
            withError:(NSError **)outErrorOrNULL {
  
  self = [super init];
  if (!self) return nil;

  if (outErrorOrNULL) *outErrorOrNULL = nil;

  if ([pattern length] == 0) {
    [self release];
    return nil;
  }

  // figure out the flags
  options_ = options;
  int flags = REG_EXTENDED;
  if (options_ & kGTMRegexOptionIgnoreCase)
    flags |= REG_ICASE;
  if ((options_ & kGTMRegexOptionSupressNewlineSupport) == 0)
    flags |= REG_NEWLINE;

  // even if regcomp failes we need a flags that we did call regcomp so we'll
  // call regfree (because the structure can get filled in some to allow better
  // error info).  we use pattern_ as this flag.
  pattern_ = [pattern copy];
  if (!pattern_) {
     // COV_NF_START - no real way to force this in a unittest
    [self release];
    return nil;
    // COV_NF_END
  }

  // compile it
  int compResult = regcomp(&regexData_, [pattern_ UTF8String], flags);
  if (compResult != 0) {
    NSString *errorStr = [self errorMessage:compResult];
    if (outErrorOrNULL) {
      // include the pattern and patternError message in the userInfo.
      NSDictionary *userInfo = [NSDictionary dictionaryWithObjectsAndKeys:
                                pattern_, kGTMRegexPatternErrorPattern,
                                errorStr, kGTMRegexPatternErrorErrorString,
                                nil];
      *outErrorOrNULL = [NSError errorWithDomain:kGTMRegexErrorDomain
                                            code:kGTMRegexPatternParseFailedError
                                        userInfo:userInfo];
    } else {
      // if caller didn't get us an NSError to fill in, we log the error to help
      // debugging.
      _GTMDevLog(@"Invalid pattern \"%@\", error: \"%@\"",
                 pattern_, errorStr);
    }

    [self release];
    return nil;
  }

  return self;
}

#if GTM_SUPPORT_GC
- (void)finalize {
  // we used pattern_ as our flag that we initialized the regex_t
  if (pattern_) {
    regfree(&regexData_);
    [pattern_ release];
    // play it safe and clear it since we use it as a flag for regexData_
    pattern_ = nil;
  }
  [super finalize];
}
#endif

- (void)dealloc {
  // we used pattern_ as our flag that we initialized the regex_t
  if (pattern_) {
    regfree(&regexData_);
    [pattern_ release];
    // play it safe and clear it since we use it as a flag for regexData_
    pattern_ = nil;
  }
  [super dealloc];
}

- (NSUInteger)subPatternCount {
  return regexData_.re_nsub;
}

- (BOOL)matchesString:(NSString *)str {
  regmatch_t regMatch;
  if (![self runRegexOnUTF8:[str UTF8String]
                     nmatch:1
                     pmatch:&regMatch
                      flags:0]) {
    // no match
    return NO;
  }

  // make sure the match is the full string
  return (regMatch.rm_so == 0) &&
    (regMatch.rm_eo == (regoff_t)[str lengthOfBytesUsingEncoding:NSUTF8StringEncoding]);
}

- (NSArray *)subPatternsOfString:(NSString *)str {
  NSArray *result = nil;

  NSUInteger count = regexData_.re_nsub + 1;
  regmatch_t *regMatches = malloc(sizeof(regmatch_t) * count);
  if (!regMatches)
    return nil; // COV_NF_LINE - no real way to force this in a unittest

  // wrap it all in a try so we don't leak the malloc
  @try {
    const char *utf8Str = [str UTF8String];
    if (![self runRegexOnUTF8:utf8Str
                       nmatch:count
                       pmatch:regMatches
                        flags:0]) {
      // no match
      return nil;
    }

    // make sure the match is the full string
    if ((regMatches[0].rm_so != 0) ||
        (regMatches[0].rm_eo != (regoff_t)[str lengthOfBytesUsingEncoding:NSUTF8StringEncoding])) {
      // only matched a sub part of the string
      return nil;
    }

    NSMutableArray *buildResult = [NSMutableArray arrayWithCapacity:count];

    for (NSUInteger x = 0 ; x < count ; ++x) {
      if ((regMatches[x].rm_so == -1) && (regMatches[x].rm_eo == -1)) {
        // add NSNull since it wasn't used
        [buildResult addObject:[NSNull null]];
      } else {
        // fetch the string
        const char *base = utf8Str + regMatches[x].rm_so;
        regoff_t len = regMatches[x].rm_eo - regMatches[x].rm_so;
        NSString *sub =
          [[[NSString alloc] initWithBytes:base
                                    length:(NSUInteger)len
                                  encoding:NSUTF8StringEncoding] autorelease];
        [buildResult addObject:sub];
      }
    }

    result = buildResult;
  } // COV_NF_LINE - radar 5851992 only reachable w/ an uncaught exception which isn't testable
  @finally {
    free(regMatches);
  }

  return result;
}

- (NSString *)firstSubStringMatchedInString:(NSString *)str {
  NSString *result = nil;
  
  regmatch_t regMatch;
  const char *utf8Str = [str UTF8String];
  if ([self runRegexOnUTF8:utf8Str
                    nmatch:1
                    pmatch:&regMatch
                     flags:0]) {
    // fetch the string
    const char *base = utf8Str + regMatch.rm_so;
    regoff_t len = regMatch.rm_eo - regMatch.rm_so;
    result =
      [[[NSString alloc] initWithBytes:base
                                length:(NSUInteger)len
                              encoding:NSUTF8StringEncoding] autorelease];
  }
  return result;
}

- (BOOL)matchesSubStringInString:(NSString *)str {
  regmatch_t regMatch;
  if ([self runRegexOnUTF8:[str UTF8String]
                    nmatch:1
                    pmatch:&regMatch
                     flags:0]) {
    // don't really care what matched, just report the match
    return YES;
  }
  return NO;
}

- (NSEnumerator *)segmentEnumeratorForString:(NSString *)str {
  return [[[GTMRegexEnumerator alloc] initWithRegex:self
                                      processString:str
                                        allSegments:YES] autorelease];
}

- (NSEnumerator *)matchSegmentEnumeratorForString:(NSString *)str {
  return [[[GTMRegexEnumerator alloc] initWithRegex:self
                                      processString:str
                                        allSegments:NO] autorelease];
}

- (NSString *)stringByReplacingMatchesInString:(NSString *)str
                               withReplacement:(NSString *)replacementPattern {
  if (!str)
    return nil;

  // if we have a replacement, we go ahead and crack it now.  if the replacement
  // is just an empty string (or nil), just use the nil marker.
  NSArray *replacements = nil;
  if ([replacementPattern length]) {
    // don't need newline support, just match the start of the pattern for '^'
    GTMRegex *replacementRegex =
      [GTMRegex regexWithPattern:kReplacementPattern
                         options:kGTMRegexOptionSupressNewlineSupport];
#ifdef DEBUG
    if (!replacementRegex) {
      _GTMDevLog(@"failed to parse out replacement regex!!!"); // COV_NF_LINE
    }
#endif
    GTMRegexEnumerator *relacementEnumerator =
      [[[GTMRegexEnumerator alloc] initWithRegex:replacementRegex
                                        processString:replacementPattern
                                          allSegments:YES] autorelease];
    // We turn on treatStartOfNewSegmentAsBeginningOfLine for this enumerator.
    // As complex as kReplacementPattern is, it can't completely do what we want
    // with the normal string walk.  The problem is this, backreferences are a
    // slash follow by a number ("\0"), but the replacement pattern might
    // actually need to use backslashes (they have to be escaped).  So if a
    // replacement were "\\0", then there is no backreference, instead the
    // replacement is a backslash and a zero.  Generically this means an even
    // number of backslashes are all escapes, and an odd are some number of
    // literal backslashes followed by our backreference.  Think of it as a "an
    // odd number of slashes that comes after a non-backslash character."  There
    // is no way to rexpress this in re_format(7) extended expressions.  Instead
    // we look for a non-blackslash or string start followed by an optional even
    // number of slashes followed by the backreference; and use the special
    // flag; so after each match, we restart claiming it's the start of the
    // string.  (the problem match w/o this flag is a substition of "\2\1")
    [relacementEnumerator treatStartOfNewSegmentAsBeginningOfString:YES];
    // pull them all into an array so we can walk this as many times as needed.
    replacements = [relacementEnumerator allObjects];
    if (!replacements) {
      // COV_NF_START - no real way to force this in a unittest
      _GTMDevLog(@"failed to create the replacements for substitutions");
      return nil;
      // COV_NF_END
    }
  }

  NSMutableString *result = [NSMutableString stringWithCapacity:[str length]];

  NSEnumerator *enumerator = [self segmentEnumeratorForString:str];
  GTMRegexStringSegment *segment = nil;
  while ((segment = [enumerator nextObject]) != nil) {
    if (![segment isMatch]) {
      // not a match, just move this chunk over
      [result appendString:[segment string]];
    } else {
      // match...
      if (!replacements) {
        // no replacements, they want to eat matches, nothing to do
      } else {
        // spin over the split up replacement
        GTMRegexStringSegment *replacementSegment = nil;
        GTM_FOREACH_OBJECT(replacementSegment, replacements) {
          if (![replacementSegment isMatch]) {
            // not a match, raw text to put in
            [result appendString:[replacementSegment string]];
          } else {
            // match...

            // first goes any leading text
            NSString *leading =
              [replacementSegment subPatternString:kReplacementPatternLeadingTextIndex];
            if (leading)
              [result appendString:leading];
            // then use the subpattern number to find what goes in from the
            // original string match.
            int subPatternNum =
              [[replacementSegment subPatternString:kReplacementPatternSubpatternNumberIndex] intValue];
            NSString *matchSubPatStr = [segment subPatternString:subPatternNum];
            // handle an unused subpattern (ie-nil result)
            if (matchSubPatStr)
              [result appendString:matchSubPatStr];
          }
        }
      }
    }
  }
  return result;
}

- (NSString *)description {
  NSMutableString *result =
    [NSMutableString stringWithFormat:@"%@<%p> { pattern=\"%@\", rawNumSubPatterns=%zd, options=(",
      [self class], self, pattern_, regexData_.re_nsub];
  if (options_) {
    if (options_ & kGTMRegexOptionIgnoreCase)
      [result appendString:@" IgnoreCase"];
    if ((options_ & kGTMRegexOptionSupressNewlineSupport) == kGTMRegexOptionSupressNewlineSupport)
      [result appendString:@" NoNewlineSupport"];
  } else {
    [result appendString:@" None(Default)"];
  }
  [result appendString:@" ) }"];
  return result;
}

@end

@implementation GTMRegex (PrivateMethods)

- (NSString *)errorMessage:(int)errCode {
  NSString *result = @"internal error";

  // size the buffer we need
  size_t len = regerror(errCode, &regexData_, NULL, 0);
  char *buffer = (char*)malloc(sizeof(char) * len);
  if (buffer) {
    // fetch the error
    if (len == regerror(errCode, &regexData_, buffer, len)) {
      NSString *generatedError = [NSString stringWithUTF8String:buffer];
      if (generatedError)
        result = generatedError;
    }
    free(buffer);
  }
  return result;
}

// private helper to run the regex on a block
- (BOOL)runRegexOnUTF8:(const char*)utf8Str
                nmatch:(size_t)nmatch
                pmatch:(regmatch_t *)pmatch
                 flags:(int)flags {
  if (!utf8Str)
    return NO;

  int execResult = regexec(&regexData_, utf8Str, nmatch, pmatch, flags);
  if (execResult != 0) {
#ifdef DEBUG
    if (execResult != REG_NOMATCH) {
      // COV_NF_START - no real way to force this in a unittest
      NSString *errorStr = [self errorMessage:execResult];
      _GTMDevLog(@"%@: matching string \"%.20s...\", had error: \"%@\"",
                 self, utf8Str, errorStr);
      // COV_NF_END
    }
#endif
    return NO;
  }
  return YES;
}

@end

@implementation GTMRegexEnumerator

// we don't block init because the class isn't exported, so no one can
// create one, or if they do, they get whatever happens...

- (id)initWithRegex:(GTMRegex *)regex
      processString:(NSString *)str
        allSegments:(BOOL)allSegments {
  self = [super init];
  if (!self) return nil;

  // collect args
  regex_ = [regex retain];
  utf8StrBuf_ = [[str dataUsingEncoding:NSUTF8StringEncoding] retain];
  allSegments_ = allSegments;

  // arg check
  if (!regex_ || !utf8StrBuf_) {
    [self release];
    return nil;
  }

  // parsing state initialized to zero for us by object creation

  return self;
}

// Don't need a finalize because savedRegMatches_ is marked __strong
- (void)dealloc {
  free(savedRegMatches_);
  [regex_ release];
  [utf8StrBuf_ release];
  [super dealloc];
}

- (void)treatStartOfNewSegmentAsBeginningOfString:(BOOL)yesNo {
  // The way regexec works, it assumes the first char it's looking at to the
  // start of the string.  In normal use, this makes sense; but in this case,
  // we're going to walk the entry string splitting it up by our pattern.  That
  // means for the first call, it is the string start, but for all future calls,
  // it is NOT the string start, so we will pass regexec the flag to let it
  // know.  However, (you knew that was coming), there are some cases where you
  // actually want the each pass to be considered as the start of the string
  // (usually the cases are where a pattern can't express what's needed w/o
  // this).  There is no really good way to explain this behavior w/o all this
  // text and lot of examples, so for now this is not in the public api, and
  // just here. (Hint: see what w/in this file uses this for why we have it)
  treatStartOfNewSegmentAsBeginningOfString_ = yesNo;
}

- (id)nextObject {

  GTMRegexStringSegment *result = nil;
  regmatch_t *nextMatches = nil;
  BOOL isMatch = NO;

  // we do all this w/in a try, so if something throws, the memory we malloced
  // will still get cleaned up
  @try {

    // if we have a saved match, use that...
    if (savedRegMatches_) {
      nextMatches = savedRegMatches_;
      savedRegMatches_ = nil;
      isMatch = YES; // if we have something saved, it was a pattern match
    }
    // have we reached the end?
    else if (curParseIndex_ >= (regoff_t)[utf8StrBuf_ length]) {
      // done, do nothing, we'll return nil
    }
    // do the search.
    else {

      // alloc the match structure (extra space for the zero (full) match)
      size_t matchBufSize = ([regex_ subPatternCount] + 1) * sizeof(regmatch_t);
      nextMatches = malloc(matchBufSize);
      if (!nextMatches)
        return nil; // COV_NF_LINE - no real way to force this in a unittest

      // setup our range to work on
      nextMatches[0].rm_so = curParseIndex_;
      nextMatches[0].rm_eo = [utf8StrBuf_ length];

      // figure out our flags
      int flags = REG_STARTEND;
      if ((!treatStartOfNewSegmentAsBeginningOfString_) &&
          (curParseIndex_ != 0)) {
        // see -treatStartOfNewSegmentAsBeginningOfString: for why we have
        // this check here.
        flags |= REG_NOTBOL;
      }

      // call for the match
      if ([regex_ runRegexOnUTF8:[utf8StrBuf_ bytes]
                          nmatch:([regex_ subPatternCount] + 1)
                          pmatch:nextMatches
                           flags:flags]) {
        // match

        if (allSegments_ &&
            (nextMatches[0].rm_so != curParseIndex_)) {
          // we should return all segments (not just matches), and there was
          // something before this match.  So safe off this match for later
          // and create a range for this.

          savedRegMatches_ = nextMatches;
          nextMatches = malloc(matchBufSize);
          if (!nextMatches)
            return nil; // COV_NF_LINE - no real way to force this in a unittest

          isMatch = NO;
          // mark everything but the zero slot w/ not used
          for (NSUInteger x = [regex_ subPatternCount]; x > 0; --x) {
            nextMatches[x].rm_so = nextMatches[x].rm_eo = -1;
          }
          nextMatches[0].rm_so = curParseIndex_;
          nextMatches[0].rm_eo = savedRegMatches_[0].rm_so;

          // advance our marker
          curParseIndex_ = savedRegMatches_[0].rm_eo;

        } else {
          // we only return matches or are pointed at a match

          // no real work to do, just fall through to return to return the
          // current match.
          isMatch = YES;

          // advance our marker
          curParseIndex_ = nextMatches[0].rm_eo;
        }

      } else {
        // no match

        // should we return the last non matching segment?
        if (allSegments_) {
          isMatch = NO;
          // mark everything but the zero slot w/ not used
          for (NSUInteger x = [regex_ subPatternCount]; x > 0; --x) {
            nextMatches[x].rm_so = nextMatches[x].rm_eo = -1;
          }
          nextMatches[0].rm_so = curParseIndex_;
          nextMatches[0].rm_eo = [utf8StrBuf_ length];
        } else {
          // drop match set, we don't want it
          free(nextMatches);
          nextMatches = nil;
        }

        // advance our marker since we're done
        curParseIndex_ = [utf8StrBuf_ length];

      }
    }

    // create the segment to return
    if (nextMatches) {
      result =
        [[[GTMRegexStringSegment alloc] initWithUTF8StrBuf:utf8StrBuf_
                                                regMatches:nextMatches
                                             numRegMatches:[regex_ subPatternCount]
                                                   isMatch:isMatch] autorelease];
      nextMatches = nil;
    }
  } @catch (id e) { // COV_NF_START - no real way to force this in a test
    _GTMDevLog(@"Exceptions while trying to advance enumeration (%@)", e);
    // if we still have something in our temp, free it
    free(nextMatches);
  } // COV_NF_END

  return result;
}

- (NSString *)description {
  return [NSString stringWithFormat:@"%@<%p> { regex=\"%@\", allSegments=%s, string=\"%.20s...\" }",
    [self class], self,
    regex_,
    (allSegments_ ? "YES" : "NO"),
    [utf8StrBuf_ bytes]];
}

@end

@implementation GTMRegexStringSegment

- (id)init {
  // make sure init is never called, the class in in the header so someone
  // could try to create it by mistake.
  // Call super init and release so we don't leak
  [[super init] autorelease];
  [self doesNotRecognizeSelector:_cmd];
  return nil; // COV_NF_LINE - return is just here to keep gcc happy
}

- (void)dealloc {
  free(regMatches_);
  [utf8StrBuf_ release];
  [super dealloc];
}

- (BOOL)isMatch {
  return isMatch_;
}

- (NSString *)string {
  // fetch match zero
  return [self subPatternString:0];
}

- (NSString *)subPatternString:(NSUInteger)patternIndex {
  if (patternIndex > numRegMatches_)
    return nil;

  // pick off when it wasn't found
  if ((regMatches_[patternIndex].rm_so == -1) && 
      (regMatches_[patternIndex].rm_eo == -1))
    return nil;

  // fetch the string
  const char *base = (const char*)[utf8StrBuf_ bytes] 
    + regMatches_[patternIndex].rm_so;
  regoff_t len = regMatches_[patternIndex].rm_eo 
    - regMatches_[patternIndex].rm_so;
  return [[[NSString alloc] initWithBytes:base
                                   length:(NSUInteger)len
                                 encoding:NSUTF8StringEncoding] autorelease];
}

- (NSString *)description {
  NSMutableString *result =
    [NSMutableString stringWithFormat:@"%@<%p> { isMatch=\"%s\", subPatterns=(",
      [self class], self, (isMatch_ ? "YES" : "NO")];
  for (NSUInteger x = 0; x <= numRegMatches_; ++x) {
    NSString *format = @", \"%.*s\"";
    if (x == 0)
      format = @" \"%.*s\"";

    [result appendFormat:format,
      (int)(regMatches_[x].rm_eo - regMatches_[x].rm_so),
      (((const char*)[utf8StrBuf_ bytes]) + regMatches_[x].rm_so)];
  }
  [result appendString:@" ) }"];

  return result;
}

@end

@implementation GTMRegexStringSegment (PrivateMethods)

- (id)initWithUTF8StrBuf:(NSData *)utf8StrBuf
              regMatches:(regmatch_t *)regMatches
           numRegMatches:(NSUInteger)numRegMatches
                 isMatch:(BOOL)isMatch {
  self = [super init];
  if (!self) return nil;

  utf8StrBuf_ = [utf8StrBuf retain];
  regMatches_ = regMatches;
  numRegMatches_ = numRegMatches;
  isMatch_ = isMatch;

  // check the args
  if (!utf8StrBuf_ || !regMatches_) {
    // COV_NF_START
    // this could only happen something messed w/ our internal state.
    [self release];
    return nil;
    // COV_NF_END
  }

  return self;
}

@end

@implementation NSString (GTMRegexAdditions)

- (BOOL)gtm_matchesPattern:(NSString *)pattern {
  GTMRegex *regex = [GTMRegex regexWithPattern:pattern];
  return [regex matchesString:self];
}

- (NSArray *)gtm_subPatternsOfPattern:(NSString *)pattern {
  GTMRegex *regex = [GTMRegex regexWithPattern:pattern];
  return [regex subPatternsOfString:self];
}

- (NSString *)gtm_firstSubStringMatchedByPattern:(NSString *)pattern {
  GTMRegex *regex = [GTMRegex regexWithPattern:pattern];
  return [regex firstSubStringMatchedInString:self];
}

- (BOOL)gtm_subStringMatchesPattern:(NSString *)pattern {
  GTMRegex *regex = [GTMRegex regexWithPattern:pattern];
  return [regex matchesSubStringInString:self];
}

- (NSArray *)gtm_allSubstringsMatchedByPattern:(NSString *)pattern {
  NSEnumerator *enumerator = [self gtm_matchSegmentEnumeratorForPattern:pattern];
  NSArray *allSegments = [enumerator allObjects];
  return [allSegments valueForKey:@"string"];
}

- (NSEnumerator *)gtm_segmentEnumeratorForPattern:(NSString *)pattern {
  GTMRegex *regex = [GTMRegex regexWithPattern:pattern];
  return [regex segmentEnumeratorForString:self];
}

- (NSEnumerator *)gtm_matchSegmentEnumeratorForPattern:(NSString *)pattern {
  GTMRegex *regex = [GTMRegex regexWithPattern:pattern];
  return [regex matchSegmentEnumeratorForString:self];
}

- (NSString *)gtm_stringByReplacingMatchesOfPattern:(NSString *)pattern
                                    withReplacement:(NSString *)replacementPattern {
  GTMRegex *regex = [GTMRegex regexWithPattern:pattern];
  return [regex stringByReplacingMatchesInString:self
                                 withReplacement:replacementPattern];
}

@end