// // GTMNSString+XML.m // // Copyright 2007-2008 Google Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy // of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the // License for the specific language governing permissions and limitations under // the License. // #import "GTMDefines.h" #import "GTMNSString+XML.h" enum { kGTMXMLCharModeEncodeQUOT = 0, kGTMXMLCharModeEncodeAMP = 1, kGTMXMLCharModeEncodeAPOS = 2, kGTMXMLCharModeEncodeLT = 3, kGTMXMLCharModeEncodeGT = 4, kGTMXMLCharModeValid = 99, kGTMXMLCharModeInvalid = 100, }; typedef NSUInteger GTMXMLCharMode; static NSString *gXMLEntityList[] = { // this must match the above order @""", @"&", @"'", @"<", @">", }; GTM_INLINE GTMXMLCharMode XMLModeForUnichar(UniChar c) { // Per XML spec Section 2.2 Characters // ( http://www.w3.org/TR/REC-xml/#charsets ) // // Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | // [#x10000-#x10FFFF] if (c <= 0xd7ff) { if (c >= 0x20) { switch (c) { case 34: return kGTMXMLCharModeEncodeQUOT; case 38: return kGTMXMLCharModeEncodeAMP; case 39: return kGTMXMLCharModeEncodeAPOS; case 60: return kGTMXMLCharModeEncodeLT; case 62: return kGTMXMLCharModeEncodeGT; default: return kGTMXMLCharModeValid; } } else { if (c == '\n') return kGTMXMLCharModeValid; if (c == '\r') return kGTMXMLCharModeValid; if (c == '\t') return kGTMXMLCharModeValid; return kGTMXMLCharModeInvalid; } } if (c < 0xE000) return kGTMXMLCharModeInvalid; if (c <= 0xFFFD) return kGTMXMLCharModeValid; // UniChar can't have the following values // if (c < 0x10000) // return kGTMXMLCharModeInvalid; // if (c <= 0x10FFFF) // return kGTMXMLCharModeValid; return kGTMXMLCharModeInvalid; } // XMLModeForUnichar static NSString *AutoreleasedCloneForXML(NSString *src, BOOL escaping) { // // NOTE: // We don't use CFXMLCreateStringByEscapingEntities because it's busted in // 10.3 (http://lists.apple.com/archives/Cocoa-dev/2004/Nov/msg00059.html) and // it doesn't do anything about the chars that are actually invalid per the // xml spec. // // we can't use the CF call here because it leaves the invalid chars // in the string. NSUInteger length = [src length]; if (!length) { return src; } NSMutableString *finalString = [NSMutableString string]; // this block is common between GTMNSString+HTML and GTMNSString+XML but // it's so short that it isn't really worth trying to share. const UniChar *buffer = CFStringGetCharactersPtr((CFStringRef)src); if (!buffer) { // We want this buffer to be autoreleased. NSMutableData *data = [NSMutableData dataWithLength:length * sizeof(UniChar)]; if (!data) { // COV_NF_START - Memory fail case _GTMDevLog(@"couldn't alloc buffer"); return nil; // COV_NF_END } [src getCharacters:[data mutableBytes]]; buffer = [data bytes]; } const UniChar *goodRun = buffer; NSUInteger goodRunLength = 0; for (NSUInteger i = 0; i < length; ++i) { GTMXMLCharMode cMode = XMLModeForUnichar(buffer[i]); // valid chars go as is, and if we aren't doing entities, then // everything goes as is. if ((cMode == kGTMXMLCharModeValid) || (!escaping && (cMode != kGTMXMLCharModeInvalid))) { // goes as is goodRunLength += 1; } else { // it's something we have to encode or something invalid // start by adding what we already collected (if anything) if (goodRunLength) { CFStringAppendCharacters((CFMutableStringRef)finalString, goodRun, goodRunLength); goodRunLength = 0; } // if it wasn't invalid, add the encoded version if (cMode != kGTMXMLCharModeInvalid) { // add this encoded [finalString appendString:gXMLEntityList[cMode]]; } // update goodRun to point to the next UniChar goodRun = buffer + i + 1; } } // anything left to add? if (goodRunLength) { CFStringAppendCharacters((CFMutableStringRef)finalString, goodRun, goodRunLength); } return finalString; } // AutoreleasedCloneForXML @implementation NSString (GTMNSStringXMLAdditions) - (NSString *)gtm_stringBySanitizingAndEscapingForXML { return AutoreleasedCloneForXML(self, YES); } // gtm_stringBySanitizingAndEscapingForXML - (NSString *)gtm_stringBySanitizingToXMLSpec { return AutoreleasedCloneForXML(self, NO); } // gtm_stringBySanitizingToXMLSpec @end