diff options
Diffstat (limited to 'third_party/java/dd_plist/java/com/dd/plist/ASCIIPropertyListParser.java')
-rw-r--r-- | third_party/java/dd_plist/java/com/dd/plist/ASCIIPropertyListParser.java | 645 |
1 files changed, 645 insertions, 0 deletions
diff --git a/third_party/java/dd_plist/java/com/dd/plist/ASCIIPropertyListParser.java b/third_party/java/dd_plist/java/com/dd/plist/ASCIIPropertyListParser.java new file mode 100644 index 0000000000..533fb391ad --- /dev/null +++ b/third_party/java/dd_plist/java/com/dd/plist/ASCIIPropertyListParser.java @@ -0,0 +1,645 @@ +/* + * plist - An open source library to parse and generate property lists + * Copyright (C) 2014 Daniel Dreibrodt + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package com.dd.plist; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.UnsupportedEncodingException; +import java.nio.CharBuffer; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.Charset; +import java.nio.charset.CharsetEncoder; +import java.text.ParseException; +import java.text.StringCharacterIterator; +import java.util.LinkedList; +import java.util.List; + +/** + * Parser for ASCII property lists. Supports Apple OS X/iOS and GnuStep/NeXTSTEP format. + * This parser is based on the recursive descent paradigm, but the underlying grammar + * is not explicitely defined. + * <p/> + * Resources on ASCII property list format: + * <ul> + * <li><a href="https://developer.apple.com/library/mac/#documentation/Cocoa/Conceptual/PropertyLists/OldStylePlists/OldStylePLists.html> + * Property List Programming Guide - Old-Style ASCII Property Lists + * </a></li> + * <li><a href="http://www.gnustep.org/resources/documentation/Developer/Base/Reference/NSPropertyList.html"> + * GnuStep - NSPropertyListSerialization class documentation + * </a></li> + * </ul> + * + * @author Daniel Dreibrodt + */ +public class ASCIIPropertyListParser { + + /** + * Parses an ASCII property list file. + * + * @param f The ASCII property list file. + * @return The root object of the property list. This is usally a NSDictionary but can also be a NSArray. + * @throws Exception When an error occurs during parsing. + */ + public static NSObject parse(File f) throws IOException, ParseException { + return parse(new FileInputStream(f)); + } + + /** + * Parses an ASCII property list from an input stream. + * + * @param in The input stream that points to the property list's data. + * @return The root object of the property list. This is usally a NSDictionary but can also be a NSArray. + * @throws Exception When an error occurs during parsing. + */ + public static NSObject parse(InputStream in) throws ParseException, IOException { + byte[] buf = PropertyListParser.readAll(in); + in.close(); + return parse(buf); + } + + /** + * Parses an ASCII property list from a byte array. + * + * @param bytes The ASCII property list data. + * @return The root object of the property list. This is usally a NSDictionary but can also be a NSArray. + * @throws Exception When an error occurs during parsing. + */ + public static NSObject parse(byte[] bytes) throws ParseException { + ASCIIPropertyListParser parser = new ASCIIPropertyListParser(bytes); + return parser.parse(); + } + + public static final char WHITESPACE_SPACE = ' '; + public static final char WHITESPACE_TAB = '\t'; + public static final char WHITESPACE_NEWLINE = '\n'; + public static final char WHITESPACE_CARRIAGE_RETURN = '\r'; + + public static final char ARRAY_BEGIN_TOKEN = '('; + public static final char ARRAY_END_TOKEN = ')'; + public static final char ARRAY_ITEM_DELIMITER_TOKEN = ','; + + public static final char DICTIONARY_BEGIN_TOKEN = '{'; + public static final char DICTIONARY_END_TOKEN = '}'; + public static final char DICTIONARY_ASSIGN_TOKEN = '='; + public static final char DICTIONARY_ITEM_DELIMITER_TOKEN = ';'; + + public static final char QUOTEDSTRING_BEGIN_TOKEN = '"'; + public static final char QUOTEDSTRING_END_TOKEN = '"'; + public static final char QUOTEDSTRING_ESCAPE_TOKEN = '\\'; + + public static final char DATA_BEGIN_TOKEN = '<'; + public static final char DATA_END_TOKEN = '>'; + + public static final char DATA_GSOBJECT_BEGIN_TOKEN = '*'; + public static final char DATA_GSDATE_BEGIN_TOKEN = 'D'; + public static final char DATA_GSBOOL_BEGIN_TOKEN = 'B'; + public static final char DATA_GSBOOL_TRUE_TOKEN = 'Y'; + public static final char DATA_GSBOOL_FALSE_TOKEN = 'N'; + public static final char DATA_GSINT_BEGIN_TOKEN = 'I'; + public static final char DATA_GSREAL_BEGIN_TOKEN = 'R'; + + public static final char DATE_DATE_FIELD_DELIMITER = '-'; + public static final char DATE_TIME_FIELD_DELIMITER = ':'; + public static final char DATE_GS_DATE_TIME_DELIMITER = ' '; + public static final char DATE_APPLE_DATE_TIME_DELIMITER = 'T'; + public static final char DATE_APPLE_END_TOKEN = 'Z'; + + public static final char COMMENT_BEGIN_TOKEN = '/'; + public static final char MULTILINE_COMMENT_SECOND_TOKEN = '*'; + public static final char SINGLELINE_COMMENT_SECOND_TOKEN = '/'; + public static final char MULTILINE_COMMENT_END_TOKEN = '/'; + + /** + * Property list source data + */ + private byte[] data; + /** + * Current parsing index + */ + private int index; + + /** + * Only allow subclasses to change instantiation. + */ + protected ASCIIPropertyListParser() { + + } + + /** + * Creates a new parser for the given property list content. + * + * @param propertyListContent The content of the property list that is to be parsed. + */ + private ASCIIPropertyListParser(byte[] propertyListContent) { + data = propertyListContent; + } + + /** + * Checks whether the given sequence of symbols can be accepted. + * + * @param sequence The sequence of tokens to look for. + * @return Whether the given tokens occur at the current parsing position. + */ + private boolean acceptSequence(char... sequence) { + for (int i = 0; i < sequence.length; i++) { + if (data[index + i] != sequence[i]) + return false; + } + return true; + } + + /** + * Checks whether the given symbols can be accepted, that is, if one + * of the given symbols is found at the current parsing position. + * + * @param acceptableSymbols The symbols to check. + * @return Whether one of the symbols can be accepted or not. + */ + private boolean accept(char... acceptableSymbols) { + boolean symbolPresent = false; + for (char c : acceptableSymbols) { + if (data[index] == c) + symbolPresent = true; + } + return symbolPresent; + } + + /** + * Checks whether the given symbol can be accepted, that is, if + * the given symbols is found at the current parsing position. + * + * @param acceptableSymbol The symbol to check. + * @return Whether the symbol can be accepted or not. + */ + private boolean accept(char acceptableSymbol) { + return data[index] == acceptableSymbol; + } + + /** + * Expects the input to have one of the given symbols at the current parsing position. + * + * @param expectedSymbols The expected symbols. + * @throws ParseException If none of the expected symbols could be found. + */ + private void expect(char... expectedSymbols) throws ParseException { + if (!accept(expectedSymbols)) { + String excString = "Expected '" + expectedSymbols[0] + "'"; + for (int i = 1; i < expectedSymbols.length; i++) { + excString += " or '" + expectedSymbols[i] + "'"; + } + excString += " but found '" + (char) data[index] + "'"; + throw new ParseException(excString, index); + } + } + + /** + * Expects the input to have the given symbol at the current parsing position. + * + * @param expectedSymbol The expected symbol. + * @throws ParseException If the expected symbol could be found. + */ + private void expect(char expectedSymbol) throws ParseException { + if (!accept(expectedSymbol)) + throw new ParseException("Expected '" + expectedSymbol + "' but found '" + (char) data[index] + "'", index); + } + + /** + * Reads an expected symbol. + * + * @param symbol The symbol to read. + * @throws ParseException If the expected symbol could not be read. + */ + private void read(char symbol) throws ParseException { + expect(symbol); + index++; + } + + /** + * Skips the current symbol. + */ + private void skip() { + index++; + } + + /** + * Skips several symbols + * + * @param numSymbols The amount of symbols to skip. + */ + private void skip(int numSymbols) { + index += numSymbols; + } + + /** + * Skips all whitespaces and comments from the current parsing position onward. + */ + private void skipWhitespacesAndComments() { + boolean commentSkipped; + do { + commentSkipped = false; + + //Skip whitespaces + while (accept(WHITESPACE_CARRIAGE_RETURN, WHITESPACE_NEWLINE, WHITESPACE_SPACE, WHITESPACE_TAB)) { + skip(); + } + + //Skip single line comments "//..." + if (acceptSequence(COMMENT_BEGIN_TOKEN, SINGLELINE_COMMENT_SECOND_TOKEN)) { + skip(2); + readInputUntil(WHITESPACE_CARRIAGE_RETURN, WHITESPACE_NEWLINE); + commentSkipped = true; + } + //Skip multi line comments "/* ... */" + else if (acceptSequence(COMMENT_BEGIN_TOKEN, MULTILINE_COMMENT_SECOND_TOKEN)) { + skip(2); + while (true) { + if (acceptSequence(MULTILINE_COMMENT_SECOND_TOKEN, MULTILINE_COMMENT_END_TOKEN)) { + skip(2); + break; + } + skip(); + } + commentSkipped = true; + } + } + while (commentSkipped); //if a comment was skipped more whitespace or another comment can follow, so skip again + } + + private String toUtf8String(ByteArrayOutputStream stream) { + try { + return stream.toString("UTF-8"); + } catch (UnsupportedEncodingException e) { + throw new RuntimeException(e); + } + } + + /** + * Reads input until one of the given symbols is found. + * + * @param symbols The symbols that can occur after the string to read. + * @return The input until one the given symbols. + */ + private String readInputUntil(char... symbols) { + ByteArrayOutputStream stringBytes = new ByteArrayOutputStream(); + while (!accept(symbols)) { + stringBytes.write(data[index]); + skip(); + } + return toUtf8String(stringBytes); + } + + /** + * Reads input until the given symbol is found. + * + * @param symbol The symbol that can occur after the string to read. + * @return The input until the given symbol. + */ + private String readInputUntil(char symbol) { + ByteArrayOutputStream stringBytes = new ByteArrayOutputStream(); + while (!accept(symbol)) { + stringBytes.write(data[index]); + skip(); + } + return toUtf8String(stringBytes); + } + + /** + * Parses the property list from the beginning and returns the root object + * of the property list. + * + * @return The root object of the property list. This can either be a NSDictionary or a NSArray. + * @throws ParseException When an error occured during parsing + */ + public NSObject parse() throws ParseException { + index = 0; + skipWhitespacesAndComments(); + expect(DICTIONARY_BEGIN_TOKEN, ARRAY_BEGIN_TOKEN, COMMENT_BEGIN_TOKEN); + try { + return parseObject(); + } catch (ArrayIndexOutOfBoundsException ex) { + throw new ParseException("Reached end of input unexpectedly.", index); + } + } + + /** + * Parses the NSObject found at the current position in the property list + * data stream. + * + * @return The parsed NSObject. + * @see ASCIIPropertyListParser#index + */ + private NSObject parseObject() throws ParseException { + switch (data[index]) { + case ARRAY_BEGIN_TOKEN: { + return parseArray(); + } + case DICTIONARY_BEGIN_TOKEN: { + return parseDictionary(); + } + case DATA_BEGIN_TOKEN: { + return parseData(); + } + case QUOTEDSTRING_BEGIN_TOKEN: { + String quotedString = parseQuotedString(); + //apple dates are quoted strings of length 20 and after the 4 year digits a dash is found + if (quotedString.length() == 20 && quotedString.charAt(4) == DATE_DATE_FIELD_DELIMITER) { + try { + return new NSDate(quotedString); + } catch (Exception ex) { + //not a date? --> return string + return new NSString(quotedString); + } + } else { + return new NSString(quotedString); + } + } + default: { + //0-9 + if (data[index] > 0x2F && data[index] < 0x3A) { + //could be a date or just a string + return parseDateString(); + } else { + //non-numerical -> string or boolean + String parsedString = parseString(); + return new NSString(parsedString); + } + } + } + } + + /** + * Parses an array from the current parsing position. + * The prerequisite for calling this method is, that an array begin token has been read. + * + * @return The array found at the parsing position. + */ + private NSArray parseArray() throws ParseException { + //Skip begin token + skip(); + skipWhitespacesAndComments(); + List<NSObject> objects = new LinkedList<NSObject>(); + while (!accept(ARRAY_END_TOKEN)) { + objects.add(parseObject()); + skipWhitespacesAndComments(); + if (accept(ARRAY_ITEM_DELIMITER_TOKEN)) { + skip(); + } else { + break; //must have reached end of array + } + skipWhitespacesAndComments(); + } + //parse end token + read(ARRAY_END_TOKEN); + return new NSArray(objects.toArray(new NSObject[objects.size()])); + } + + /** + * Parses a dictionary from the current parsing position. + * The prerequisite for calling this method is, that a dictionary begin token has been read. + * + * @return The dictionary found at the parsing position. + */ + private NSDictionary parseDictionary() throws ParseException { + //Skip begin token + skip(); + skipWhitespacesAndComments(); + NSDictionary dict = new NSDictionary(); + while (!accept(DICTIONARY_END_TOKEN)) { + //Parse key + String keyString; + if (accept(QUOTEDSTRING_BEGIN_TOKEN)) { + keyString = parseQuotedString(); + } else { + keyString = parseString(); + } + skipWhitespacesAndComments(); + + //Parse assign token + read(DICTIONARY_ASSIGN_TOKEN); + skipWhitespacesAndComments(); + + NSObject object = parseObject(); + dict.put(keyString, object); + skipWhitespacesAndComments(); + read(DICTIONARY_ITEM_DELIMITER_TOKEN); + skipWhitespacesAndComments(); + } + //skip end token + skip(); + return dict; + } + + /** + * Parses a data object from the current parsing position. + * This can either be a NSData object or a GnuStep NSNumber or NSDate. + * The prerequisite for calling this method is, that a data begin token has been read. + * + * @return The data object found at the parsing position. + */ + private NSObject parseData() throws ParseException { + NSObject obj = null; + //Skip begin token + skip(); + if (accept(DATA_GSOBJECT_BEGIN_TOKEN)) { + skip(); + expect(DATA_GSBOOL_BEGIN_TOKEN, DATA_GSDATE_BEGIN_TOKEN, DATA_GSINT_BEGIN_TOKEN, DATA_GSREAL_BEGIN_TOKEN); + if (accept(DATA_GSBOOL_BEGIN_TOKEN)) { + //Boolean + skip(); + expect(DATA_GSBOOL_TRUE_TOKEN, DATA_GSBOOL_FALSE_TOKEN); + if (accept(DATA_GSBOOL_TRUE_TOKEN)) { + obj = new NSNumber(true); + } else { + obj = new NSNumber(false); + } + //Skip the parsed boolean token + skip(); + } else if (accept(DATA_GSDATE_BEGIN_TOKEN)) { + //Date + skip(); + String dateString = readInputUntil(DATA_END_TOKEN); + obj = new NSDate(dateString); + } else if (accept(DATA_GSINT_BEGIN_TOKEN, DATA_GSREAL_BEGIN_TOKEN)) { + //Number + skip(); + String numberString = readInputUntil(DATA_END_TOKEN); + obj = new NSNumber(numberString); + } + //parse data end token + read(DATA_END_TOKEN); + } else { + String dataString = readInputUntil(DATA_END_TOKEN); + dataString = dataString.replaceAll("\\s+", ""); + + int numBytes = dataString.length() / 2; + byte[] bytes = new byte[numBytes]; + for (int i = 0; i < bytes.length; i++) { + String byteString = dataString.substring(i * 2, i * 2 + 2); + int byteValue = Integer.parseInt(byteString, 16); + bytes[i] = (byte) byteValue; + } + obj = new NSData(bytes); + + //skip end token + skip(); + } + + return obj; + } + + /** + * Attempts to parse a plain string as a date if possible. + * + * @return A NSDate if the string represents such an object. Otherwise a NSString is returned. + */ + private NSObject parseDateString() { + String numericalString = parseString(); + if (numericalString.length() > 4 && numericalString.charAt(4) == DATE_DATE_FIELD_DELIMITER) { + try { + return new NSDate(numericalString); + } catch(Exception ex) { + //An exception occurs if the string is not a date but just a string + } + } + return new NSString(numericalString); + } + + /** + * Parses a plain string from the current parsing position. + * The string is made up of all characters to the next whitespace, delimiter token or assignment token. + * + * @return The string found at the current parsing position. + */ + private String parseString() { + return readInputUntil(WHITESPACE_SPACE, WHITESPACE_TAB, WHITESPACE_NEWLINE, WHITESPACE_CARRIAGE_RETURN, + ARRAY_ITEM_DELIMITER_TOKEN, DICTIONARY_ITEM_DELIMITER_TOKEN, DICTIONARY_ASSIGN_TOKEN, ARRAY_END_TOKEN); + } + + /** + * Parses a quoted string from the current parsing position. + * The prerequisite for calling this method is, that a quoted string begin token has been read. + * + * @return The quoted string found at the parsing method with all special characters unescaped. + * @throws ParseException If an error occured during parsing. + */ + private String parseQuotedString() throws ParseException { + //Skip begin token + skip(); + ByteArrayOutputStream quotedString = new ByteArrayOutputStream(); + boolean unescapedBackslash = true; + //Read from opening quotation marks to closing quotation marks and skip escaped quotation marks + while (data[index] != QUOTEDSTRING_END_TOKEN || (data[index - 1] == QUOTEDSTRING_ESCAPE_TOKEN && unescapedBackslash)) { + quotedString.write(data[index]); + if (accept(QUOTEDSTRING_ESCAPE_TOKEN)) { + unescapedBackslash = !(data[index - 1] == QUOTEDSTRING_ESCAPE_TOKEN && unescapedBackslash); + } + skip(); + } + String unescapedString; + try { + unescapedString = parseQuotedString(toUtf8String(quotedString)); + } catch (Exception ex) { + throw new ParseException("The quoted string could not be parsed.", index); + } + //skip end token + skip(); + return unescapedString; + } + + /** + * Used to encode the parsed strings + */ + private static CharsetEncoder asciiEncoder; + + /** + * Parses a string according to the format specified for ASCII property lists. + * Such strings can contain escape sequences which are unescaped in this method. + * + * @param s The escaped string according to the ASCII property list format, without leading and trailing quotation marks. + * @return The unescaped string in UTF-8 or ASCII format, depending on the contained characters. + * @throws Exception If the string could not be properly parsed. + */ + public static synchronized String parseQuotedString(String s) throws UnsupportedEncodingException, CharacterCodingException { + StringBuilder parsed = new StringBuilder(); + StringCharacterIterator iterator = new StringCharacterIterator(s); + char c = iterator.current(); + + while (iterator.getIndex() < iterator.getEndIndex()) { + switch (c) { + case '\\': { //An escaped sequence is following + parsed.append(parseEscapedSequence(iterator)); + break; + } + default: { + parsed.append(c); + break; + } + } + c = iterator.next(); + } + return parsed.toString(); + } + + /** + * Unescapes an escaped character sequence, e.g. \\u00FC. + * + * @param iterator The string character iterator pointing to the first character after the backslash + * @return The unescaped character + */ + private static char parseEscapedSequence(StringCharacterIterator iterator) { + char c = iterator.next(); + if (c == 'b') { + return '\b'; + } else if (c == 'n') { + return '\n'; + } else if (c == 'r') { + return '\r'; + } else if (c == 't') { + return '\t'; + } else if (c == 'U' || c == 'u') { + //4 digit hex Unicode value + String byte1 = ""; + byte1 += iterator.next(); + byte1 += iterator.next(); + String byte2 = ""; + byte2 += iterator.next(); + byte2 += iterator.next(); + return (char) ((Integer.parseInt(byte1, 16) << 8) + Integer.parseInt(byte2, 16)); + } else if ((c >= '0') && (c <= '7')) { + //3 digit octal ASCII value + String num = ""; + num += c; + num += iterator.next(); + num += iterator.next(); + return (char) Integer.parseInt(num, 8); + } else { + // Possibly something that needn't be escaped, but we should accept it + // it anyway for consistency with Apple tools. + return c; + } + } + +} |