1 /*
2  * Copyright (C) 2010 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package org.json;
18 
19 import android.compat.annotation.UnsupportedAppUsage;
20 
21 // Note: this class was written without inspecting the non-free org.json sourcecode.
22 
23 /**
24  * Parses a JSON (<a href="http://www.ietf.org/rfc/rfc4627.txt">RFC 4627</a>)
25  * encoded string into the corresponding object. Most clients of
26  * this class will use only need the {@link #JSONTokener(String) constructor}
27  * and {@link #nextValue} method. Example usage: <pre>
28  * String json = "{"
29  *         + "  \"query\": \"Pizza\", "
30  *         + "  \"locations\": [ 94043, 90210 ] "
31  *         + "}";
32  *
33  * JSONObject object = (JSONObject) new JSONTokener(json).nextValue();
34  * String query = object.getString("query");
35  * JSONArray locations = object.getJSONArray("locations");</pre>
36  *
37  * <p>For best interoperability and performance use JSON that complies with
38  * RFC 4627, such as that generated by {@link JSONStringer}. For legacy reasons
39  * this parser is lenient, so a successful parse does not indicate that the
40  * input string was valid JSON. All of the following syntax errors will be
41  * ignored:
42  * <ul>
43  *   <li>End of line comments starting with {@code //} or {@code #} and ending
44  *       with a newline character.
45  *   <li>C-style comments starting with {@code /*} and ending with
46  *       {@code *}{@code /}. Such comments may not be nested.
47  *   <li>Strings that are unquoted or {@code 'single quoted'}.
48  *   <li>Hexadecimal integers prefixed with {@code 0x} or {@code 0X}.
49  *   <li>Octal integers prefixed with {@code 0}.
50  *   <li>Array elements separated by {@code ;}.
51  *   <li>Unnecessary array separators. These are interpreted as if null was the
52  *       omitted value.
53  *   <li>Key-value pairs separated by {@code =} or {@code =>}.
54  *   <li>Key-value pairs separated by {@code ;}.
55  * </ul>
56  *
57  * <p>Each tokener may be used to parse a single JSON string. Instances of this
58  * class are not thread safe. Although this class is nonfinal, it was not
59  * designed for inheritance and should not be subclassed. In particular,
60  * self-use by overrideable methods is not specified. See <i>Effective Java</i>
61  * Item 17, "Design and Document or inheritance or else prohibit it" for further
62  * information.
63  */
64 public class JSONTokener {
65 
66     /** The input JSON. */
67     @UnsupportedAppUsage
68     private final String in;
69 
70     /**
71      * The index of the next character to be returned by {@link #next}. When
72      * the input is exhausted, this equals the input's length.
73      */
74     @UnsupportedAppUsage
75     private int pos;
76 
77     /**
78      * @param in JSON encoded string. Null is not permitted and will yield a
79      *     tokener that throws {@code NullPointerExceptions} when methods are
80      *     called.
81      */
JSONTokener(String in)82     public JSONTokener(String in) {
83         // consume an optional byte order mark (BOM) if it exists
84         if (in != null && in.startsWith("\ufeff")) {
85             in = in.substring(1);
86         }
87         this.in = in;
88     }
89 
90     /**
91      * Returns the next value from the input.
92      *
93      * @return a {@link JSONObject}, {@link JSONArray}, String, Boolean,
94      *     Integer, Long, Double or {@link JSONObject#NULL}.
95      * @throws JSONException if the input is malformed.
96      */
nextValue()97     public Object nextValue() throws JSONException {
98         int c = nextCleanInternal();
99         switch (c) {
100             case -1:
101                 throw syntaxError("End of input");
102 
103             case '{':
104                 return readObject();
105 
106             case '[':
107                 return readArray();
108 
109             case '\'':
110             case '"':
111                 return nextString((char) c);
112 
113             default:
114                 pos--;
115                 return readLiteral();
116         }
117     }
118 
119     @UnsupportedAppUsage
nextCleanInternal()120     private int nextCleanInternal() throws JSONException {
121         final int inLength = in.length();
122 
123         while (pos < inLength) {
124             int c = in.charAt(pos++);
125             switch (c) {
126                 case '\t':
127                 case ' ':
128                 case '\n':
129                 case '\r':
130                     continue;
131 
132                 case '/':
133                     if (pos == inLength) {
134                         return c;
135                     }
136 
137                     char peek = in.charAt(pos);
138                     switch (peek) {
139                         case '*':
140                             // skip a /* c-style comment */
141                             pos++;
142                             int commentEnd = in.indexOf("*/", pos);
143                             if (commentEnd == -1) {
144                                 throw syntaxError("Unterminated comment");
145                             }
146                             pos = commentEnd + 2;
147                             continue;
148 
149                         case '/':
150                             // skip a // end-of-line comment
151                             pos++;
152                             skipToEndOfLine();
153                             continue;
154 
155                         default:
156                             return c;
157                     }
158 
159                 case '#':
160                     /*
161                      * Skip a # hash end-of-line comment. The JSON RFC doesn't
162                      * specify this behavior, but it's required to parse
163                      * existing documents. See http://b/2571423.
164                      */
165                     skipToEndOfLine();
166                     continue;
167 
168                 default:
169                     return c;
170             }
171         }
172 
173         return -1;
174     }
175 
176     /**
177      * Advances the position until after the next newline character. If the line
178      * is terminated by "\r\n", the '\n' must be consumed as whitespace by the
179      * caller.
180      */
181     @UnsupportedAppUsage
skipToEndOfLine()182     private void skipToEndOfLine() {
183         final int inLength = in.length();
184 
185         for (; pos < inLength; pos++) {
186             char c = in.charAt(pos);
187             if (c == '\r' || c == '\n') {
188                 pos++;
189                 break;
190             }
191         }
192     }
193 
194     /**
195      * Returns the string up to but not including {@code quote}, unescaping any
196      * character escape sequences encountered along the way. The opening quote
197      * should have already been read. This consumes the closing quote, but does
198      * not include it in the returned string.
199      *
200      * @param quote either ' or ".
201      */
nextString(char quote)202     public String nextString(char quote) throws JSONException {
203         /*
204          * For strings that are free of escape sequences, we can just extract
205          * the result as a substring of the input. But if we encounter an escape
206          * sequence, we need to use a StringBuilder to compose the result.
207          */
208         StringBuilder builder = null;
209 
210         /* the index of the first character not yet appended to the builder. */
211         int start = pos;
212 
213         final int inLength = in.length();
214 
215         while (pos < inLength) {
216             int c = in.charAt(pos++);
217             if (c == quote) {
218                 if (builder == null) {
219                     return in.substring(start, pos - 1);
220                 } else {
221                     builder.append(in, start, pos - 1);
222                     return builder.toString();
223                 }
224             }
225 
226             if (c == '\\') {
227                 if (pos == inLength) {
228                     throw syntaxError("Unterminated escape sequence");
229                 }
230                 if (builder == null) {
231                     builder = new StringBuilder();
232                 }
233                 builder.append(in, start, pos - 1);
234                 builder.append(readEscapeCharacter());
235                 start = pos;
236             }
237         }
238 
239         throw syntaxError("Unterminated string");
240     }
241 
242     /**
243      * Unescapes the character identified by the character or characters that
244      * immediately follow a backslash. The backslash '\' should have already
245      * been read. This supports both unicode escapes "u000A" and two-character
246      * escapes "\n".
247      */
248     @UnsupportedAppUsage
readEscapeCharacter()249     private char readEscapeCharacter() throws JSONException {
250         char escaped = in.charAt(pos++);
251         switch (escaped) {
252             case 'u':
253                 if (pos + 4 > in.length()) {
254                     throw syntaxError("Unterminated escape sequence");
255                 }
256                 String hex = in.substring(pos, pos + 4);
257                 pos += 4;
258                 try {
259                     return (char) Integer.parseInt(hex, 16);
260                 } catch (NumberFormatException nfe) {
261                     throw syntaxError("Invalid escape sequence: " + hex);
262                 }
263 
264             case 't':
265                 return '\t';
266 
267             case 'b':
268                 return '\b';
269 
270             case 'n':
271                 return '\n';
272 
273             case 'r':
274                 return '\r';
275 
276             case 'f':
277                 return '\f';
278 
279             case '\'':
280             case '"':
281             case '\\':
282             default:
283                 return escaped;
284         }
285     }
286 
287     /**
288      * Reads a null, boolean, numeric or unquoted string literal value. Numeric
289      * values will be returned as an Integer, Long, or Double, in that order of
290      * preference.
291      */
292     @UnsupportedAppUsage
readLiteral()293     private Object readLiteral() throws JSONException {
294         String literal = nextToInternal("{}[]/\\:,=;# \t\f");
295 
296         if (literal.length() == 0) {
297             throw syntaxError("Expected literal value");
298         } else if ("null".equalsIgnoreCase(literal)) {
299             return JSONObject.NULL;
300         } else if ("true".equalsIgnoreCase(literal)) {
301             return Boolean.TRUE;
302         } else if ("false".equalsIgnoreCase(literal)) {
303             return Boolean.FALSE;
304         }
305 
306         /* try to parse as an integral type... */
307         if (literal.indexOf('.') == -1) {
308             int base = 10;
309             String number = literal;
310             if (number.startsWith("0x") || number.startsWith("0X")) {
311                 number = number.substring(2);
312                 base = 16;
313             } else if (number.startsWith("0") && number.length() > 1) {
314                 number = number.substring(1);
315                 base = 8;
316             }
317             try {
318                 long longValue = Long.parseLong(number, base);
319                 if (longValue <= Integer.MAX_VALUE && longValue >= Integer.MIN_VALUE) {
320                     return (int) longValue;
321                 } else {
322                     return longValue;
323                 }
324             } catch (NumberFormatException e) {
325                 /*
326                  * This only happens for integral numbers greater than
327                  * Long.MAX_VALUE, numbers in exponential form (5e-10) and
328                  * unquoted strings. Fall through to try floating point.
329                  */
330             }
331         }
332 
333         /* ...next try to parse as a floating point... */
334         try {
335             return Double.valueOf(literal);
336         } catch (NumberFormatException ignored) {
337         }
338 
339         /* ... finally give up. We have an unquoted string */
340         return literal;
341     }
342 
343     /**
344      * Returns the string up to but not including any of the given characters or
345      * a newline character. This does not consume the excluded character.
346      */
347     @UnsupportedAppUsage
nextToInternal(String excluded)348     private String nextToInternal(String excluded) {
349         final int inLength = in.length();
350 
351         int start = pos;
352         for (; pos < inLength; pos++) {
353             char c = in.charAt(pos);
354             if (c == '\r' || c == '\n' || excluded.indexOf(c) != -1) {
355                 return in.substring(start, pos);
356             }
357         }
358         return in.substring(start);
359     }
360 
361     /**
362      * Reads a sequence of key/value pairs and the trailing closing brace '}' of
363      * an object. The opening brace '{' should have already been read.
364      */
365     @UnsupportedAppUsage
readObject()366     private JSONObject readObject() throws JSONException {
367         JSONObject result = new JSONObject();
368 
369         /* Peek to see if this is the empty object. */
370         int first = nextCleanInternal();
371         if (first == '}') {
372             return result;
373         } else if (first != -1) {
374             pos--;
375         }
376 
377         final int inLength = in.length();
378 
379         while (true) {
380             Object name = nextValue();
381             if (!(name instanceof String)) {
382                 if (name == null) {
383                     throw syntaxError("Names cannot be null");
384                 } else {
385                     throw syntaxError("Names must be strings, but " + name
386                             + " is of type " + name.getClass().getName());
387                 }
388             }
389 
390             /*
391              * Expect the name/value separator to be either a colon ':', an
392              * equals sign '=', or an arrow "=>". The last two are bogus but we
393              * include them because that's what the original implementation did.
394              */
395             int separator = nextCleanInternal();
396             if (separator != ':' && separator != '=') {
397                 throw syntaxError("Expected ':' after " + name);
398             }
399             if (pos < inLength && in.charAt(pos) == '>') {
400                 pos++;
401             }
402 
403             result.put((String) name, nextValue());
404 
405             switch (nextCleanInternal()) {
406                 case '}':
407                     return result;
408                 case ';':
409                 case ',':
410                     continue;
411                 default:
412                     throw syntaxError("Unterminated object");
413             }
414         }
415     }
416 
417     /**
418      * Reads a sequence of values and the trailing closing brace ']' of an
419      * array. The opening brace '[' should have already been read. Note that
420      * "[]" yields an empty array, but "[,]" returns a two-element array
421      * equivalent to "[null,null]".
422      */
423     @UnsupportedAppUsage
readArray()424     private JSONArray readArray() throws JSONException {
425         JSONArray result = new JSONArray();
426 
427         /* to cover input that ends with ",]". */
428         boolean hasTrailingSeparator = false;
429 
430         while (true) {
431             switch (nextCleanInternal()) {
432                 case -1:
433                     throw syntaxError("Unterminated array");
434                 case ']':
435                     if (hasTrailingSeparator) {
436                         result.put(null);
437                     }
438                     return result;
439                 case ',':
440                 case ';':
441                     /* A separator without a value first means "null". */
442                     result.put(null);
443                     hasTrailingSeparator = true;
444                     continue;
445                 default:
446                     pos--;
447             }
448 
449             result.put(nextValue());
450 
451             switch (nextCleanInternal()) {
452                 case ']':
453                     return result;
454                 case ',':
455                 case ';':
456                     hasTrailingSeparator = true;
457                     continue;
458                 default:
459                     throw syntaxError("Unterminated array");
460             }
461         }
462     }
463 
464     /**
465      * Returns an exception containing the given message plus the current
466      * position and the entire input string.
467      */
syntaxError(String message)468     public JSONException syntaxError(String message) {
469         return new JSONException(message + this);
470     }
471 
472     /**
473      * Returns the current position and the entire input string.
474      */
toString()475     @Override public String toString() {
476         // consistent with the original implementation
477         return " at character " + pos + " of " + in;
478     }
479 
480     /*
481      * Legacy APIs.
482      *
483      * None of the methods below are on the critical path of parsing JSON
484      * documents. They exist only because they were exposed by the original
485      * implementation and may be used by some clients.
486      */
487 
488     /**
489      * Returns true until the input has been exhausted.
490      */
more()491     public boolean more() {
492         return pos < in.length();
493     }
494 
495     /**
496      * Returns the next available character, or the null character '\0' if all
497      * input has been exhausted. The return value of this method is ambiguous
498      * for JSON strings that contain the character '\0'.
499      */
next()500     public char next() {
501         return pos < in.length() ? in.charAt(pos++) : '\0';
502     }
503 
504     /**
505      * Returns the next available character if it equals {@code c}. Otherwise an
506      * exception is thrown.
507      */
next(char c)508     public char next(char c) throws JSONException {
509         char result = next();
510         if (result != c) {
511             throw syntaxError("Expected " + c + " but was " + result);
512         }
513         return result;
514     }
515 
516     /**
517      * Returns the next character that is not whitespace and does not belong to
518      * a comment. If the input is exhausted before such a character can be
519      * found, the null character '\0' is returned. The return value of this
520      * method is ambiguous for JSON strings that contain the character '\0'.
521      */
nextClean()522     public char nextClean() throws JSONException {
523         int nextCleanInt = nextCleanInternal();
524         return nextCleanInt == -1 ? '\0' : (char) nextCleanInt;
525     }
526 
527     /**
528      * Returns the next {@code length} characters of the input.
529      *
530      * @throws JSONException if the remaining input is not long enough to
531      *     satisfy this request.
532      */
next(int length)533     public String next(int length) throws JSONException {
534         if (pos + length > in.length()) {
535             throw syntaxError(length + " is out of bounds");
536         }
537         String result = in.substring(pos, pos + length);
538         pos += length;
539         return result;
540     }
541 
542     /**
543      * Returns the {@link String#trim trimmed} string holding the characters up
544      * to but not including the first of:
545      * <ul>
546      *   <li>any character in {@code excluded}
547      *   <li>a newline character '\n'
548      *   <li>a carriage return '\r'
549      * </ul>
550      *
551      * @return a possibly-empty string
552      */
nextTo(String excluded)553     public String nextTo(String excluded) {
554         if (excluded == null) {
555             throw new NullPointerException("excluded == null");
556         }
557         return nextToInternal(excluded).trim();
558     }
559 
560     /**
561      * Equivalent to {@code nextTo(String.valueOf(excluded))}.
562      */
nextTo(char excluded)563     public String nextTo(char excluded) {
564         return nextToInternal(String.valueOf(excluded)).trim();
565     }
566 
567     /**
568      * Advances past all input up to and including the next occurrence of
569      * {@code thru}. If the remaining input doesn't contain {@code thru}, the
570      * input is exhausted.
571      */
skipPast(String thru)572     public void skipPast(String thru) {
573         int thruStart = in.indexOf(thru, pos);
574         pos = thruStart == -1 ? in.length() : (thruStart + thru.length());
575     }
576 
577     /**
578      * Advances past all input up to but not including the next occurrence of
579      * {@code to}. If the remaining input doesn't contain {@code to}, the input
580      * is unchanged.
581      */
skipTo(char to)582     public char skipTo(char to) {
583         int index = in.indexOf(to, pos);
584         if (index != -1) {
585             pos = index;
586             return to;
587         } else {
588             return '\0';
589         }
590     }
591 
592     /**
593      * Unreads the most recent character of input. If no input characters have
594      * been read, the input is unchanged.
595      */
back()596     public void back() {
597         if (--pos == -1) {
598             pos = 0;
599         }
600     }
601 
602     /**
603      * Returns the integer [0..15] value for the given hex character, or -1
604      * for non-hex input.
605      *
606      * @param hex a character in the ranges [0-9], [A-F] or [a-f]. Any other
607      *     character will yield a -1 result.
608      */
dehexchar(char hex)609     public static int dehexchar(char hex) {
610         if (hex >= '0' && hex <= '9') {
611             return hex - '0';
612         } else if (hex >= 'A' && hex <= 'F') {
613             return hex - 'A' + 10;
614         } else if (hex >= 'a' && hex <= 'f') {
615             return hex - 'a' + 10;
616         } else {
617             return -1;
618         }
619     }
620 }
621