1 /*
2  * Copyright (C) 2010 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 package com.android.tradefed.util;
17 
18 import com.android.tradefed.error.HarnessRuntimeException;
19 import com.android.tradefed.log.LogUtil.CLog;
20 import com.android.tradefed.result.error.InfraErrorIdentifier;
21 
22 import java.util.ArrayList;
23 import java.util.regex.Matcher;
24 import java.util.regex.Pattern;
25 
26 public class QuotationAwareTokenizer {
27 
28     /**
29      * Tokenizes the string, splitting on specified delimiter. Does not split between consecutive,
30      * unquoted double-quote marks.
31      *
32      * <p>How the tokenizer works:
33      *
34      * <ol>
35      *   <li> Split the string into "characters" where each "character" is either an escaped
36      *       character like \" (that is, "\\\"") or a single real character like f (just "f").
37      *   <li> For each "character"
38      *       <ol>
39      *         <li> If it's a space, finish a token unless we're being quoted
40      *         <li> If it's a quotation mark, flip the "we're being quoted" bit
41      *         <li> Otherwise, add it to the token being built
42      *       </ol>
43      *
44      *   <li> At EOL, we typically haven't added the final token to the (tokens) {@link ArrayList}
45      *       <ol>
46      *         <li> If the last "character" is an escape character, throw an exception; that's not
47      *             valid
48      *         <li> If we're in the middle of a quotation, throw an exception; that's not valid
49      *         <li> Otherwise, add the final token to (tokens)
50      *       </ol>
51      *
52      *   <li> Return a String[] version of (tokens)
53      * </ol>
54      *
55      * @param line A {@link String} to be tokenized
56      * @param delim the delimiter to split on
57      * @param logging whether or not to log operations
58      * @return A tokenized version of the string
59      * @throws IllegalArgumentException if the line cannot be parsed
60      */
tokenizeLine(String line, String delim, boolean logging)61     public static String[] tokenizeLine(String line, String delim, boolean logging)
62             throws IllegalArgumentException {
63         if (line == null) {
64             throw new IllegalArgumentException("line is null");
65         }
66 
67         ArrayList<String> tokens = new ArrayList<String>();
68         StringBuilder token = new StringBuilder();
69         // This pattern matches an escaped character or a character.  Escaped char takes precedence
70         final Pattern charPattern = Pattern.compile("\\\\.|.");
71         final Matcher charMatcher = charPattern.matcher(line);
72         String aChar = "";
73         boolean quotation = false;
74 
75         log(String.format("Trying to tokenize the line '%s'", line), logging);
76         while (charMatcher.find()) {
77             aChar = charMatcher.group();
78 
79             if (delim.equals(aChar)) {
80                 if (quotation) {
81                     // inside a quotation; treat spaces as part of the token
82                     token.append(aChar);
83                 } else {
84                     if (token.length() > 0) {
85                         // this is the end of a non-empty token; dump it in our list of tokens,
86                         // clear our temp storage, and keep rolling
87                         log(String.format("Finished token '%s'", token.toString()), logging);
88                         // Handle escaped empty string by '' to restore and empty string
89                         if (token.toString().equals("''")) {
90                             tokens.add("");
91                         } else {
92                             tokens.add(token.toString());
93                         }
94                         token.delete(0, token.length());
95                     }
96                     // otherwise, this is the non-first in a sequence of spaces; ignore.
97                 }
98             } else if ("\"".equals(aChar)) {
99                 // unescaped quotation mark; flip quotation state
100                 log("Flipped quotation state", logging);
101                 quotation ^= true;
102             } else {
103                 // default case: add the character to the token being built
104                 token.append(aChar);
105             }
106         }
107 
108         if (quotation || "\\".equals(aChar)) {
109             // We ended in a quotation or with an escape character; this is not valid
110             throw new HarnessRuntimeException(
111                     "Unexpected EOL in a quotation or after an escape " + "character",
112                     InfraErrorIdentifier.OPTION_CONFIGURATION_ERROR);
113         }
114 
115         // Add the final token to the tokens array.
116         if (token.length() > 0) {
117             log(String.format("Finished final token '%s'", token.toString()), logging);
118             // Handle escaped empty string by '' to restore and empty string
119             if (token.toString().equals("''")) {
120                 tokens.add("");
121             } else {
122                 tokens.add(token.toString());
123             }
124             token.delete(0, token.length());
125         }
126 
127         String[] tokensArray = new String[tokens.size()];
128         return tokens.toArray(tokensArray);
129     }
130 
131     /**
132      * Tokenizes the string, splitting on spaces. Does not split between consecutive, unquoted
133      * double-quote marks.
134      *
135      * <p>See also {@link #tokenizeLine(String, String)}
136      */
tokenizeLine(String line)137     public static String[] tokenizeLine(String line) {
138         return tokenizeLine(line, " ", true);
139     }
140 
tokenizeLine(String line, String delim)141     public static String[] tokenizeLine(String line, String delim) {
142         return tokenizeLine(line, delim, true);
143     }
144 
145     /**
146      * Tokenizes the string, splitting on spaces. Does not split between consecutive, unquoted
147      * double-quote marks.
148      *
149      * <p>See also {@link #tokenizeLine(String, String)}
150      */
tokenizeLine(String line, boolean logging)151     public static String[] tokenizeLine(String line, boolean logging) {
152         return tokenizeLine(line, " ", logging);
153     }
154 
155     /**
156      * Perform the reverse of {@link #tokenizeLine(String)}. <br/>
157      * Given array of tokens, combine them into a single line.
158      *
159      * @param tokens
160      * @return A {@link String} created from all the tokens.
161      */
combineTokens(String... tokens)162     public static String combineTokens(String... tokens) {
163         final Pattern wsPattern = Pattern.compile("\\s");
164         StringBuilder sb = new StringBuilder();
165         for (int i=0; i < tokens.length; i++) {
166             final String token = tokens[i];
167             final Matcher wsMatcher = wsPattern.matcher(token);
168             if (wsMatcher.find()) {
169                 sb.append('"');
170                 sb.append(token);
171                 sb.append('"');
172             } else {
173                 sb.append(token);
174             }
175             if (i < (tokens.length - 1)) {
176                 // don't output space after last token
177                 sb.append(' ');
178             }
179         }
180         return sb.toString();
181     }
182 
log(String message, boolean display)183     private static void log(String message, boolean display) {
184         if (display) {
185             CLog.v(message);
186         }
187     }
188 }
189