1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 package com.android.tradefed.util; 17 18 import com.android.tradefed.error.HarnessRuntimeException; 19 import com.android.tradefed.log.LogUtil.CLog; 20 import com.android.tradefed.result.error.InfraErrorIdentifier; 21 22 import java.util.ArrayList; 23 import java.util.regex.Matcher; 24 import java.util.regex.Pattern; 25 26 public class QuotationAwareTokenizer { 27 28 /** 29 * Tokenizes the string, splitting on specified delimiter. Does not split between consecutive, 30 * unquoted double-quote marks. 31 * 32 * <p>How the tokenizer works: 33 * 34 * <ol> 35 * <li> Split the string into "characters" where each "character" is either an escaped 36 * character like \" (that is, "\\\"") or a single real character like f (just "f"). 37 * <li> For each "character" 38 * <ol> 39 * <li> If it's a space, finish a token unless we're being quoted 40 * <li> If it's a quotation mark, flip the "we're being quoted" bit 41 * <li> Otherwise, add it to the token being built 42 * </ol> 43 * 44 * <li> At EOL, we typically haven't added the final token to the (tokens) {@link ArrayList} 45 * <ol> 46 * <li> If the last "character" is an escape character, throw an exception; that's not 47 * valid 48 * <li> If we're in the middle of a quotation, throw an exception; that's not valid 49 * <li> Otherwise, add the final token to (tokens) 50 * </ol> 51 * 52 * <li> Return a String[] version of (tokens) 53 * </ol> 54 * 55 * @param line A {@link String} to be tokenized 56 * @param delim the delimiter to split on 57 * @param logging whether or not to log operations 58 * @return A tokenized version of the string 59 * @throws IllegalArgumentException if the line cannot be parsed 60 */ tokenizeLine(String line, String delim, boolean logging)61 public static String[] tokenizeLine(String line, String delim, boolean logging) 62 throws IllegalArgumentException { 63 if (line == null) { 64 throw new IllegalArgumentException("line is null"); 65 } 66 67 ArrayList<String> tokens = new ArrayList<String>(); 68 StringBuilder token = new StringBuilder(); 69 // This pattern matches an escaped character or a character. Escaped char takes precedence 70 final Pattern charPattern = Pattern.compile("\\\\.|."); 71 final Matcher charMatcher = charPattern.matcher(line); 72 String aChar = ""; 73 boolean quotation = false; 74 75 log(String.format("Trying to tokenize the line '%s'", line), logging); 76 while (charMatcher.find()) { 77 aChar = charMatcher.group(); 78 79 if (delim.equals(aChar)) { 80 if (quotation) { 81 // inside a quotation; treat spaces as part of the token 82 token.append(aChar); 83 } else { 84 if (token.length() > 0) { 85 // this is the end of a non-empty token; dump it in our list of tokens, 86 // clear our temp storage, and keep rolling 87 log(String.format("Finished token '%s'", token.toString()), logging); 88 // Handle escaped empty string by '' to restore and empty string 89 if (token.toString().equals("''")) { 90 tokens.add(""); 91 } else { 92 tokens.add(token.toString()); 93 } 94 token.delete(0, token.length()); 95 } 96 // otherwise, this is the non-first in a sequence of spaces; ignore. 97 } 98 } else if ("\"".equals(aChar)) { 99 // unescaped quotation mark; flip quotation state 100 log("Flipped quotation state", logging); 101 quotation ^= true; 102 } else { 103 // default case: add the character to the token being built 104 token.append(aChar); 105 } 106 } 107 108 if (quotation || "\\".equals(aChar)) { 109 // We ended in a quotation or with an escape character; this is not valid 110 throw new HarnessRuntimeException( 111 "Unexpected EOL in a quotation or after an escape " + "character", 112 InfraErrorIdentifier.OPTION_CONFIGURATION_ERROR); 113 } 114 115 // Add the final token to the tokens array. 116 if (token.length() > 0) { 117 log(String.format("Finished final token '%s'", token.toString()), logging); 118 // Handle escaped empty string by '' to restore and empty string 119 if (token.toString().equals("''")) { 120 tokens.add(""); 121 } else { 122 tokens.add(token.toString()); 123 } 124 token.delete(0, token.length()); 125 } 126 127 String[] tokensArray = new String[tokens.size()]; 128 return tokens.toArray(tokensArray); 129 } 130 131 /** 132 * Tokenizes the string, splitting on spaces. Does not split between consecutive, unquoted 133 * double-quote marks. 134 * 135 * <p>See also {@link #tokenizeLine(String, String)} 136 */ tokenizeLine(String line)137 public static String[] tokenizeLine(String line) { 138 return tokenizeLine(line, " ", true); 139 } 140 tokenizeLine(String line, String delim)141 public static String[] tokenizeLine(String line, String delim) { 142 return tokenizeLine(line, delim, true); 143 } 144 145 /** 146 * Tokenizes the string, splitting on spaces. Does not split between consecutive, unquoted 147 * double-quote marks. 148 * 149 * <p>See also {@link #tokenizeLine(String, String)} 150 */ tokenizeLine(String line, boolean logging)151 public static String[] tokenizeLine(String line, boolean logging) { 152 return tokenizeLine(line, " ", logging); 153 } 154 155 /** 156 * Perform the reverse of {@link #tokenizeLine(String)}. <br/> 157 * Given array of tokens, combine them into a single line. 158 * 159 * @param tokens 160 * @return A {@link String} created from all the tokens. 161 */ combineTokens(String... tokens)162 public static String combineTokens(String... tokens) { 163 final Pattern wsPattern = Pattern.compile("\\s"); 164 StringBuilder sb = new StringBuilder(); 165 for (int i=0; i < tokens.length; i++) { 166 final String token = tokens[i]; 167 final Matcher wsMatcher = wsPattern.matcher(token); 168 if (wsMatcher.find()) { 169 sb.append('"'); 170 sb.append(token); 171 sb.append('"'); 172 } else { 173 sb.append(token); 174 } 175 if (i < (tokens.length - 1)) { 176 // don't output space after last token 177 sb.append(' '); 178 } 179 } 180 return sb.toString(); 181 } 182 log(String message, boolean display)183 private static void log(String message, boolean display) { 184 if (display) { 185 CLog.v(message); 186 } 187 } 188 } 189