/* * Copyright (C) 2006 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package android.webkit; import android.annotation.NonNull; import android.annotation.Nullable; import android.compat.Compatibility; import android.compat.annotation.ChangeId; import android.compat.annotation.EnabledSince; import android.compat.annotation.UnsupportedAppUsage; import android.net.ParseException; import android.net.Uri; import android.net.WebAddress; import android.os.Build; import android.util.Log; import java.io.UnsupportedEncodingException; import java.net.URLDecoder; import java.nio.charset.Charset; import java.util.Locale; import java.util.regex.Matcher; import java.util.regex.Pattern; public final class URLUtil { /** * This feature enables parsing of Content-Disposition headers that conform to RFC 6266. In * particular, this enables parsing of {@code filename*} values which can use a different * character encoding. * * @hide */ @ChangeId @EnabledSince(targetSdkVersion = Build.VERSION_CODES.VANILLA_ICE_CREAM) static final long PARSE_CONTENT_DISPOSITION_USING_RFC_6266 = 319400769L; private static final String LOGTAG = "webkit"; private static final boolean TRACE = false; // to refer to bar.png under your package's asset/foo/ directory, use // "file:///android_asset/foo/bar.png". static final String ASSET_BASE = "file:///android_asset/"; // to refer to bar.png under your package's res/drawable/ directory, use // "file:///android_res/drawable/bar.png". Use "drawable" to refer to // "drawable-hdpi" directory as well. static final String RESOURCE_BASE = "file:///android_res/"; static final String FILE_BASE = "file:"; static final String PROXY_BASE = "file:///cookieless_proxy/"; static final String CONTENT_BASE = "content:"; /** Cleans up (if possible) user-entered web addresses */ public static String guessUrl(String inUrl) { String retVal = inUrl; WebAddress webAddress; if (TRACE) Log.v(LOGTAG, "guessURL before queueRequest: " + inUrl); if (inUrl.length() == 0) return inUrl; if (inUrl.startsWith("about:")) return inUrl; // Do not try to interpret data scheme URLs if (inUrl.startsWith("data:")) return inUrl; // Do not try to interpret file scheme URLs if (inUrl.startsWith("file:")) return inUrl; // Do not try to interpret javascript scheme URLs if (inUrl.startsWith("javascript:")) return inUrl; // bug 762454: strip period off end of url if (inUrl.endsWith(".") == true) { inUrl = inUrl.substring(0, inUrl.length() - 1); } try { webAddress = new WebAddress(inUrl); } catch (ParseException ex) { if (TRACE) { Log.v(LOGTAG, "smartUrlFilter: failed to parse url = " + inUrl); } return retVal; } // Check host if (webAddress.getHost().indexOf('.') == -1) { // no dot: user probably entered a bare domain. try .com webAddress.setHost("www." + webAddress.getHost() + ".com"); } return webAddress.toString(); } /** * Inserts the {@code inQuery} in the {@code template} after URL-encoding it. The encoded query * will replace the {@code queryPlaceHolder}. */ public static String composeSearchUrl( String inQuery, String template, String queryPlaceHolder) { int placeHolderIndex = template.indexOf(queryPlaceHolder); if (placeHolderIndex < 0) { return null; } String query; StringBuilder buffer = new StringBuilder(); buffer.append(template.substring(0, placeHolderIndex)); try { query = java.net.URLEncoder.encode(inQuery, "utf-8"); buffer.append(query); } catch (UnsupportedEncodingException ex) { return null; } buffer.append(template.substring(placeHolderIndex + queryPlaceHolder.length())); return buffer.toString(); } public static byte[] decode(byte[] url) throws IllegalArgumentException { if (url.length == 0) { return new byte[0]; } // Create a new byte array with the same length to ensure capacity byte[] tempData = new byte[url.length]; int tempCount = 0; for (int i = 0; i < url.length; i++) { byte b = url[i]; if (b == '%') { if (url.length - i > 2) { b = (byte) (parseHex(url[i + 1]) * 16 + parseHex(url[i + 2])); i += 2; } else { throw new IllegalArgumentException("Invalid format"); } } tempData[tempCount++] = b; } byte[] retData = new byte[tempCount]; System.arraycopy(tempData, 0, retData, 0, tempCount); return retData; } /** * @return {@code true} if the url is correctly URL encoded */ @UnsupportedAppUsage static boolean verifyURLEncoding(String url) { int count = url.length(); if (count == 0) { return false; } int index = url.indexOf('%'); while (index >= 0 && index < count) { if (index < count - 2) { try { parseHex((byte) url.charAt(++index)); parseHex((byte) url.charAt(++index)); } catch (IllegalArgumentException e) { return false; } } else { return false; } index = url.indexOf('%', index + 1); } return true; } private static int parseHex(byte b) { if (b >= '0' && b <= '9') return (b - '0'); if (b >= 'A' && b <= 'F') return (b - 'A' + 10); if (b >= 'a' && b <= 'f') return (b - 'a' + 10); throw new IllegalArgumentException("Invalid hex char '" + b + "'"); } /** * @return {@code true} if the url is an asset file. */ public static boolean isAssetUrl(String url) { return (null != url) && url.startsWith(ASSET_BASE); } /** * @return {@code true} if the url is a resource file. * @hide */ @UnsupportedAppUsage public static boolean isResourceUrl(String url) { return (null != url) && url.startsWith(RESOURCE_BASE); } /** * @return {@code true} if the url is a proxy url to allow cookieless network requests from a * file url. * @deprecated Cookieless proxy is no longer supported. */ @Deprecated public static boolean isCookielessProxyUrl(String url) { return (null != url) && url.startsWith(PROXY_BASE); } /** * @return {@code true} if the url is a local file. */ public static boolean isFileUrl(String url) { return (null != url) && (url.startsWith(FILE_BASE) && !url.startsWith(ASSET_BASE) && !url.startsWith(PROXY_BASE)); } /** * @return {@code true} if the url is an about: url. */ public static boolean isAboutUrl(String url) { return (null != url) && url.startsWith("about:"); } /** * @return {@code true} if the url is a data: url. */ public static boolean isDataUrl(String url) { return (null != url) && url.startsWith("data:"); } /** * @return {@code true} if the url is a javascript: url. */ public static boolean isJavaScriptUrl(String url) { return (null != url) && url.startsWith("javascript:"); } /** * @return {@code true} if the url is an http: url. */ public static boolean isHttpUrl(String url) { return (null != url) && (url.length() > 6) && url.substring(0, 7).equalsIgnoreCase("http://"); } /** * @return {@code true} if the url is an https: url. */ public static boolean isHttpsUrl(String url) { return (null != url) && (url.length() > 7) && url.substring(0, 8).equalsIgnoreCase("https://"); } /** * @return {@code true} if the url is a network url. */ public static boolean isNetworkUrl(String url) { if (url == null || url.length() == 0) { return false; } return isHttpUrl(url) || isHttpsUrl(url); } /** * @return {@code true} if the url is a content: url. */ public static boolean isContentUrl(String url) { return (null != url) && url.startsWith(CONTENT_BASE); } /** * @return {@code true} if the url is valid. */ public static boolean isValidUrl(String url) { if (url == null || url.length() == 0) { return false; } return (isAssetUrl(url) || isResourceUrl(url) || isFileUrl(url) || isAboutUrl(url) || isHttpUrl(url) || isHttpsUrl(url) || isJavaScriptUrl(url) || isContentUrl(url)); } /** Strips the url of the anchor. */ public static String stripAnchor(String url) { int anchorIndex = url.indexOf('#'); if (anchorIndex != -1) { return url.substring(0, anchorIndex); } return url; } /** * Guesses canonical filename that a download would have, using the URL and contentDisposition. * *
File extension, if not defined, is added based on the mimetype. * *
The {@code contentDisposition} argument will be treated differently depending on * targetSdkVersion. * *
The function also changed in the following ways in {@code VANILLA_ICE_CREAM}: * *
Behavior depends on targetSdkVersion. * *
The pattern will attempt to parse the value as either single-, double-, or unquoted. For * the single- and double-quoted options, the pattern allows escaped quotes as part of the * value, as per rfc2616 * section-2.2 */ @SuppressWarnings("RegExpRepeatedSpace") // Spaces are only for readability. private static final Pattern DISPOSITION_PATTERN = Pattern.compile( """ \\s*(\\S+?) # Group 1: parameter name \\s*=\\s* # Match equals sign (?: # non-capturing group of options '( (?: [^'\\\\] | \\\\. )* )' # Group 2: single-quoted | "( (?: [^"\\\\] | \\\\. )* )" # Group 3: double-quoted | ( [^'"][^;\\s]* ) # Group 4: un-quoted parameter )\\s*;? # Optional end semicolon""", Pattern.COMMENTS); /** * Extract filename from a {@code Content-Disposition} header value. * *
This method implements the parsing defined in RFC 6266, supporting both the {@code * filename} and {@code filename*} disposition parameters. If the passed header value has the * {@code "inline"} disposition type, this method will return {@code null} to indicate that a * download was not intended. * *
If both {@code filename*} and {@code filename} is present, the former will be returned, as * per the RFC. Invalid encoded values will be ignored. * * @param contentDisposition Value of {@code Content-Disposition} header. * @return The filename suggested by the header or {@code null} if no filename could be parsed * from the header value. */ @Nullable private static String getFilenameFromContentDispositionRfc6266( @NonNull String contentDisposition) { String[] parts = contentDisposition.trim().split(";", 2); if (parts.length < 2) { // Need at least 2 parts, the `disposition-type` and at least one `disposition-parm`. return null; } String dispositionType = parts[0].trim(); if ("inline".equalsIgnoreCase(dispositionType)) { // "inline" should not result in a download. // Unknown disposition types should be handles as "attachment" // https://datatracker.ietf.org/doc/html/rfc6266#section-4.2 return null; } String dispositionParameters = parts[1]; Matcher matcher = DISPOSITION_PATTERN.matcher(dispositionParameters); String filename = null; String filenameExt = null; while (matcher.find()) { String parameter = matcher.group(1); String value; if (matcher.group(2) != null) { value = removeSlashEscapes(matcher.group(2)); // Value was single-quoted } else if (matcher.group(3) != null) { value = removeSlashEscapes(matcher.group(3)); // Value was double-quoted } else { value = matcher.group(4); // Value was un-quoted } if (parameter == null || value == null) { continue; } if ("filename*".equalsIgnoreCase(parameter)) { filenameExt = parseExtValueString(value); } else if ("filename".equalsIgnoreCase(parameter)) { filename = value; } } // RFC 6266 dictates the filenameExt should be preferred if present. if (filenameExt != null) { return filenameExt; } return filename; } /** Replace escapes of the \X form with X. */ private static String removeSlashEscapes(String raw) { if (raw == null) { return null; } return raw.replaceAll("\\\\(.)", "$1"); } /** * Parse an extended value string which can be percent-encoded. Return {@code} null if unable to * parse the string. */ private static String parseExtValueString(String raw) { String[] parts = raw.split("'", 3); if (parts.length < 3) { return null; } String encoding = parts[0]; // Intentionally ignore parts[1] (language). String valueChars = parts[2]; try { // The URLDecoder force-decodes + as " " // so preemptively replace all values with the encoded value to preserve them. Charset charset = Charset.forName(encoding); String valueWithEncodedPlus = encodePlusCharacters(valueChars, charset); return URLDecoder.decode(valueWithEncodedPlus, charset); } catch (RuntimeException ignored) { return null; // Ignoring an un-parsable value is within spec. } } /** * Replace all instances of {@code "+"} with the percent-encoded equivalent for the given {@code * charset}. */ @NonNull private static String encodePlusCharacters(@NonNull String valueChars, Charset charset) { StringBuilder sb = new StringBuilder(); for (byte b : charset.encode("+").array()) { // Formatting a byte is not possible with TextUtils.formatSimple sb.append(String.format("%02x", b)); } return valueChars.replaceAll("\\+", sb.toString()); } }