1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  *
9  *   http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 package javax.xml.parsers;
19 
20 class FilePathToURI {
21 
22     // which ASCII characters need to be escaped
23     private static boolean gNeedEscaping[] = new boolean[128];
24     // the first hex character if a character needs to be escaped
25     private static char[] gAfterEscaping1 = new char[128];
26     // the second hex character if a character needs to be escaped
27     private static char[] gAfterEscaping2 = new char[128];
28     private static char[] gHexChs = {'0', '1', '2', '3', '4', '5', '6', '7',
29                                      '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
30     // initialize the above 3 arrays
31     static {
32         for (int i = 0; i <= 0x1f; i++) {
33             gNeedEscaping[i] = true;
34             gAfterEscaping1[i] = gHexChs[i >> 4];
35             gAfterEscaping2[i] = gHexChs[i & 0xf];
36         }
37         gNeedEscaping[0x7f] = true;
38         gAfterEscaping1[0x7f] = '7';
39         gAfterEscaping2[0x7f] = 'F';
40         char[] escChs = {' ', '<', '>', '#', '%', '"', '{', '}',
41                          '|', '\\', '^', '~', '[', ']', '`'};
42         int len = escChs.length;
43         char ch;
44         for (int i = 0; i < len; i++) {
45             ch = escChs[i];
46             gNeedEscaping[ch] = true;
47             gAfterEscaping1[ch] = gHexChs[ch >> 4];
48             gAfterEscaping2[ch] = gHexChs[ch & 0xf];
49         }
50     }
51 
52     // To escape a file path to a URI, by using %HH to represent
53     // special ASCII characters: 0x00~0x1F, 0x7F, ' ', '<', '>', '#', '%'
54     // and '"' and non-ASCII characters (whose value >= 128).
filepath2URI(String path)55     public static String filepath2URI(String path){
56         // return null if path is null.
57         if (path == null)
58             return null;
59 
60         char separator = java.io.File.separatorChar;
61         path = path.replace(separator, '/');
62 
63         int len = path.length(), ch;
64         StringBuilder buffer = new StringBuilder(len*3);
65         buffer.append("file://");
66         // change C:/blah to /C:/blah
67         if (len >= 2 && path.charAt(1) == ':') {
68             ch = Character.toUpperCase(path.charAt(0));
69             if (ch >= 'A' && ch <= 'Z') {
70                 buffer.append('/');
71             }
72         }
73 
74         // for each character in the path
75         int i = 0;
76         for (; i < len; i++) {
77             ch = path.charAt(i);
78             // if it's not an ASCII character, break here, and use UTF-8 encoding
79             if (ch >= 128)
80                 break;
81             if (gNeedEscaping[ch]) {
82                 buffer.append('%');
83                 buffer.append(gAfterEscaping1[ch]);
84                 buffer.append(gAfterEscaping2[ch]);
85                 // record the fact that it's escaped
86             }
87             else {
88                 buffer.append((char)ch);
89             }
90         }
91 
92         // we saw some non-ascii character
93         if (i < len) {
94             // get UTF-8 bytes for the remaining sub-string
95             byte[] bytes = null;
96             byte b;
97             try {
98                 bytes = path.substring(i).getBytes("UTF-8");
99             } catch (java.io.UnsupportedEncodingException e) {
100                 // should never happen
101                 return path;
102             }
103             len = bytes.length;
104 
105             // for each byte
106             for (i = 0; i < len; i++) {
107                 b = bytes[i];
108                 // for non-ascii character: make it positive, then escape
109                 if (b < 0) {
110                     ch = b + 256;
111                     buffer.append('%');
112                     buffer.append(gHexChs[ch >> 4]);
113                     buffer.append(gHexChs[ch & 0xf]);
114                 }
115                 else if (gNeedEscaping[b]) {
116                     buffer.append('%');
117                     buffer.append(gAfterEscaping1[b]);
118                     buffer.append(gAfterEscaping2[b]);
119                 }
120                 else {
121                     buffer.append((char)b);
122                 }
123             }
124         }
125 
126         return buffer.toString();
127     }
128 
129 }//FilePathToURI
130