1 /* 2 * Copyright (c) 1999, 2000, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.util.regex; 27 28 29 /** 30 * Utility class that implements the standard C ctype functionality. 31 * 32 * @author Hong Zhang 33 */ 34 35 final class ASCII { 36 37 static final int UPPER = 0x00000100; 38 39 static final int LOWER = 0x00000200; 40 41 static final int DIGIT = 0x00000400; 42 43 static final int SPACE = 0x00000800; 44 45 static final int PUNCT = 0x00001000; 46 47 static final int CNTRL = 0x00002000; 48 49 static final int BLANK = 0x00004000; 50 51 static final int HEX = 0x00008000; 52 53 static final int UNDER = 0x00010000; 54 55 static final int ASCII = 0x0000FF00; 56 57 static final int ALPHA = (UPPER|LOWER); 58 59 static final int ALNUM = (UPPER|LOWER|DIGIT); 60 61 static final int GRAPH = (PUNCT|UPPER|LOWER|DIGIT); 62 63 static final int WORD = (UPPER|LOWER|UNDER|DIGIT); 64 65 static final int XDIGIT = (HEX); 66 67 private static final int[] ctype = new int[] { 68 CNTRL, /* 00 (NUL) */ 69 CNTRL, /* 01 (SOH) */ 70 CNTRL, /* 02 (STX) */ 71 CNTRL, /* 03 (ETX) */ 72 CNTRL, /* 04 (EOT) */ 73 CNTRL, /* 05 (ENQ) */ 74 CNTRL, /* 06 (ACK) */ 75 CNTRL, /* 07 (BEL) */ 76 CNTRL, /* 08 (BS) */ 77 SPACE+CNTRL+BLANK, /* 09 (HT) */ 78 SPACE+CNTRL, /* 0A (LF) */ 79 SPACE+CNTRL, /* 0B (VT) */ 80 SPACE+CNTRL, /* 0C (FF) */ 81 SPACE+CNTRL, /* 0D (CR) */ 82 CNTRL, /* 0E (SI) */ 83 CNTRL, /* 0F (SO) */ 84 CNTRL, /* 10 (DLE) */ 85 CNTRL, /* 11 (DC1) */ 86 CNTRL, /* 12 (DC2) */ 87 CNTRL, /* 13 (DC3) */ 88 CNTRL, /* 14 (DC4) */ 89 CNTRL, /* 15 (NAK) */ 90 CNTRL, /* 16 (SYN) */ 91 CNTRL, /* 17 (ETB) */ 92 CNTRL, /* 18 (CAN) */ 93 CNTRL, /* 19 (EM) */ 94 CNTRL, /* 1A (SUB) */ 95 CNTRL, /* 1B (ESC) */ 96 CNTRL, /* 1C (FS) */ 97 CNTRL, /* 1D (GS) */ 98 CNTRL, /* 1E (RS) */ 99 CNTRL, /* 1F (US) */ 100 SPACE+BLANK, /* 20 SPACE */ 101 PUNCT, /* 21 ! */ 102 PUNCT, /* 22 " */ 103 PUNCT, /* 23 # */ 104 PUNCT, /* 24 $ */ 105 PUNCT, /* 25 % */ 106 PUNCT, /* 26 & */ 107 PUNCT, /* 27 ' */ 108 PUNCT, /* 28 ( */ 109 PUNCT, /* 29 ) */ 110 PUNCT, /* 2A * */ 111 PUNCT, /* 2B + */ 112 PUNCT, /* 2C , */ 113 PUNCT, /* 2D - */ 114 PUNCT, /* 2E . */ 115 PUNCT, /* 2F / */ 116 DIGIT+HEX+0, /* 30 0 */ 117 DIGIT+HEX+1, /* 31 1 */ 118 DIGIT+HEX+2, /* 32 2 */ 119 DIGIT+HEX+3, /* 33 3 */ 120 DIGIT+HEX+4, /* 34 4 */ 121 DIGIT+HEX+5, /* 35 5 */ 122 DIGIT+HEX+6, /* 36 6 */ 123 DIGIT+HEX+7, /* 37 7 */ 124 DIGIT+HEX+8, /* 38 8 */ 125 DIGIT+HEX+9, /* 39 9 */ 126 PUNCT, /* 3A : */ 127 PUNCT, /* 3B ; */ 128 PUNCT, /* 3C < */ 129 PUNCT, /* 3D = */ 130 PUNCT, /* 3E > */ 131 PUNCT, /* 3F ? */ 132 PUNCT, /* 40 @ */ 133 UPPER+HEX+10, /* 41 A */ 134 UPPER+HEX+11, /* 42 B */ 135 UPPER+HEX+12, /* 43 C */ 136 UPPER+HEX+13, /* 44 D */ 137 UPPER+HEX+14, /* 45 E */ 138 UPPER+HEX+15, /* 46 F */ 139 UPPER+16, /* 47 G */ 140 UPPER+17, /* 48 H */ 141 UPPER+18, /* 49 I */ 142 UPPER+19, /* 4A J */ 143 UPPER+20, /* 4B K */ 144 UPPER+21, /* 4C L */ 145 UPPER+22, /* 4D M */ 146 UPPER+23, /* 4E N */ 147 UPPER+24, /* 4F O */ 148 UPPER+25, /* 50 P */ 149 UPPER+26, /* 51 Q */ 150 UPPER+27, /* 52 R */ 151 UPPER+28, /* 53 S */ 152 UPPER+29, /* 54 T */ 153 UPPER+30, /* 55 U */ 154 UPPER+31, /* 56 V */ 155 UPPER+32, /* 57 W */ 156 UPPER+33, /* 58 X */ 157 UPPER+34, /* 59 Y */ 158 UPPER+35, /* 5A Z */ 159 PUNCT, /* 5B [ */ 160 PUNCT, /* 5C \ */ 161 PUNCT, /* 5D ] */ 162 PUNCT, /* 5E ^ */ 163 PUNCT|UNDER, /* 5F _ */ 164 PUNCT, /* 60 ` */ 165 LOWER+HEX+10, /* 61 a */ 166 LOWER+HEX+11, /* 62 b */ 167 LOWER+HEX+12, /* 63 c */ 168 LOWER+HEX+13, /* 64 d */ 169 LOWER+HEX+14, /* 65 e */ 170 LOWER+HEX+15, /* 66 f */ 171 LOWER+16, /* 67 g */ 172 LOWER+17, /* 68 h */ 173 LOWER+18, /* 69 i */ 174 LOWER+19, /* 6A j */ 175 LOWER+20, /* 6B k */ 176 LOWER+21, /* 6C l */ 177 LOWER+22, /* 6D m */ 178 LOWER+23, /* 6E n */ 179 LOWER+24, /* 6F o */ 180 LOWER+25, /* 70 p */ 181 LOWER+26, /* 71 q */ 182 LOWER+27, /* 72 r */ 183 LOWER+28, /* 73 s */ 184 LOWER+29, /* 74 t */ 185 LOWER+30, /* 75 u */ 186 LOWER+31, /* 76 v */ 187 LOWER+32, /* 77 w */ 188 LOWER+33, /* 78 x */ 189 LOWER+34, /* 79 y */ 190 LOWER+35, /* 7A z */ 191 PUNCT, /* 7B { */ 192 PUNCT, /* 7C | */ 193 PUNCT, /* 7D } */ 194 PUNCT, /* 7E ~ */ 195 CNTRL, /* 7F (DEL) */ 196 }; 197 getType(int ch)198 static int getType(int ch) { 199 return ((ch & 0xFFFFFF80) == 0 ? ctype[ch] : 0); 200 } 201 isType(int ch, int type)202 static boolean isType(int ch, int type) { 203 return (getType(ch) & type) != 0; 204 } 205 isAscii(int ch)206 static boolean isAscii(int ch) { 207 return ((ch & 0xFFFFFF80) == 0); 208 } 209 isAlpha(int ch)210 static boolean isAlpha(int ch) { 211 return isType(ch, ALPHA); 212 } 213 isDigit(int ch)214 static boolean isDigit(int ch) { 215 return ((ch-'0')|('9'-ch)) >= 0; 216 } 217 isAlnum(int ch)218 static boolean isAlnum(int ch) { 219 return isType(ch, ALNUM); 220 } 221 isGraph(int ch)222 static boolean isGraph(int ch) { 223 return isType(ch, GRAPH); 224 } 225 isPrint(int ch)226 static boolean isPrint(int ch) { 227 return ((ch-0x20)|(0x7E-ch)) >= 0; 228 } 229 isPunct(int ch)230 static boolean isPunct(int ch) { 231 return isType(ch, PUNCT); 232 } 233 isSpace(int ch)234 static boolean isSpace(int ch) { 235 return isType(ch, SPACE); 236 } 237 isHexDigit(int ch)238 static boolean isHexDigit(int ch) { 239 return isType(ch, HEX); 240 } 241 isOctDigit(int ch)242 static boolean isOctDigit(int ch) { 243 return ((ch-'0')|('7'-ch)) >= 0; 244 } 245 isCntrl(int ch)246 static boolean isCntrl(int ch) { 247 return isType(ch, CNTRL); 248 } 249 isLower(int ch)250 static boolean isLower(int ch) { 251 return ((ch-'a')|('z'-ch)) >= 0; 252 } 253 isUpper(int ch)254 static boolean isUpper(int ch) { 255 return ((ch-'A')|('Z'-ch)) >= 0; 256 } 257 isWord(int ch)258 static boolean isWord(int ch) { 259 return isType(ch, WORD); 260 } 261 toDigit(int ch)262 static int toDigit(int ch) { 263 return (ctype[ch & 0x7F] & 0x3F); 264 } 265 toLower(int ch)266 static int toLower(int ch) { 267 return isUpper(ch) ? (ch + 0x20) : ch; 268 } 269 toUpper(int ch)270 static int toUpper(int ch) { 271 return isLower(ch) ? (ch - 0x20) : ch; 272 } 273 274 } 275