1 /*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "java_lang_StringFactory.h"
18
19 #include "common_throws.h"
20 #include "handle_scope-inl.h"
21 #include "jni/jni_internal.h"
22 #include "mirror/object-inl.h"
23 #include "mirror/string-alloc-inl.h"
24 #include "native_util.h"
25 #include "nativehelper/jni_macros.h"
26 #include "nativehelper/scoped_local_ref.h"
27 #include "nativehelper/scoped_primitive_array.h"
28 #include "scoped_fast_native_object_access-inl.h"
29 #include "scoped_thread_state_change-inl.h"
30
31 namespace art HIDDEN {
32
StringFactory_newStringFromBytes(JNIEnv * env,jclass,jbyteArray java_data,jint high,jint offset,jint byte_count)33 static jstring StringFactory_newStringFromBytes(JNIEnv* env, jclass, jbyteArray java_data,
34 jint high, jint offset, jint byte_count) {
35 ScopedFastNativeObjectAccess soa(env);
36 if (UNLIKELY(java_data == nullptr)) {
37 ThrowNullPointerException("data == null");
38 return nullptr;
39 }
40 StackHandleScope<1> hs(soa.Self());
41 Handle<mirror::ByteArray> byte_array(hs.NewHandle(soa.Decode<mirror::ByteArray>(java_data)));
42 int32_t data_size = byte_array->GetLength();
43 if ((offset | byte_count) < 0 || byte_count > data_size - offset) {
44 soa.Self()->ThrowNewExceptionF("Ljava/lang/StringIndexOutOfBoundsException;",
45 "length=%d; regionStart=%d; regionLength=%d", data_size,
46 offset, byte_count);
47 return nullptr;
48 }
49 gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
50 ObjPtr<mirror::String> result = mirror::String::AllocFromByteArray(soa.Self(),
51 byte_count,
52 byte_array,
53 offset,
54 high,
55 allocator_type);
56 return soa.AddLocalReference<jstring>(result);
57 }
58
StringFactory_newStringFromUtf16Bytes(JNIEnv * env,jclass,jbyteArray java_data,jint offset,jint char_count)59 static jstring StringFactory_newStringFromUtf16Bytes(
60 JNIEnv* env, jclass, jbyteArray java_data, jint offset, jint char_count) {
61 ScopedFastNativeObjectAccess soa(env);
62 if (UNLIKELY(java_data == nullptr)) {
63 ThrowNullPointerException("data == null");
64 return nullptr;
65 }
66 StackHandleScope<1> hs(soa.Self());
67 Handle<mirror::ByteArray> byte_array(hs.NewHandle(soa.Decode<mirror::ByteArray>(java_data)));
68 int32_t data_size = byte_array->GetLength();
69 DCHECK_GE(data_size, 0);
70 if (offset < 0 ||
71 offset > data_size ||
72 static_cast<uint32_t>(char_count) > (static_cast<uint32_t>(data_size - offset) >> 1)) {
73 soa.Self()->ThrowNewExceptionF("Ljava/lang/StringIndexOutOfBoundsException;",
74 "length=%d; regionStart=%d; bytePairLength=%d",
75 data_size,
76 offset,
77 char_count);
78 return nullptr;
79 }
80 gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
81 ObjPtr<mirror::String> result = mirror::String::AllocFromUtf16ByteArray(soa.Self(),
82 char_count,
83 byte_array,
84 offset,
85 allocator_type);
86 return soa.AddLocalReference<jstring>(result);
87 }
88
89 // The char array passed as `java_data` must not be a null reference.
StringFactory_newStringFromChars(JNIEnv * env,jclass,jint offset,jint char_count,jcharArray java_data)90 static jstring StringFactory_newStringFromChars(JNIEnv* env, jclass, jint offset,
91 jint char_count, jcharArray java_data) {
92 DCHECK(java_data != nullptr);
93 ScopedFastNativeObjectAccess soa(env);
94 StackHandleScope<1> hs(soa.Self());
95 Handle<mirror::CharArray> char_array(hs.NewHandle(soa.Decode<mirror::CharArray>(java_data)));
96 gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
97 ObjPtr<mirror::String> result = mirror::String::AllocFromCharArray(soa.Self(),
98 char_count,
99 char_array,
100 offset,
101 allocator_type);
102 return soa.AddLocalReference<jstring>(result);
103 }
104
StringFactory_newStringFromString(JNIEnv * env,jclass,jstring to_copy)105 static jstring StringFactory_newStringFromString(JNIEnv* env, jclass, jstring to_copy) {
106 ScopedFastNativeObjectAccess soa(env);
107 if (UNLIKELY(to_copy == nullptr)) {
108 ThrowNullPointerException("toCopy == null");
109 return nullptr;
110 }
111 StackHandleScope<1> hs(soa.Self());
112 Handle<mirror::String> string(hs.NewHandle(soa.Decode<mirror::String>(to_copy)));
113 gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
114 ObjPtr<mirror::String> result = mirror::String::AllocFromString(soa.Self(),
115 string->GetLength(),
116 string,
117 /*offset=*/ 0,
118 allocator_type);
119 return soa.AddLocalReference<jstring>(result);
120 }
121
StringFactory_newStringFromUtf8Bytes(JNIEnv * env,jclass,jbyteArray java_data,jint offset,jint byte_count)122 static jstring StringFactory_newStringFromUtf8Bytes(JNIEnv* env, jclass, jbyteArray java_data,
123 jint offset, jint byte_count) {
124 // Local Define in here
125 static const jchar kReplacementChar = 0xfffd;
126 static const int kDefaultBufferSize = 256;
127 static const int kTableUtf8Needed[] = {
128 // 0 1 2 3 4 5 6 7 8 9 a b c d e f
129 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xc0 - 0xcf
130 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xd0 - 0xdf
131 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xe0 - 0xef
132 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xf0 - 0xff
133 };
134
135 ScopedFastNativeObjectAccess soa(env);
136 if (UNLIKELY(java_data == nullptr)) {
137 ThrowNullPointerException("data == null");
138 return nullptr;
139 }
140
141 StackHandleScope<1> hs(soa.Self());
142 Handle<mirror::ByteArray> byte_array(hs.NewHandle(soa.Decode<mirror::ByteArray>(java_data)));
143 int32_t data_size = byte_array->GetLength();
144 if ((offset | byte_count) < 0 || byte_count > data_size - offset) {
145 soa.Self()->ThrowNewExceptionF("Ljava/lang/StringIndexOutOfBoundsException;",
146 "length=%d; regionStart=%d; regionLength=%d", data_size,
147 offset, byte_count);
148 return nullptr;
149 }
150
151 /*
152 * This code converts a UTF-8 byte sequence to a Java String (UTF-16).
153 * It implements the W3C recommended UTF-8 decoder.
154 * https://www.w3.org/TR/encoding/#utf-8-decoder
155 *
156 * Unicode 3.2 Well-Formed UTF-8 Byte Sequences
157 * Code Points First Second Third Fourth
158 * U+0000..U+007F 00..7F
159 * U+0080..U+07FF C2..DF 80..BF
160 * U+0800..U+0FFF E0 A0..BF 80..BF
161 * U+1000..U+CFFF E1..EC 80..BF 80..BF
162 * U+D000..U+D7FF ED 80..9F 80..BF
163 * U+E000..U+FFFF EE..EF 80..BF 80..BF
164 * U+10000..U+3FFFF F0 90..BF 80..BF 80..BF
165 * U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF
166 * U+100000..U+10FFFF F4 80..8F 80..BF 80..BF
167 *
168 * Please refer to Unicode as the authority.
169 * p.126 Table 3-7 in http://www.unicode.org/versions/Unicode10.0.0/ch03.pdf
170 *
171 * Handling Malformed Input
172 * The maximal subpart should be replaced by a single U+FFFD. Maximal subpart is
173 * the longest code unit subsequence starting at an unconvertible offset that is either
174 * 1) the initial subsequence of a well-formed code unit sequence, or
175 * 2) a subsequence of length one:
176 * One U+FFFD should be emitted for every sequence of bytes that is an incomplete prefix
177 * of a valid sequence, and with the conversion to restart after the incomplete sequence.
178 *
179 * For example, in byte sequence "41 C0 AF 41 F4 80 80 41", the maximal subparts are
180 * "C0", "AF", and "F4 80 80". "F4 80 80" can be the initial subsequence of "F4 80 80 80",
181 * but "C0" can't be the initial subsequence of any well-formed code unit sequence.
182 * Thus, the output should be "A\ufffd\ufffdA\ufffdA".
183 *
184 * Please refer to section "Best Practices for Using U+FFFD." in
185 * http://www.unicode.org/versions/Unicode10.0.0/ch03.pdf
186 */
187
188 // Initial value
189 jchar stack_buffer[kDefaultBufferSize];
190 std::unique_ptr<jchar[]> allocated_buffer;
191 jchar* v;
192 if (byte_count <= kDefaultBufferSize) {
193 v = stack_buffer;
194 } else {
195 allocated_buffer.reset(new jchar[byte_count]);
196 v = allocated_buffer.get();
197 }
198
199 jbyte* d = byte_array->GetData();
200 DCHECK(d != nullptr);
201
202 int idx = offset;
203 int last = offset + byte_count;
204 int s = 0;
205
206 int code_point = 0;
207 int utf8_bytes_seen = 0;
208 int utf8_bytes_needed = 0;
209 int lower_bound = 0x80;
210 int upper_bound = 0xbf;
211 while (idx < last) {
212 int b = d[idx++] & 0xff;
213 if (utf8_bytes_needed == 0) {
214 if ((b & 0x80) == 0) { // ASCII char. 0xxxxxxx
215 v[s++] = (jchar) b;
216 continue;
217 }
218
219 if ((b & 0x40) == 0) { // 10xxxxxx is illegal as first byte
220 v[s++] = kReplacementChar;
221 continue;
222 }
223
224 // 11xxxxxx
225 int tableLookupIndex = b & 0x3f;
226 utf8_bytes_needed = kTableUtf8Needed[tableLookupIndex];
227 if (utf8_bytes_needed == 0) {
228 v[s++] = kReplacementChar;
229 continue;
230 }
231
232 // utf8_bytes_needed
233 // 1: b & 0x1f
234 // 2: b & 0x0f
235 // 3: b & 0x07
236 code_point = b & (0x3f >> utf8_bytes_needed);
237 if (b == 0xe0) {
238 lower_bound = 0xa0;
239 } else if (b == 0xed) {
240 upper_bound = 0x9f;
241 } else if (b == 0xf0) {
242 lower_bound = 0x90;
243 } else if (b == 0xf4) {
244 upper_bound = 0x8f;
245 }
246 } else {
247 if (b < lower_bound || b > upper_bound) {
248 // The bytes seen are ill-formed. Substitute them with U+FFFD
249 v[s++] = kReplacementChar;
250 code_point = 0;
251 utf8_bytes_needed = 0;
252 utf8_bytes_seen = 0;
253 lower_bound = 0x80;
254 upper_bound = 0xbf;
255 /*
256 * According to the Unicode Standard,
257 * "a UTF-8 conversion process is required to never consume well-formed
258 * subsequences as part of its error handling for ill-formed subsequences"
259 * The current byte could be part of well-formed subsequences. Reduce the
260 * index by 1 to parse it in next loop.
261 */
262 idx--;
263 continue;
264 }
265
266 lower_bound = 0x80;
267 upper_bound = 0xbf;
268 code_point = (code_point << 6) | (b & 0x3f);
269 utf8_bytes_seen++;
270 if (utf8_bytes_needed != utf8_bytes_seen) {
271 continue;
272 }
273
274 // Encode chars from U+10000 up as surrogate pairs
275 if (code_point < 0x10000) {
276 v[s++] = (jchar) code_point;
277 } else {
278 v[s++] = (jchar) ((code_point >> 10) + 0xd7c0);
279 v[s++] = (jchar) ((code_point & 0x3ff) + 0xdc00);
280 }
281
282 utf8_bytes_seen = 0;
283 utf8_bytes_needed = 0;
284 code_point = 0;
285 }
286 }
287
288 // The bytes seen are ill-formed. Substitute them by U+FFFD
289 if (utf8_bytes_needed != 0) {
290 v[s++] = kReplacementChar;
291 }
292
293 ObjPtr<mirror::String> result = mirror::String::AllocFromUtf16(soa.Self(), s, v);
294 return soa.AddLocalReference<jstring>(result);
295 }
296
297 static JNINativeMethod gMethods[] = {
298 FAST_NATIVE_METHOD(StringFactory, newStringFromBytes, "([BIII)Ljava/lang/String;"),
299 FAST_NATIVE_METHOD(StringFactory, newStringFromChars, "(II[C)Ljava/lang/String;"),
300 FAST_NATIVE_METHOD(StringFactory, newStringFromString, "(Ljava/lang/String;)Ljava/lang/String;"),
301 FAST_NATIVE_METHOD(StringFactory, newStringFromUtf8Bytes, "([BII)Ljava/lang/String;"),
302 FAST_NATIVE_METHOD(StringFactory, newStringFromUtf16Bytes, "([BII)Ljava/lang/String;"),
303 };
304
register_java_lang_StringFactory(JNIEnv * env)305 void register_java_lang_StringFactory(JNIEnv* env) {
306 REGISTER_NATIVE_METHODS("java/lang/StringFactory");
307 }
308
309 } // namespace art
310