1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "java_lang_StringFactory.h"
18 
19 #include "common_throws.h"
20 #include "handle_scope-inl.h"
21 #include "jni/jni_internal.h"
22 #include "mirror/object-inl.h"
23 #include "mirror/string-alloc-inl.h"
24 #include "native_util.h"
25 #include "nativehelper/jni_macros.h"
26 #include "nativehelper/scoped_local_ref.h"
27 #include "nativehelper/scoped_primitive_array.h"
28 #include "scoped_fast_native_object_access-inl.h"
29 #include "scoped_thread_state_change-inl.h"
30 
31 namespace art HIDDEN {
32 
StringFactory_newStringFromBytes(JNIEnv * env,jclass,jbyteArray java_data,jint high,jint offset,jint byte_count)33 static jstring StringFactory_newStringFromBytes(JNIEnv* env, jclass, jbyteArray java_data,
34                                                 jint high, jint offset, jint byte_count) {
35   ScopedFastNativeObjectAccess soa(env);
36   if (UNLIKELY(java_data == nullptr)) {
37     ThrowNullPointerException("data == null");
38     return nullptr;
39   }
40   StackHandleScope<1> hs(soa.Self());
41   Handle<mirror::ByteArray> byte_array(hs.NewHandle(soa.Decode<mirror::ByteArray>(java_data)));
42   int32_t data_size = byte_array->GetLength();
43   if ((offset | byte_count) < 0 || byte_count > data_size - offset) {
44     soa.Self()->ThrowNewExceptionF("Ljava/lang/StringIndexOutOfBoundsException;",
45                                    "length=%d; regionStart=%d; regionLength=%d", data_size,
46                                    offset, byte_count);
47     return nullptr;
48   }
49   gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
50   ObjPtr<mirror::String> result = mirror::String::AllocFromByteArray(soa.Self(),
51                                                                      byte_count,
52                                                                      byte_array,
53                                                                      offset,
54                                                                      high,
55                                                                      allocator_type);
56   return soa.AddLocalReference<jstring>(result);
57 }
58 
StringFactory_newStringFromUtf16Bytes(JNIEnv * env,jclass,jbyteArray java_data,jint offset,jint char_count)59 static jstring StringFactory_newStringFromUtf16Bytes(
60     JNIEnv* env, jclass, jbyteArray java_data, jint offset, jint char_count) {
61   ScopedFastNativeObjectAccess soa(env);
62   if (UNLIKELY(java_data == nullptr)) {
63     ThrowNullPointerException("data == null");
64     return nullptr;
65   }
66   StackHandleScope<1> hs(soa.Self());
67   Handle<mirror::ByteArray> byte_array(hs.NewHandle(soa.Decode<mirror::ByteArray>(java_data)));
68   int32_t data_size = byte_array->GetLength();
69   DCHECK_GE(data_size, 0);
70   if (offset < 0 ||
71       offset > data_size ||
72       static_cast<uint32_t>(char_count) > (static_cast<uint32_t>(data_size - offset) >> 1)) {
73     soa.Self()->ThrowNewExceptionF("Ljava/lang/StringIndexOutOfBoundsException;",
74                                    "length=%d; regionStart=%d; bytePairLength=%d",
75                                    data_size,
76                                    offset,
77                                    char_count);
78     return nullptr;
79   }
80   gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
81   ObjPtr<mirror::String> result = mirror::String::AllocFromUtf16ByteArray(soa.Self(),
82                                                                           char_count,
83                                                                           byte_array,
84                                                                           offset,
85                                                                           allocator_type);
86   return soa.AddLocalReference<jstring>(result);
87 }
88 
89 // The char array passed as `java_data` must not be a null reference.
StringFactory_newStringFromChars(JNIEnv * env,jclass,jint offset,jint char_count,jcharArray java_data)90 static jstring StringFactory_newStringFromChars(JNIEnv* env, jclass, jint offset,
91                                                 jint char_count, jcharArray java_data) {
92   DCHECK(java_data != nullptr);
93   ScopedFastNativeObjectAccess soa(env);
94   StackHandleScope<1> hs(soa.Self());
95   Handle<mirror::CharArray> char_array(hs.NewHandle(soa.Decode<mirror::CharArray>(java_data)));
96   gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
97   ObjPtr<mirror::String> result = mirror::String::AllocFromCharArray(soa.Self(),
98                                                                      char_count,
99                                                                      char_array,
100                                                                      offset,
101                                                                      allocator_type);
102   return soa.AddLocalReference<jstring>(result);
103 }
104 
StringFactory_newStringFromString(JNIEnv * env,jclass,jstring to_copy)105 static jstring StringFactory_newStringFromString(JNIEnv* env, jclass, jstring to_copy) {
106   ScopedFastNativeObjectAccess soa(env);
107   if (UNLIKELY(to_copy == nullptr)) {
108     ThrowNullPointerException("toCopy == null");
109     return nullptr;
110   }
111   StackHandleScope<1> hs(soa.Self());
112   Handle<mirror::String> string(hs.NewHandle(soa.Decode<mirror::String>(to_copy)));
113   gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
114   ObjPtr<mirror::String> result = mirror::String::AllocFromString(soa.Self(),
115                                                                   string->GetLength(),
116                                                                   string,
117                                                                   /*offset=*/ 0,
118                                                                   allocator_type);
119   return soa.AddLocalReference<jstring>(result);
120 }
121 
StringFactory_newStringFromUtf8Bytes(JNIEnv * env,jclass,jbyteArray java_data,jint offset,jint byte_count)122 static jstring StringFactory_newStringFromUtf8Bytes(JNIEnv* env, jclass, jbyteArray java_data,
123                                                     jint offset, jint byte_count) {
124   // Local Define in here
125   static const jchar kReplacementChar = 0xfffd;
126   static const int kDefaultBufferSize = 256;
127   static const int kTableUtf8Needed[] = {
128     //      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f
129     0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 0xc0 - 0xcf
130     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 0xd0 - 0xdf
131     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  // 0xe0 - 0xef
132     3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0xf0 - 0xff
133   };
134 
135   ScopedFastNativeObjectAccess soa(env);
136   if (UNLIKELY(java_data == nullptr)) {
137     ThrowNullPointerException("data == null");
138     return nullptr;
139   }
140 
141   StackHandleScope<1> hs(soa.Self());
142   Handle<mirror::ByteArray> byte_array(hs.NewHandle(soa.Decode<mirror::ByteArray>(java_data)));
143   int32_t data_size = byte_array->GetLength();
144   if ((offset | byte_count) < 0 || byte_count > data_size - offset) {
145     soa.Self()->ThrowNewExceptionF("Ljava/lang/StringIndexOutOfBoundsException;",
146         "length=%d; regionStart=%d; regionLength=%d", data_size,
147         offset, byte_count);
148     return nullptr;
149   }
150 
151   /*
152    * This code converts a UTF-8 byte sequence to a Java String (UTF-16).
153    * It implements the W3C recommended UTF-8 decoder.
154    * https://www.w3.org/TR/encoding/#utf-8-decoder
155    *
156    * Unicode 3.2 Well-Formed UTF-8 Byte Sequences
157    * Code Points        First  Second Third Fourth
158    * U+0000..U+007F     00..7F
159    * U+0080..U+07FF     C2..DF 80..BF
160    * U+0800..U+0FFF     E0     A0..BF 80..BF
161    * U+1000..U+CFFF     E1..EC 80..BF 80..BF
162    * U+D000..U+D7FF     ED     80..9F 80..BF
163    * U+E000..U+FFFF     EE..EF 80..BF 80..BF
164    * U+10000..U+3FFFF   F0     90..BF 80..BF 80..BF
165    * U+40000..U+FFFFF   F1..F3 80..BF 80..BF 80..BF
166    * U+100000..U+10FFFF F4     80..8F 80..BF 80..BF
167    *
168    * Please refer to Unicode as the authority.
169    * p.126 Table 3-7 in http://www.unicode.org/versions/Unicode10.0.0/ch03.pdf
170    *
171    * Handling Malformed Input
172    * The maximal subpart should be replaced by a single U+FFFD. Maximal subpart is
173    * the longest code unit subsequence starting at an unconvertible offset that is either
174    * 1) the initial subsequence of a well-formed code unit sequence, or
175    * 2) a subsequence of length one:
176    * One U+FFFD should be emitted for every sequence of bytes that is an incomplete prefix
177    * of a valid sequence, and with the conversion to restart after the incomplete sequence.
178    *
179    * For example, in byte sequence "41 C0 AF 41 F4 80 80 41", the maximal subparts are
180    * "C0", "AF", and "F4 80 80". "F4 80 80" can be the initial subsequence of "F4 80 80 80",
181    * but "C0" can't be the initial subsequence of any well-formed code unit sequence.
182    * Thus, the output should be "A\ufffd\ufffdA\ufffdA".
183    *
184    * Please refer to section "Best Practices for Using U+FFFD." in
185    * http://www.unicode.org/versions/Unicode10.0.0/ch03.pdf
186    */
187 
188   // Initial value
189   jchar stack_buffer[kDefaultBufferSize];
190   std::unique_ptr<jchar[]> allocated_buffer;
191   jchar* v;
192   if (byte_count <= kDefaultBufferSize) {
193     v = stack_buffer;
194   } else {
195     allocated_buffer.reset(new jchar[byte_count]);
196     v = allocated_buffer.get();
197   }
198 
199   jbyte* d = byte_array->GetData();
200   DCHECK(d != nullptr);
201 
202   int idx = offset;
203   int last = offset + byte_count;
204   int s = 0;
205 
206   int code_point = 0;
207   int utf8_bytes_seen = 0;
208   int utf8_bytes_needed = 0;
209   int lower_bound = 0x80;
210   int upper_bound = 0xbf;
211   while (idx < last) {
212     int b = d[idx++] & 0xff;
213     if (utf8_bytes_needed == 0) {
214       if ((b & 0x80) == 0) {  // ASCII char. 0xxxxxxx
215         v[s++] = (jchar) b;
216         continue;
217       }
218 
219       if ((b & 0x40) == 0) {  // 10xxxxxx is illegal as first byte
220         v[s++] = kReplacementChar;
221         continue;
222       }
223 
224       // 11xxxxxx
225       int tableLookupIndex = b & 0x3f;
226       utf8_bytes_needed = kTableUtf8Needed[tableLookupIndex];
227       if (utf8_bytes_needed == 0) {
228         v[s++] = kReplacementChar;
229         continue;
230       }
231 
232       // utf8_bytes_needed
233       // 1: b & 0x1f
234       // 2: b & 0x0f
235       // 3: b & 0x07
236       code_point = b & (0x3f >> utf8_bytes_needed);
237       if (b == 0xe0) {
238         lower_bound = 0xa0;
239       } else if (b == 0xed) {
240         upper_bound = 0x9f;
241       } else if (b == 0xf0) {
242         lower_bound = 0x90;
243       } else if (b == 0xf4) {
244         upper_bound = 0x8f;
245       }
246     } else {
247       if (b < lower_bound || b > upper_bound) {
248         // The bytes seen are ill-formed. Substitute them with U+FFFD
249         v[s++] = kReplacementChar;
250         code_point = 0;
251         utf8_bytes_needed = 0;
252         utf8_bytes_seen = 0;
253         lower_bound = 0x80;
254         upper_bound = 0xbf;
255         /*
256          * According to the Unicode Standard,
257          * "a UTF-8 conversion process is required to never consume well-formed
258          * subsequences as part of its error handling for ill-formed subsequences"
259          * The current byte could be part of well-formed subsequences. Reduce the
260          * index by 1 to parse it in next loop.
261          */
262         idx--;
263         continue;
264       }
265 
266       lower_bound = 0x80;
267       upper_bound = 0xbf;
268       code_point = (code_point << 6) | (b & 0x3f);
269       utf8_bytes_seen++;
270       if (utf8_bytes_needed != utf8_bytes_seen) {
271         continue;
272       }
273 
274       // Encode chars from U+10000 up as surrogate pairs
275       if (code_point < 0x10000) {
276         v[s++] = (jchar) code_point;
277       } else {
278         v[s++] = (jchar) ((code_point >> 10) + 0xd7c0);
279         v[s++] = (jchar) ((code_point & 0x3ff) + 0xdc00);
280       }
281 
282       utf8_bytes_seen = 0;
283       utf8_bytes_needed = 0;
284       code_point = 0;
285     }
286   }
287 
288   // The bytes seen are ill-formed. Substitute them by U+FFFD
289   if (utf8_bytes_needed != 0) {
290     v[s++] = kReplacementChar;
291   }
292 
293   ObjPtr<mirror::String> result = mirror::String::AllocFromUtf16(soa.Self(), s, v);
294   return soa.AddLocalReference<jstring>(result);
295 }
296 
297 static JNINativeMethod gMethods[] = {
298   FAST_NATIVE_METHOD(StringFactory, newStringFromBytes, "([BIII)Ljava/lang/String;"),
299   FAST_NATIVE_METHOD(StringFactory, newStringFromChars, "(II[C)Ljava/lang/String;"),
300   FAST_NATIVE_METHOD(StringFactory, newStringFromString, "(Ljava/lang/String;)Ljava/lang/String;"),
301   FAST_NATIVE_METHOD(StringFactory, newStringFromUtf8Bytes, "([BII)Ljava/lang/String;"),
302   FAST_NATIVE_METHOD(StringFactory, newStringFromUtf16Bytes, "([BII)Ljava/lang/String;"),
303 };
304 
register_java_lang_StringFactory(JNIEnv * env)305 void register_java_lang_StringFactory(JNIEnv* env) {
306   REGISTER_NATIVE_METHODS("java/lang/StringFactory");
307 }
308 
309 }  // namespace art
310