1 /*
2 * Copyright (C) 2024 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef MEDIAPROVIDER_PDF_JNI_PDFCLIENT_LIBS_UTF_H
18 #define MEDIAPROVIDER_PDF_JNI_PDFCLIENT_LIBS_UTF_H
19
20 #include "core.h"
21
22 namespace pdfClient {
23 namespace unchecked {
24 template <typename octet_iterator>
append(uint32_t cp,octet_iterator result)25 octet_iterator append(uint32_t cp, octet_iterator result) {
26 if (cp < 0x80) // one octet
27 *(result++) = static_cast<uint8_t>(cp);
28 else if (cp < 0x800) { // two octets
29 *(result++) = static_cast<uint8_t>((cp >> 6) | 0xc0);
30 *(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
31 } else if (cp < 0x10000) { // three octets
32 *(result++) = static_cast<uint8_t>((cp >> 12) | 0xe0);
33 *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
34 *(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
35 } else { // four octets
36 *(result++) = static_cast<uint8_t>((cp >> 18) | 0xf0);
37 *(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f) | 0x80);
38 *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
39 *(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
40 }
41 return result;
42 }
43
44 template <typename octet_iterator>
next(octet_iterator & it)45 uint32_t next(octet_iterator& it) {
46 uint32_t cp = utf8::mask8(*it);
47 typename std::iterator_traits<octet_iterator>::difference_type length =
48 utf8::sequence_length(it);
49 switch (length) {
50 case 1:
51 break;
52 case 2:
53 it++;
54 cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f);
55 break;
56 case 3:
57 ++it;
58 cp = ((cp << 12) & 0xffff) + ((utf8::mask8(*it) << 6) & 0xfff);
59 ++it;
60 cp += (*it) & 0x3f;
61 break;
62 case 4:
63 ++it;
64 cp = ((cp << 18) & 0x1fffff) + ((utf8::mask8(*it) << 12) & 0x3ffff);
65 ++it;
66 cp += (utf8::mask8(*it) << 6) & 0xfff;
67 ++it;
68 cp += (*it) & 0x3f;
69 break;
70 }
71 ++it;
72 return cp;
73 }
74
75 template <typename u16bit_iterator, typename octet_iterator>
utf16to8(u16bit_iterator start,u16bit_iterator end,octet_iterator result)76 octet_iterator utf16to8(u16bit_iterator start, u16bit_iterator end, octet_iterator result) {
77 while (start != end) {
78 uint32_t cp = utf8::mask16(*start++);
79 // Take care of surrogate pairs first
80 if (utf8::is_lead_surrogate(cp)) {
81 uint32_t trail_surrogate = utf8::mask16(*start++);
82 cp = (cp << 10) + trail_surrogate + utf8::SURROGATE_OFFSET;
83 }
84 result = unchecked::append(cp, result);
85 }
86 return result;
87 }
88
89 template <typename u16bit_iterator, typename octet_iterator>
utf8to16(octet_iterator start,octet_iterator end,u16bit_iterator result)90 u16bit_iterator utf8to16(octet_iterator start, octet_iterator end, u16bit_iterator result) {
91 while (start < end) {
92 uint32_t cp = next(start);
93 if (cp > 0xffff) { // make a surrogate pair
94 *result++ = static_cast<uint16_t>((cp >> 10) + utf8::LEAD_OFFSET);
95 *result++ = static_cast<uint16_t>((cp & 0x3ff) + utf8::TRAIL_SURROGATE_MIN);
96 } else
97 *result++ = static_cast<uint16_t>(cp);
98 }
99 return result;
100 }
101
102 template <typename octet_iterator, typename u32bit_iterator>
utf8to32(octet_iterator start,octet_iterator end,u32bit_iterator result)103 u32bit_iterator utf8to32(octet_iterator start, octet_iterator end, u32bit_iterator result) {
104 while (start < end) (*result++) = next(start);
105
106 return result;
107 }
108
109 } // namespace unchecked
110 } // namespace pdfClient
111
112 #endif // header guard