1 /*
2  * Copyright (C) 2024 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MEDIAPROVIDER_PDF_JNI_PDFCLIENT_LIBS_UTF_H
18 #define MEDIAPROVIDER_PDF_JNI_PDFCLIENT_LIBS_UTF_H
19 
20 #include "core.h"
21 
22 namespace pdfClient {
23 namespace unchecked {
24 template <typename octet_iterator>
append(uint32_t cp,octet_iterator result)25 octet_iterator append(uint32_t cp, octet_iterator result) {
26     if (cp < 0x80)  // one octet
27         *(result++) = static_cast<uint8_t>(cp);
28     else if (cp < 0x800) {  // two octets
29         *(result++) = static_cast<uint8_t>((cp >> 6) | 0xc0);
30         *(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
31     } else if (cp < 0x10000) {  // three octets
32         *(result++) = static_cast<uint8_t>((cp >> 12) | 0xe0);
33         *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
34         *(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
35     } else {  // four octets
36         *(result++) = static_cast<uint8_t>((cp >> 18) | 0xf0);
37         *(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f) | 0x80);
38         *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
39         *(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
40     }
41     return result;
42 }
43 
44 template <typename octet_iterator>
next(octet_iterator & it)45 uint32_t next(octet_iterator& it) {
46     uint32_t cp = utf8::mask8(*it);
47     typename std::iterator_traits<octet_iterator>::difference_type length =
48             utf8::sequence_length(it);
49     switch (length) {
50         case 1:
51             break;
52         case 2:
53             it++;
54             cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f);
55             break;
56         case 3:
57             ++it;
58             cp = ((cp << 12) & 0xffff) + ((utf8::mask8(*it) << 6) & 0xfff);
59             ++it;
60             cp += (*it) & 0x3f;
61             break;
62         case 4:
63             ++it;
64             cp = ((cp << 18) & 0x1fffff) + ((utf8::mask8(*it) << 12) & 0x3ffff);
65             ++it;
66             cp += (utf8::mask8(*it) << 6) & 0xfff;
67             ++it;
68             cp += (*it) & 0x3f;
69             break;
70     }
71     ++it;
72     return cp;
73 }
74 
75 template <typename u16bit_iterator, typename octet_iterator>
utf16to8(u16bit_iterator start,u16bit_iterator end,octet_iterator result)76 octet_iterator utf16to8(u16bit_iterator start, u16bit_iterator end, octet_iterator result) {
77     while (start != end) {
78         uint32_t cp = utf8::mask16(*start++);
79         // Take care of surrogate pairs first
80         if (utf8::is_lead_surrogate(cp)) {
81             uint32_t trail_surrogate = utf8::mask16(*start++);
82             cp = (cp << 10) + trail_surrogate + utf8::SURROGATE_OFFSET;
83         }
84         result = unchecked::append(cp, result);
85     }
86     return result;
87 }
88 
89 template <typename u16bit_iterator, typename octet_iterator>
utf8to16(octet_iterator start,octet_iterator end,u16bit_iterator result)90 u16bit_iterator utf8to16(octet_iterator start, octet_iterator end, u16bit_iterator result) {
91     while (start < end) {
92         uint32_t cp = next(start);
93         if (cp > 0xffff) {  // make a surrogate pair
94             *result++ = static_cast<uint16_t>((cp >> 10) + utf8::LEAD_OFFSET);
95             *result++ = static_cast<uint16_t>((cp & 0x3ff) + utf8::TRAIL_SURROGATE_MIN);
96         } else
97             *result++ = static_cast<uint16_t>(cp);
98     }
99     return result;
100 }
101 
102 template <typename octet_iterator, typename u32bit_iterator>
utf8to32(octet_iterator start,octet_iterator end,u32bit_iterator result)103 u32bit_iterator utf8to32(octet_iterator start, octet_iterator end, u32bit_iterator result) {
104     while (start < end) (*result++) = next(start);
105 
106     return result;
107 }
108 
109 }  // namespace unchecked
110 }  // namespace pdfClient
111 
112 #endif  // header guard