1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <cutils/log.h>
18 #include <unicode/utf.h>
19 #include <unicode/utf8.h>
20 
21 #include <cstdlib>
22 #include <sstream>
23 #include <string>
24 #include <vector>
25 
26 #include "minikin/U16StringPiece.h"
27 
28 namespace minikin {
29 
30 // src is of the form "U+1F431 | 'h' 'i'". Position of "|" gets saved to offset if non-null.
31 // Size is returned in an out parameter because gtest needs a void return for ASSERT to work.
ParseUnicode(uint16_t * buf,size_t buf_size,const char * src,size_t * result_size,size_t * offset)32 void ParseUnicode(uint16_t* buf, size_t buf_size, const char* src, size_t* result_size,
33                   size_t* offset) {
34     size_t input_ix = 0;
35     size_t output_ix = 0;
36     bool seen_offset = false;
37 
38     while (src[input_ix] != 0) {
39         switch (src[input_ix]) {
40             case '\'':
41                 // single ASCII char
42                 LOG_ALWAYS_FATAL_IF(static_cast<uint8_t>(src[input_ix]) >= 0x80);
43                 input_ix++;
44                 LOG_ALWAYS_FATAL_IF(src[input_ix] == 0);
45                 LOG_ALWAYS_FATAL_IF(output_ix >= buf_size);
46                 buf[output_ix++] = (uint16_t)src[input_ix++];
47                 LOG_ALWAYS_FATAL_IF(src[input_ix] != '\'');
48                 input_ix++;
49                 break;
50             case 'u':
51             case 'U': {
52                 // Unicode codepoint in hex syntax
53                 input_ix++;
54                 LOG_ALWAYS_FATAL_IF(src[input_ix] != '+');
55                 input_ix++;
56                 char* endptr = (char*)src + input_ix;
57                 unsigned long int codepoint = strtoul(src + input_ix, &endptr, 16);
58                 size_t num_hex_digits = endptr - (src + input_ix);
59 
60                 // also triggers on invalid number syntax, digits = 0
61                 LOG_ALWAYS_FATAL_IF(num_hex_digits < 4u);
62                 LOG_ALWAYS_FATAL_IF(num_hex_digits > 6u);
63                 LOG_ALWAYS_FATAL_IF(codepoint > 0x10FFFFu);
64                 input_ix += num_hex_digits;
65                 if (U16_LENGTH(codepoint) == 1) {
66                     LOG_ALWAYS_FATAL_IF(output_ix + 1 > buf_size);
67                     buf[output_ix++] = codepoint;
68                 } else {
69                     // UTF-16 encoding
70                     LOG_ALWAYS_FATAL_IF(output_ix + 2 > buf_size);
71                     buf[output_ix++] = U16_LEAD(codepoint);
72                     buf[output_ix++] = U16_TRAIL(codepoint);
73                 }
74                 break;
75             }
76             case ' ':
77                 input_ix++;
78                 break;
79             case '|':
80                 LOG_ALWAYS_FATAL_IF(seen_offset);
81                 LOG_ALWAYS_FATAL_IF(offset == nullptr);
82                 *offset = output_ix;
83                 seen_offset = true;
84                 input_ix++;
85                 break;
86             default:
87                 LOG_ALWAYS_FATAL("Unexpected Character");
88         }
89     }
90     LOG_ALWAYS_FATAL_IF(result_size == nullptr);
91     *result_size = output_ix;
92     LOG_ALWAYS_FATAL_IF(!seen_offset && offset != nullptr);
93 }
94 
parseUnicodeStringWithOffset(const std::string & in,size_t * offset)95 std::vector<uint16_t> parseUnicodeStringWithOffset(const std::string& in, size_t* offset) {
96     std::unique_ptr<uint16_t[]> buffer(new uint16_t[in.size()]);
97     size_t result_size = 0;
98     ParseUnicode(buffer.get(), in.size(), in.c_str(), &result_size, offset);
99     return std::vector<uint16_t>(buffer.get(), buffer.get() + result_size);
100 }
101 
parseUnicodeString(const std::string & in)102 std::vector<uint16_t> parseUnicodeString(const std::string& in) {
103     return parseUnicodeStringWithOffset(in, nullptr);
104 }
105 
utf8ToUtf16(const std::string & text)106 std::vector<uint16_t> utf8ToUtf16(const std::string& text) {
107     std::vector<uint16_t> result;
108     int32_t i = 0;
109     const int32_t textLength = static_cast<int32_t>(text.size());
110     uint32_t c = 0;
111     while (i < textLength) {
112         U8_NEXT(text.c_str(), i, textLength, c);
113         if (U16_LENGTH(c) == 1) {
114             result.push_back(c);
115         } else {
116             result.push_back(U16_LEAD(c));
117             result.push_back(U16_TRAIL(c));
118         }
119     }
120     return result;
121 }
122 
utf16ToUtf8(const U16StringPiece & u16String)123 std::string utf16ToUtf8(const U16StringPiece& u16String) {
124     const uint32_t textLength = u16String.size();
125     uint32_t i = 0;
126     uint32_t c = 0;
127 
128     std::string out;
129     out.reserve(textLength * 4);
130 
131     while (i < textLength) {
132         U16_NEXT(u16String.data(), i, textLength, c);
133 
134         char buf[U8_MAX_LENGTH] = {};
135         uint32_t outIndex = 0;
136         U8_APPEND_UNSAFE(buf, outIndex, c);
137         out.append(buf, outIndex);
138     }
139     return out;
140 }
141 
repeat(const std::string & text,int count)142 std::string repeat(const std::string& text, int count) {
143     std::stringstream ss;
144     for (int i = 0; i < count; ++i) {
145         ss << text;
146     }
147     return ss.str();
148 }
149 
150 }  // namespace minikin
151