1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINIKIN_LOCALE_LIST_H 18 #define MINIKIN_LOCALE_LIST_H 19 20 #include <hb.h> 21 22 #include <string> 23 #include <vector> 24 25 #include "StringPiece.h" 26 #include "minikin/LineBreakStyle.h" 27 28 namespace minikin { 29 30 // Due to the limits in font fallback score calculation, we can't use anything more than 12 locales. 31 const size_t FONT_LOCALE_LIMIT = 12; 32 33 // The language or region code is encoded to 15 bits. 34 constexpr uint16_t NO_LANGUAGE = 0x7fff; 35 constexpr uint16_t NO_REGION = 0x7fff; 36 // The script code is encoded to 20 bits. 37 constexpr uint32_t NO_SCRIPT = 0xfffff; 38 39 class LocaleList; 40 41 // Enum for making sub-locale from FontLangauge. 42 enum class SubtagBits : uint8_t { 43 EMPTY = 0b00000000, 44 LANGUAGE = 0b00000001, 45 SCRIPT = 0b00000010, 46 REGION = 0b00000100, 47 VARIANT = 0b00001000, 48 EMOJI = 0b00010000, 49 ALL = 0b00011111, 50 }; 51 52 inline constexpr SubtagBits operator&(SubtagBits l, SubtagBits r) { 53 return static_cast<SubtagBits>(static_cast<uint8_t>(l) & static_cast<uint8_t>(r)); 54 } 55 inline constexpr SubtagBits operator|(SubtagBits l, SubtagBits r) { 56 return static_cast<SubtagBits>(static_cast<uint8_t>(l) | static_cast<uint8_t>(r)); 57 } 58 59 // Enum for emoji style. 60 enum class EmojiStyle : uint8_t { 61 EMPTY = 0, // No emoji style is specified. 62 DEFAULT = 1, // Default emoji style is specified. 63 EMOJI = 2, // Emoji (color) emoji style is specified. 64 TEXT = 3, // Text (black/white) emoji style is specified. 65 }; 66 67 // Locale is a compact representation of a BCP 47 language tag. 68 // It does not capture all possible information, only what directly affects text layout: 69 // font rendering, hyphenation, word breaking, etc. 70 struct Locale { 71 public: 72 enum class Variant : uint16_t { 73 NO_VARIANT = 0x0000, 74 GERMAN_1901_ORTHOGRAPHY = 0x0001, 75 GERMAN_1996_ORTHOGRAPHY = 0x0002, 76 }; 77 78 // Default constructor creates the unsupported locale. LocaleLocale79 Locale() 80 : mScript(NO_SCRIPT), 81 mLanguage(NO_LANGUAGE), 82 mRegion(NO_REGION), 83 mSubScriptBits(0ul), 84 mVariant(Variant::NO_VARIANT), 85 mEmojiStyle(EmojiStyle::EMPTY) {} 86 87 // Parse from string 88 Locale(const StringPiece& buf); 89 90 // Parse from identifier. See getIdentifier() for the identifier format. LocaleLocale91 explicit Locale(uint64_t identifier) 92 : mScript(extractBits(identifier, 29, 20)), 93 mLanguage(extractBits(identifier, 49, 15)), 94 mRegion(extractBits(identifier, 14, 15)), 95 mSubScriptBits(scriptToSubScriptBits(mScript)), 96 mVariant(static_cast<Variant>(extractBits(identifier, 0, 2))), 97 mEmojiStyle(static_cast<EmojiStyle>(extractBits(identifier, 12, 2))) {} 98 99 bool operator==(const Locale& other) const { 100 return !isUnsupported() && isEqualScript(other) && mLanguage == other.mLanguage && 101 mRegion == other.mRegion && mVariant == other.mVariant && 102 mEmojiStyle == other.mEmojiStyle; 103 } 104 105 bool operator!=(const Locale other) const { return !(*this == other); } 106 hasLanguageLocale107 inline bool hasLanguage() const { return mLanguage != NO_LANGUAGE; } hasScriptLocale108 inline bool hasScript() const { return mScript != NO_SCRIPT; } hasRegionLocale109 inline bool hasRegion() const { return mRegion != NO_REGION; } hasVariantLocale110 inline bool hasVariant() const { return mVariant != Variant::NO_VARIANT; } hasEmojiStyleLocale111 inline bool hasEmojiStyle() const { return mEmojiStyle != EmojiStyle::EMPTY; } 112 isSupportedLocale113 inline bool isSupported() const { 114 return hasLanguage() || hasScript() || hasRegion() || hasVariant() || hasEmojiStyle(); 115 } 116 isUnsupportedLocale117 inline bool isUnsupported() const { return !isSupported(); } 118 getEmojiStyleLocale119 EmojiStyle getEmojiStyle() const { return mEmojiStyle; } 120 121 bool isEqualScript(const Locale& other) const; 122 123 // Returns true if this script supports the given script. For example, ja-Jpan supports Hira, 124 // ja-Hira doesn't support Jpan. 125 bool supportsScript(uint32_t script) const; 126 bool supportsScript(char c1, char c2, char c3, char c4) const; 127 128 std::string getString() const; 129 130 std::string getStringWithLineBreakOption(LineBreakStyle lbStyle, 131 LineBreakWordStyle lbWordStyle) const; 132 133 // Calculates a matching score. This score represents how well the input locales cover this 134 // locale. The maximum score in the locale list is returned. 135 // 0 = no match, 1 = script match, 2 = script and primary language match. 136 int calcScoreFor(const LocaleList& supported) const; 137 138 // Identifier pattern: 139 // |-------|-------|-------|-------|-------|-------|-------|-------| 140 // lllllllllllllll Language Code (15 bits) 141 // ssssssssssssssssssss Script Code (20 bits) 142 // rrrrrrrrrrrrrrr Region Code (15 bits) 143 // ee Emoji Style (2 bits) 144 // XXXXXXXXXX Free (10 bits) 145 // vv German Variant (2 bits) getIdentifierLocale146 uint64_t getIdentifier() const { 147 return ((uint64_t)mLanguage << 49) | ((uint64_t)mScript << 29) | ((uint64_t)mRegion << 14) | 148 ((uint64_t)mEmojiStyle << 12) | (uint64_t)mVariant; 149 } 150 151 Locale getPartialLocale(SubtagBits bits) const; 152 153 private: 154 friend class LocaleList; // for LocaleList constructor 155 156 // ISO 15924 compliant script code. The 4 chars script code are packed into a 20 bit integer. 157 // If not specified, this is kInvalidScript. 158 uint32_t mScript; 159 160 // ISO 639-1 or ISO 639-2 compliant language code. 161 // The two- or three-letter language code is packed into a 15 bit integer. 162 // mLanguage = 0 means the Locale is unsupported. 163 uint16_t mLanguage; 164 165 // ISO 3166-1 or UN M.49 compliant region code. The two-letter or three-digit region code is 166 // packed into a 15 bit integer. 167 uint16_t mRegion; 168 169 // For faster comparing, use 7 bits for specific scripts. 170 static const uint8_t kBopomofoFlag = 1u; 171 static const uint8_t kHanFlag = 1u << 1; 172 static const uint8_t kHangulFlag = 1u << 2; 173 static const uint8_t kHiraganaFlag = 1u << 3; 174 static const uint8_t kKatakanaFlag = 1u << 4; 175 static const uint8_t kSimplifiedChineseFlag = 1u << 5; 176 static const uint8_t kTraditionalChineseFlag = 1u << 6; 177 uint8_t mSubScriptBits; 178 179 Variant mVariant; 180 181 EmojiStyle mEmojiStyle; 182 183 void resolveUnicodeExtension(const char* buf, size_t length); 184 extractBitsLocale185 inline static uint64_t extractBits(uint64_t value, uint8_t shift, uint8_t nBits) { 186 return (value >> shift) & ((1 << nBits) - 1); 187 } 188 189 int buildLocaleString(char* buf) const; 190 191 static uint8_t scriptToSubScriptBits(uint32_t rawScript); 192 193 static EmojiStyle resolveEmojiStyle(const char* buf, size_t length); 194 static EmojiStyle scriptToEmojiStyle(uint32_t script); 195 196 // Returns true if the provide subscript bits has the requested subscript bits. 197 // Note that this function returns false if the requested subscript bits are empty. 198 static bool supportsScript(uint8_t providedBits, uint8_t requestedBits); 199 }; 200 201 // An immutable list of locale. 202 class LocaleList { 203 public: 204 explicit LocaleList(std::vector<Locale>&& locales); LocaleList()205 LocaleList() 206 : mUnionOfSubScriptBits(0), 207 mIsAllTheSameLocale(false), 208 mEmojiStyle(EmojiStyle::EMPTY) {} 209 LocaleList(LocaleList&&) = default; 210 size()211 size_t size() const { return mLocales.size(); } empty()212 bool empty() const { return mLocales.empty(); } 213 const Locale& operator[](size_t n) const { return mLocales[n]; } 214 getHbLanguage(size_t n)215 hb_language_t getHbLanguage(size_t n) const { return mHbLangs[n]; } 216 217 // Returns an effective emoji style of this locale list. 218 // The effective means the first non empty emoji style in the list. getEmojiStyle()219 EmojiStyle getEmojiStyle() const { return mEmojiStyle; } 220 221 bool atLeastOneScriptMatch(const LocaleList& list) const; 222 hasJapanese()223 bool hasJapanese() const { return hasScript('J', 'p', 'a', 'n'); } hasKorean()224 bool hasKorean() const { return hasScript('K', 'o', 'r', 'e'); } 225 226 private: 227 friend struct Locale; // for calcScoreFor 228 229 std::vector<Locale> mLocales; 230 231 // The languages to be passed to HarfBuzz shaper. 232 std::vector<hb_language_t> mHbLangs; 233 uint8_t mUnionOfSubScriptBits; 234 bool mIsAllTheSameLocale; 235 EmojiStyle mEmojiStyle; 236 getUnionOfSubScriptBits()237 uint8_t getUnionOfSubScriptBits() const { return mUnionOfSubScriptBits; } isAllTheSameLocale()238 bool isAllTheSameLocale() const { return mIsAllTheSameLocale; } 239 240 bool hasScript(char c1, char c2, char c3, char c4) const; 241 242 // Do not copy and assign. 243 LocaleList(const LocaleList&) = delete; 244 void operator=(const LocaleList&) = delete; 245 }; 246 247 } // namespace minikin 248 249 #endif // MINIKIN_LOCALE_LIST_H 250