1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "HyphenatorMap.h"
18 
19 #include <gtest/gtest.h>
20 
21 #include "LocaleListCache.h"
22 #include "MinikinInternal.h"
23 
24 namespace minikin {
25 namespace {
26 
27 // Constants used for testing. The address does not need a valid one.
28 const Hyphenator* FAKE_ADDRESS = reinterpret_cast<const Hyphenator*>(1);
29 const Hyphenator* AS_HYPHENATOR = FAKE_ADDRESS++;
30 const Hyphenator* BG_HYPHENATOR = FAKE_ADDRESS++;
31 const Hyphenator* BN_HYPHENATOR = FAKE_ADDRESS++;
32 const Hyphenator* CU_HYPHENATOR = FAKE_ADDRESS++;
33 const Hyphenator* CY_HYPHENATOR = FAKE_ADDRESS++;
34 const Hyphenator* DA_HYPHENATOR = FAKE_ADDRESS++;
35 const Hyphenator* DE_1901_HYPHENATOR = FAKE_ADDRESS++;
36 const Hyphenator* DE_1996_HYPHENATOR = FAKE_ADDRESS++;
37 const Hyphenator* DE_CH_1901_HYPHENATOR = FAKE_ADDRESS++;
38 const Hyphenator* EN_GB_HYPHENATOR = FAKE_ADDRESS++;
39 const Hyphenator* EN_US_HYPHENATOR = FAKE_ADDRESS++;
40 const Hyphenator* ES_HYPHENATOR = FAKE_ADDRESS++;
41 const Hyphenator* ET_HYPHENATOR = FAKE_ADDRESS++;
42 const Hyphenator* EU_HYPHENATOR = FAKE_ADDRESS++;
43 const Hyphenator* FR_HYPHENATOR = FAKE_ADDRESS++;
44 const Hyphenator* GA_HYPHENATOR = FAKE_ADDRESS++;
45 const Hyphenator* GU_HYPHENATOR = FAKE_ADDRESS++;
46 const Hyphenator* HI_HYPHENATOR = FAKE_ADDRESS++;
47 const Hyphenator* HR_HYPHENATOR = FAKE_ADDRESS++;
48 const Hyphenator* HU_HYPHENATOR = FAKE_ADDRESS++;
49 const Hyphenator* HY_HYPHENATOR = FAKE_ADDRESS++;
50 const Hyphenator* KN_HYPHENATOR = FAKE_ADDRESS++;
51 const Hyphenator* ML_HYPHENATOR = FAKE_ADDRESS++;
52 const Hyphenator* MN_CYRL_HYPHENATOR = FAKE_ADDRESS++;
53 const Hyphenator* MR_HYPHENATOR = FAKE_ADDRESS++;
54 const Hyphenator* NB_HYPHENATOR = FAKE_ADDRESS++;
55 const Hyphenator* NN_HYPHENATOR = FAKE_ADDRESS++;
56 const Hyphenator* OR_HYPHENATOR = FAKE_ADDRESS++;
57 const Hyphenator* PA_HYPHENATOR = FAKE_ADDRESS++;
58 const Hyphenator* PT_HYPHENATOR = FAKE_ADDRESS++;
59 const Hyphenator* SL_HYPHENATOR = FAKE_ADDRESS++;
60 const Hyphenator* TA_HYPHENATOR = FAKE_ADDRESS++;
61 const Hyphenator* TE_HYPHENATOR = FAKE_ADDRESS++;
62 const Hyphenator* TK_HYPHENATOR = FAKE_ADDRESS++;
63 const Hyphenator* UND_ETHI_HYPHENATOR = FAKE_ADDRESS++;
64 
65 class TestableHyphenatorMap : public HyphenatorMap {
66 public:
TestableHyphenatorMap()67     TestableHyphenatorMap() : HyphenatorMap() {}
68 
69     using HyphenatorMap::addAliasInternal;
70     using HyphenatorMap::addInternal;
71     using HyphenatorMap::lookupInternal;
72 };
73 
74 class HyphenatorMapTest : public testing::Test {
75 protected:
SetUp()76     virtual void SetUp() override {
77         // Following settings are copied from Hyphenator.java.
78         mMap.addInternal("as", AS_HYPHENATOR);
79         mMap.addInternal("bg", BG_HYPHENATOR);
80         mMap.addInternal("bn", BN_HYPHENATOR);
81         mMap.addInternal("cu", CU_HYPHENATOR);
82         mMap.addInternal("cy", CY_HYPHENATOR);
83         mMap.addInternal("da", DA_HYPHENATOR);
84         mMap.addInternal("de-1901", DE_1901_HYPHENATOR);
85         mMap.addInternal("de-1996", DE_1996_HYPHENATOR);
86         mMap.addInternal("de-CH-1901", DE_CH_1901_HYPHENATOR);
87         mMap.addInternal("en-GB", EN_GB_HYPHENATOR);
88         mMap.addInternal("en-US", EN_US_HYPHENATOR);
89         mMap.addInternal("es", ES_HYPHENATOR);
90         mMap.addInternal("et", ET_HYPHENATOR);
91         mMap.addInternal("eu", EU_HYPHENATOR);
92         mMap.addInternal("fr", FR_HYPHENATOR);
93         mMap.addInternal("ga", GA_HYPHENATOR);
94         mMap.addInternal("gu", GU_HYPHENATOR);
95         mMap.addInternal("hi", HI_HYPHENATOR);
96         mMap.addInternal("hr", HR_HYPHENATOR);
97         mMap.addInternal("hu", HU_HYPHENATOR);
98         mMap.addInternal("hy", HY_HYPHENATOR);
99         mMap.addInternal("kn", KN_HYPHENATOR);
100         mMap.addInternal("ml", ML_HYPHENATOR);
101         mMap.addInternal("mn-Cyrl", MN_CYRL_HYPHENATOR);
102         mMap.addInternal("mr", MR_HYPHENATOR);
103         mMap.addInternal("nb", NB_HYPHENATOR);
104         mMap.addInternal("nn", NN_HYPHENATOR);
105         mMap.addInternal("or", OR_HYPHENATOR);
106         mMap.addInternal("pa", PA_HYPHENATOR);
107         mMap.addInternal("pt", PT_HYPHENATOR);
108         mMap.addInternal("sl", SL_HYPHENATOR);
109         mMap.addInternal("ta", TA_HYPHENATOR);
110         mMap.addInternal("te", TE_HYPHENATOR);
111         mMap.addInternal("tk", TK_HYPHENATOR);
112         mMap.addInternal("und-Ethi", UND_ETHI_HYPHENATOR);
113 
114         mMap.addAliasInternal("en", "en-GB");
115         mMap.addAliasInternal("en-AS", "en-US");
116         mMap.addAliasInternal("en-GU", "en-US");
117         mMap.addAliasInternal("en-MH", "en-US");
118         mMap.addAliasInternal("en-MP", "en-US");
119         mMap.addAliasInternal("en-PR", "en-US");
120         mMap.addAliasInternal("en-UM", "en-US");
121         mMap.addAliasInternal("en-VI", "en-US");
122         mMap.addAliasInternal("de-LI-1901", "de-CH-1901");
123         mMap.addAliasInternal("de", "de-1996");
124         mMap.addAliasInternal("no", "nb");
125         mMap.addAliasInternal("mn", "mn-Cyrl");
126         // am for und-Ethi is removed for testing purposes.
127         mMap.addAliasInternal("byn", "und-Ethi");
128         mMap.addAliasInternal("gez", "und-Ethi");
129         mMap.addAliasInternal("ti", "und-Ethi");
130         mMap.addAliasInternal("wal", "und-Ethi");
131     }
132 
getLocale(const std::string & localeStr)133     const Locale& getLocale(const std::string& localeStr) {
134         // In production, we reconstruct the LocaleList from the locale list ID.
135         // So, do it here too.
136         const uint32_t id = LocaleListCache::getId(localeStr);
137         const LocaleList& locales = LocaleListCache::getById(id);
138         MINIKIN_ASSERT(locales.size() == 1, "The input must be a single locale");
139         return locales[0];
140     }
141 
lookup(const std::string & localeStr)142     const Hyphenator* lookup(const std::string& localeStr) {
143         return mMap.lookupInternal(getLocale(localeStr));
144     }
145 
146 private:
147     TestableHyphenatorMap mMap;
148 };
149 
TEST_F(HyphenatorMapTest,exactMatch)150 TEST_F(HyphenatorMapTest, exactMatch) {
151     EXPECT_EQ(AS_HYPHENATOR, lookup("as"));
152     EXPECT_EQ(BG_HYPHENATOR, lookup("bg"));
153     EXPECT_EQ(BN_HYPHENATOR, lookup("bn"));
154     EXPECT_EQ(CU_HYPHENATOR, lookup("cu"));
155     EXPECT_EQ(CY_HYPHENATOR, lookup("cy"));
156     EXPECT_EQ(DA_HYPHENATOR, lookup("da"));
157     EXPECT_EQ(DE_1901_HYPHENATOR, lookup("de-1901"));
158     EXPECT_EQ(DE_1996_HYPHENATOR, lookup("de-1996"));
159     EXPECT_EQ(DE_CH_1901_HYPHENATOR, lookup("de-CH-1901"));
160     EXPECT_EQ(EN_GB_HYPHENATOR, lookup("en-GB"));
161     EXPECT_EQ(EN_US_HYPHENATOR, lookup("en-US"));
162     EXPECT_EQ(ES_HYPHENATOR, lookup("es"));
163     EXPECT_EQ(ET_HYPHENATOR, lookup("et"));
164     EXPECT_EQ(EU_HYPHENATOR, lookup("eu"));
165     EXPECT_EQ(FR_HYPHENATOR, lookup("fr"));
166     EXPECT_EQ(GA_HYPHENATOR, lookup("ga"));
167     EXPECT_EQ(GU_HYPHENATOR, lookup("gu"));
168     EXPECT_EQ(HI_HYPHENATOR, lookup("hi"));
169     EXPECT_EQ(HR_HYPHENATOR, lookup("hr"));
170     EXPECT_EQ(HU_HYPHENATOR, lookup("hu"));
171     EXPECT_EQ(HY_HYPHENATOR, lookup("hy"));
172     EXPECT_EQ(KN_HYPHENATOR, lookup("kn"));
173     EXPECT_EQ(ML_HYPHENATOR, lookup("ml"));
174     EXPECT_EQ(MN_CYRL_HYPHENATOR, lookup("mn-Cyrl"));
175     EXPECT_EQ(MR_HYPHENATOR, lookup("mr"));
176     EXPECT_EQ(NB_HYPHENATOR, lookup("nb"));
177     EXPECT_EQ(NN_HYPHENATOR, lookup("nn"));
178     EXPECT_EQ(OR_HYPHENATOR, lookup("or"));
179     EXPECT_EQ(PA_HYPHENATOR, lookup("pa"));
180     EXPECT_EQ(PT_HYPHENATOR, lookup("pt"));
181     EXPECT_EQ(SL_HYPHENATOR, lookup("sl"));
182     EXPECT_EQ(TA_HYPHENATOR, lookup("ta"));
183     EXPECT_EQ(TE_HYPHENATOR, lookup("te"));
184     EXPECT_EQ(TK_HYPHENATOR, lookup("tk"));
185     EXPECT_EQ(UND_ETHI_HYPHENATOR, lookup("und-Ethi"));
186 }
187 
TEST_F(HyphenatorMapTest,aliasMatch)188 TEST_F(HyphenatorMapTest, aliasMatch) {
189     EXPECT_EQ(EN_US_HYPHENATOR, lookup("en-AS"));
190     EXPECT_EQ(EN_US_HYPHENATOR, lookup("en-GU"));
191     EXPECT_EQ(EN_US_HYPHENATOR, lookup("en-MH"));
192     EXPECT_EQ(EN_US_HYPHENATOR, lookup("en-MP"));
193     EXPECT_EQ(EN_US_HYPHENATOR, lookup("en-PR"));
194     EXPECT_EQ(EN_US_HYPHENATOR, lookup("en-UM"));
195     EXPECT_EQ(EN_US_HYPHENATOR, lookup("en-VI"));
196     EXPECT_EQ(DE_1996_HYPHENATOR, lookup("de"));
197     EXPECT_EQ(DE_CH_1901_HYPHENATOR, lookup("de-LI-1901"));
198     EXPECT_EQ(NB_HYPHENATOR, lookup("no"));
199     EXPECT_EQ(MN_CYRL_HYPHENATOR, lookup("mn"));
200     EXPECT_EQ(UND_ETHI_HYPHENATOR, lookup("byn"));
201     EXPECT_EQ(UND_ETHI_HYPHENATOR, lookup("gez"));
202     EXPECT_EQ(UND_ETHI_HYPHENATOR, lookup("ti"));
203     EXPECT_EQ(UND_ETHI_HYPHENATOR, lookup("wal"));
204     // Amharic is tested in fallbackTest_scriptFallback
205 }
206 
TEST_F(HyphenatorMapTest,IgnoreScript)207 TEST_F(HyphenatorMapTest, IgnoreScript) {
208     // Script should be ignored until the final script-only matching rule.
209     EXPECT_EQ(EN_US_HYPHENATOR, lookup("en-Latn-US"));
210     EXPECT_EQ(EN_US_HYPHENATOR, lookup("en-Zsye-US"));
211     EXPECT_EQ(EN_US_HYPHENATOR, lookup("en-Zsym-US"));
212     EXPECT_EQ(EN_US_HYPHENATOR, lookup("en-Jpan-US"));
213     EXPECT_EQ(EN_US_HYPHENATOR, lookup("en-Hans-US"));
214     EXPECT_EQ(EN_US_HYPHENATOR, lookup("en-Ethi-US"));
215 
216     EXPECT_EQ(EN_GB_HYPHENATOR, lookup("en-Zsye-AU"));
217     EXPECT_EQ(EN_GB_HYPHENATOR, lookup("en-Zsye-GB"));
218 }
219 
TEST_F(HyphenatorMapTest,languageFallback)220 TEST_F(HyphenatorMapTest, languageFallback) {
221     EXPECT_EQ(EN_GB_HYPHENATOR, lookup("en-AU"));
222     EXPECT_EQ(EN_GB_HYPHENATOR, lookup("en-NZ"));
223 
224     // "en" is expanded to en-Latn-US. So this is equivalent to "en-Latn-US" test case.
225     // This expansion also happens in production.
226     EXPECT_EQ(EN_US_HYPHENATOR, lookup("en"));
227 }
228 
TEST_F(HyphenatorMapTest,GermanFallback)229 TEST_F(HyphenatorMapTest, GermanFallback) {
230     // German in general
231     EXPECT_EQ(DE_1996_HYPHENATOR, lookup("de"));
232     EXPECT_EQ(DE_1901_HYPHENATOR, lookup("de-1901"));
233     EXPECT_EQ(DE_1996_HYPHENATOR, lookup("de-1996"));
234 
235     // German in Germany
236     EXPECT_EQ(DE_1996_HYPHENATOR, lookup("de-DE"));
237 
238     EXPECT_EQ(DE_1901_HYPHENATOR, lookup("de-DE-1901"));
239     EXPECT_EQ(DE_1901_HYPHENATOR, lookup("de-Latn-DE-1901"));
240     EXPECT_EQ(DE_1901_HYPHENATOR, lookup("de-Latn-DE-1901-u-em-emoji"));
241 
242     EXPECT_EQ(DE_1996_HYPHENATOR, lookup("de-DE-1996"));
243     EXPECT_EQ(DE_1996_HYPHENATOR, lookup("de-Latn-DE-1996"));
244     EXPECT_EQ(DE_1996_HYPHENATOR, lookup("de-Latn-DE-1996-u-em-emoji"));
245 
246     // German in Austria
247     EXPECT_EQ(DE_1996_HYPHENATOR, lookup("de-AT"));
248 
249     EXPECT_EQ(DE_1901_HYPHENATOR, lookup("de-AT-1901"));
250     EXPECT_EQ(DE_1901_HYPHENATOR, lookup("de-Latn-AT-1901"));
251     EXPECT_EQ(DE_1901_HYPHENATOR, lookup("de-Latn-AT-1901-u-em-emoji"));
252 
253     EXPECT_EQ(DE_1996_HYPHENATOR, lookup("de-AT-1996"));
254     EXPECT_EQ(DE_1996_HYPHENATOR, lookup("de-Latn-AT-1996"));
255     EXPECT_EQ(DE_1996_HYPHENATOR, lookup("de-Latn-AT-1996-u-em-emoji"));
256 
257     // German in Switzerland
258     EXPECT_EQ(DE_1996_HYPHENATOR, lookup("de-CH"));
259 
260     EXPECT_EQ(DE_CH_1901_HYPHENATOR, lookup("de-CH-1901"));
261     EXPECT_EQ(DE_CH_1901_HYPHENATOR, lookup("de-Latn-CH-1901"));
262     EXPECT_EQ(DE_CH_1901_HYPHENATOR, lookup("de-Latn-CH-1901-u-em-emoji"));
263 
264     EXPECT_EQ(DE_1996_HYPHENATOR, lookup("de-CH-1996"));
265     EXPECT_EQ(DE_1996_HYPHENATOR, lookup("de-Latn-CH-1996"));
266     EXPECT_EQ(DE_1996_HYPHENATOR, lookup("de-Latn-CH-1996-u-em-emoji"));
267 
268     // German in Liechtenstein
269     EXPECT_EQ(DE_1996_HYPHENATOR, lookup("de-LI"));
270 
271     EXPECT_EQ(DE_CH_1901_HYPHENATOR, lookup("de-LI-1901"));
272     EXPECT_EQ(DE_CH_1901_HYPHENATOR, lookup("de-Latn-LI-1901"));
273     EXPECT_EQ(DE_CH_1901_HYPHENATOR, lookup("de-Latn-LI-1901-u-em-emoji"));
274 
275     EXPECT_EQ(DE_1996_HYPHENATOR, lookup("de-LI-1996"));
276     EXPECT_EQ(DE_1996_HYPHENATOR, lookup("de-Latn-LI-1996"));
277     EXPECT_EQ(DE_1996_HYPHENATOR, lookup("de-Latn-LI-1996-u-em-emoji"));
278 }
279 
TEST_F(HyphenatorMapTest,fallbackTest_LanguageFallback)280 TEST_F(HyphenatorMapTest, fallbackTest_LanguageFallback) {
281     EXPECT_EQ(ES_HYPHENATOR, lookup("es-ES"));
282     EXPECT_EQ(ES_HYPHENATOR, lookup("es-AR"));
283     EXPECT_EQ(ES_HYPHENATOR, lookup("es-BO"));
284     EXPECT_EQ(ES_HYPHENATOR, lookup("es-CL"));
285 
286     // Spanish in Great Britain
287     EXPECT_EQ(ES_HYPHENATOR, lookup("es-GB"));
288 }
289 
TEST_F(HyphenatorMapTest,fallbackTest_ScriptFallback)290 TEST_F(HyphenatorMapTest, fallbackTest_ScriptFallback) {
291     EXPECT_EQ(UND_ETHI_HYPHENATOR, lookup("am"));
292 }
293 
TEST_F(HyphenatorMapTest,neverReturnNullptrTest)294 TEST_F(HyphenatorMapTest, neverReturnNullptrTest) {
295     EXPECT_NE(nullptr, lookup("und"));
296     EXPECT_NE(nullptr, lookup("ja"));
297     EXPECT_NE(nullptr, lookup("ja-JP"));
298 }
299 
TEST_F(HyphenatorMapTest,CyrlScriptFallback)300 TEST_F(HyphenatorMapTest, CyrlScriptFallback) {
301     // mn-Cryl should not match with ru-Cyrl and und-Cyrl
302     EXPECT_NE(MN_CYRL_HYPHENATOR, lookup("ru-Cyrl"));
303     EXPECT_NE(MN_CYRL_HYPHENATOR, lookup("und-Cyrl"));
304 }
305 
306 }  // namespace
307 }  // namespace minikin
308