1 /******************************************************************************
2  *
3  * Copyright (C) 2022 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  */
19 #include <fuzzer/FuzzedDataProvider.h>
20 #include <minikin/Hyphenator.h>
21 
22 #include <iostream>
23 #include <string>
24 
25 #include "HyphenatorMap.h"
26 #include "Locale.h"
27 #include "LocaleListCache.h"
28 #include "MinikinInternal.h"
29 #include "UnicodeUtils.h"
30 #include "minikin/LocaleList.h"
31 #include "minikin/U16StringPiece.h"
32 
33 using namespace minikin;
34 
35 const EndHyphenEdit EndHyphenEdits[] = {
36         EndHyphenEdit::NO_EDIT,
37         EndHyphenEdit::REPLACE_WITH_HYPHEN,
38         EndHyphenEdit::INSERT_HYPHEN,
39         EndHyphenEdit::INSERT_ARMENIAN_HYPHEN,
40         EndHyphenEdit::INSERT_MAQAF,
41         EndHyphenEdit::INSERT_UCAS_HYPHEN,
42         EndHyphenEdit::INSERT_ZWJ_AND_HYPHEN,
43 };
44 
45 const StartHyphenEdit StartHyphenEdits[] = {
46         StartHyphenEdit::NO_EDIT,
47         StartHyphenEdit::INSERT_HYPHEN,
48         StartHyphenEdit::INSERT_ZWJ,
49 };
50 
51 const HyphenationType HyphenationTypes[] = {
52         HyphenationType::DONT_BREAK,
53         HyphenationType::BREAK_AND_INSERT_HYPHEN,
54         HyphenationType::BREAK_AND_INSERT_ARMENIAN_HYPHEN,
55         HyphenationType::BREAK_AND_INSERT_MAQAF,
56         HyphenationType::BREAK_AND_INSERT_UCAS_HYPHEN,
57         HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN,
58         HyphenationType::BREAK_AND_REPLACE_WITH_HYPHEN,
59         HyphenationType::BREAK_AND_INSERT_HYPHEN_AT_NEXT_LINE,
60         HyphenationType::BREAK_AND_INSERT_HYPHEN_AND_ZWJ,
61 };
62 
63 uint16_t specialChars[] = {
64         0x000A, 0x000D, 0x0009, 0x002D, 0x00A0, 0x00AD,
65         0x00B7, 0x058A, 0x05BE, 0x1400, 0x200D, 0x2010,
66 };
67 
68 const uint16_t MAX_STR_LEN = 256;
69 
70 // Function to generate StringPiece from a vector by pushing random valued elements using fdp
generateStringPiece(FuzzedDataProvider * fdp)71 U16StringPiece generateStringPiece(FuzzedDataProvider* fdp) {
72     uint16_t size = fdp->ConsumeIntegralInRange<uint16_t>(0, (fdp->remaining_bytes() / 3));
73 
74     std::vector<uint16_t> v;
75     for (uint16_t i = 0; i < size; ++i) {
76         // To randomize the insertion of special characters
77         if (fdp->ConsumeBool()) {
78             v.push_back(fdp->PickValueInArray(specialChars));
79         } else {
80             v.push_back(fdp->ConsumeIntegral<uint16_t>());
81         }
82     }
83 
84     return U16StringPiece(v);
85 }
86 
LLVMFuzzerTestOneInput(const uint8_t * data,size_t size)87 extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
88     FuzzedDataProvider fdp(data, size);
89 
90     uint8_t minPrefix = fdp.ConsumeIntegral<size_t>();
91     uint8_t minSuffix = fdp.ConsumeIntegral<size_t>();
92     std::string locale = fdp.ConsumeRandomLengthString(MAX_STR_LEN);
93     std::vector<uint8_t> patternData(fdp.ConsumeIntegralInRange<uint32_t>(0, 256));
94 
95     Hyphenator* hyphenator = Hyphenator::loadBinary(&patternData[0], patternData.size(), minPrefix,
96                                                     minSuffix, locale);
97 
98     // To randomize the API calls
99     while (fdp.remaining_bytes() > 0) {
100         auto func = fdp.PickValueInArray<const std::function<void()>>({
101                 [&]() { addHyphenator(locale, hyphenator); },
102                 [&]() {
103                     auto fromLocaleString = fdp.ConsumeRandomLengthString(MAX_STR_LEN);
104                     auto toLocaleString = fdp.ConsumeRandomLengthString(MAX_STR_LEN);
105                     addHyphenatorAlias(fromLocaleString, toLocaleString);
106                 },
107                 [&]() {
108                     packHyphenEdit(fdp.PickValueInArray(StartHyphenEdits),
109                                    fdp.PickValueInArray(EndHyphenEdits));
110                 },
111                 [&]() {
112                     auto textBuf = generateStringPiece(&fdp);
113                     std::vector<HyphenationType> result;
114                     result.push_back(fdp.PickValueInArray(HyphenationTypes));
115                     hyphenator->hyphenate(textBuf, &result);
116                 },
117                 // Get the list of locales and invoke the API for each one of them
118                 [&]() {
119                     uint32_t id = registerLocaleList(fdp.ConsumeRandomLengthString(MAX_STR_LEN));
120                     const LocaleList& locales = LocaleListCache::getById(id);
121                     for (size_t i = 0; i < locales.size(); ++i) {
122                         HyphenatorMap::lookup(locales[i]);
123                     }
124                 },
125                 [&]() { getHyphenString(endHyphenEdit(fdp.ConsumeIntegral<uint8_t>())); },
126                 [&]() { getHyphenString(startHyphenEdit(fdp.ConsumeIntegral<uint8_t>())); },
127                 [&]() { isInsertion(endHyphenEdit(fdp.ConsumeIntegral<uint8_t>())); },
128                 [&]() { isInsertion(startHyphenEdit(fdp.ConsumeIntegral<uint8_t>())); },
129                 [&]() { editForThisLine(fdp.PickValueInArray(HyphenationTypes)); },
130                 [&]() { editForNextLine(fdp.PickValueInArray(HyphenationTypes)); },
131                 [&]() { isReplacement(endHyphenEdit(fdp.ConsumeIntegral<uint8_t>())); },
132         });
133 
134         func();
135     }
136 
137     return 0;
138 }
139