1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "text/Unicode.h"
18 
19 #include <algorithm>
20 #include <array>
21 
22 #include "text/Utf8Iterator.h"
23 
24 using ::android::StringPiece;
25 
26 namespace aapt {
27 namespace text {
28 
29 namespace {
30 
31 struct CharacterProperties {
32   enum : uint32_t {
33     kXidStart = 1 << 0,
34     kXidContinue = 1 << 1,
35   };
36 
37   char32_t first_char;
38   char32_t last_char;
39   uint32_t properties;
40 };
41 
42 // Incude the generated data table.
43 #include "text/Unicode_data.cpp"
44 
CompareCharacterProperties(const CharacterProperties & a,char32_t codepoint)45 bool CompareCharacterProperties(const CharacterProperties& a, char32_t codepoint) {
46   return a.last_char < codepoint;
47 }
48 
FindCharacterProperties(char32_t codepoint)49 uint32_t FindCharacterProperties(char32_t codepoint) {
50   const auto iter_end = sCharacterProperties.end();
51   const auto iter = std::lower_bound(sCharacterProperties.begin(), iter_end, codepoint,
52                                      CompareCharacterProperties);
53   if (iter != iter_end && codepoint >= iter->first_char) {
54     return iter->properties;
55   }
56   return 0u;
57 }
58 
59 }  // namespace
60 
IsXidStart(char32_t codepoint)61 bool IsXidStart(char32_t codepoint) {
62   return FindCharacterProperties(codepoint) & CharacterProperties::kXidStart;
63 }
64 
IsXidContinue(char32_t codepoint)65 bool IsXidContinue(char32_t codepoint) {
66   return FindCharacterProperties(codepoint) & CharacterProperties::kXidContinue;
67 }
68 
69 // Hardcode the White_Space characters since they are few and the external/icu project doesn't
70 // list them as data files to parse.
71 // Sourced from http://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
IsWhitespace(char32_t codepoint)72 bool IsWhitespace(char32_t codepoint) {
73   return (codepoint >= 0x0009 && codepoint <= 0x000d) || (codepoint == 0x0020) ||
74          (codepoint == 0x0085) || (codepoint == 0x00a0) || (codepoint == 0x1680) ||
75          (codepoint >= 0x2000 && codepoint <= 0x200a) || (codepoint == 0x2028) ||
76          (codepoint == 0x2029) || (codepoint == 0x202f) || (codepoint == 0x205f) ||
77          (codepoint == 0x3000);
78 }
79 
IsJavaIdentifier(StringPiece str)80 bool IsJavaIdentifier(StringPiece str) {
81   Utf8Iterator iter(str);
82 
83   // Check the first character.
84   if (!iter.HasNext()) {
85     return false;
86   }
87 
88   const char32_t first_codepoint = iter.Next();
89   if (!IsXidStart(first_codepoint) && first_codepoint != U'_' && first_codepoint != U'$') {
90     return false;
91   }
92 
93   while (iter.HasNext()) {
94     const char32_t codepoint = iter.Next();
95     if (!IsXidContinue(codepoint) && codepoint != U'$') {
96       return false;
97     }
98   }
99   return true;
100 }
101 
IsValidResourceEntryName(StringPiece str)102 bool IsValidResourceEntryName(StringPiece str) {
103   Utf8Iterator iter(str);
104 
105   // Check the first character.
106   if (!iter.HasNext()) {
107     return false;
108   }
109 
110   // Resources are allowed to start with '_'
111   const char32_t first_codepoint = iter.Next();
112   if (!IsXidStart(first_codepoint) && first_codepoint != U'_') {
113     return false;
114   }
115 
116   while (iter.HasNext()) {
117     const char32_t codepoint = iter.Next();
118     if (!IsXidContinue(codepoint) && codepoint != U'.' && codepoint != U'-') {
119       return false;
120     }
121   }
122   return true;
123 }
124 
125 }  // namespace text
126 }  // namespace aapt
127