1 /*
2  * Copyright (C) 2023 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.tools.metalava.model.text
18 
19 import com.android.tools.metalava.reporter.FileLocation
20 import java.nio.file.Path
21 
22 /**
23  * Extracts tokens from a sequence of characters.
24  *
25  * The tokens are not the usual sort of tokens created by a tokenizer, e.g. some tokens contain
26  * white spaces and even whole strings. e.g. an annotation, including parameters if present, can be
27  * returned as a single token, if requested (e.g. by calling [requireToken] with
28  * `parenIsSep=false`).
29  */
30 internal class Tokenizer(private val path: Path, private val buffer: CharArray) :
31     FileLocationTracker {
32     private var position = 0
33     private var line = 1
34 
fileLocationnull35     override fun fileLocation(): FileLocation {
36         return FileLocation.createLocation(path, line)
37     }
38 
eatWhitespacenull39     private fun eatWhitespace(): Boolean {
40         var ate = false
41         while (position < buffer.size && isSpace(buffer[position])) {
42             if (buffer[position] == '\n') {
43                 line++
44             }
45             position++
46             ate = true
47         }
48         return ate
49     }
50 
eatCommentnull51     private fun eatComment(): Boolean {
52         if (position + 1 < buffer.size) {
53             if (buffer[position] == '/' && buffer[position + 1] == '/') {
54                 position += 2
55                 while (position < buffer.size && !isNewline(buffer[position])) {
56                     position++
57                 }
58                 return true
59             }
60         }
61         return false
62     }
63 
eatWhitespaceAndCommentsnull64     private fun eatWhitespaceAndComments() {
65         while (eatWhitespace() || eatComment()) {
66             // intentionally consume whitespace and comments
67         }
68     }
69 
requireTokennull70     fun requireToken(parenIsSep: Boolean = true, eatWhitespace: Boolean = true): String {
71         val token = getToken(parenIsSep, eatWhitespace)
72         return token ?: throw ApiParseException("Unexpected end of file", this)
73     }
74 
offsetnull75     fun offset(): Int {
76         return position
77     }
78 
getStringFromOffsetnull79     fun getStringFromOffset(offset: Int): String {
80         return String(buffer, offset, position - offset)
81     }
82 
83     lateinit var current: String
84 
getTokennull85     fun getToken(parenIsSep: Boolean = true, eatWhitespace: Boolean = true): String? {
86         if (eatWhitespace) {
87             eatWhitespaceAndComments()
88         }
89         if (position >= buffer.size) {
90             return null
91         }
92         val line = line
93         val c = buffer[position]
94         val start = position
95         position++
96         if (c == '"') {
97             val STATE_BEGIN = 0
98             val STATE_ESCAPE = 1
99             var state = STATE_BEGIN
100             while (true) {
101                 if (position >= buffer.size) {
102                     throw ApiParseException("Unexpected end of file for \" starting at $line", this)
103                 }
104                 val k = buffer[position]
105                 if (k == '\n' || k == '\r') {
106                     throw ApiParseException(
107                         "Unexpected newline for \" starting at $line in $path",
108                         this
109                     )
110                 }
111                 position++
112                 when (state) {
113                     STATE_BEGIN ->
114                         when (k) {
115                             '\\' -> state = STATE_ESCAPE
116                             '"' -> {
117                                 current = String(buffer, start, position - start)
118                                 return current
119                             }
120                         }
121                     STATE_ESCAPE -> state = STATE_BEGIN
122                 }
123             }
124         } else if (isSeparator(c, parenIsSep)) {
125             current = c.toString()
126             return current
127         } else {
128             var genericDepth = 0
129             do {
130                 while (position < buffer.size) {
131                     val d = buffer[position]
132                     if (isSpace(d) || isSeparator(d, parenIsSep)) {
133                         break
134                     } else if (d == '"') {
135                         // String literal in token: skip the full thing
136                         position++
137                         while (position < buffer.size) {
138                             if (buffer[position] == '"') {
139                                 position++
140                                 break
141                             } else if (buffer[position] == '\\') {
142                                 position++
143                             }
144                             position++
145                         }
146                         continue
147                     }
148                     position++
149                 }
150                 if (position < buffer.size) {
151                     if (buffer[position] == '<') {
152                         genericDepth++
153                         position++
154                     } else if (genericDepth != 0) {
155                         if (buffer[position] == '>') {
156                             genericDepth--
157                         }
158                         position++
159                     }
160                 }
161             } while (
162                 position < buffer.size &&
163                     (!isSpace(buffer[position]) && !isSeparator(buffer[position], parenIsSep) ||
164                         genericDepth != 0)
165             )
166             if (position >= buffer.size) {
167                 throw ApiParseException("Unexpected end of file for \" starting at $line", this)
168             }
169             current = String(buffer, start, position - start)
170             return current
171         }
172     }
173 
assertIdentnull174     internal fun assertIdent(token: String) {
175         if (!isIdent(token[0])) {
176             throw ApiParseException("Expected identifier: $token", this)
177         }
178     }
179 
180     companion object {
isSpacenull181         private fun isSpace(c: Char): Boolean {
182             return c == ' ' || c == '\t' || c == '\n' || c == '\r'
183         }
184 
isNewlinenull185         private fun isNewline(c: Char): Boolean {
186             return c == '\n' || c == '\r'
187         }
188 
isSeparatornull189         private fun isSeparator(c: Char, parenIsSep: Boolean): Boolean {
190             if (parenIsSep) {
191                 if (c == '(' || c == ')') {
192                     return true
193                 }
194             }
195             return c == '{' || c == '}' || c == ',' || c == ';' || c == '<' || c == '>'
196         }
197 
isIdentnull198         private fun isIdent(c: Char): Boolean {
199             return c != '"' && !isSeparator(c, true)
200         }
201 
isIdentnull202         internal fun isIdent(token: String): Boolean {
203             return isIdent(token[0])
204         }
205     }
206 }
207 
208 /**
209  * Interface implemented by [Tokenizer] which keeps track of the [FileLocation] for the current
210  * token.
211  *
212  * This is provided to avoid passing [Tokenizer] to code that might need access to the current
213  * [FileLocation] but does not consume tokens. That makes that code and the [Tokenizer] state easier
214  * to reason about.
215  */
216 internal interface FileLocationTracker {
217     /** Get the current [FileLocation]. */
fileLocationnull218     fun fileLocation(): FileLocation
219 }
220