1# Copyright (C) 2023 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#      http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14"""
15This module provides a function for validating starlark data against a schema.
16See validate() for more information.
17"""
18
19_schema_schema = {
20    "type": "dict",
21    "optional_keys": {
22        "or": {
23            "type": "list",
24            "length": ">=2",
25        },
26        "noneable": {"type": "bool"},
27        "type": {
28            "type": "string",
29            "choices": [
30                "NoneType",
31                "bool",
32                "int",
33                "float",
34                "string",
35                "bytes",
36                "list",
37                "tuple",
38                "dict",
39                "struct",
40            ],
41        },
42        "choices": {
43            "type": "list",
44            "of": {
45                "or": [
46                    {"type": "string"},
47                    {"type": "int"},
48                    {"type": "float"},
49                ],
50            },
51        },
52        "value": {
53            "or": [
54                {"type": "string"},
55                {"type": "int"},
56                {"type": "float"},
57            ],
58        },
59        "of": {},  # to be filled in later
60        "unique": {"type": "bool"},
61        "length": {"or": [
62            {"type": "string"},
63            {"type": "int"},
64        ]},
65        "required_keys": {
66            "type": "dict",
67            "values": {},  # to be filled in later
68        },
69        "optional_keys": {
70            "type": "dict",
71            "values": {},  # to be filled in later
72        },
73        "keys": {},  # to be filled in later
74        "values": {},  # to be filled in later
75        "required_fields": {
76            "type": "dict",
77            "keys": {"type": "string"},
78            "values": {},  # to be filled in later
79        },
80        "optional_fields": {
81            "type": "dict",
82            "keys": {"type": "string"},
83            "values": {},  # to be filled in later
84        },
85    },
86}
87
88_schema_schema["optional_keys"]["of"] = _schema_schema
89_schema_schema["optional_keys"]["required_keys"]["values"] = _schema_schema
90_schema_schema["optional_keys"]["optional_keys"]["values"] = _schema_schema
91_schema_schema["optional_keys"]["keys"] = _schema_schema
92_schema_schema["optional_keys"]["values"] = _schema_schema
93_schema_schema["optional_keys"]["required_fields"]["values"] = _schema_schema
94_schema_schema["optional_keys"]["optional_fields"]["values"] = _schema_schema
95
96def _check_len(obj, length):
97    if type(length) == "int":
98        return len(obj) == length
99    if length.startswith("<="):
100        return len(obj) <= int(length[2:])
101    if length.startswith(">="):
102        return len(obj) >= int(length[2:])
103    ln = int(length[1:])
104    if length[0] == "=":
105        return len(obj) == ln
106    if length[0] == "<":
107        return len(obj) < ln
108    if length[0] == ">":
109        return len(obj) > ln
110    fail("Unexpected length format")
111
112def _validate_impl(obj, schema):
113    stack = []
114
115    def newStackFrame(obj, schema):
116        stack.append({
117            "obj": obj,
118            "schema": schema,
119            "state": "start",
120        })
121
122    newStackFrame(obj, schema)
123    ret = ""
124
125    # Because bazel doesn't allow infinite loops/recursion, just make a loop
126    # with an arbitrarily large number of iterations.
127    for _ in range(100000):
128        if not stack:
129            break
130        frame = stack[-1]
131        obj = frame["obj"]
132        schema = frame["schema"]
133        state = frame["state"]
134
135        if state == "start":
136            if len(schema) == 0:
137                ret = ""
138                stack.pop()
139                continue
140            if "or" in schema:
141                if len(schema) != 1:
142                    fail("an 'or' schema must not be accompanied by any other keys")
143                frame["i"] = 0
144                frame["state"] = "or_loop"
145                frame["failures"] = []
146                newStackFrame(obj, schema["or"][0])
147                continue
148            if "type" not in schema:
149                fail("a non-empty/non-or schema must have a 'type' key: " + str(schema))
150            if schema.get("noneable", False):
151                if obj == None:
152                    ret = ""
153                    stack.pop()
154                    continue
155            ty = schema["type"]
156            if type(obj) != ty:
157                ret = "Expected %s, got %s" % (ty, type(obj))
158                stack.pop()
159                continue
160            if "length" in schema:
161                if ty not in ["string", "bytes", "list", "tuple"]:
162                    fail("'len' is only valid for string, bytes, lists, or tuples, got: " + ty)
163                if not _check_len(obj, schema["length"]):
164                    ret = "Expected length %s, got %d" % (schema["length"], len(obj))
165                    stack.pop()
166                    continue
167            if "choices" in schema:
168                if ty not in ["string", "int", "float"]:
169                    fail("'choices' is only valid for string, int, or float, got: " + ty)
170                if obj not in schema["choices"]:
171                    ret = "Expected one of %s, got %s" % (schema["choices"], obj)
172                    stack.pop()
173                    continue
174            if "value" in schema:
175                if ty not in ["string", "int", "float"]:
176                    fail("'value' is only valid for string, int, or float, got: " + ty)
177                if obj != schema["value"]:
178                    ret = "Expected %s, got %s" % (schema["value"], obj)
179                    stack.pop()
180                    continue
181            if schema.get("unique", False):
182                if ty != "list" and ty != "tuple":
183                    fail("'unique' is only valid for lists or tuples, got: " + ty)
184                sorted_list = sorted(obj)
185                done = False
186                for i in range(len(sorted_list) - 1):
187                    if type(sorted_list[i]) not in ["string", "int", "float", "bool", "NoneType", "bytes"]:
188                        ret = "'unique' only works on lists/tuples of scalar types, got: " + type(sorted_list[i])
189                        stack.pop()
190                        done = True
191                        break
192                    if sorted_list[i] == sorted_list[i + 1]:
193                        ret = "Expected all elements to be unique, but saw '%s' twice" % str(sorted_list[i])
194                        stack.pop()
195                        done = True
196                        break
197                if done:
198                    continue
199            if "of" in schema:
200                if ty != "list" and ty != "tuple":
201                    fail("'of' is only valid for lists or tuples, got: " + ty)
202                if obj:
203                    frame["i"] = 0
204                    frame["state"] = "of_loop"
205                    newStackFrame(obj[0], schema["of"])
206                    continue
207            if ty == "dict":
208                if "required_fields" in schema or "optional_fields" in schema:
209                    fail("a dict schema can't contain required_fields/optional_fields")
210                schema_names_keys = bool(schema.get("required_keys", {})) or bool(schema.get("optional_keys", {}))
211                schema_enforces_generic_keys = bool(schema.get("keys", {})) or bool(schema.get("values", {}))
212                if schema_names_keys and schema_enforces_generic_keys:
213                    fail("Only required_keys/optional_keys or keys/values may be used, but not both")
214                if schema_names_keys:
215                    all_keys = {}
216                    done = False
217                    for key, subSchema in schema.get("required_keys", {}).items():
218                        if key not in obj:
219                            ret = "required key '" + key + "' not found"
220                            stack.pop()
221                            done = True
222                            break
223                        all_keys[key] = subSchema
224                    if done:
225                        continue
226                    for key, subSchema in schema.get("optional_keys", {}).items():
227                        if key in all_keys:
228                            fail("A key cannot be both required and optional: " + key)
229                        if key in obj:
230                            all_keys[key] = subSchema
231                    extra_keys = [
232                        key
233                        for key in obj.keys()
234                        if key not in all_keys
235                    ]
236                    if extra_keys:
237                        ret = "keys " + str(extra_keys) + " not allowed, valid keys: " + str(all_keys.keys())
238                        stack.pop()
239                        continue
240                    if all_keys:
241                        frame["all_keys"] = all_keys.items()
242                        frame["i"] = 0
243                        frame["state"] = "dict_individual_keys_loop"
244                        k, v = frame["all_keys"][0]
245                        newStackFrame(obj[k], v)
246                        continue
247                elif schema_enforces_generic_keys:
248                    frame["items"] = obj.items()
249                    if frame["items"]:
250                        frame["i"] = 0
251                        frame["state"] = "dict_generic_keys_loop"
252                        frame["checking_key"] = True
253                        continue
254            if ty == "struct":
255                if "required_keys" in schema or "optional_keys" in schema or "keys" in schema or "values" in schema:
256                    fail("a struct schema can't contain required_keys/optional_keys/keys/values")
257                all_fields = {}
258                original_fields = {f: True for f in dir(obj)}
259                done = False
260                for field, subSchema in schema.get("required_fields", {}).items():
261                    if field not in original_fields:
262                        ret = "required field '" + field + "' not found"
263                        stack.pop()
264                        done = True
265                        break
266                    all_fields[field] = subSchema
267                if done:
268                    continue
269                for field, subSchema in schema.get("optional_fields", {}).items():
270                    if field in all_fields:
271                        fail("A field cannot be both required and optional: " + key)
272                    if field in original_fields:
273                        all_fields[field] = subSchema
274                for field in all_fields:
275                    if field == "to_json" or field == "to_proto":
276                        fail("don't use deprecated fields to_json or to_proto")
277                extra_fields = [
278                    field
279                    for field in original_fields.keys()
280                    if field not in all_fields and field != "to_json" and field != "to_proto"
281                ]
282                if extra_fields:
283                    ret = "fields " + str(extra_fields) + " not allowed, valid keys: " + str(all_fields.keys())
284                    stack.pop()
285                    continue
286                if all_fields:
287                    frame["all_fields"] = all_fields.items()
288                    frame["i"] = 0
289                    frame["state"] = "struct_individual_fields_loop"
290                    k, v = frame["all_fields"][0]
291                    newStackFrame(getattr(obj, k), v)
292                    continue
293        elif state == "or_loop":
294            if ret != "":
295                frame["failures"].append("  " + ret)
296                frame["i"] += 1
297                if frame["i"] >= len(schema["or"]):
298                    ret = "did not match any schemas in 'or' list, errors:\n" + "\n".join(frame["failures"])
299                    stack.pop()
300                    continue
301                else:
302                    newStackFrame(obj, schema["or"][frame["i"]])
303                    continue
304        elif state == "of_loop":
305            frame["i"] += 1
306            if ret != "" or frame["i"] >= len(obj):
307                stack.pop()
308                continue
309            newStackFrame(obj[frame["i"]], schema["of"])
310            continue
311        elif state == "dict_individual_keys_loop":
312            frame["i"] += 1
313            if ret != "" or frame["i"] >= len(frame["all_keys"]):
314                stack.pop()
315                continue
316            k, v = frame["all_keys"][frame["i"]]
317            newStackFrame(obj[k], v)
318            continue
319        elif state == "dict_generic_keys_loop":
320            if ret != "" or frame["i"] >= len(frame["items"]):
321                stack.pop()
322                continue
323            k, v = frame["items"][frame["i"]]
324            if frame["checking_key"]:
325                frame["checking_key"] = False
326                newStackFrame(k, schema.get("keys", {}))
327                continue
328            else:
329                frame["checking_key"] = True
330                frame["i"] += 1
331                newStackFrame(v, schema.get("values", {}))
332                continue
333        elif state == "struct_individual_fields_loop":
334            frame["i"] += 1
335            if ret != "" or frame["i"] >= len(frame["all_fields"]):
336                stack.pop()
337                continue
338            k, v = frame["all_fields"][frame["i"]]
339            newStackFrame(getattr(obj, k), v)
340            continue
341
342        # by default return success
343        ret = ""
344        stack.pop()
345    if stack:
346        fail("Schema validation took too many iterations")
347    return ret
348
349def validate(obj, schema, *, validate_schema = True, fail_on_error = True):
350    """Validates the given starlark object against a schema.
351
352    A schema is a dictionary that describes the format of obj. Currently,
353    recursive objects cannot be validated because there's no cycle detection.
354
355    An empty dictionary describes "any object".
356
357    A dictionary with an "or" key must not have any other keys, and its
358    value is a list of other schema objects. If any of those schema objects
359    match, the "or" schema is considered a success.
360
361    Any schemas that are not empty or "or" schemas must have a "type" key.
362    This type must match the result of type(obj).
363
364    The "noneable" key can be set to true to act as an alias for:
365    `{"or": [{"type": "NoneType"}, ...the rest of the schema...]}`
366
367    The "value" key contains a value that must match the object exactly.
368    Only applies to strings, ints, and floats.
369
370    The "choices" key is a list of values that the object could match.
371    If the object is equal to any one of them then validation succeeds.
372
373    The "length" key applies to strings, bytes, lists, or tuples.
374    Its value can either be an integer length that the object must have,
375    or a string in that starts with <, >, <=, >=, or =, followed by a number.
376
377    The "of" key is a schema to match against the elements of a list/tuple.
378
379    Dictionaries and structs have "required_keys"/"required_fields" and
380    "optional_keys"/"optional_fields". (keys for dictionaries, fields for
381    structs). The value of each of these fields is a dictionary mapping from
382    the key/field value to a schema object to validate the value of the
383    key/field. Any keys/fields that are not listed in the schema will cause
384    the validation to fail. Any keys/fields in the required_ schemas must
385    be present in the input object.
386
387    Dictionaries have two additional fields over structs, "keys" and "values".
388    These fields cannot be mixed with required_keys/optional_keys. They provide
389    a single schema object each to apply to all the keys/values in the dictionary.
390
391    Args:
392        obj: The object to be validated against the schema
393        schema: The schema. (See above)
394        validate_schema: Also check that the schema itself is valid. This
395            can be disabled for performance. However, some of the checks
396            about the schema are hardcoded and cannot be disabled.
397        fail_on_error: If this function should fail() when the object doesn't
398            conform to the schema. Note that if the schema itself is invalid,
399            validate() fails regardless of the value of this argument.
400    Returns:
401        If fail_on_error is True, validate() doesn't return anything.
402        If fail_on_error is False, validate() returns a string that describes
403        the reason why the object doesn't match the schema, or an empty string
404        if it does match.
405    """
406    if validate_schema:
407        schema_validation_results = _validate_impl(schema, _schema_schema)
408        if schema_validation_results:
409            fail("Schema is invalid: " + schema_validation_results)
410    result = _validate_impl(obj, schema)
411    if not fail_on_error:
412        return result
413    if result:
414        fail(result)
415    return None
416