1# Copyright (C) 2023 The Android Open Source Project 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14""" 15This module provides a function for validating starlark data against a schema. 16See validate() for more information. 17""" 18 19_schema_schema = { 20 "type": "dict", 21 "optional_keys": { 22 "or": { 23 "type": "list", 24 "length": ">=2", 25 }, 26 "noneable": {"type": "bool"}, 27 "type": { 28 "type": "string", 29 "choices": [ 30 "NoneType", 31 "bool", 32 "int", 33 "float", 34 "string", 35 "bytes", 36 "list", 37 "tuple", 38 "dict", 39 "struct", 40 ], 41 }, 42 "choices": { 43 "type": "list", 44 "of": { 45 "or": [ 46 {"type": "string"}, 47 {"type": "int"}, 48 {"type": "float"}, 49 ], 50 }, 51 }, 52 "value": { 53 "or": [ 54 {"type": "string"}, 55 {"type": "int"}, 56 {"type": "float"}, 57 ], 58 }, 59 "of": {}, # to be filled in later 60 "unique": {"type": "bool"}, 61 "length": {"or": [ 62 {"type": "string"}, 63 {"type": "int"}, 64 ]}, 65 "required_keys": { 66 "type": "dict", 67 "values": {}, # to be filled in later 68 }, 69 "optional_keys": { 70 "type": "dict", 71 "values": {}, # to be filled in later 72 }, 73 "keys": {}, # to be filled in later 74 "values": {}, # to be filled in later 75 "required_fields": { 76 "type": "dict", 77 "keys": {"type": "string"}, 78 "values": {}, # to be filled in later 79 }, 80 "optional_fields": { 81 "type": "dict", 82 "keys": {"type": "string"}, 83 "values": {}, # to be filled in later 84 }, 85 }, 86} 87 88_schema_schema["optional_keys"]["of"] = _schema_schema 89_schema_schema["optional_keys"]["required_keys"]["values"] = _schema_schema 90_schema_schema["optional_keys"]["optional_keys"]["values"] = _schema_schema 91_schema_schema["optional_keys"]["keys"] = _schema_schema 92_schema_schema["optional_keys"]["values"] = _schema_schema 93_schema_schema["optional_keys"]["required_fields"]["values"] = _schema_schema 94_schema_schema["optional_keys"]["optional_fields"]["values"] = _schema_schema 95 96def _check_len(obj, length): 97 if type(length) == "int": 98 return len(obj) == length 99 if length.startswith("<="): 100 return len(obj) <= int(length[2:]) 101 if length.startswith(">="): 102 return len(obj) >= int(length[2:]) 103 ln = int(length[1:]) 104 if length[0] == "=": 105 return len(obj) == ln 106 if length[0] == "<": 107 return len(obj) < ln 108 if length[0] == ">": 109 return len(obj) > ln 110 fail("Unexpected length format") 111 112def _validate_impl(obj, schema): 113 stack = [] 114 115 def newStackFrame(obj, schema): 116 stack.append({ 117 "obj": obj, 118 "schema": schema, 119 "state": "start", 120 }) 121 122 newStackFrame(obj, schema) 123 ret = "" 124 125 # Because bazel doesn't allow infinite loops/recursion, just make a loop 126 # with an arbitrarily large number of iterations. 127 for _ in range(100000): 128 if not stack: 129 break 130 frame = stack[-1] 131 obj = frame["obj"] 132 schema = frame["schema"] 133 state = frame["state"] 134 135 if state == "start": 136 if len(schema) == 0: 137 ret = "" 138 stack.pop() 139 continue 140 if "or" in schema: 141 if len(schema) != 1: 142 fail("an 'or' schema must not be accompanied by any other keys") 143 frame["i"] = 0 144 frame["state"] = "or_loop" 145 frame["failures"] = [] 146 newStackFrame(obj, schema["or"][0]) 147 continue 148 if "type" not in schema: 149 fail("a non-empty/non-or schema must have a 'type' key: " + str(schema)) 150 if schema.get("noneable", False): 151 if obj == None: 152 ret = "" 153 stack.pop() 154 continue 155 ty = schema["type"] 156 if type(obj) != ty: 157 ret = "Expected %s, got %s" % (ty, type(obj)) 158 stack.pop() 159 continue 160 if "length" in schema: 161 if ty not in ["string", "bytes", "list", "tuple"]: 162 fail("'len' is only valid for string, bytes, lists, or tuples, got: " + ty) 163 if not _check_len(obj, schema["length"]): 164 ret = "Expected length %s, got %d" % (schema["length"], len(obj)) 165 stack.pop() 166 continue 167 if "choices" in schema: 168 if ty not in ["string", "int", "float"]: 169 fail("'choices' is only valid for string, int, or float, got: " + ty) 170 if obj not in schema["choices"]: 171 ret = "Expected one of %s, got %s" % (schema["choices"], obj) 172 stack.pop() 173 continue 174 if "value" in schema: 175 if ty not in ["string", "int", "float"]: 176 fail("'value' is only valid for string, int, or float, got: " + ty) 177 if obj != schema["value"]: 178 ret = "Expected %s, got %s" % (schema["value"], obj) 179 stack.pop() 180 continue 181 if schema.get("unique", False): 182 if ty != "list" and ty != "tuple": 183 fail("'unique' is only valid for lists or tuples, got: " + ty) 184 sorted_list = sorted(obj) 185 done = False 186 for i in range(len(sorted_list) - 1): 187 if type(sorted_list[i]) not in ["string", "int", "float", "bool", "NoneType", "bytes"]: 188 ret = "'unique' only works on lists/tuples of scalar types, got: " + type(sorted_list[i]) 189 stack.pop() 190 done = True 191 break 192 if sorted_list[i] == sorted_list[i + 1]: 193 ret = "Expected all elements to be unique, but saw '%s' twice" % str(sorted_list[i]) 194 stack.pop() 195 done = True 196 break 197 if done: 198 continue 199 if "of" in schema: 200 if ty != "list" and ty != "tuple": 201 fail("'of' is only valid for lists or tuples, got: " + ty) 202 if obj: 203 frame["i"] = 0 204 frame["state"] = "of_loop" 205 newStackFrame(obj[0], schema["of"]) 206 continue 207 if ty == "dict": 208 if "required_fields" in schema or "optional_fields" in schema: 209 fail("a dict schema can't contain required_fields/optional_fields") 210 schema_names_keys = bool(schema.get("required_keys", {})) or bool(schema.get("optional_keys", {})) 211 schema_enforces_generic_keys = bool(schema.get("keys", {})) or bool(schema.get("values", {})) 212 if schema_names_keys and schema_enforces_generic_keys: 213 fail("Only required_keys/optional_keys or keys/values may be used, but not both") 214 if schema_names_keys: 215 all_keys = {} 216 done = False 217 for key, subSchema in schema.get("required_keys", {}).items(): 218 if key not in obj: 219 ret = "required key '" + key + "' not found" 220 stack.pop() 221 done = True 222 break 223 all_keys[key] = subSchema 224 if done: 225 continue 226 for key, subSchema in schema.get("optional_keys", {}).items(): 227 if key in all_keys: 228 fail("A key cannot be both required and optional: " + key) 229 if key in obj: 230 all_keys[key] = subSchema 231 extra_keys = [ 232 key 233 for key in obj.keys() 234 if key not in all_keys 235 ] 236 if extra_keys: 237 ret = "keys " + str(extra_keys) + " not allowed, valid keys: " + str(all_keys.keys()) 238 stack.pop() 239 continue 240 if all_keys: 241 frame["all_keys"] = all_keys.items() 242 frame["i"] = 0 243 frame["state"] = "dict_individual_keys_loop" 244 k, v = frame["all_keys"][0] 245 newStackFrame(obj[k], v) 246 continue 247 elif schema_enforces_generic_keys: 248 frame["items"] = obj.items() 249 if frame["items"]: 250 frame["i"] = 0 251 frame["state"] = "dict_generic_keys_loop" 252 frame["checking_key"] = True 253 continue 254 if ty == "struct": 255 if "required_keys" in schema or "optional_keys" in schema or "keys" in schema or "values" in schema: 256 fail("a struct schema can't contain required_keys/optional_keys/keys/values") 257 all_fields = {} 258 original_fields = {f: True for f in dir(obj)} 259 done = False 260 for field, subSchema in schema.get("required_fields", {}).items(): 261 if field not in original_fields: 262 ret = "required field '" + field + "' not found" 263 stack.pop() 264 done = True 265 break 266 all_fields[field] = subSchema 267 if done: 268 continue 269 for field, subSchema in schema.get("optional_fields", {}).items(): 270 if field in all_fields: 271 fail("A field cannot be both required and optional: " + key) 272 if field in original_fields: 273 all_fields[field] = subSchema 274 for field in all_fields: 275 if field == "to_json" or field == "to_proto": 276 fail("don't use deprecated fields to_json or to_proto") 277 extra_fields = [ 278 field 279 for field in original_fields.keys() 280 if field not in all_fields and field != "to_json" and field != "to_proto" 281 ] 282 if extra_fields: 283 ret = "fields " + str(extra_fields) + " not allowed, valid keys: " + str(all_fields.keys()) 284 stack.pop() 285 continue 286 if all_fields: 287 frame["all_fields"] = all_fields.items() 288 frame["i"] = 0 289 frame["state"] = "struct_individual_fields_loop" 290 k, v = frame["all_fields"][0] 291 newStackFrame(getattr(obj, k), v) 292 continue 293 elif state == "or_loop": 294 if ret != "": 295 frame["failures"].append(" " + ret) 296 frame["i"] += 1 297 if frame["i"] >= len(schema["or"]): 298 ret = "did not match any schemas in 'or' list, errors:\n" + "\n".join(frame["failures"]) 299 stack.pop() 300 continue 301 else: 302 newStackFrame(obj, schema["or"][frame["i"]]) 303 continue 304 elif state == "of_loop": 305 frame["i"] += 1 306 if ret != "" or frame["i"] >= len(obj): 307 stack.pop() 308 continue 309 newStackFrame(obj[frame["i"]], schema["of"]) 310 continue 311 elif state == "dict_individual_keys_loop": 312 frame["i"] += 1 313 if ret != "" or frame["i"] >= len(frame["all_keys"]): 314 stack.pop() 315 continue 316 k, v = frame["all_keys"][frame["i"]] 317 newStackFrame(obj[k], v) 318 continue 319 elif state == "dict_generic_keys_loop": 320 if ret != "" or frame["i"] >= len(frame["items"]): 321 stack.pop() 322 continue 323 k, v = frame["items"][frame["i"]] 324 if frame["checking_key"]: 325 frame["checking_key"] = False 326 newStackFrame(k, schema.get("keys", {})) 327 continue 328 else: 329 frame["checking_key"] = True 330 frame["i"] += 1 331 newStackFrame(v, schema.get("values", {})) 332 continue 333 elif state == "struct_individual_fields_loop": 334 frame["i"] += 1 335 if ret != "" or frame["i"] >= len(frame["all_fields"]): 336 stack.pop() 337 continue 338 k, v = frame["all_fields"][frame["i"]] 339 newStackFrame(getattr(obj, k), v) 340 continue 341 342 # by default return success 343 ret = "" 344 stack.pop() 345 if stack: 346 fail("Schema validation took too many iterations") 347 return ret 348 349def validate(obj, schema, *, validate_schema = True, fail_on_error = True): 350 """Validates the given starlark object against a schema. 351 352 A schema is a dictionary that describes the format of obj. Currently, 353 recursive objects cannot be validated because there's no cycle detection. 354 355 An empty dictionary describes "any object". 356 357 A dictionary with an "or" key must not have any other keys, and its 358 value is a list of other schema objects. If any of those schema objects 359 match, the "or" schema is considered a success. 360 361 Any schemas that are not empty or "or" schemas must have a "type" key. 362 This type must match the result of type(obj). 363 364 The "noneable" key can be set to true to act as an alias for: 365 `{"or": [{"type": "NoneType"}, ...the rest of the schema...]}` 366 367 The "value" key contains a value that must match the object exactly. 368 Only applies to strings, ints, and floats. 369 370 The "choices" key is a list of values that the object could match. 371 If the object is equal to any one of them then validation succeeds. 372 373 The "length" key applies to strings, bytes, lists, or tuples. 374 Its value can either be an integer length that the object must have, 375 or a string in that starts with <, >, <=, >=, or =, followed by a number. 376 377 The "of" key is a schema to match against the elements of a list/tuple. 378 379 Dictionaries and structs have "required_keys"/"required_fields" and 380 "optional_keys"/"optional_fields". (keys for dictionaries, fields for 381 structs). The value of each of these fields is a dictionary mapping from 382 the key/field value to a schema object to validate the value of the 383 key/field. Any keys/fields that are not listed in the schema will cause 384 the validation to fail. Any keys/fields in the required_ schemas must 385 be present in the input object. 386 387 Dictionaries have two additional fields over structs, "keys" and "values". 388 These fields cannot be mixed with required_keys/optional_keys. They provide 389 a single schema object each to apply to all the keys/values in the dictionary. 390 391 Args: 392 obj: The object to be validated against the schema 393 schema: The schema. (See above) 394 validate_schema: Also check that the schema itself is valid. This 395 can be disabled for performance. However, some of the checks 396 about the schema are hardcoded and cannot be disabled. 397 fail_on_error: If this function should fail() when the object doesn't 398 conform to the schema. Note that if the schema itself is invalid, 399 validate() fails regardless of the value of this argument. 400 Returns: 401 If fail_on_error is True, validate() doesn't return anything. 402 If fail_on_error is False, validate() returns a string that describes 403 the reason why the object doesn't match the schema, or an empty string 404 if it does match. 405 """ 406 if validate_schema: 407 schema_validation_results = _validate_impl(schema, _schema_schema) 408 if schema_validation_results: 409 fail("Schema is invalid: " + schema_validation_results) 410 result = _validate_impl(obj, schema) 411 if not fail_on_error: 412 return result 413 if result: 414 fail(result) 415 return None 416