1 #!/usr/bin/python3 -B 2 3 # Copyright 2022 The Android Open Source Project 4 # 5 # Licensed under the Apache License, Version 2.0 (the "License"); 6 # you may not use this file except in compliance with the License. 7 # You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 17 """Read the EXPECTED_UPSTREAM and merge the files from the upstream.""" 18 import argparse 19 import datetime 20 import logging 21 # pylint: disable=g-importing-member 22 import os.path 23 from pathlib import Path 24 import random 25 import re 26 import string 27 import sys 28 from typing import List, Tuple, Set, Dict 29 from typing import Sequence 30 31 # pylint: disable=g-multiple-import 32 from common_util import ( 33 ExpectedUpstreamEntry, 34 ExpectedUpstreamFile, 35 has_file_in_tree, 36 LIBCORE_DIR, 37 OjluniFinder, 38 TEST_PATH, 39 ) 40 41 from git import ( 42 Commit, 43 DiffIndex, 44 GitCommandError, 45 Head, 46 IndexFile, 47 Repo, 48 ) 49 50 # Enable INFO logging for error emitted by GitPython 51 logging.basicConfig(level=logging.INFO) 52 53 54 def validate_and_remove_unmodified_entries( 55 entries: List[ExpectedUpstreamEntry], 56 repo: Repo, commit: Commit) -> List[ExpectedUpstreamEntry]: 57 """Returns a list of entries of which the file content needs to be updated.""" 58 commit_tree = commit.tree 59 result: List[ExpectedUpstreamEntry] = [] 60 61 for e in entries: 62 try: 63 # The following step validate each entry by querying the git database 64 commit = repo.commit(e.git_ref) 65 source_blob = commit.tree.join(e.src_path) 66 if not has_file_in_tree(e.dst_path, commit_tree): 67 # Add the entry if the file is missing in the HEAD 68 result.append(e) 69 continue 70 71 dst_blob = commit_tree.join(e.dst_path) 72 # Add the entry if the content is different. 73 # data_stream will be close during GC. 74 if source_blob.data_stream.read() != dst_blob.data_stream.read(): 75 result.append(e) 76 except: 77 print(f"ERROR: reading entry: {e}", file=sys.stderr) 78 raise 79 80 return result 81 82 83 THIS_TOOL_PATH = Path(__file__).relative_to(LIBCORE_DIR) 84 85 TEMP_EXPECTED_BRANCH_PREFIX = "expected_upstream_" 86 87 MSG_FIRST_COMMIT = ("Import {summary}\n" 88 "\n" 89 "List of files:\n" 90 " {files}\n" 91 "\n" 92 f"Generated by {THIS_TOOL_PATH}\n" 93 "\n" 94 "{bug}\n" 95 "Test: N/A\n" 96 "No-Typo-Check: Imported files" 97 "{change_id_str}") 98 99 MSG_SECOND_COMMIT = ("Merge {summary} into the " 100 "aosp/main branch\n" 101 "\n" 102 "List of files:\n" 103 " {files}\n" 104 "\n" 105 "{bug}\n" 106 "Test: N/A" 107 "{change_id_str}") 108 109 INVALID_DIFF = (None, None) 110 111 LICENSE_BLOCK = r"\/\*(?:\*(?!\/)|[^*])*\*\/[ ]*\n+" 112 REGEX_LICENSE_AND_IMPORT = re.compile( 113 r"^(" + LICENSE_BLOCK + ")(import .+;)$", re.MULTILINE) 114 115 116 def create_commit_staging_diff(repo: Repo) -> None: 117 r"""Save the current EXPECTED_UPSTREAM filein a new git commit. 118 119 It can be retrieved later if this script fails. 120 121 Args: 122 repo: the repository object 123 """ 124 head = repo.head 125 index = IndexFile.from_tree(repo, head.commit) 126 index.add("EXPECTED_UPSTREAM") 127 128 now_str = datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S") 129 msg = f"Staging EXPECTED_UPSTREAM at {now_str}" 130 commit = index.commit(message=msg, parent_commits=[head.commit], head=False) 131 132 print( 133 f"The current EXPECTED_UPSTREAM file is saved in {commit.hexsha}.\n" 134 "If this script fails in the later stage, please retrieve the file by:\n" 135 f" git checkout {commit.hexsha} -- EXPECTED_UPSTREAM") 136 137 138 def create_commit_summary(diff_entries: List[ExpectedUpstreamEntry]) -> str: 139 r"""Create a commit summary message. 140 141 Args: 142 diff_entries: list of new / modified entries 143 144 Returns: 145 a string message 146 """ 147 148 default_msg = "files" 149 entries_and_names = [] 150 for e in diff_entries: 151 t = (e, OjluniFinder.translate_ojluni_path_to_class_name(e.dst_path)) 152 entries_and_names.append(t) 153 154 # Non-test entries 155 important_entries: List[tuple[ExpectedUpstreamEntry, str]] = [ 156 t for t in entries_and_names 157 if t[1] is not None and not t[1].startswith("test.")] 158 if not important_entries: 159 # Test entries 160 important_entries = [t for t in entries_and_names if t[1] is not None and 161 t[1].startswith("test.")] 162 # no path is under OJLUNI_JAVA_BASE_PATH or OJLUNI_TEST_PATH 163 if not important_entries: 164 return default_msg 165 166 # Get ref if all entries come from the same OpenJDK revision 167 git_ref = important_entries[0][0].git_ref 168 for e in important_entries: 169 if e[0].git_ref != git_ref: 170 git_ref = None 171 break 172 173 if len(important_entries) == 1: 174 classes_summary = important_entries[0][1].split(".")[-1] 175 else: 176 common_prefix = os.path.commonprefix(list(map( 177 lambda t: t[1], important_entries))) 178 prefix_split = common_prefix.split(".") 179 180 # short java package, e.g. javax. or java.n, doesn't provide meaningful 181 # commit summary. 182 if len(prefix_split) <= 2: 183 classes_summary = default_msg 184 else: 185 # Assume that package name isn't title-case. 186 is_package = (not prefix_split[-1] or prefix_split[-1][0].islower()) 187 if is_package: 188 # Discard the prefix after the last "." 189 classes_summary = ".".join(prefix_split[:-1]) 190 else: 191 classes_summary = common_prefix + "*" 192 193 if git_ref is None: 194 return classes_summary 195 else: 196 abbv_ref = git_ref.split("/", 1)[-1] 197 return f"{classes_summary} from {abbv_ref}" 198 199 200 def create_commit_at_expected_upstream( 201 repo: Repo, head: Head, new_entries: List[ExpectedUpstreamEntry], 202 removed_paths: Set[str], bug_id: str, 203 last_expected_change_id: str, discard_working_tree: bool) -> Head: 204 r"""Create a new commit importing the given files at the head. 205 206 Args: 207 repo: the repository object 208 head: the temp expected_upstream branch 209 new_entries: a list of entries 210 removed_paths: removed paths 211 bug_id: bug id 212 last_expected_change_id: Gerrit's change Id 213 discard_working_tree: discard the working tree. 214 215 Returns: 216 a list of entries 217 """ 218 affected_paths = [e.dst_path for e in new_entries] + list(removed_paths) 219 str_affected_paths = "\n ".join(affected_paths) 220 221 for entry in new_entries: 222 ref = entry.git_ref 223 upstream_commit = repo.commit(ref) 224 src_blob = upstream_commit.tree[entry.src_path] 225 # Write into the file system directly because GitPython provides no API 226 # writing into the index in memory. IndexFile.move doesn't help here, 227 # because the API requires the file on the working tree too. 228 # However, it's fine, because we later reset the HEAD. 229 absolute_dst_path = Path(LIBCORE_DIR, entry.dst_path) 230 absolute_dst_path.parent.mkdir(parents=True, exist_ok=True) 231 with absolute_dst_path.open("wb") as file: 232 file.write(src_blob.data_stream.read()) 233 234 entries = ExpectedUpstreamFile(head.commit.tree["EXPECTED_UPSTREAM"] 235 .data_stream.read()).read_all_entries() 236 entries = overlay_entries(entries, new_entries) 237 entries = list(filter(lambda e: e.dst_path not in removed_paths, entries)) 238 # Write the entries to the file system. 239 ExpectedUpstreamFile().sort_and_write_all_entries(entries) 240 241 if discard_working_tree: 242 repo.head.reference = head 243 repo.head.reset(index=True) 244 index = repo.index 245 else: 246 index = IndexFile.from_tree(repo, head.commit) 247 index.add("EXPECTED_UPSTREAM") 248 for entry in new_entries: 249 index.add(entry.dst_path) 250 251 for p in removed_paths: 252 index.remove(p) 253 254 summary_msg = create_commit_summary(new_entries) 255 str_bug = "" if bug_id is None else f"Bug: {bug_id}" 256 change_id_str = "" 257 if last_expected_change_id: 258 change_id_str = f"\nChange-Id: {last_expected_change_id}" 259 msg = MSG_FIRST_COMMIT.format(summary=summary_msg, files=str_affected_paths, 260 bug=str_bug, change_id_str=change_id_str) 261 commit = index.commit(message=msg, parent_commits=[head.commit], head=False) 262 new_head = head.set_commit(commit) 263 264 print(f"Create a new commit {commit.hexsha} at {head.name}") 265 266 return new_head 267 268 269 def overlay_entries( 270 existing_entries: List[ExpectedUpstreamEntry], 271 new_entries: List[ExpectedUpstreamEntry]) -> List[ExpectedUpstreamEntry]: 272 r"""Return a list of entries after overlaying the new_entries. 273 274 Args: 275 existing_entries: current entries 276 new_entries: entries being overlaid 277 Returns: 278 a list of entries 279 """ 280 entries_map = {} 281 for e in existing_entries: 282 entries_map[e.dst_path] = e 283 284 for e in new_entries: 285 entries_map[e.dst_path] = e 286 287 return [e for key, e in entries_map.items()] 288 289 290 REGEX_CHANGE_ID = r"^Change-Id: (I[0-9a-f]+)$" 291 REGEX_BUG_ID = r"^Bug: ([0-9]+)$" 292 293 294 def extract_change_id(commit: Commit) -> str: 295 r"""Extract gerrit's Change-Id from a commit message. 296 297 Args: 298 commit: commit 299 300 Returns: 301 Change-Id 302 """ 303 result = re.search(REGEX_CHANGE_ID, commit.message, re.M) 304 return result.group(1) if result else None 305 306 307 def extract_bug_id(commit: Commit) -> str: 308 r"""Extract the bug id from a commit message. 309 310 Args: 311 commit: commit 312 313 Returns: 314 Buganizer Id 315 """ 316 result = re.search(REGEX_BUG_ID, commit.message, re.M) 317 return result.group(1) if result else None 318 319 320 def get_diff_entries(repo: Repo, base_expected_commit: Commit) -> Tuple[ 321 List[ExpectedUpstreamEntry], Set[str]]: 322 """Get a list of entries different from the head commit. 323 324 Validate EXPECTED_UPSTREAM file and return the list of 325 modified or new entries between the working tree and HEAD. 326 327 Args: 328 repo: Repo 329 base_expected_commit: the base commit 330 331 Returns: 332 a list of entries 333 """ 334 current_tracking_branch = repo.active_branch.tracking_branch() 335 if current_tracking_branch.name != "aosp/main": 336 print("This script should only run on aosp/main branch. " 337 f"Currently, this is on branch {repo.active_branch} " 338 f"tracking {current_tracking_branch}", file=sys.stderr) 339 return INVALID_DIFF 340 341 print("Reading EXPECTED_UPSTREAM file...") 342 head_commit = repo.head.commit 343 diff_index = head_commit.diff(None) 344 no_file_change = len(diff_index) 345 if no_file_change == 0: 346 print("Can't find any EXPECTED_UPSTREAM file change", file=sys.stderr) 347 return INVALID_DIFF 348 elif no_file_change > 1 or diff_index[0].a_rawpath != b"EXPECTED_UPSTREAM": 349 print("Expect modification in the EXPECTED_UPSTREAM file only.\n" 350 "Please remove / commit the other changes. The below file changes " 351 "are detected: ", file=sys.stderr) 352 print_diff_index(diff_index, file=sys.stderr) 353 return INVALID_DIFF 354 355 prev_file = ExpectedUpstreamFile(head_commit.tree["EXPECTED_UPSTREAM"] 356 .data_stream.read()) 357 curr_file = ExpectedUpstreamFile() 358 diff_entries = prev_file.get_new_or_modified_entries(curr_file) 359 removed_paths = prev_file.get_removed_paths(curr_file) 360 361 modified_entries = validate_and_remove_unmodified_entries( 362 diff_entries, repo, base_expected_commit) 363 364 if not modified_entries and not removed_paths: 365 print("No need to update. All files are updated.") 366 return INVALID_DIFF 367 368 print("The following entries will be updated from upstream") 369 for e in modified_entries: 370 print(f" {e.dst_path}") 371 for p in removed_paths: 372 print(f" {p}") 373 374 return diff_entries, removed_paths 375 376 377 def compute_absorbed_diff_entries( 378 repo: Repo, base_commit: Commit, commit: Commit, overlaid_entries: List[ 379 ExpectedUpstreamEntry], removed_paths: Set[ 380 str]) -> Tuple[List[ExpectedUpstreamEntry], Set[str]]: 381 r"""Compute the combined entries after absorbing the new changes. 382 383 Args: 384 repo: Repo 385 base_commit: the base commit in the expected_upstream 386 commit: The commit diff-ed against from the base_commit 387 overlaid_entries: Additional entries overlaid on top of the diff. 388 removed_paths: removed paths 389 390 Returns: 391 Combined diff entries 392 """ 393 prev_file = ExpectedUpstreamFile(base_commit.tree["EXPECTED_UPSTREAM"] 394 .data_stream.read()) 395 curr_file = ExpectedUpstreamFile(commit.tree["EXPECTED_UPSTREAM"] 396 .data_stream.read()) 397 diff_entries = prev_file.get_new_or_modified_entries(curr_file) 398 diff_entries = overlay_entries(diff_entries, overlaid_entries) 399 intersection = set(filter(lambda e: e.dst_path in removed_paths, 400 diff_entries)) 401 diff_entries = list(filter(lambda e: e.dst_path not in intersection, diff_entries)) 402 new_removed_paths = set(filter(lambda p: p not in intersection, 403 removed_paths)) 404 return validate_and_remove_unmodified_entries( 405 diff_entries, repo, base_commit), new_removed_paths 406 407 408 def main_run( 409 repo: Repo, expected_upstream_base: str, 410 bug_id: str, use_rerere: bool, is_absorbed: bool, 411 discard_working_tree: bool) -> None: 412 """Create the commits importing files according to the EXPECTED_UPSTREAM. 413 414 Args: 415 repo: Repo 416 expected_upstream_base: the base commit in the expected_upstream branch. 417 bug_id: bug id 418 use_rerere: Reuses the recorded resolution from git 419 is_absorbed: Absorb the new changes from EXPECTED_UPSTREAM into the 420 existing commits created by this script 421 discard_working_tree: discard working tree flag. 422 """ 423 last_master_commit = repo.head.commit 424 last_master_change_id = None 425 last_expected_change_id = None 426 if is_absorbed: 427 head = repo.head 428 if len(head.commit.parents) != 2: 429 print("Error: HEAD isn't a merge commit.", file=sys.stderr) 430 return 431 432 last_branch = None 433 last_expected_commit = None 434 for commit in head.commit.parents: 435 name_rev: list[str] = commit.name_rev.split(" ", 1) 436 if (len(name_rev) > 1 and # name_rev[1] is usually the branch name 437 name_rev[1].startswith(TEMP_EXPECTED_BRANCH_PREFIX)): 438 last_branch = name_rev[1] 439 last_expected_commit = commit 440 else: 441 last_master_commit = commit 442 443 if last_branch is None: 444 print("Error: Can't find the last commit in the expected_upstream " 445 "branch.", file=sys.stderr) 446 return 447 448 if len(last_expected_commit.parents) != 1: 449 print(f"Error: The head commit at {last_branch} isn't in the expected " 450 f"state.") 451 return 452 453 base_expected_branch_commit = last_expected_commit.parents[0] 454 last_expected_change_id = extract_change_id(last_expected_commit) 455 last_master_change_id = extract_change_id(head.commit) 456 if bug_id is None: 457 bug_id = extract_bug_id(last_expected_commit) 458 else: 459 if expected_upstream_base is None: 460 expected_upstream_base = "aosp/expected_upstream" 461 try: 462 base_expected_branch_commit = repo.commit(expected_upstream_base) 463 finally: 464 if base_expected_branch_commit is None: 465 print(f"{expected_upstream_base} is not found in this repository.", 466 file=sys.stderr) 467 468 diff_entries, removed_paths = get_diff_entries(repo, 469 base_expected_branch_commit) 470 if not diff_entries and not removed_paths: 471 return 472 473 if is_absorbed: 474 diff_entries, removed_paths = compute_absorbed_diff_entries( 475 repo, base_expected_branch_commit, last_expected_commit, diff_entries, 476 removed_paths) 477 478 # Due to a limitation in GitPython, index.remove requires switching branch 479 # and discard the working tree. 480 if removed_paths and not discard_working_tree: 481 print("-r option is required to discard the current working tree.") 482 return 483 484 create_commit_staging_diff(repo) 485 486 master_head = repo.active_branch 487 branch_name = create_random_branch_name() 488 new_branch = repo.create_head(branch_name, base_expected_branch_commit.hexsha) 489 new_branch.set_tracking_branch(repo.remotes.aosp.refs.expected_upstream) 490 new_branch = create_commit_at_expected_upstream( 491 repo, new_branch, diff_entries, removed_paths, bug_id, 492 last_expected_change_id, discard_working_tree) 493 494 # Clean the working tree before merging branch 495 if discard_working_tree: 496 repo.head.reference = master_head 497 498 repo.head.reset(commit=last_master_commit, working_tree=True) 499 for e in diff_entries: 500 if not has_file_in_tree(e.dst_path, repo.head.commit.tree): 501 path = Path(LIBCORE_DIR, e.dst_path) 502 path.unlink(missing_ok=True) 503 504 affected_paths = [e.dst_path for e in diff_entries] + list(removed_paths) 505 str_affected_paths = "\n ".join(affected_paths) 506 summary_msg = create_commit_summary(diff_entries) 507 str_bug = "" if bug_id is None else f"Bug: {bug_id}" 508 change_id_str = "" 509 if last_master_change_id: 510 change_id_str = f"\nChange-Id: {last_master_change_id}" 511 msg = MSG_SECOND_COMMIT.format( 512 summary=summary_msg, files=str_affected_paths, bug=str_bug, 513 change_id_str=change_id_str) 514 rerere_str = "rerere.enabled=" 515 rerere_str += "true" if use_rerere else "false" 516 517 test_dst_paths = {} 518 for e in diff_entries: 519 if e.dst_path.startswith(TEST_PATH): 520 class_name = OjluniFinder.translate_ojluni_path_to_class_name(e.dst_path) 521 if class_name is not None: 522 package_name = class_name[:class_name.rfind(".")] 523 test_dst_paths[e.dst_path] = package_name 524 525 # Run git-merge command here, and will let the user to handle 526 # any errors and merge conflicts 527 try: 528 repo.git.execute(["git", "-c", rerere_str, "merge", 529 new_branch.commit.hexsha, "-m", msg]) 530 except GitCommandError as err: 531 print(f"Error: {err}", file=sys.stderr) 532 533 insert_package_name_to_tests(test_dst_paths) 534 535 536 def insert_package_name_to_tests(test_dst_paths: Dict[str, str]): 537 """Insert package name into the test file before the java import statement. 538 539 Args: 540 test_dst_paths: Map the file path to package names 541 """ 542 for dst_path, package_name in test_dst_paths.items(): 543 with open(dst_path, "r") as file: 544 src = file.read() 545 replacement = r"\1package " + package_name + r";\n\n\2" 546 modified = REGEX_LICENSE_AND_IMPORT.sub(replacement, src, count=1) 547 with open(dst_path, "w") as out: 548 out.write(modified) 549 550 551 def create_random_branch_name(): 552 rand_suffix = "".join(random.choice(string.ascii_lowercase + 553 string.digits) for _ in range(10)) 554 return f"{TEMP_EXPECTED_BRANCH_PREFIX}{rand_suffix}" 555 556 557 def print_diff_index(index: DiffIndex, file=sys.stdout) -> None: 558 for diff in index: 559 print(f" {diff.a_rawpath}", file=file) 560 561 562 def main(argv: Sequence[str]) -> None: 563 arg_parser = argparse.ArgumentParser( 564 description="Read the EXPECTED_UPSTREAM and update the files from the " 565 "OpenJDK. This script imports the files from OpenJDK into " 566 "the expected_upstream branch and merges it into the " 567 "current branch.") 568 arg_parser.add_argument( 569 "-a", "--absorbed-to-last-merge", action="store_true", 570 help="Import more files but absorb them into the last commits created " 571 "by this script.") 572 arg_parser.add_argument( 573 "--disable-rerere", action="store_true", 574 help="Do not re-use the recorded resolution from git.") 575 arg_parser.add_argument( 576 "-r", "--reset", action="store_true", 577 help="Discard the current working tree. Experimental flag to " 578 "support file removal from ojluni/.") 579 arg_parser.add_argument( 580 "-b", "--bug", nargs="?", 581 help="Buganizer Id") 582 arg_parser.add_argument( 583 "-e", "--expected_upstream_base", nargs="?", 584 help="The base commit in the expected_upstream branch") 585 586 args = arg_parser.parse_args(argv) 587 588 bug_id = args.bug 589 expected_upstream_base = args.expected_upstream_base 590 use_rerere = not args.disable_rerere 591 is_absorbed = args.absorbed_to_last_merge 592 discard_working_tree = args.reset 593 if is_absorbed and expected_upstream_base is not None: 594 print("Error: -a and -e options can't be used together.", file=sys.stderr) 595 return 596 597 repo = Repo(LIBCORE_DIR.as_posix()) 598 try: 599 main_run(repo, expected_upstream_base, bug_id, use_rerere, is_absorbed, 600 discard_working_tree) 601 finally: 602 repo.close() 603 604 605 if __name__ == "__main__": 606 main(sys.argv[1:]) 607