• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #!/usr/bin/python3 -B
2 
3 # Copyright 2022 The Android Open Source Project
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 #      http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 
17 """Read the EXPECTED_UPSTREAM and  merge the files from the upstream."""
18 import argparse
19 import datetime
20 import logging
21 # pylint: disable=g-importing-member
22 import os.path
23 from pathlib import Path
24 import random
25 import re
26 import string
27 import sys
28 from typing import List, Tuple, Set, Dict
29 from typing import Sequence
30 
31 # pylint: disable=g-multiple-import
32 from common_util import (
33     ExpectedUpstreamEntry,
34     ExpectedUpstreamFile,
35     has_file_in_tree,
36     LIBCORE_DIR,
37     OjluniFinder,
38     TEST_PATH,
39 )
40 
41 from git import (
42     Commit,
43     DiffIndex,
44     GitCommandError,
45     Head,
46     IndexFile,
47     Repo,
48 )
49 
50 # Enable INFO logging for error emitted by GitPython
51 logging.basicConfig(level=logging.INFO)
52 
53 
54 def validate_and_remove_unmodified_entries(
55     entries: List[ExpectedUpstreamEntry],
56     repo: Repo, commit: Commit) -> List[ExpectedUpstreamEntry]:
57   """Returns a list of entries of which the file content needs to be updated."""
58   commit_tree = commit.tree
59   result: List[ExpectedUpstreamEntry] = []
60 
61   for e in entries:
62     try:
63       # The following step validate each entry by querying the git database
64       commit = repo.commit(e.git_ref)
65       source_blob = commit.tree.join(e.src_path)
66       if not has_file_in_tree(e.dst_path, commit_tree):
67         # Add the entry if the file is missing in the HEAD
68         result.append(e)
69         continue
70 
71       dst_blob = commit_tree.join(e.dst_path)
72       # Add the entry if the content is different.
73       # data_stream will be close during GC.
74       if source_blob.data_stream.read() != dst_blob.data_stream.read():
75         result.append(e)
76     except:
77       print(f"ERROR: reading entry: {e}", file=sys.stderr)
78       raise
79 
80   return result
81 
82 
83 THIS_TOOL_PATH = Path(__file__).relative_to(LIBCORE_DIR)
84 
85 TEMP_EXPECTED_BRANCH_PREFIX = "expected_upstream_"
86 
87 MSG_FIRST_COMMIT = ("Import {summary}\n"
88                     "\n"
89                     "List of files:\n"
90                     "  {files}\n"
91                     "\n"
92                     f"Generated by {THIS_TOOL_PATH}\n"
93                     "\n"
94                     "{bug}\n"
95                     "Test: N/A\n"
96                     "No-Typo-Check: Imported files"
97                     "{change_id_str}")
98 
99 MSG_SECOND_COMMIT = ("Merge {summary} into the "
100                      "aosp/main branch\n"
101                      "\n"
102                      "List of files:\n"
103                      "  {files}\n"
104                      "\n"
105                      "{bug}\n"
106                      "Test: N/A"
107                      "{change_id_str}")
108 
109 INVALID_DIFF = (None, None)
110 
111 LICENSE_BLOCK = r"\/\*(?:\*(?!\/)|[^*])*\*\/[ ]*\n+"
112 REGEX_LICENSE_AND_IMPORT = re.compile(
113     r"^(" + LICENSE_BLOCK + ")(import .+;)$", re.MULTILINE)
114 
115 
116 def create_commit_staging_diff(repo: Repo) -> None:
117   r"""Save the current EXPECTED_UPSTREAM filein a new git commit.
118 
119   It can be retrieved later if this script fails.
120 
121   Args:
122     repo: the repository object
123   """
124   head = repo.head
125   index = IndexFile.from_tree(repo, head.commit)
126   index.add("EXPECTED_UPSTREAM")
127 
128   now_str = datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S")
129   msg = f"Staging EXPECTED_UPSTREAM at {now_str}"
130   commit = index.commit(message=msg, parent_commits=[head.commit], head=False)
131 
132   print(
133       f"The current EXPECTED_UPSTREAM file is saved in {commit.hexsha}.\n"
134       "If this script fails in the later stage, please retrieve the file by:\n"
135       f"  git checkout {commit.hexsha} -- EXPECTED_UPSTREAM")
136 
137 
138 def create_commit_summary(diff_entries: List[ExpectedUpstreamEntry]) -> str:
139   r"""Create a commit summary message.
140 
141   Args:
142     diff_entries: list of new / modified entries
143 
144   Returns:
145     a string message
146   """
147 
148   default_msg = "files"
149   entries_and_names = []
150   for e in diff_entries:
151     t = (e, OjluniFinder.translate_ojluni_path_to_class_name(e.dst_path))
152     entries_and_names.append(t)
153 
154   # Non-test entries
155   important_entries: List[tuple[ExpectedUpstreamEntry, str]] = [
156       t for t in entries_and_names
157       if t[1] is not None and not t[1].startswith("test.")]
158   if not important_entries:
159     # Test entries
160     important_entries = [t for t in entries_and_names if t[1] is not None and
161                          t[1].startswith("test.")]
162     # no path is under OJLUNI_JAVA_BASE_PATH or OJLUNI_TEST_PATH
163     if not important_entries:
164       return default_msg
165 
166   # Get ref if all entries come from the same OpenJDK revision
167   git_ref = important_entries[0][0].git_ref
168   for e in important_entries:
169     if e[0].git_ref != git_ref:
170       git_ref = None
171       break
172 
173   if len(important_entries) == 1:
174     classes_summary = important_entries[0][1].split(".")[-1]
175   else:
176     common_prefix = os.path.commonprefix(list(map(
177         lambda t: t[1], important_entries)))
178     prefix_split = common_prefix.split(".")
179 
180     # short java package, e.g. javax. or java.n, doesn't provide meaningful
181     # commit summary.
182     if len(prefix_split) <= 2:
183       classes_summary = default_msg
184     else:
185       # Assume that package name isn't title-case.
186       is_package = (not prefix_split[-1] or prefix_split[-1][0].islower())
187       if is_package:
188         # Discard the prefix after the last "."
189         classes_summary = ".".join(prefix_split[:-1])
190       else:
191         classes_summary = common_prefix + "*"
192 
193   if git_ref is None:
194     return classes_summary
195   else:
196     abbv_ref = git_ref.split("/", 1)[-1]
197     return f"{classes_summary} from {abbv_ref}"
198 
199 
200 def create_commit_at_expected_upstream(
201     repo: Repo, head: Head, new_entries: List[ExpectedUpstreamEntry],
202     removed_paths: Set[str], bug_id: str,
203     last_expected_change_id: str, discard_working_tree: bool) -> Head:
204   r"""Create a new commit importing the given files at the head.
205 
206   Args:
207     repo: the repository object
208     head: the temp expected_upstream branch
209     new_entries: a list of entries
210     removed_paths: removed paths
211     bug_id: bug id
212     last_expected_change_id: Gerrit's change Id
213     discard_working_tree: discard the working tree.
214 
215   Returns:
216     a list of entries
217   """
218   affected_paths = [e.dst_path for e in new_entries] + list(removed_paths)
219   str_affected_paths = "\n  ".join(affected_paths)
220 
221   for entry in new_entries:
222     ref = entry.git_ref
223     upstream_commit = repo.commit(ref)
224     src_blob = upstream_commit.tree[entry.src_path]
225     # Write into the file system directly because GitPython provides no API
226     # writing into the index in memory. IndexFile.move doesn't help here,
227     # because the API requires the file on the working tree too.
228     # However, it's fine, because we later reset the HEAD.
229     absolute_dst_path = Path(LIBCORE_DIR, entry.dst_path)
230     absolute_dst_path.parent.mkdir(parents=True, exist_ok=True)
231     with absolute_dst_path.open("wb") as file:
232       file.write(src_blob.data_stream.read())
233 
234   entries = ExpectedUpstreamFile(head.commit.tree["EXPECTED_UPSTREAM"]
235                                  .data_stream.read()).read_all_entries()
236   entries = overlay_entries(entries, new_entries)
237   entries = list(filter(lambda e: e.dst_path not in removed_paths, entries))
238   # Write the entries to the file system.
239   ExpectedUpstreamFile().sort_and_write_all_entries(entries)
240 
241   if discard_working_tree:
242     repo.head.reference = head
243     repo.head.reset(index=True)
244     index = repo.index
245   else:
246     index = IndexFile.from_tree(repo, head.commit)
247   index.add("EXPECTED_UPSTREAM")
248   for entry in new_entries:
249     index.add(entry.dst_path)
250 
251   for p in removed_paths:
252     index.remove(p)
253 
254   summary_msg = create_commit_summary(new_entries)
255   str_bug = "" if bug_id is None else f"Bug: {bug_id}"
256   change_id_str = ""
257   if last_expected_change_id:
258     change_id_str = f"\nChange-Id: {last_expected_change_id}"
259   msg = MSG_FIRST_COMMIT.format(summary=summary_msg, files=str_affected_paths,
260                                 bug=str_bug, change_id_str=change_id_str)
261   commit = index.commit(message=msg, parent_commits=[head.commit], head=False)
262   new_head = head.set_commit(commit)
263 
264   print(f"Create a new commit {commit.hexsha} at {head.name}")
265 
266   return new_head
267 
268 
269 def overlay_entries(
270     existing_entries: List[ExpectedUpstreamEntry],
271     new_entries: List[ExpectedUpstreamEntry]) -> List[ExpectedUpstreamEntry]:
272   r"""Return a list of entries after overlaying the new_entries.
273 
274   Args:
275     existing_entries: current entries
276     new_entries: entries being overlaid
277   Returns:
278     a list of entries
279   """
280   entries_map = {}
281   for e in existing_entries:
282     entries_map[e.dst_path] = e
283 
284   for e in new_entries:
285     entries_map[e.dst_path] = e
286 
287   return [e for key, e in entries_map.items()]
288 
289 
290 REGEX_CHANGE_ID = r"^Change-Id: (I[0-9a-f]+)$"
291 REGEX_BUG_ID = r"^Bug: ([0-9]+)$"
292 
293 
294 def extract_change_id(commit: Commit) -> str:
295   r"""Extract gerrit's Change-Id from a commit message.
296 
297   Args:
298      commit: commit
299 
300   Returns:
301     Change-Id
302   """
303   result = re.search(REGEX_CHANGE_ID, commit.message, re.M)
304   return result.group(1) if result else None
305 
306 
307 def extract_bug_id(commit: Commit) -> str:
308   r"""Extract the bug id from a commit message.
309 
310   Args:
311      commit: commit
312 
313   Returns:
314     Buganizer Id
315   """
316   result = re.search(REGEX_BUG_ID, commit.message, re.M)
317   return result.group(1) if result else None
318 
319 
320 def get_diff_entries(repo: Repo, base_expected_commit: Commit) -> Tuple[
321     List[ExpectedUpstreamEntry], Set[str]]:
322   """Get a list of entries different from the head commit.
323 
324   Validate EXPECTED_UPSTREAM file and return the list of
325   modified or new entries between the working tree and HEAD.
326 
327   Args:
328     repo: Repo
329     base_expected_commit: the base commit
330 
331   Returns:
332     a list of entries
333   """
334   current_tracking_branch = repo.active_branch.tracking_branch()
335   if current_tracking_branch.name != "aosp/main":
336     print("This script should only run on aosp/main branch. "
337           f"Currently, this is on branch {repo.active_branch} "
338           f"tracking {current_tracking_branch}", file=sys.stderr)
339     return INVALID_DIFF
340 
341   print("Reading EXPECTED_UPSTREAM file...")
342   head_commit = repo.head.commit
343   diff_index = head_commit.diff(None)
344   no_file_change = len(diff_index)
345   if no_file_change == 0:
346     print("Can't find any EXPECTED_UPSTREAM file change", file=sys.stderr)
347     return INVALID_DIFF
348   elif no_file_change > 1 or diff_index[0].a_rawpath != b"EXPECTED_UPSTREAM":
349     print("Expect modification in the EXPECTED_UPSTREAM file only.\n"
350           "Please remove / commit the other changes. The below file changes "
351           "are detected: ", file=sys.stderr)
352     print_diff_index(diff_index, file=sys.stderr)
353     return INVALID_DIFF
354 
355   prev_file = ExpectedUpstreamFile(head_commit.tree["EXPECTED_UPSTREAM"]
356                                    .data_stream.read())
357   curr_file = ExpectedUpstreamFile()
358   diff_entries = prev_file.get_new_or_modified_entries(curr_file)
359   removed_paths = prev_file.get_removed_paths(curr_file)
360 
361   modified_entries = validate_and_remove_unmodified_entries(
362       diff_entries, repo, base_expected_commit)
363 
364   if not modified_entries and not removed_paths:
365     print("No need to update. All files are updated.")
366     return INVALID_DIFF
367 
368   print("The following entries will be updated from upstream")
369   for e in modified_entries:
370     print(f"  {e.dst_path}")
371   for p in removed_paths:
372     print(f"  {p}")
373 
374   return diff_entries, removed_paths
375 
376 
377 def compute_absorbed_diff_entries(
378     repo: Repo, base_commit: Commit, commit: Commit, overlaid_entries: List[
379         ExpectedUpstreamEntry], removed_paths: Set[
380             str]) -> Tuple[List[ExpectedUpstreamEntry], Set[str]]:
381   r"""Compute the combined entries after absorbing the new changes.
382 
383   Args:
384     repo: Repo
385     base_commit: the base commit in the expected_upstream
386     commit: The commit diff-ed against from the base_commit
387     overlaid_entries: Additional entries overlaid on top of the diff.
388     removed_paths: removed paths
389 
390   Returns:
391     Combined diff entries
392   """
393   prev_file = ExpectedUpstreamFile(base_commit.tree["EXPECTED_UPSTREAM"]
394                                    .data_stream.read())
395   curr_file = ExpectedUpstreamFile(commit.tree["EXPECTED_UPSTREAM"]
396                                    .data_stream.read())
397   diff_entries = prev_file.get_new_or_modified_entries(curr_file)
398   diff_entries = overlay_entries(diff_entries, overlaid_entries)
399   intersection = set(filter(lambda e: e.dst_path in removed_paths,
400                             diff_entries))
401   diff_entries = list(filter(lambda e: e.dst_path not in intersection, diff_entries))
402   new_removed_paths = set(filter(lambda p: p not in intersection,
403                                  removed_paths))
404   return validate_and_remove_unmodified_entries(
405       diff_entries, repo, base_commit), new_removed_paths
406 
407 
408 def main_run(
409     repo: Repo, expected_upstream_base: str,
410     bug_id: str, use_rerere: bool, is_absorbed: bool,
411     discard_working_tree: bool) -> None:
412   """Create the commits importing files according to the EXPECTED_UPSTREAM.
413 
414   Args:
415     repo: Repo
416     expected_upstream_base: the base commit in the expected_upstream branch.
417     bug_id: bug id
418     use_rerere: Reuses the recorded resolution from git
419     is_absorbed: Absorb the new changes from EXPECTED_UPSTREAM into the
420       existing commits created by this script
421     discard_working_tree: discard working tree flag.
422   """
423   last_master_commit = repo.head.commit
424   last_master_change_id = None
425   last_expected_change_id = None
426   if is_absorbed:
427     head = repo.head
428     if len(head.commit.parents) != 2:
429       print("Error: HEAD isn't a merge commit.", file=sys.stderr)
430       return
431 
432     last_branch = None
433     last_expected_commit = None
434     for commit in head.commit.parents:
435       name_rev: list[str] = commit.name_rev.split(" ", 1)
436       if (len(name_rev) > 1 and  # name_rev[1] is usually the branch name
437           name_rev[1].startswith(TEMP_EXPECTED_BRANCH_PREFIX)):
438         last_branch = name_rev[1]
439         last_expected_commit = commit
440       else:
441         last_master_commit = commit
442 
443     if last_branch is None:
444       print("Error: Can't find the last commit in the expected_upstream "
445             "branch.", file=sys.stderr)
446       return
447 
448     if len(last_expected_commit.parents) != 1:
449       print(f"Error: The head commit at {last_branch} isn't in the expected "
450             f"state.")
451       return
452 
453     base_expected_branch_commit = last_expected_commit.parents[0]
454     last_expected_change_id = extract_change_id(last_expected_commit)
455     last_master_change_id = extract_change_id(head.commit)
456     if bug_id is None:
457       bug_id = extract_bug_id(last_expected_commit)
458   else:
459     if expected_upstream_base is None:
460       expected_upstream_base = "aosp/expected_upstream"
461     try:
462       base_expected_branch_commit = repo.commit(expected_upstream_base)
463     finally:
464       if base_expected_branch_commit is None:
465         print(f"{expected_upstream_base} is not found in this repository.",
466               file=sys.stderr)
467 
468   diff_entries, removed_paths = get_diff_entries(repo,
469                                                  base_expected_branch_commit)
470   if not diff_entries and not removed_paths:
471     return
472 
473   if is_absorbed:
474     diff_entries, removed_paths = compute_absorbed_diff_entries(
475         repo, base_expected_branch_commit, last_expected_commit, diff_entries,
476         removed_paths)
477 
478   # Due to a limitation in GitPython, index.remove requires switching branch
479   # and discard the working tree.
480   if removed_paths and not discard_working_tree:
481     print("-r option is required to discard the current working tree.")
482     return
483 
484   create_commit_staging_diff(repo)
485 
486   master_head = repo.active_branch
487   branch_name = create_random_branch_name()
488   new_branch = repo.create_head(branch_name, base_expected_branch_commit.hexsha)
489   new_branch.set_tracking_branch(repo.remotes.aosp.refs.expected_upstream)
490   new_branch = create_commit_at_expected_upstream(
491       repo, new_branch, diff_entries, removed_paths, bug_id,
492       last_expected_change_id, discard_working_tree)
493 
494   # Clean the working tree before merging branch
495   if discard_working_tree:
496     repo.head.reference = master_head
497 
498   repo.head.reset(commit=last_master_commit, working_tree=True)
499   for e in diff_entries:
500     if not has_file_in_tree(e.dst_path, repo.head.commit.tree):
501       path = Path(LIBCORE_DIR, e.dst_path)
502       path.unlink(missing_ok=True)
503 
504   affected_paths = [e.dst_path for e in diff_entries] + list(removed_paths)
505   str_affected_paths = "\n  ".join(affected_paths)
506   summary_msg = create_commit_summary(diff_entries)
507   str_bug = "" if bug_id is None else f"Bug: {bug_id}"
508   change_id_str = ""
509   if last_master_change_id:
510     change_id_str = f"\nChange-Id: {last_master_change_id}"
511   msg = MSG_SECOND_COMMIT.format(
512       summary=summary_msg, files=str_affected_paths, bug=str_bug,
513       change_id_str=change_id_str)
514   rerere_str = "rerere.enabled="
515   rerere_str += "true" if use_rerere else "false"
516 
517   test_dst_paths = {}
518   for e in diff_entries:
519     if e.dst_path.startswith(TEST_PATH):
520       class_name = OjluniFinder.translate_ojluni_path_to_class_name(e.dst_path)
521       if class_name is not None:
522         package_name = class_name[:class_name.rfind(".")]
523         test_dst_paths[e.dst_path] = package_name
524 
525   # Run git-merge command here, and will let the user to handle
526   # any errors and merge conflicts
527   try:
528     repo.git.execute(["git", "-c", rerere_str, "merge",
529                       new_branch.commit.hexsha, "-m", msg])
530   except GitCommandError as err:
531     print(f"Error: {err}", file=sys.stderr)
532 
533   insert_package_name_to_tests(test_dst_paths)
534 
535 
536 def insert_package_name_to_tests(test_dst_paths: Dict[str, str]):
537   """Insert package name into the test file before the java import statement.
538 
539   Args:
540     test_dst_paths: Map the file path to package names
541   """
542   for dst_path, package_name in test_dst_paths.items():
543     with open(dst_path, "r") as file:
544       src = file.read()
545     replacement = r"\1package " + package_name + r";\n\n\2"
546     modified = REGEX_LICENSE_AND_IMPORT.sub(replacement, src, count=1)
547     with open(dst_path, "w") as out:
548       out.write(modified)
549 
550 
551 def create_random_branch_name():
552   rand_suffix = "".join(random.choice(string.ascii_lowercase +
553                                       string.digits) for _ in range(10))
554   return f"{TEMP_EXPECTED_BRANCH_PREFIX}{rand_suffix}"
555 
556 
557 def print_diff_index(index: DiffIndex, file=sys.stdout) -> None:
558   for diff in index:
559     print(f"  {diff.a_rawpath}", file=file)
560 
561 
562 def main(argv: Sequence[str]) -> None:
563   arg_parser = argparse.ArgumentParser(
564       description="Read the EXPECTED_UPSTREAM and update the files from the "
565                   "OpenJDK. This script imports the files from OpenJDK into "
566                   "the expected_upstream branch and merges it into the "
567                   "current branch.")
568   arg_parser.add_argument(
569       "-a", "--absorbed-to-last-merge", action="store_true",
570       help="Import more files but absorb them into the last commits created "
571            "by this script.")
572   arg_parser.add_argument(
573       "--disable-rerere", action="store_true",
574       help="Do not re-use the recorded resolution from git.")
575   arg_parser.add_argument(
576       "-r", "--reset", action="store_true",
577       help="Discard the current working tree. Experimental flag to "
578            "support file removal from ojluni/.")
579   arg_parser.add_argument(
580       "-b", "--bug", nargs="?",
581       help="Buganizer Id")
582   arg_parser.add_argument(
583       "-e", "--expected_upstream_base", nargs="?",
584       help="The base commit in the expected_upstream branch")
585 
586   args = arg_parser.parse_args(argv)
587 
588   bug_id = args.bug
589   expected_upstream_base = args.expected_upstream_base
590   use_rerere = not args.disable_rerere
591   is_absorbed = args.absorbed_to_last_merge
592   discard_working_tree = args.reset
593   if is_absorbed and expected_upstream_base is not None:
594     print("Error: -a and -e options can't be used together.", file=sys.stderr)
595     return
596 
597   repo = Repo(LIBCORE_DIR.as_posix())
598   try:
599     main_run(repo, expected_upstream_base, bug_id, use_rerere, is_absorbed,
600              discard_working_tree)
601   finally:
602     repo.close()
603 
604 
605 if __name__ == "__main__":
606   main(sys.argv[1:])
607