1#!/usr/bin/env python 2# 3# Copyright (C) 2022 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17from sys import exit 18from typing import List 19from glob import glob 20from pathlib import Path 21from collections import defaultdict 22from difflib import Differ 23from re import split 24from tqdm import tqdm 25import argparse 26 27 28DIFFER_CODE_LEN = 2 29 30class DifferCodes: 31 COMMON = ' ' 32 UNIQUE_FIRST = '- ' 33 UNIQUE_SECOND = '+ ' 34 DIFF_IDENT = '? ' 35 36class FilesDiffAnalyzer: 37 def __init__(self, args) -> None: 38 self.out_dir = args.out_dir 39 self.show_diff = args.show_diff 40 self.skip_words = args.skip_words 41 self.first_dir = args.first_dir 42 self.second_dir = args.second_dir 43 self.include_common = args.include_common 44 45 self.first_dir_files = self.get_files(self.first_dir) 46 self.second_dir_files = self.get_files(self.second_dir) 47 self.common_file_map = defaultdict(set) 48 49 self.map_common_files(self.first_dir_files, self.first_dir) 50 self.map_common_files(self.second_dir_files, self.second_dir) 51 52 def get_files(self, dir: str) -> List[str]: 53 """Get all files directory in the input directory including the files in the subdirectories 54 55 Recursively finds all files in the input directory. 56 Returns a list of file directory strings, which do not include directories but only files. 57 List is sorted in alphabetical order of the file directories. 58 59 Args: 60 dir: Directory to get the files. String. 61 62 Returns: 63 A list of file directory strings within the input directory. 64 Sorted in Alphabetical order. 65 66 Raises: 67 FileNotFoundError: An error occurred accessing the non-existing directory 68 """ 69 70 if not dir_exists(dir): 71 raise FileNotFoundError("Directory does not exist") 72 73 if dir[:-2] != "**": 74 if dir[:-1] != "/": 75 dir += "/" 76 dir += "**" 77 78 return [file for file in sorted(glob(dir, recursive=True)) if Path(file).is_file()] 79 80 def map_common_files(self, files: List[str], dir: str) -> None: 81 for file in files: 82 file_name = file.split(dir, 1)[-1] 83 self.common_file_map[file_name].add(dir) 84 return 85 86 def compare_file_contents(self, first_file: str, second_file: str) -> List[str]: 87 """Compare the contents of the files and return different lines 88 89 Given two file directory strings, compare the contents of the two files 90 and return the list of file contents string prepended with unique identifier codes. 91 The identifier codes include: 92 - ' '(two empty space characters): Line common to two files 93 - '- '(minus followed by a space) : Line unique to first file 94 - '+ '(plus followed by a space) : Line unique to second file 95 96 Args: 97 first_file: First file directory string to compare the content 98 second_file: Second file directory string to compare the content 99 100 Returns: 101 A list of the file content strings. For example: 102 103 [ 104 " Foo", 105 "- Bar", 106 "+ Baz" 107 ] 108 """ 109 110 d = Differ() 111 first_file_contents = sort_methods(get_file_contents(first_file)) 112 second_file_contents = sort_methods(get_file_contents(second_file)) 113 diff = list(d.compare(first_file_contents, second_file_contents)) 114 ret = [f"diff {first_file} {second_file}"] 115 116 idx = 0 117 while idx < len(diff): 118 line = diff[idx] 119 line_code = line[:DIFFER_CODE_LEN] 120 121 match line_code: 122 case DifferCodes.COMMON: 123 if self.include_common: 124 ret.append(line) 125 126 case DifferCodes.UNIQUE_FIRST: 127 # Should compare line 128 if (idx < len(diff) - 1 and 129 (next_line_code := diff[idx + 1][:DIFFER_CODE_LEN]) 130 not in (DifferCodes.UNIQUE_FIRST, DifferCodes.COMMON)): 131 delta = 1 if next_line_code == DifferCodes.UNIQUE_SECOND else 2 132 line_to_compare = diff[idx + delta] 133 if self.lines_differ(line, line_to_compare): 134 ret.extend([line, line_to_compare]) 135 else: 136 if self.include_common: 137 ret.append(DifferCodes.COMMON + 138 line[DIFFER_CODE_LEN:]) 139 idx += delta 140 else: 141 ret.append(line) 142 143 case DifferCodes.UNIQUE_SECOND: 144 ret.append(line) 145 146 case DifferCodes.DIFF_IDENT: 147 pass 148 idx += 1 149 return ret 150 151 def lines_differ(self, line1: str, line2: str) -> bool: 152 """Check if the input lines are different or not 153 154 Compare the two lines word by word and check if the two lines are different or not. 155 If the different words in the comparing lines are included in skip_words, 156 the lines are not considered different. 157 158 Args: 159 line1: first line to compare 160 line2: second line to compare 161 162 Returns: 163 Boolean value indicating if the two lines are different or not 164 165 """ 166 # Split by '.' or ' '(whitespace) 167 def split_words(line: str) -> List[str]: 168 return split('\\s|\\.', line[DIFFER_CODE_LEN:]) 169 170 line1_words, line2_words = split_words(line1), split_words(line2) 171 if len(line1_words) != len(line2_words): 172 return True 173 174 for word1, word2 in zip(line1_words, line2_words): 175 if word1 != word2: 176 # not check if words are equal to skip word, but 177 # check if words contain skip word as substring 178 if all(sw not in word1 and sw not in word2 for sw in self.skip_words): 179 return True 180 181 return False 182 183 def analyze(self) -> None: 184 """Analyze file contents in both directories and write to output or console. 185 """ 186 for file in tqdm(sorted(self.common_file_map.keys())): 187 val = self.common_file_map[file] 188 189 # When file exists in both directories 190 lines = list() 191 if val == set([self.first_dir, self.second_dir]): 192 lines = self.compare_file_contents( 193 self.first_dir + file, self.second_dir + file) 194 else: 195 existing_dir, not_existing_dir = ( 196 (self.first_dir, self.second_dir) if self.first_dir in val 197 else (self.second_dir, self.first_dir)) 198 199 lines = [f"{not_existing_dir}{file} does not exist."] 200 201 if self.show_diff: 202 lines.append(f"Content of {existing_dir}{file}: \n") 203 lines.extend(get_file_contents(existing_dir + file)) 204 205 self.write(lines) 206 207 def write(self, lines: List[str]) -> None: 208 if self.out_dir == "": 209 pprint(lines) 210 else: 211 write_lines(self.out_dir, lines) 212 213### 214# Helper functions 215### 216 217def sort_methods(lines: List[str]) -> List[str]: 218 """Sort class methods in the file contents by alphabetical order 219 220 Given lines of Java file contents, return lines with class methods sorted in alphabetical order. 221 Also omit empty lines or lines with spaces. 222 For example: 223 l = [ 224 "package android.test;", 225 "", 226 "public static final int ORANGE = 1;", 227 "", 228 "public class TestClass {", 229 "public TestClass() { throw new RuntimeException("Stub!"); }", 230 "public void foo() { throw new RuntimeException("Stub!"); }", 231 "public void bar() { throw new RuntimeException("Stub!"); }", 232 "}" 233 ] 234 sort_methods(l) returns 235 [ 236 "package android.test;", 237 "public static final int ORANGE = 1;", 238 "public class TestClass {", 239 "public TestClass() { throw new RuntimeException("Stub!"); }", 240 "public void bar() { throw new RuntimeException("Stub!"); }", 241 "public void foo() { throw new RuntimeException("Stub!"); }", 242 "}" 243 ] 244 245 Args: 246 lines: List of strings consisted of Java file contents. 247 248 Returns: 249 A list of string with sorted class methods. 250 251 """ 252 def is_not_blank(l: str) -> bool: 253 return bool(l) and not l.isspace() 254 255 ret = list() 256 257 in_class = False 258 buffer = list() 259 for line in lines: 260 if not in_class: 261 if "class" in line: 262 in_class = True 263 ret.append(line) 264 else: 265 # Adding static variables, package info, etc. 266 # Skipping empty or space lines. 267 if is_not_blank(line): 268 ret.append(line) 269 else: 270 # End of class 271 if line and line[0] == "}": 272 in_class = False 273 ret.extend(sorted(buffer)) 274 buffer = list() 275 ret.append(line) 276 else: 277 if is_not_blank(line): 278 buffer.append(line) 279 280 return ret 281 282def get_file_contents(file_path: str) -> List[str]: 283 lines = list() 284 with open(file_path) as f: 285 lines = [line.rstrip('\n') for line in f] 286 f.close() 287 return lines 288 289def pprint(l: List[str]) -> None: 290 for line in l: 291 print(line) 292 293def write_lines(out_dir: str, lines: List[str]) -> None: 294 with open(out_dir, "a") as f: 295 f.writelines(line + '\n' for line in lines) 296 f.write("\n") 297 f.close() 298 299def dir_exists(dir: str) -> bool: 300 return Path(dir).exists() 301 302if __name__ == '__main__': 303 parser = argparse.ArgumentParser() 304 parser.add_argument('first_dir', action='store', type=str, 305 help="first path to compare file directory and contents") 306 parser.add_argument('second_dir', action='store', type=str, 307 help="second path to compare file directory and contents") 308 parser.add_argument('--out', dest='out_dir', 309 action='store', default="", type=str, 310 help="optional directory to write log. If not set, will print to console") 311 parser.add_argument('--show-diff-file', dest='show_diff', 312 action=argparse.BooleanOptionalAction, 313 help="optional flag. If passed, will print out the content of the file unique to each directories") 314 parser.add_argument('--include-common', dest='include_common', 315 action=argparse.BooleanOptionalAction, 316 help="optional flag. If passed, will print out the contents common to both files as well,\ 317 instead of printing only diff lines.") 318 parser.add_argument('--skip-words', nargs='+', 319 dest='skip_words', default=[], help="optional words to skip in comparison") 320 321 args = parser.parse_args() 322 323 if not args.first_dir or not args.second_dir: 324 parser.print_usage() 325 exit(0) 326 327 analyzer = FilesDiffAnalyzer(args) 328 analyzer.analyze() 329