1#!/usr/bin/env python
2#
3# Copyright (C) 2013 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17
18"""Block diff utility."""
19
20from __future__ import absolute_import
21from __future__ import print_function
22
23# pylint: disable=import-error
24import argparse
25import sys
26
27
28class BlockDiffError(Exception):
29  pass
30
31
32def BlockDiff(block_size, file1, file2, name1, name2, max_length=-1):
33  """Performs a binary diff of two files by blocks.
34
35  Args:
36    block_size: the size of a block to diff by
37    file1: first file object
38    file2: second file object
39    name1: name of first file (for error reporting)
40    name2: name of second file (for error reporting)
41    max_length: the maximum length to read/diff in bytes (optional)
42  Returns:
43    A list of (start, length) pairs representing block extents that differ
44    between the two files.
45  Raises:
46    BlockDiffError if there were errors while diffing.
47
48  """
49  if max_length < 0:
50    max_length = sys.maxsize
51  diff_list = []
52  num_blocks = extent_start = extent_length = 0
53  while max_length or extent_length:
54    read_length = min(max_length, block_size)
55    data1 = file1.read(read_length)
56    data2 = file2.read(read_length)
57    if len(data1) != len(data2):
58      raise BlockDiffError('read %d bytes from %s but %d bytes from %s' %
59                           (len(data1), name1, len(data2), name2))
60
61    if data1 != data2:
62      # Data is different, mark it down.
63      if extent_length:
64        # Stretch the current diff extent.
65        extent_length += 1
66      else:
67        # Start a new diff extent.
68        extent_start = num_blocks
69        extent_length = 1
70    elif extent_length:
71      # Record the previous extent.
72      diff_list.append((extent_start, extent_length))
73      extent_length = 0
74
75    # Are we done reading?
76    if not data1:
77      break
78
79    max_length -= len(data1)
80    num_blocks += 1
81
82  return diff_list
83
84
85def main(argv):
86  # Parse command-line arguments.
87  parser = argparse.ArgumentParser(
88      description='Compare FILE1 and FILE2 by blocks.',
89      formatter_class=argparse.ArgumentDefaultsHelpFormatter)
90
91  parser.add_argument('-b', '--block-size', metavar='NUM', type=int,
92                      default=4096, help='the block size to use')
93  parser.add_argument('-m', '--max-length', metavar='NUM', type=int, default=-1,
94                      help='maximum number of bytes to compare')
95  parser.add_argument('file1', metavar='FILE1')
96  parser.add_argument('file2', metavar='FILE2')
97
98  args = parser.parse_args(argv[1:])
99
100  # Perform the block diff.
101  try:
102    with open(args.file1) as file1:
103      with open(args.file2) as file2:
104        diff_list = BlockDiff(args.block_size, file1, file2,
105                              args.file1, args.file2, args.max_length)
106  except BlockDiffError as e:
107    print('Error: ' % e, file=sys.stderr)
108    return 2
109
110  # Print the diff, if such was found.
111  if diff_list:
112    total_diff_blocks = 0
113    for extent_start, extent_length in diff_list:
114      total_diff_blocks += extent_length
115      print('%d->%d (%d)' %
116            (extent_start, extent_start + extent_length, extent_length))
117
118    print('total diff: %d blocks' % total_diff_blocks)
119    return 1
120
121  return 0
122
123
124if __name__ == '__main__':
125  sys.exit(main(sys.argv))
126