1#!/usr/bin/env python
2#
3# Copyright (C) 2014 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17from __future__ import print_function
18
19import argparse
20import bisect
21import logging
22import os
23import struct
24import threading
25from hashlib import sha1
26
27import rangelib
28
29logger = logging.getLogger(__name__)
30
31
32class SparseImage(object):
33  """Wraps a sparse image file into an image object.
34
35  Wraps a sparse image file (and optional file map and clobbered_blocks) into
36  an image object suitable for passing to BlockImageDiff. file_map contains
37  the mapping between files and their blocks. clobbered_blocks contains the set
38  of blocks that should be always written to the target regardless of the old
39  contents (i.e. copying instead of patching). clobbered_blocks should be in
40  the form of a string like "0" or "0 1-5 8".
41  """
42
43  def __init__(self, simg_fn, file_map_fn=None, clobbered_blocks=None,
44               mode="rb", build_map=True, allow_shared_blocks=False):
45    self.simg_f = f = open(simg_fn, mode)
46
47    header_bin = f.read(28)
48    header = struct.unpack("<I4H4I", header_bin)
49
50    magic = header[0]
51    major_version = header[1]
52    minor_version = header[2]
53    file_hdr_sz = header[3]
54    chunk_hdr_sz = header[4]
55    self.blocksize = blk_sz = header[5]
56    self.total_blocks = total_blks = header[6]
57    self.total_chunks = total_chunks = header[7]
58
59    if magic != 0xED26FF3A:
60      raise ValueError("Magic should be 0xED26FF3A but is 0x%08X" % (magic,))
61    if major_version != 1 or minor_version != 0:
62      raise ValueError("I know about version 1.0, but this is version %u.%u" %
63                       (major_version, minor_version))
64    if file_hdr_sz != 28:
65      raise ValueError("File header size was expected to be 28, but is %u." %
66                       (file_hdr_sz,))
67    if chunk_hdr_sz != 12:
68      raise ValueError("Chunk header size was expected to be 12, but is %u." %
69                       (chunk_hdr_sz,))
70
71    logger.info(
72        "Total of %u %u-byte output blocks in %u input chunks.", total_blks,
73        blk_sz, total_chunks)
74
75    if not build_map:
76      return
77
78    pos = 0   # in blocks
79    care_data = []
80    self.offset_map = offset_map = []
81    self.clobbered_blocks = rangelib.RangeSet(data=clobbered_blocks)
82
83    for _ in range(total_chunks):
84      header_bin = f.read(12)
85      header = struct.unpack("<2H2I", header_bin)
86      chunk_type = header[0]
87      chunk_sz = header[2]
88      total_sz = header[3]
89      data_sz = total_sz - 12
90
91      if chunk_type == 0xCAC1:
92        if data_sz != (chunk_sz * blk_sz):
93          raise ValueError(
94              "Raw chunk input size (%u) does not match output size (%u)" %
95              (data_sz, chunk_sz * blk_sz))
96        else:
97          care_data.append(pos)
98          care_data.append(pos + chunk_sz)
99          offset_map.append((pos, chunk_sz, f.tell(), None))
100          pos += chunk_sz
101          f.seek(data_sz, os.SEEK_CUR)
102
103      elif chunk_type == 0xCAC2:
104        fill_data = f.read(4)
105        care_data.append(pos)
106        care_data.append(pos + chunk_sz)
107        offset_map.append((pos, chunk_sz, None, fill_data))
108        pos += chunk_sz
109
110      elif chunk_type == 0xCAC3:
111        if data_sz != 0:
112          raise ValueError("Don't care chunk input size is non-zero (%u)" %
113                           (data_sz))
114
115        pos += chunk_sz
116
117      elif chunk_type == 0xCAC4:
118        raise ValueError("CRC32 chunks are not supported")
119
120      else:
121        raise ValueError("Unknown chunk type 0x%04X not supported" %
122                         (chunk_type,))
123
124    self.generator_lock = threading.Lock()
125
126    self.care_map = rangelib.RangeSet(care_data)
127    self.offset_index = [i[0] for i in offset_map]
128
129    # Bug: 20881595
130    # Introduce extended blocks as a workaround for the bug. dm-verity may
131    # touch blocks that are not in the care_map due to block device
132    # read-ahead. It will fail if such blocks contain non-zeroes. We zero out
133    # the extended blocks explicitly to avoid dm-verity failures. 512 blocks
134    # are the maximum read-ahead we configure for dm-verity block devices.
135    extended = self.care_map.extend(512)
136    all_blocks = rangelib.RangeSet(data=(0, self.total_blocks))
137    extended = extended.intersect(all_blocks).subtract(self.care_map)
138    self.extended = extended
139
140    if file_map_fn:
141      self.LoadFileBlockMap(file_map_fn, self.clobbered_blocks,
142                            allow_shared_blocks)
143    else:
144      self.file_map = {"__DATA": self.care_map}
145
146  def AppendFillChunk(self, data, blocks):
147    f = self.simg_f
148
149    # Append a fill chunk
150    f.seek(0, os.SEEK_END)
151    f.write(struct.pack("<2H3I", 0xCAC2, 0, blocks, 16, data))
152
153    # Update the sparse header
154    self.total_blocks += blocks
155    self.total_chunks += 1
156
157    f.seek(16, os.SEEK_SET)
158    f.write(struct.pack("<2I", self.total_blocks, self.total_chunks))
159
160  def RangeSha1(self, ranges):
161    h = sha1()
162    for data in self._GetRangeData(ranges):
163      h.update(data)
164    return h.hexdigest()
165
166  def ReadRangeSet(self, ranges):
167    return [d for d in self._GetRangeData(ranges)]
168
169  def ReadBlocks(self, start=0, num_blocks=None):
170    if num_blocks is None:
171      num_blocks = self.total_blocks
172    return self._GetRangeData([(start, start + num_blocks)])
173
174  def TotalSha1(self, include_clobbered_blocks=False):
175    """Return the SHA-1 hash of all data in the 'care' regions.
176
177    If include_clobbered_blocks is True, it returns the hash including the
178    clobbered_blocks."""
179    ranges = self.care_map
180    if not include_clobbered_blocks:
181      ranges = ranges.subtract(self.clobbered_blocks)
182    return self.RangeSha1(ranges)
183
184  def WriteRangeDataToFd(self, ranges, fd):
185    for data in self._GetRangeData(ranges):
186      fd.write(data)
187
188  def _GetRangeData(self, ranges):
189    """Generator that produces all the image data in 'ranges'.  The
190    number of individual pieces returned is arbitrary (and in
191    particular is not necessarily equal to the number of ranges in
192    'ranges'.
193
194    Use a lock to protect the generator so that we will not run two
195    instances of this generator on the same object simultaneously."""
196
197    f = self.simg_f
198    with self.generator_lock:
199      for s, e in ranges:
200        to_read = e-s
201        idx = bisect.bisect_right(self.offset_index, s) - 1
202        chunk_start, chunk_len, filepos, fill_data = self.offset_map[idx]
203
204        # for the first chunk we may be starting partway through it.
205        remain = chunk_len - (s - chunk_start)
206        this_read = min(remain, to_read)
207        if filepos is not None:
208          p = filepos + ((s - chunk_start) * self.blocksize)
209          f.seek(p, os.SEEK_SET)
210          yield f.read(this_read * self.blocksize)
211        else:
212          yield fill_data * (this_read * (self.blocksize >> 2))
213        to_read -= this_read
214
215        while to_read > 0:
216          # continue with following chunks if this range spans multiple chunks.
217          idx += 1
218          chunk_start, chunk_len, filepos, fill_data = self.offset_map[idx]
219          this_read = min(chunk_len, to_read)
220          if filepos is not None:
221            f.seek(filepos, os.SEEK_SET)
222            yield f.read(this_read * self.blocksize)
223          else:
224            yield fill_data * (this_read * (self.blocksize >> 2))
225          to_read -= this_read
226
227  def LoadFileBlockMap(self, fn, clobbered_blocks, allow_shared_blocks):
228    """Loads the given block map file.
229
230    Args:
231      fn: The filename of the block map file.
232      clobbered_blocks: A RangeSet instance for the clobbered blocks.
233      allow_shared_blocks: Whether having shared blocks is allowed.
234    """
235    remaining = self.care_map
236    self.file_map = out = {}
237
238    with open(fn) as f:
239      for line in f:
240        fn, ranges_text = line.rstrip().split(None, 1)
241        raw_ranges = rangelib.RangeSet.parse(ranges_text)
242
243        # Note: e2fsdroid records holes in the extent tree as "0" blocks.
244        # This causes confusion because clobbered_blocks always includes
245        # the superblock (physical block #0). Since the 0 blocks here do
246        # not represent actual physical blocks, remove them from the set.
247        ranges = raw_ranges.subtract(rangelib.RangeSet("0"))
248        # b/150334561 we need to perserve the monotonic property of the raw
249        # range. Otherwise, the validation script will read the blocks with
250        # wrong order when pulling files from the image.
251        ranges.monotonic = raw_ranges.monotonic
252        ranges.extra['text_str'] = ranges_text
253
254        if allow_shared_blocks:
255          # Find the shared blocks that have been claimed by others. If so, tag
256          # the entry so that we can skip applying imgdiff on this file.
257          shared_blocks = ranges.subtract(remaining)
258          if shared_blocks:
259            non_shared = ranges.subtract(shared_blocks)
260            if not non_shared:
261              continue
262
263            # Put the non-shared RangeSet as the value in the block map, which
264            # has a copy of the original RangeSet.
265            non_shared.extra['uses_shared_blocks'] = ranges
266            ranges = non_shared
267
268        out[fn] = ranges
269        assert ranges.size() == ranges.intersect(remaining).size()
270
271        # Currently we assume that blocks in clobbered_blocks are not part of
272        # any file.
273        assert not clobbered_blocks.overlaps(ranges)
274        remaining = remaining.subtract(ranges)
275
276    remaining = remaining.subtract(clobbered_blocks)
277
278    # For all the remaining blocks in the care_map (ie, those that
279    # aren't part of the data for any file nor part of the clobbered_blocks),
280    # divide them into blocks that are all zero and blocks that aren't.
281    # (Zero blocks are handled specially because (1) there are usually
282    # a lot of them and (2) bsdiff handles files with long sequences of
283    # repeated bytes especially poorly.)
284
285    zero_blocks = []
286    nonzero_blocks = []
287    reference = '\0' * self.blocksize
288
289    # Workaround for bug 23227672. For squashfs, we don't have a system.map. So
290    # the whole system image will be treated as a single file. But for some
291    # unknown bug, the updater will be killed due to OOM when writing back the
292    # patched image to flash (observed on lenok-userdebug MEA49). Prior to
293    # getting a real fix, we evenly divide the non-zero blocks into smaller
294    # groups (currently 1024 blocks or 4MB per group).
295    # Bug: 23227672
296    MAX_BLOCKS_PER_GROUP = 1024
297    nonzero_groups = []
298
299    f = self.simg_f
300    for s, e in remaining:
301      for b in range(s, e):
302        idx = bisect.bisect_right(self.offset_index, b) - 1
303        chunk_start, _, filepos, fill_data = self.offset_map[idx]
304        if filepos is not None:
305          filepos += (b-chunk_start) * self.blocksize
306          f.seek(filepos, os.SEEK_SET)
307          data = f.read(self.blocksize)
308        else:
309          if fill_data == reference[:4]:   # fill with all zeros
310            data = reference
311          else:
312            data = None
313
314        if data == reference:
315          zero_blocks.append(b)
316          zero_blocks.append(b+1)
317        else:
318          nonzero_blocks.append(b)
319          nonzero_blocks.append(b+1)
320
321          if len(nonzero_blocks) >= MAX_BLOCKS_PER_GROUP:
322            nonzero_groups.append(nonzero_blocks)
323            # Clear the list.
324            nonzero_blocks = []
325
326    if nonzero_blocks:
327      nonzero_groups.append(nonzero_blocks)
328      nonzero_blocks = []
329
330    assert zero_blocks or nonzero_groups or clobbered_blocks
331
332    if zero_blocks:
333      out["__ZERO"] = rangelib.RangeSet(data=zero_blocks)
334    if nonzero_groups:
335      for i, blocks in enumerate(nonzero_groups):
336        out["__NONZERO-%d" % i] = rangelib.RangeSet(data=blocks)
337    if clobbered_blocks:
338      out["__COPY"] = clobbered_blocks
339
340  def ResetFileMap(self):
341    """Throw away the file map and treat the entire image as
342    undifferentiated data."""
343    self.file_map = {"__DATA": self.care_map}
344
345
346def GetImagePartitionSize(img):
347  try:
348    simg = SparseImage(img, build_map=False)
349    return simg.blocksize * simg.total_blocks
350  except ValueError:
351    return os.path.getsize(img)
352
353
354if __name__ == '__main__':
355  parser = argparse.ArgumentParser()
356  parser.add_argument('image')
357  parser.add_argument('--get_partition_size', action='store_true',
358                      help='Return partition size of the image')
359  args = parser.parse_args()
360  if args.get_partition_size:
361    print(GetImagePartitionSize(args.image))
362