1#!/usr/bin/env python 2# 3# Copyright (C) 2014 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17from __future__ import print_function 18 19import argparse 20import bisect 21import logging 22import os 23import struct 24import threading 25from hashlib import sha1 26 27import rangelib 28 29logger = logging.getLogger(__name__) 30 31 32class SparseImage(object): 33 """Wraps a sparse image file into an image object. 34 35 Wraps a sparse image file (and optional file map and clobbered_blocks) into 36 an image object suitable for passing to BlockImageDiff. file_map contains 37 the mapping between files and their blocks. clobbered_blocks contains the set 38 of blocks that should be always written to the target regardless of the old 39 contents (i.e. copying instead of patching). clobbered_blocks should be in 40 the form of a string like "0" or "0 1-5 8". 41 """ 42 43 def __init__(self, simg_fn, file_map_fn=None, clobbered_blocks=None, 44 mode="rb", build_map=True, allow_shared_blocks=False): 45 self.simg_f = f = open(simg_fn, mode) 46 47 header_bin = f.read(28) 48 header = struct.unpack("<I4H4I", header_bin) 49 50 magic = header[0] 51 major_version = header[1] 52 minor_version = header[2] 53 file_hdr_sz = header[3] 54 chunk_hdr_sz = header[4] 55 self.blocksize = blk_sz = header[5] 56 self.total_blocks = total_blks = header[6] 57 self.total_chunks = total_chunks = header[7] 58 59 if magic != 0xED26FF3A: 60 raise ValueError("Magic should be 0xED26FF3A but is 0x%08X" % (magic,)) 61 if major_version != 1 or minor_version != 0: 62 raise ValueError("I know about version 1.0, but this is version %u.%u" % 63 (major_version, minor_version)) 64 if file_hdr_sz != 28: 65 raise ValueError("File header size was expected to be 28, but is %u." % 66 (file_hdr_sz,)) 67 if chunk_hdr_sz != 12: 68 raise ValueError("Chunk header size was expected to be 12, but is %u." % 69 (chunk_hdr_sz,)) 70 71 logger.info( 72 "Total of %u %u-byte output blocks in %u input chunks.", total_blks, 73 blk_sz, total_chunks) 74 75 if not build_map: 76 return 77 78 pos = 0 # in blocks 79 care_data = [] 80 self.offset_map = offset_map = [] 81 self.clobbered_blocks = rangelib.RangeSet(data=clobbered_blocks) 82 83 for _ in range(total_chunks): 84 header_bin = f.read(12) 85 header = struct.unpack("<2H2I", header_bin) 86 chunk_type = header[0] 87 chunk_sz = header[2] 88 total_sz = header[3] 89 data_sz = total_sz - 12 90 91 if chunk_type == 0xCAC1: 92 if data_sz != (chunk_sz * blk_sz): 93 raise ValueError( 94 "Raw chunk input size (%u) does not match output size (%u)" % 95 (data_sz, chunk_sz * blk_sz)) 96 else: 97 care_data.append(pos) 98 care_data.append(pos + chunk_sz) 99 offset_map.append((pos, chunk_sz, f.tell(), None)) 100 pos += chunk_sz 101 f.seek(data_sz, os.SEEK_CUR) 102 103 elif chunk_type == 0xCAC2: 104 fill_data = f.read(4) 105 care_data.append(pos) 106 care_data.append(pos + chunk_sz) 107 offset_map.append((pos, chunk_sz, None, fill_data)) 108 pos += chunk_sz 109 110 elif chunk_type == 0xCAC3: 111 if data_sz != 0: 112 raise ValueError("Don't care chunk input size is non-zero (%u)" % 113 (data_sz)) 114 115 pos += chunk_sz 116 117 elif chunk_type == 0xCAC4: 118 raise ValueError("CRC32 chunks are not supported") 119 120 else: 121 raise ValueError("Unknown chunk type 0x%04X not supported" % 122 (chunk_type,)) 123 124 self.generator_lock = threading.Lock() 125 126 self.care_map = rangelib.RangeSet(care_data) 127 self.offset_index = [i[0] for i in offset_map] 128 129 # Bug: 20881595 130 # Introduce extended blocks as a workaround for the bug. dm-verity may 131 # touch blocks that are not in the care_map due to block device 132 # read-ahead. It will fail if such blocks contain non-zeroes. We zero out 133 # the extended blocks explicitly to avoid dm-verity failures. 512 blocks 134 # are the maximum read-ahead we configure for dm-verity block devices. 135 extended = self.care_map.extend(512) 136 all_blocks = rangelib.RangeSet(data=(0, self.total_blocks)) 137 extended = extended.intersect(all_blocks).subtract(self.care_map) 138 self.extended = extended 139 140 if file_map_fn: 141 self.LoadFileBlockMap(file_map_fn, self.clobbered_blocks, 142 allow_shared_blocks) 143 else: 144 self.file_map = {"__DATA": self.care_map} 145 146 def AppendFillChunk(self, data, blocks): 147 f = self.simg_f 148 149 # Append a fill chunk 150 f.seek(0, os.SEEK_END) 151 f.write(struct.pack("<2H3I", 0xCAC2, 0, blocks, 16, data)) 152 153 # Update the sparse header 154 self.total_blocks += blocks 155 self.total_chunks += 1 156 157 f.seek(16, os.SEEK_SET) 158 f.write(struct.pack("<2I", self.total_blocks, self.total_chunks)) 159 160 def RangeSha1(self, ranges): 161 h = sha1() 162 for data in self._GetRangeData(ranges): 163 h.update(data) 164 return h.hexdigest() 165 166 def ReadRangeSet(self, ranges): 167 return [d for d in self._GetRangeData(ranges)] 168 169 def ReadBlocks(self, start=0, num_blocks=None): 170 if num_blocks is None: 171 num_blocks = self.total_blocks 172 return self._GetRangeData([(start, start + num_blocks)]) 173 174 def TotalSha1(self, include_clobbered_blocks=False): 175 """Return the SHA-1 hash of all data in the 'care' regions. 176 177 If include_clobbered_blocks is True, it returns the hash including the 178 clobbered_blocks.""" 179 ranges = self.care_map 180 if not include_clobbered_blocks: 181 ranges = ranges.subtract(self.clobbered_blocks) 182 return self.RangeSha1(ranges) 183 184 def WriteRangeDataToFd(self, ranges, fd): 185 for data in self._GetRangeData(ranges): 186 fd.write(data) 187 188 def _GetRangeData(self, ranges): 189 """Generator that produces all the image data in 'ranges'. The 190 number of individual pieces returned is arbitrary (and in 191 particular is not necessarily equal to the number of ranges in 192 'ranges'. 193 194 Use a lock to protect the generator so that we will not run two 195 instances of this generator on the same object simultaneously.""" 196 197 f = self.simg_f 198 with self.generator_lock: 199 for s, e in ranges: 200 to_read = e-s 201 idx = bisect.bisect_right(self.offset_index, s) - 1 202 chunk_start, chunk_len, filepos, fill_data = self.offset_map[idx] 203 204 # for the first chunk we may be starting partway through it. 205 remain = chunk_len - (s - chunk_start) 206 this_read = min(remain, to_read) 207 if filepos is not None: 208 p = filepos + ((s - chunk_start) * self.blocksize) 209 f.seek(p, os.SEEK_SET) 210 yield f.read(this_read * self.blocksize) 211 else: 212 yield fill_data * (this_read * (self.blocksize >> 2)) 213 to_read -= this_read 214 215 while to_read > 0: 216 # continue with following chunks if this range spans multiple chunks. 217 idx += 1 218 chunk_start, chunk_len, filepos, fill_data = self.offset_map[idx] 219 this_read = min(chunk_len, to_read) 220 if filepos is not None: 221 f.seek(filepos, os.SEEK_SET) 222 yield f.read(this_read * self.blocksize) 223 else: 224 yield fill_data * (this_read * (self.blocksize >> 2)) 225 to_read -= this_read 226 227 def LoadFileBlockMap(self, fn, clobbered_blocks, allow_shared_blocks): 228 """Loads the given block map file. 229 230 Args: 231 fn: The filename of the block map file. 232 clobbered_blocks: A RangeSet instance for the clobbered blocks. 233 allow_shared_blocks: Whether having shared blocks is allowed. 234 """ 235 remaining = self.care_map 236 self.file_map = out = {} 237 238 with open(fn) as f: 239 for line in f: 240 fn, ranges_text = line.rstrip().split(None, 1) 241 raw_ranges = rangelib.RangeSet.parse(ranges_text) 242 243 # Note: e2fsdroid records holes in the extent tree as "0" blocks. 244 # This causes confusion because clobbered_blocks always includes 245 # the superblock (physical block #0). Since the 0 blocks here do 246 # not represent actual physical blocks, remove them from the set. 247 ranges = raw_ranges.subtract(rangelib.RangeSet("0")) 248 # b/150334561 we need to perserve the monotonic property of the raw 249 # range. Otherwise, the validation script will read the blocks with 250 # wrong order when pulling files from the image. 251 ranges.monotonic = raw_ranges.monotonic 252 ranges.extra['text_str'] = ranges_text 253 254 if allow_shared_blocks: 255 # Find the shared blocks that have been claimed by others. If so, tag 256 # the entry so that we can skip applying imgdiff on this file. 257 shared_blocks = ranges.subtract(remaining) 258 if shared_blocks: 259 non_shared = ranges.subtract(shared_blocks) 260 if not non_shared: 261 continue 262 263 # Put the non-shared RangeSet as the value in the block map, which 264 # has a copy of the original RangeSet. 265 non_shared.extra['uses_shared_blocks'] = ranges 266 ranges = non_shared 267 268 out[fn] = ranges 269 assert ranges.size() == ranges.intersect(remaining).size() 270 271 # Currently we assume that blocks in clobbered_blocks are not part of 272 # any file. 273 assert not clobbered_blocks.overlaps(ranges) 274 remaining = remaining.subtract(ranges) 275 276 remaining = remaining.subtract(clobbered_blocks) 277 278 # For all the remaining blocks in the care_map (ie, those that 279 # aren't part of the data for any file nor part of the clobbered_blocks), 280 # divide them into blocks that are all zero and blocks that aren't. 281 # (Zero blocks are handled specially because (1) there are usually 282 # a lot of them and (2) bsdiff handles files with long sequences of 283 # repeated bytes especially poorly.) 284 285 zero_blocks = [] 286 nonzero_blocks = [] 287 reference = '\0' * self.blocksize 288 289 # Workaround for bug 23227672. For squashfs, we don't have a system.map. So 290 # the whole system image will be treated as a single file. But for some 291 # unknown bug, the updater will be killed due to OOM when writing back the 292 # patched image to flash (observed on lenok-userdebug MEA49). Prior to 293 # getting a real fix, we evenly divide the non-zero blocks into smaller 294 # groups (currently 1024 blocks or 4MB per group). 295 # Bug: 23227672 296 MAX_BLOCKS_PER_GROUP = 1024 297 nonzero_groups = [] 298 299 f = self.simg_f 300 for s, e in remaining: 301 for b in range(s, e): 302 idx = bisect.bisect_right(self.offset_index, b) - 1 303 chunk_start, _, filepos, fill_data = self.offset_map[idx] 304 if filepos is not None: 305 filepos += (b-chunk_start) * self.blocksize 306 f.seek(filepos, os.SEEK_SET) 307 data = f.read(self.blocksize) 308 else: 309 if fill_data == reference[:4]: # fill with all zeros 310 data = reference 311 else: 312 data = None 313 314 if data == reference: 315 zero_blocks.append(b) 316 zero_blocks.append(b+1) 317 else: 318 nonzero_blocks.append(b) 319 nonzero_blocks.append(b+1) 320 321 if len(nonzero_blocks) >= MAX_BLOCKS_PER_GROUP: 322 nonzero_groups.append(nonzero_blocks) 323 # Clear the list. 324 nonzero_blocks = [] 325 326 if nonzero_blocks: 327 nonzero_groups.append(nonzero_blocks) 328 nonzero_blocks = [] 329 330 assert zero_blocks or nonzero_groups or clobbered_blocks 331 332 if zero_blocks: 333 out["__ZERO"] = rangelib.RangeSet(data=zero_blocks) 334 if nonzero_groups: 335 for i, blocks in enumerate(nonzero_groups): 336 out["__NONZERO-%d" % i] = rangelib.RangeSet(data=blocks) 337 if clobbered_blocks: 338 out["__COPY"] = clobbered_blocks 339 340 def ResetFileMap(self): 341 """Throw away the file map and treat the entire image as 342 undifferentiated data.""" 343 self.file_map = {"__DATA": self.care_map} 344 345 346def GetImagePartitionSize(img): 347 try: 348 simg = SparseImage(img, build_map=False) 349 return simg.blocksize * simg.total_blocks 350 except ValueError: 351 return os.path.getsize(img) 352 353 354if __name__ == '__main__': 355 parser = argparse.ArgumentParser() 356 parser.add_argument('image') 357 parser.add_argument('--get_partition_size', action='store_true', 358 help='Return partition size of the image') 359 args = parser.parse_args() 360 if args.get_partition_size: 361 print(GetImagePartitionSize(args.image)) 362