1 //
2 // Copyright (C) 2015 The Android Open Source Project
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 
17 #ifndef PAYLOAD_GENERATOR_DELTA_DIFF_UTILS_H_
18 #define PAYLOAD_GENERATOR_DELTA_DIFF_UTILS_H_
19 
20 #include <map>
21 #include <string>
22 #include <utility>
23 #include <vector>
24 
25 #include <brillo/secure_blob.h>
26 #include <puffin/puffdiff.h>
27 
28 #include "update_engine/payload_consumer/payload_constants.h"
29 #include "update_engine/payload_generator/annotated_operation.h"
30 #include "update_engine/payload_generator/deflate_utils.h"
31 #include "update_engine/payload_generator/extent_ranges.h"
32 #include "update_engine/payload_generator/payload_generation_config.h"
33 #include "update_engine/update_metadata.pb.h"
34 
35 namespace chromeos_update_engine {
36 
37 namespace diff_utils {
38 using File = FilesystemInterface::File;
39 
40 // Create operations in |aops| to produce all the blocks in the |new_part|
41 // partition using the filesystem opened in that PartitionConfig.
42 // It uses the files reported by the filesystem in |old_part| and the data
43 // blocks in that partition (if available) to determine the best way to compress
44 // the new files (REPLACE, REPLACE_BZ, COPY, BSDIFF) and writes any necessary
45 // data to |blob_file|. |hard_chunk_blocks| and |soft_chunk_blocks| are the hard
46 // and soft chunk limits in number of blocks respectively. The soft chunk limit
47 // is used to split MOVE and SOURCE_COPY operations and REPLACE_BZ of zeroed
48 // blocks, while the hard limit is used to split a file when generating other
49 // operations. A value of -1 in |hard_chunk_blocks| means whole files.
50 bool DeltaReadPartition(std::vector<AnnotatedOperation>* aops,
51                         const PartitionConfig& old_part,
52                         const PartitionConfig& new_part,
53                         ssize_t hard_chunk_blocks,
54                         size_t soft_chunk_blocks,
55                         const PayloadGenerationConfig& version,
56                         BlobFileWriter* blob_file);
57 
58 // Create operations in |aops| for identical blocks that moved around in the old
59 // and new partition and also handle zeroed blocks. The old and new partition
60 // are stored in the |old_part| and |new_part| files and have |old_num_blocks|
61 // and |new_num_blocks| respectively. The maximum operation size is
62 // |chunk_blocks| blocks, or unlimited if |chunk_blocks| is -1. The blobs of the
63 // produced operations are stored in the |blob_file|.
64 // The collections |old_visited_blocks| and |new_visited_blocks| state what
65 // blocks already have operations reading or writing them and only operations
66 // for unvisited blocks are produced by this function updating both collections
67 // with the used blocks.
68 bool DeltaMovedAndZeroBlocks(std::vector<AnnotatedOperation>* aops,
69                              const std::string& old_part,
70                              const std::string& new_part,
71                              size_t old_num_blocks,
72                              size_t new_num_blocks,
73                              ssize_t chunk_blocks,
74                              const PayloadGenerationConfig& version,
75                              BlobFileWriter* blob_file,
76                              ExtentRanges* old_visited_blocks,
77                              ExtentRanges* new_visited_blocks,
78                              ExtentRanges* old_zero_blocks);
79 
80 // For a given file |name| append operations to |aops| to produce it in the
81 // |new_part|. The file will be split in chunks of |chunk_blocks| blocks each
82 // or treated as a single chunk if |chunk_blocks| is -1. The file data is
83 // stored in |new_part| in the blocks described by |new_extents| and, if it
84 // exists, the old version exists in |old_part| in the blocks described by
85 // |old_extents|. The operations added to |aops| reference the data blob
86 // in the |blob_file|. |old_deflates| and |new_deflates| are all deflate
87 // locations in |old_part| and |new_part|. Returns true on success.
88 bool DeltaReadFile(std::vector<AnnotatedOperation>* aops,
89                    const std::string& old_part,
90                    const std::string& new_part,
91                    const File& old_file,
92                    const File& new_file,
93                    ssize_t chunk_blocks,
94                    const PayloadGenerationConfig& config,
95                    BlobFileWriter* blob_file);
96 
97 // Reads the blocks |old_extents| from |old_part| (if it exists) and the
98 // |new_extents| from |new_part| and determines the smallest way to encode
99 // this |new_extents| for the diff. It stores necessary data in |out_data| and
100 // fills in |out_op|. If there's no change in old and new files, it creates a
101 // MOVE or SOURCE_COPY operation. If there is a change, the smallest of the
102 // operations allowed in the given |version| (REPLACE, REPLACE_BZ, BSDIFF,
103 // SOURCE_BSDIFF, PUFFDIFF or ZUCCHINI) wins.
104 // |new_extents| must not be empty. |old_deflates| and |new_deflates| are all
105 // the deflate locations in |old_part| and |new_part|. Returns true on success.
106 // TODO(197361113) Move logic to calculate deflates inside puffin.
107 bool ReadExtentsToDiff(const std::string& old_part,
108                        const std::string& new_part,
109                        const std::vector<Extent>& old_extents,
110                        const std::vector<Extent>& new_extents,
111                        const File& old_file,
112                        const File& new_file,
113                        const PayloadGenerationConfig& config,
114                        brillo::Blob* out_data,
115                        AnnotatedOperation* out_op);
116 
117 // Generates the best allowed full operation to produce |new_data|. The allowed
118 // operations are based on |payload_version|. The operation blob will be stored
119 // in |out_blob| and the resulting operation type in |out_type|. Returns whether
120 // a valid full operation was generated.
121 bool GenerateBestFullOperation(const brillo::Blob& new_data,
122                                const PayloadVersion& version,
123                                brillo::Blob* out_blob,
124                                InstallOperation::Type* out_type);
125 
126 // Returns whether |op_type| is one of the REPLACE full operations.
127 bool IsAReplaceOperation(InstallOperation::Type op_type);
128 
129 // Returns true if an operation with type |op_type| has no |src_extents|.
130 bool IsNoSourceOperation(InstallOperation::Type op_type);
131 
132 bool InitializePartitionInfo(const PartitionConfig& partition,
133                              PartitionInfo* info);
134 
135 // Compare two AnnotatedOperations by the start block of the first Extent in
136 // their destination extents.
137 bool CompareAopsByDestination(AnnotatedOperation first_aop,
138                               AnnotatedOperation second_aop);
139 
140 // Returns whether the filesystem is an ext[234] filesystem. In case of failure,
141 // such as if the file |device| doesn't exists or can't be read, it returns
142 // false.
143 bool IsExtFilesystem(const std::string& device);
144 
145 // Returns the max number of threads to process the files(chunks) in parallel.
146 size_t GetMaxThreads();
147 
148 // Returns the old file which file name has the shortest levenshtein distance to
149 // |new_file_name|.
150 FilesystemInterface::File GetOldFile(
151     const std::map<std::string, FilesystemInterface::File>& old_files_map,
152     const std::string& new_file_name);
153 
154 // Read BSDIFF patch data in |data|, compute list of blocks that can be COW_XOR,
155 // store these blocks in |aop|.
156 bool PopulateXorOps(AnnotatedOperation* aop, const uint8_t* data, size_t size);
157 
PopulateXorOps(AnnotatedOperation * aop,const brillo::Blob & patch_data)158 inline bool PopulateXorOps(AnnotatedOperation* aop,
159                            const brillo::Blob& patch_data) {
160   return PopulateXorOps(aop, patch_data.data(), patch_data.size());
161 }
162 
163 // A utility class that tries different algorithms and pick the patch with the
164 // smallest size.
165 
166 class BestDiffGenerator {
167  public:
BestDiffGenerator(const brillo::Blob & old_data,const brillo::Blob & new_data,const std::vector<Extent> & src_extents,const std::vector<Extent> & dst_extents,const File & old_file,const File & new_file,const PayloadGenerationConfig & config)168   BestDiffGenerator(const brillo::Blob& old_data,
169                     const brillo::Blob& new_data,
170                     const std::vector<Extent>& src_extents,
171                     const std::vector<Extent>& dst_extents,
172                     const File& old_file,
173                     const File& new_file,
174                     const PayloadGenerationConfig& config)
175       : old_data_(old_data),
176         new_data_(new_data),
177         src_extents_(src_extents),
178         dst_extents_(dst_extents),
179         old_deflates_(old_file.deflates),
180         new_deflates_(new_file.deflates),
181         old_block_info_(old_file.compressed_file_info),
182         new_block_info_(new_file.compressed_file_info),
183         config_(config) {
184     using std::vector;
185     // Find all deflate positions inside the given extents and then put all
186     // deflates together because we have already read all the extents into
187     // one buffer.
188     Dedup(&old_deflates_);
189     Dedup(&new_deflates_);
190     vector<puffin::BitExtent> src_deflates;
191     CHECK(deflate_utils::FindAndCompactDeflates(
192         src_extents_, old_deflates_, &src_deflates));
193 
194     vector<puffin::BitExtent> dst_deflates;
195     CHECK(deflate_utils::FindAndCompactDeflates(
196         dst_extents_, new_deflates_, &dst_deflates));
197     puffin::RemoveEqualBitExtents(
198         old_data_, new_data_, &src_deflates, &dst_deflates);
199     // See crbug.com/915559.
200     if (config.version.minor <= kPuffdiffMinorPayloadVersion) {
201       CHECK(
202           puffin::RemoveDeflatesWithBadDistanceCaches(old_data, &src_deflates));
203 
204       CHECK(
205           puffin::RemoveDeflatesWithBadDistanceCaches(new_data, &dst_deflates));
206     }
207     old_deflates_ = std::move(src_deflates);
208     new_deflates_ = std::move(dst_deflates);
209   }
210 
211   // Tries different algorithms and compares their patch sizes with the
212   // compressed full operation data in |data_blob|. If the size is smaller,
213   // updates the operation type in |aop| and bytes in |data_blob|.
214   bool GenerateBestDiffOperation(AnnotatedOperation* aop,
215                                  brillo::Blob* data_blob);
216 
217   bool GenerateBestDiffOperation(
218       const std::vector<std::pair<InstallOperation_Type, size_t>>&
219           diff_candidates,
220       AnnotatedOperation* aop,
221       brillo::Blob* data_blob);
222 
223  private:
224   std::vector<bsdiff::CompressorType> GetUsableCompressorTypes() const;
225   bool TryBsdiffAndUpdateOperation(InstallOperation_Type operation_type,
226                                    AnnotatedOperation* aop,
227                                    brillo::Blob* data_blob);
228   bool TryPuffdiffAndUpdateOperation(AnnotatedOperation* aop,
229                                      brillo::Blob* data_blob);
230   bool TryZucchiniAndUpdateOperation(AnnotatedOperation* aop,
231                                      brillo::Blob* data_blob);
232 
233   const brillo::Blob& old_data_;
234   const brillo::Blob& new_data_;
235   const std::vector<Extent>& src_extents_;
236   const std::vector<Extent>& dst_extents_;
237   std::vector<puffin::BitExtent> old_deflates_;
238   std::vector<puffin::BitExtent> new_deflates_;
239   const CompressedFile& old_block_info_;
240   const CompressedFile& new_block_info_;
241   const PayloadGenerationConfig& config_;
242 };
243 
244 }  // namespace diff_utils
245 
246 }  // namespace chromeos_update_engine
247 
248 #endif  // PAYLOAD_GENERATOR_DELTA_DIFF_UTILS_H_
249