1 //
2 // Copyright (C) 2020 The Android Open Source Project
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16
17 #include "update_engine/payload_generator/merge_sequence_generator.h"
18
19 #include <algorithm>
20 #include <limits>
21
22 #include "update_engine/payload_generator/extent_ranges.h"
23 #include "update_engine/payload_generator/extent_utils.h"
24 #include "update_engine/update_metadata.pb.h"
25
26 namespace chromeos_update_engine {
27
CreateCowMergeOperation(const Extent & src_extent,const Extent & dst_extent,CowMergeOperation::Type op_type,uint32_t src_offset)28 CowMergeOperation CreateCowMergeOperation(const Extent& src_extent,
29 const Extent& dst_extent,
30 CowMergeOperation::Type op_type,
31 uint32_t src_offset) {
32 CowMergeOperation ret;
33 ret.set_type(op_type);
34 *ret.mutable_src_extent() = src_extent;
35 *ret.mutable_dst_extent() = dst_extent;
36 ret.set_src_offset(src_offset);
37 return ret;
38 }
39
operator <<(std::ostream & os,const CowMergeOperation & merge_operation)40 std::ostream& operator<<(std::ostream& os,
41 const CowMergeOperation& merge_operation) {
42 os << "CowMergeOperation src extent: "
43 << ExtentsToString({merge_operation.src_extent()})
44 << ", dst extent: " << ExtentsToString({merge_operation.dst_extent()});
45 if (merge_operation.has_src_offset()) {
46 os << ", src offset: " << merge_operation.src_offset();
47 }
48 os << " op_type: ";
49 if (merge_operation.type() == CowMergeOperation::COW_COPY) {
50 os << "COW_COPY";
51 } else if (merge_operation.type() == CowMergeOperation::COW_XOR) {
52 os << "COW_XOR";
53 } else {
54 os << merge_operation.type();
55 }
56 return os;
57 }
58
59 // The OTA generation guarantees that all blocks in the dst extent will be
60 // written only once. So we can use it to order the CowMergeOperation.
operator <(const CowMergeOperation & op1,const CowMergeOperation & op2)61 bool operator<(const CowMergeOperation& op1, const CowMergeOperation& op2) {
62 return op1.dst_extent().start_block() < op2.dst_extent().start_block();
63 }
64
operator ==(const CowMergeOperation & op1,const CowMergeOperation & op2)65 bool operator==(const CowMergeOperation& op1, const CowMergeOperation& op2) {
66 return op1.type() == op2.type() && op1.src_extent() == op2.src_extent() &&
67 op1.dst_extent() == op2.dst_extent();
68 }
69
70 template <typename T>
GetDifference(T first,T second)71 constexpr T GetDifference(T first, T second) {
72 T abs_diff = (first > second) ? (first - second) : (second - first);
73 return abs_diff;
74 }
75
GetCowOpType(InstallOperation::Type install_op_type)76 CowMergeOperation::Type GetCowOpType(InstallOperation::Type install_op_type) {
77 switch (install_op_type) {
78 case InstallOperation::SOURCE_COPY:
79 return CowMergeOperation::COW_COPY;
80 case InstallOperation::SOURCE_BSDIFF:
81 case InstallOperation::BROTLI_BSDIFF:
82 case InstallOperation::PUFFDIFF:
83 return CowMergeOperation::COW_XOR;
84 default:
85 CHECK(false) << "Unknown install op type: " << install_op_type;
86 return CowMergeOperation::COW_REPLACE;
87 }
88 }
89
SplitSelfOverlapping(const Extent & src_extent,const Extent & dst_extent,std::vector<CowMergeOperation> * sequence)90 void SplitSelfOverlapping(const Extent& src_extent,
91 const Extent& dst_extent,
92 std::vector<CowMergeOperation>* sequence) {
93 CHECK_EQ(src_extent.num_blocks(), dst_extent.num_blocks());
94 if (src_extent.start_block() == dst_extent.start_block()) {
95 sequence->emplace_back(CreateCowMergeOperation(
96 src_extent, dst_extent, CowMergeOperation::COW_COPY));
97 return;
98 }
99
100 const size_t diff =
101 GetDifference(src_extent.start_block(), dst_extent.start_block());
102 for (size_t i = 0; i < src_extent.num_blocks(); i += diff) {
103 auto num_blocks = std::min<size_t>(diff, src_extent.num_blocks() - i);
104 sequence->emplace_back(CreateCowMergeOperation(
105 ExtentForRange(i + src_extent.start_block(), num_blocks),
106 ExtentForRange(i + dst_extent.start_block(), num_blocks),
107 CowMergeOperation::COW_COPY));
108 }
109 }
110
ProcessXorOps(std::vector<CowMergeOperation> * sequence,const AnnotatedOperation & aop)111 static bool ProcessXorOps(std::vector<CowMergeOperation>* sequence,
112 const AnnotatedOperation& aop) {
113 const auto size_before = sequence->size();
114 sequence->insert(sequence->end(), aop.xor_ops.begin(), aop.xor_ops.end());
115 std::for_each(
116 sequence->begin() + size_before,
117 sequence->end(),
118 [](CowMergeOperation& op) {
119 CHECK_EQ(op.type(), CowMergeOperation::COW_XOR);
120 // If |src_offset| is greater than 0, then we are reading 1
121 // extra block at the end of src_extent. This dependency must
122 // be honored during merge sequence generation, or we can end
123 // up with a corrupted device after merge.
124 if (op.src_offset() > 0) {
125 if (op.src_extent().num_blocks() == op.dst_extent().num_blocks()) {
126 op.mutable_src_extent()->set_num_blocks(
127 op.src_extent().num_blocks() + 1);
128 }
129 CHECK_EQ(op.src_extent().num_blocks(),
130 op.dst_extent().num_blocks() + 1);
131 }
132 CHECK_NE(op.src_extent().start_block(),
133 std::numeric_limits<uint64_t>::max());
134 });
135 return true;
136 }
137
ProcessCopyOps(std::vector<CowMergeOperation> * sequence,const AnnotatedOperation & aop)138 static bool ProcessCopyOps(std::vector<CowMergeOperation>* sequence,
139 const AnnotatedOperation& aop) {
140 CHECK_EQ(GetCowOpType(aop.op.type()), CowMergeOperation::COW_COPY);
141 if (aop.op.dst_extents().size() != 1) {
142 std::vector<Extent> out_extents;
143 ExtentsToVector(aop.op.dst_extents(), &out_extents);
144 LOG(ERROR)
145 << "The dst extents for source_copy are expected to be contiguous,"
146 << " dst extents: " << ExtentsToString(out_extents);
147 return false;
148 }
149 // Split the source extents.
150 size_t used_blocks = 0;
151 for (const auto& src_extent : aop.op.src_extents()) {
152 // The dst_extent in the merge sequence will be a subset of
153 // InstallOperation's dst_extent. This will simplify the OTA -> COW
154 // conversion when we install the payload.
155 Extent dst_extent =
156 ExtentForRange(aop.op.dst_extents(0).start_block() + used_blocks,
157 src_extent.num_blocks());
158 // Self-overlapping operation, must split into multiple non
159 // self-overlapping ops
160 if (ExtentRanges::ExtentsOverlap(src_extent, dst_extent)) {
161 SplitSelfOverlapping(src_extent, dst_extent, sequence);
162 } else {
163 sequence->emplace_back(CreateCowMergeOperation(
164 src_extent, dst_extent, CowMergeOperation::COW_COPY));
165 }
166 used_blocks += src_extent.num_blocks();
167 }
168
169 if (used_blocks != aop.op.dst_extents(0).num_blocks()) {
170 LOG(ERROR) << "Number of blocks in src extents doesn't equal to the"
171 << " ones in the dst extents, src blocks " << used_blocks
172 << ", dst blocks " << aop.op.dst_extents(0).num_blocks();
173 return false;
174 }
175 return true;
176 }
177
Create(const std::vector<AnnotatedOperation> & aops,std::string_view partition_name)178 std::unique_ptr<MergeSequenceGenerator> MergeSequenceGenerator::Create(
179 const std::vector<AnnotatedOperation>& aops,
180 std::string_view partition_name) {
181 std::vector<CowMergeOperation> sequence;
182
183 for (const auto& aop : aops) {
184 if (aop.op.type() == InstallOperation::SOURCE_COPY) {
185 if (!ProcessCopyOps(&sequence, aop)) {
186 return nullptr;
187 }
188 } else if (!aop.xor_ops.empty()) {
189 if (!ProcessXorOps(&sequence, aop)) {
190 return nullptr;
191 }
192 }
193 }
194
195 return std::unique_ptr<MergeSequenceGenerator>(
196 new MergeSequenceGenerator(sequence, partition_name));
197 }
198
199 template <typename T>
MaxOutDegree(const T & nodes,const std::map<CowMergeOperation,std::set<CowMergeOperation>> & merge_after)200 CowMergeOperation MaxOutDegree(
201 const T& nodes,
202 const std::map<CowMergeOperation, std::set<CowMergeOperation>>&
203 merge_after) {
204 // Rationale for this algorithm:
205 // We only need to remove nodes from the graph if the graph contains a cycle.
206 // Any graph of N nodes has cycle iff number of edges >= N.
207 // So, to restore the graph back to an acyclic state, we need to keep removing
208 // edges until we have <N edges left. To minimize the number of nodes removed,
209 // we always remove the node with maximum out degree.
210 CowMergeOperation best;
211 size_t max_out_degree = 0;
212 const auto has_xor =
213 std::any_of(nodes.begin(),
214 nodes.end(),
215 [&merge_after](const CowMergeOperation& node) {
216 if (node.type() != CowMergeOperation::COW_XOR) {
217 return false;
218 }
219 auto it = merge_after.find(node);
220 if (it == merge_after.end()) {
221 return false;
222 }
223 return it->second.size() > 0;
224 });
225 for (const auto& op : nodes) {
226 if (has_xor && op.type() != CowMergeOperation::COW_XOR) {
227 continue;
228 }
229 const auto out_degree = merge_after.at(op).size();
230 if (out_degree > max_out_degree) {
231 best = op;
232 max_out_degree = out_degree;
233 } else if (out_degree == max_out_degree) {
234 if (op.src_extent().num_blocks() < best.src_extent().num_blocks()) {
235 best = op;
236 }
237 }
238 }
239 CHECK_NE(max_out_degree, 0UL);
240 return best;
241 }
242
243 template <typename T>
244 struct MapKeyIterator {
operator ++chromeos_update_engine::MapKeyIterator245 MapKeyIterator<T>& operator++() {
246 ++it;
247 return *this;
248 }
operator --chromeos_update_engine::MapKeyIterator249 MapKeyIterator<T>& operator--() {
250 --it;
251 return *this;
252 }
operator ==chromeos_update_engine::MapKeyIterator253 bool operator==(const MapKeyIterator<T>& rhs) const { return it == rhs.it; }
operator !=chromeos_update_engine::MapKeyIterator254 bool operator!=(const MapKeyIterator<T>& rhs) const { return it != rhs.it; }
operator ->chromeos_update_engine::MapKeyIterator255 auto&& operator->() const { return it->first; }
operator *chromeos_update_engine::MapKeyIterator256 auto&& operator*() const { return it->first; }
257 T it;
258 };
259
260 template <typename T, typename U>
261 struct MapKeyRange {
beginchromeos_update_engine::MapKeyRange262 auto begin() const {
263 return MapKeyIterator<typename std::map<T, U>::const_iterator>{map.begin()};
264 }
endchromeos_update_engine::MapKeyRange265 auto end() const {
266 return MapKeyIterator<typename std::map<T, U>::const_iterator>{map.end()};
267 }
268 std::map<T, U> map;
269 };
270
MaxOutDegree(const std::map<CowMergeOperation,int> & incoming_edges,const std::map<CowMergeOperation,std::set<CowMergeOperation>> & merge_after)271 CowMergeOperation MaxOutDegree(
272 const std::map<CowMergeOperation, int>& incoming_edges,
273 const std::map<CowMergeOperation, std::set<CowMergeOperation>>&
274 merge_after) {
275 return MaxOutDegree(MapKeyRange<CowMergeOperation, int>{incoming_edges},
276 merge_after);
277 }
278
279 // Given a potentially cyclic graph, return a node to remove to break cycles
280 // |incoming_edges| stores nodes' in degree, and |merge_after| is an outgoing
281 // edge list. For example, |merge_after[a]| returns all nodes which `a` has an
282 // out going edge to.
283 // The only requirement of this function is to return a node which is in
284 // |incoming_edges| . As long as this is satisfied, merge sequence generation
285 // will work. Caller will keep removing nodes returned by this function until
286 // the graph has no cycles. However, the choice of which node to remove can
287 // greatly impact COW sizes. Nodes removed from the graph will be converted to a
288 // COW_REPLACE operation, taking more disk space. So this function should try to
289 // pick a node which minimizes number of nodes we have to remove. (Modulo the
290 // weight of each node, which is how many blocks a CowMergeOperation touches)
PickConvertToRaw(const std::map<CowMergeOperation,int> & incoming_edges,const std::map<CowMergeOperation,std::set<CowMergeOperation>> & merge_after)291 CowMergeOperation PickConvertToRaw(
292 const std::map<CowMergeOperation, int>& incoming_edges,
293 const std::map<CowMergeOperation, std::set<CowMergeOperation>>&
294 merge_after) {
295 return MaxOutDegree(incoming_edges, merge_after);
296 }
297
298 std::map<CowMergeOperation, std::set<CowMergeOperation>>
FindDependency(const std::vector<CowMergeOperation> & operations)299 MergeSequenceGenerator::FindDependency(
300 const std::vector<CowMergeOperation>& operations) {
301 LOG(INFO) << "Finding dependencies";
302
303 // Since the OTA operation may reuse some source blocks, use the binary
304 // search on sorted dst extents to find overlaps.
305 std::map<CowMergeOperation, std::set<CowMergeOperation>> merge_after;
306 for (const auto& op : operations) {
307 // lower bound (inclusive): dst extent's end block >= src extent's start
308 // block.
309 const auto lower_it = std::lower_bound(
310 operations.begin(),
311 operations.end(),
312 op,
313 [](const CowMergeOperation& it, const CowMergeOperation& op) {
314 auto dst_end_block =
315 it.dst_extent().start_block() + it.dst_extent().num_blocks() - 1;
316 return dst_end_block < op.src_extent().start_block();
317 });
318 // upper bound: dst extent's start block > src extent's end block
319 const auto upper_it = std::upper_bound(
320 lower_it,
321 operations.end(),
322 op,
323 [](const CowMergeOperation& op, const CowMergeOperation& it) {
324 auto src_end_block =
325 op.src_extent().start_block() + op.src_extent().num_blocks() - 1;
326 return src_end_block < it.dst_extent().start_block();
327 });
328
329 // TODO(xunchang) skip inserting the empty set to merge_after.
330 if (lower_it == upper_it) {
331 merge_after.insert({op, {}});
332 } else {
333 std::set<CowMergeOperation> operations(lower_it, upper_it);
334 auto it = operations.find(op);
335 if (it != operations.end()) {
336 LOG(INFO) << "Self overlapping " << op;
337 operations.erase(it);
338 }
339 auto ret = merge_after.emplace(op, std::move(operations));
340 // Check the insertion indeed happens.
341 CHECK(ret.second) << op;
342 }
343 }
344
345 return merge_after;
346 }
347
Generate(std::vector<CowMergeOperation> * sequence) const348 bool MergeSequenceGenerator::Generate(
349 std::vector<CowMergeOperation>* sequence) const {
350 sequence->clear();
351
352 LOG(INFO) << "Generating sequence";
353
354 // Use the non-DFS version of the topology sort. So we can control the
355 // operations to discard to break cycles; thus yielding a deterministic
356 // sequence.
357 std::map<CowMergeOperation, int> incoming_edges;
358 for (const auto& it : merge_after_) {
359 for (const auto& blocked : it.second) {
360 // Value is default initialized to 0.
361 incoming_edges[blocked] += 1;
362 }
363 }
364
365 // Technically, we can use std::unordered_set or just a std::vector. but
366 // std::set gives the benefit where operations are sorted by dst blocks. This
367 // will ensure that operations that do not have dependency constraints appear
368 // in increasing block order. Such order would help snapuserd batch merges and
369 // improve boot time, but isn't strictly needed for correctness.
370 std::set<CowMergeOperation> free_operations;
371 for (const auto& op : operations_) {
372 if (incoming_edges.find(op) == incoming_edges.end()) {
373 free_operations.insert(op);
374 }
375 }
376
377 std::vector<CowMergeOperation> merge_sequence;
378 std::set<CowMergeOperation> convert_to_raw;
379 while (!incoming_edges.empty()) {
380 if (!free_operations.empty()) {
381 merge_sequence.insert(
382 merge_sequence.end(), free_operations.begin(), free_operations.end());
383 } else {
384 auto to_convert = PickConvertToRaw(incoming_edges, merge_after_);
385 // The operation we pick must be one of the nodes not already in merge
386 // sequence.
387 CHECK(incoming_edges.find(to_convert) != incoming_edges.end());
388
389 free_operations.insert(to_convert);
390 convert_to_raw.insert(to_convert);
391 LOG(INFO) << "Converting operation to raw " << to_convert;
392 }
393
394 std::set<CowMergeOperation> next_free_operations;
395 for (const auto& op : free_operations) {
396 incoming_edges.erase(op);
397
398 // Now that this particular operation is merged, other operations
399 // blocked by this one may be free. Decrement the count of blocking
400 // operations, and set up the free operations for the next iteration.
401 for (const auto& blocked : merge_after_.at(op)) {
402 auto it = incoming_edges.find(blocked);
403 if (it == incoming_edges.end()) {
404 continue;
405 }
406
407 auto blocking_transfer_count = &it->second;
408 if (*blocking_transfer_count <= 0) {
409 LOG(ERROR) << "Unexpected count in merge after map "
410 << blocking_transfer_count;
411 return false;
412 }
413 // This operation is no longer blocked by anyone. Add it to the merge
414 // sequence in the next iteration.
415 *blocking_transfer_count -= 1;
416 if (*blocking_transfer_count == 0) {
417 next_free_operations.insert(blocked);
418 }
419 }
420 }
421
422 LOG(INFO) << "Remaining transfers " << incoming_edges.size()
423 << ", free transfers " << free_operations.size()
424 << ", merge_sequence size " << merge_sequence.size();
425 free_operations = std::move(next_free_operations);
426 }
427
428 if (!free_operations.empty()) {
429 merge_sequence.insert(
430 merge_sequence.end(), free_operations.begin(), free_operations.end());
431 }
432
433 CHECK_EQ(operations_.size(), merge_sequence.size() + convert_to_raw.size());
434
435 size_t blocks_in_sequence = 0;
436 for (const CowMergeOperation& transfer : merge_sequence) {
437 blocks_in_sequence += transfer.dst_extent().num_blocks();
438 }
439
440 size_t blocks_in_raw = 0;
441 for (const CowMergeOperation& transfer : convert_to_raw) {
442 blocks_in_raw += transfer.dst_extent().num_blocks();
443 }
444
445 LOG(INFO) << "Blocks in merge sequence " << blocks_in_sequence
446 << ", blocks in raw " << blocks_in_raw << ", partition "
447 << partition_name_;
448 if (!ValidateSequence(merge_sequence)) {
449 LOG(ERROR) << "Invalid Sequence";
450 return false;
451 }
452
453 *sequence = std::move(merge_sequence);
454 return true;
455 }
456
ValidateSequence(const std::vector<CowMergeOperation> & sequence)457 bool MergeSequenceGenerator::ValidateSequence(
458 const std::vector<CowMergeOperation>& sequence) {
459 LOG(INFO) << "Validating merge sequence";
460 ExtentRanges visited;
461 for (const auto& op : sequence) {
462 // If |src_offset| is greater than zero, dependency should include 1 extra
463 // block at end of src_extent, as the OP actually references data past
464 // original src_extent.
465 if (op.src_offset() > 0) {
466 CHECK_EQ(op.src_extent().num_blocks(), op.dst_extent().num_blocks() + 1)
467 << op;
468 } else {
469 CHECK_EQ(op.src_extent().num_blocks(), op.dst_extent().num_blocks())
470 << op;
471 }
472 if (visited.OverlapsWithExtent(op.src_extent())) {
473 LOG(ERROR) << "Transfer violates the merge sequence " << op
474 << "Visited extent ranges: ";
475 visited.Dump();
476 return false;
477 }
478
479 CHECK(!visited.OverlapsWithExtent(op.dst_extent()))
480 << "dst extent should write only once.";
481 visited.AddExtent(op.dst_extent());
482 }
483
484 return true;
485 }
486
487 } // namespace chromeos_update_engine
488