1 //
2 // Copyright (C) 2020 The Android Open Source Project
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 
17 #include "update_engine/payload_generator/merge_sequence_generator.h"
18 
19 #include <algorithm>
20 #include <limits>
21 
22 #include "update_engine/payload_generator/extent_ranges.h"
23 #include "update_engine/payload_generator/extent_utils.h"
24 #include "update_engine/update_metadata.pb.h"
25 
26 namespace chromeos_update_engine {
27 
CreateCowMergeOperation(const Extent & src_extent,const Extent & dst_extent,CowMergeOperation::Type op_type,uint32_t src_offset)28 CowMergeOperation CreateCowMergeOperation(const Extent& src_extent,
29                                           const Extent& dst_extent,
30                                           CowMergeOperation::Type op_type,
31                                           uint32_t src_offset) {
32   CowMergeOperation ret;
33   ret.set_type(op_type);
34   *ret.mutable_src_extent() = src_extent;
35   *ret.mutable_dst_extent() = dst_extent;
36   ret.set_src_offset(src_offset);
37   return ret;
38 }
39 
operator <<(std::ostream & os,const CowMergeOperation & merge_operation)40 std::ostream& operator<<(std::ostream& os,
41                          const CowMergeOperation& merge_operation) {
42   os << "CowMergeOperation src extent: "
43      << ExtentsToString({merge_operation.src_extent()})
44      << ", dst extent: " << ExtentsToString({merge_operation.dst_extent()});
45   if (merge_operation.has_src_offset()) {
46     os << ", src offset: " << merge_operation.src_offset();
47   }
48   os << " op_type: ";
49   if (merge_operation.type() == CowMergeOperation::COW_COPY) {
50     os << "COW_COPY";
51   } else if (merge_operation.type() == CowMergeOperation::COW_XOR) {
52     os << "COW_XOR";
53   } else {
54     os << merge_operation.type();
55   }
56   return os;
57 }
58 
59 // The OTA generation guarantees that all blocks in the dst extent will be
60 // written only once. So we can use it to order the CowMergeOperation.
operator <(const CowMergeOperation & op1,const CowMergeOperation & op2)61 bool operator<(const CowMergeOperation& op1, const CowMergeOperation& op2) {
62   return op1.dst_extent().start_block() < op2.dst_extent().start_block();
63 }
64 
operator ==(const CowMergeOperation & op1,const CowMergeOperation & op2)65 bool operator==(const CowMergeOperation& op1, const CowMergeOperation& op2) {
66   return op1.type() == op2.type() && op1.src_extent() == op2.src_extent() &&
67          op1.dst_extent() == op2.dst_extent();
68 }
69 
70 template <typename T>
GetDifference(T first,T second)71 constexpr T GetDifference(T first, T second) {
72   T abs_diff = (first > second) ? (first - second) : (second - first);
73   return abs_diff;
74 }
75 
GetCowOpType(InstallOperation::Type install_op_type)76 CowMergeOperation::Type GetCowOpType(InstallOperation::Type install_op_type) {
77   switch (install_op_type) {
78     case InstallOperation::SOURCE_COPY:
79       return CowMergeOperation::COW_COPY;
80     case InstallOperation::SOURCE_BSDIFF:
81     case InstallOperation::BROTLI_BSDIFF:
82     case InstallOperation::PUFFDIFF:
83       return CowMergeOperation::COW_XOR;
84     default:
85       CHECK(false) << "Unknown install op type: " << install_op_type;
86       return CowMergeOperation::COW_REPLACE;
87   }
88 }
89 
SplitSelfOverlapping(const Extent & src_extent,const Extent & dst_extent,std::vector<CowMergeOperation> * sequence)90 void SplitSelfOverlapping(const Extent& src_extent,
91                           const Extent& dst_extent,
92                           std::vector<CowMergeOperation>* sequence) {
93   CHECK_EQ(src_extent.num_blocks(), dst_extent.num_blocks());
94   if (src_extent.start_block() == dst_extent.start_block()) {
95     sequence->emplace_back(CreateCowMergeOperation(
96         src_extent, dst_extent, CowMergeOperation::COW_COPY));
97     return;
98   }
99 
100   const size_t diff =
101       GetDifference(src_extent.start_block(), dst_extent.start_block());
102   for (size_t i = 0; i < src_extent.num_blocks(); i += diff) {
103     auto num_blocks = std::min<size_t>(diff, src_extent.num_blocks() - i);
104     sequence->emplace_back(CreateCowMergeOperation(
105         ExtentForRange(i + src_extent.start_block(), num_blocks),
106         ExtentForRange(i + dst_extent.start_block(), num_blocks),
107         CowMergeOperation::COW_COPY));
108   }
109 }
110 
ProcessXorOps(std::vector<CowMergeOperation> * sequence,const AnnotatedOperation & aop)111 static bool ProcessXorOps(std::vector<CowMergeOperation>* sequence,
112                           const AnnotatedOperation& aop) {
113   const auto size_before = sequence->size();
114   sequence->insert(sequence->end(), aop.xor_ops.begin(), aop.xor_ops.end());
115   std::for_each(
116       sequence->begin() + size_before,
117       sequence->end(),
118       [](CowMergeOperation& op) {
119         CHECK_EQ(op.type(), CowMergeOperation::COW_XOR);
120         // If |src_offset| is greater than 0, then we are reading 1
121         // extra block at the end of src_extent. This dependency must
122         // be honored during merge sequence generation, or we can end
123         // up with a corrupted device after merge.
124         if (op.src_offset() > 0) {
125           if (op.src_extent().num_blocks() == op.dst_extent().num_blocks()) {
126             op.mutable_src_extent()->set_num_blocks(
127                 op.src_extent().num_blocks() + 1);
128           }
129           CHECK_EQ(op.src_extent().num_blocks(),
130                    op.dst_extent().num_blocks() + 1);
131         }
132         CHECK_NE(op.src_extent().start_block(),
133                  std::numeric_limits<uint64_t>::max());
134       });
135   return true;
136 }
137 
ProcessCopyOps(std::vector<CowMergeOperation> * sequence,const AnnotatedOperation & aop)138 static bool ProcessCopyOps(std::vector<CowMergeOperation>* sequence,
139                            const AnnotatedOperation& aop) {
140   CHECK_EQ(GetCowOpType(aop.op.type()), CowMergeOperation::COW_COPY);
141   if (aop.op.dst_extents().size() != 1) {
142     std::vector<Extent> out_extents;
143     ExtentsToVector(aop.op.dst_extents(), &out_extents);
144     LOG(ERROR)
145         << "The dst extents for source_copy are expected to be contiguous,"
146         << " dst extents: " << ExtentsToString(out_extents);
147     return false;
148   }
149   // Split the source extents.
150   size_t used_blocks = 0;
151   for (const auto& src_extent : aop.op.src_extents()) {
152     // The dst_extent in the merge sequence will be a subset of
153     // InstallOperation's dst_extent. This will simplify the OTA -> COW
154     // conversion when we install the payload.
155     Extent dst_extent =
156         ExtentForRange(aop.op.dst_extents(0).start_block() + used_blocks,
157                        src_extent.num_blocks());
158     // Self-overlapping operation, must split into multiple non
159     // self-overlapping ops
160     if (ExtentRanges::ExtentsOverlap(src_extent, dst_extent)) {
161       SplitSelfOverlapping(src_extent, dst_extent, sequence);
162     } else {
163       sequence->emplace_back(CreateCowMergeOperation(
164           src_extent, dst_extent, CowMergeOperation::COW_COPY));
165     }
166     used_blocks += src_extent.num_blocks();
167   }
168 
169   if (used_blocks != aop.op.dst_extents(0).num_blocks()) {
170     LOG(ERROR) << "Number of blocks in src extents doesn't equal to the"
171                << " ones in the dst extents, src blocks " << used_blocks
172                << ", dst blocks " << aop.op.dst_extents(0).num_blocks();
173     return false;
174   }
175   return true;
176 }
177 
Create(const std::vector<AnnotatedOperation> & aops,std::string_view partition_name)178 std::unique_ptr<MergeSequenceGenerator> MergeSequenceGenerator::Create(
179     const std::vector<AnnotatedOperation>& aops,
180     std::string_view partition_name) {
181   std::vector<CowMergeOperation> sequence;
182 
183   for (const auto& aop : aops) {
184     if (aop.op.type() == InstallOperation::SOURCE_COPY) {
185       if (!ProcessCopyOps(&sequence, aop)) {
186         return nullptr;
187       }
188     } else if (!aop.xor_ops.empty()) {
189       if (!ProcessXorOps(&sequence, aop)) {
190         return nullptr;
191       }
192     }
193   }
194 
195   return std::unique_ptr<MergeSequenceGenerator>(
196       new MergeSequenceGenerator(sequence, partition_name));
197 }
198 
199 template <typename T>
MaxOutDegree(const T & nodes,const std::map<CowMergeOperation,std::set<CowMergeOperation>> & merge_after)200 CowMergeOperation MaxOutDegree(
201     const T& nodes,
202     const std::map<CowMergeOperation, std::set<CowMergeOperation>>&
203         merge_after) {
204   // Rationale for this algorithm:
205   // We only need to remove nodes from the graph if the graph contains a cycle.
206   // Any graph of N nodes has cycle iff number of edges >= N.
207   // So, to restore the graph back to an acyclic state, we need to keep removing
208   // edges until we have <N edges left. To minimize the number of nodes removed,
209   // we always remove the node with maximum out degree.
210   CowMergeOperation best;
211   size_t max_out_degree = 0;
212   const auto has_xor =
213       std::any_of(nodes.begin(),
214                   nodes.end(),
215                   [&merge_after](const CowMergeOperation& node) {
216                     if (node.type() != CowMergeOperation::COW_XOR) {
217                       return false;
218                     }
219                     auto it = merge_after.find(node);
220                     if (it == merge_after.end()) {
221                       return false;
222                     }
223                     return it->second.size() > 0;
224                   });
225   for (const auto& op : nodes) {
226     if (has_xor && op.type() != CowMergeOperation::COW_XOR) {
227       continue;
228     }
229     const auto out_degree = merge_after.at(op).size();
230     if (out_degree > max_out_degree) {
231       best = op;
232       max_out_degree = out_degree;
233     } else if (out_degree == max_out_degree) {
234       if (op.src_extent().num_blocks() < best.src_extent().num_blocks()) {
235         best = op;
236       }
237     }
238   }
239   CHECK_NE(max_out_degree, 0UL);
240   return best;
241 }
242 
243 template <typename T>
244 struct MapKeyIterator {
operator ++chromeos_update_engine::MapKeyIterator245   MapKeyIterator<T>& operator++() {
246     ++it;
247     return *this;
248   }
operator --chromeos_update_engine::MapKeyIterator249   MapKeyIterator<T>& operator--() {
250     --it;
251     return *this;
252   }
operator ==chromeos_update_engine::MapKeyIterator253   bool operator==(const MapKeyIterator<T>& rhs) const { return it == rhs.it; }
operator !=chromeos_update_engine::MapKeyIterator254   bool operator!=(const MapKeyIterator<T>& rhs) const { return it != rhs.it; }
operator ->chromeos_update_engine::MapKeyIterator255   auto&& operator->() const { return it->first; }
operator *chromeos_update_engine::MapKeyIterator256   auto&& operator*() const { return it->first; }
257   T it;
258 };
259 
260 template <typename T, typename U>
261 struct MapKeyRange {
beginchromeos_update_engine::MapKeyRange262   auto begin() const {
263     return MapKeyIterator<typename std::map<T, U>::const_iterator>{map.begin()};
264   }
endchromeos_update_engine::MapKeyRange265   auto end() const {
266     return MapKeyIterator<typename std::map<T, U>::const_iterator>{map.end()};
267   }
268   std::map<T, U> map;
269 };
270 
MaxOutDegree(const std::map<CowMergeOperation,int> & incoming_edges,const std::map<CowMergeOperation,std::set<CowMergeOperation>> & merge_after)271 CowMergeOperation MaxOutDegree(
272     const std::map<CowMergeOperation, int>& incoming_edges,
273     const std::map<CowMergeOperation, std::set<CowMergeOperation>>&
274         merge_after) {
275   return MaxOutDegree(MapKeyRange<CowMergeOperation, int>{incoming_edges},
276                       merge_after);
277 }
278 
279 // Given a potentially cyclic graph, return a node to remove to break cycles
280 // |incoming_edges| stores nodes' in degree, and |merge_after| is an outgoing
281 // edge list. For example, |merge_after[a]| returns all nodes which `a` has an
282 // out going edge to.
283 // The only requirement of this function is to return a node which is in
284 // |incoming_edges| . As long as this is satisfied, merge sequence generation
285 // will work. Caller will keep removing nodes returned by this function until
286 // the graph has no cycles. However, the choice of which node to remove can
287 // greatly impact COW sizes. Nodes removed from the graph will be converted to a
288 // COW_REPLACE operation, taking more disk space. So this function should try to
289 // pick a node which minimizes number of nodes we have to remove. (Modulo the
290 // weight of each node, which is how many blocks a CowMergeOperation touches)
PickConvertToRaw(const std::map<CowMergeOperation,int> & incoming_edges,const std::map<CowMergeOperation,std::set<CowMergeOperation>> & merge_after)291 CowMergeOperation PickConvertToRaw(
292     const std::map<CowMergeOperation, int>& incoming_edges,
293     const std::map<CowMergeOperation, std::set<CowMergeOperation>>&
294         merge_after) {
295   return MaxOutDegree(incoming_edges, merge_after);
296 }
297 
298 std::map<CowMergeOperation, std::set<CowMergeOperation>>
FindDependency(const std::vector<CowMergeOperation> & operations)299 MergeSequenceGenerator::FindDependency(
300     const std::vector<CowMergeOperation>& operations) {
301   LOG(INFO) << "Finding dependencies";
302 
303   // Since the OTA operation may reuse some source blocks, use the binary
304   // search on sorted dst extents to find overlaps.
305   std::map<CowMergeOperation, std::set<CowMergeOperation>> merge_after;
306   for (const auto& op : operations) {
307     // lower bound (inclusive): dst extent's end block >= src extent's start
308     // block.
309     const auto lower_it = std::lower_bound(
310         operations.begin(),
311         operations.end(),
312         op,
313         [](const CowMergeOperation& it, const CowMergeOperation& op) {
314           auto dst_end_block =
315               it.dst_extent().start_block() + it.dst_extent().num_blocks() - 1;
316           return dst_end_block < op.src_extent().start_block();
317         });
318     // upper bound: dst extent's start block > src extent's end block
319     const auto upper_it = std::upper_bound(
320         lower_it,
321         operations.end(),
322         op,
323         [](const CowMergeOperation& op, const CowMergeOperation& it) {
324           auto src_end_block =
325               op.src_extent().start_block() + op.src_extent().num_blocks() - 1;
326           return src_end_block < it.dst_extent().start_block();
327         });
328 
329     // TODO(xunchang) skip inserting the empty set to merge_after.
330     if (lower_it == upper_it) {
331       merge_after.insert({op, {}});
332     } else {
333       std::set<CowMergeOperation> operations(lower_it, upper_it);
334       auto it = operations.find(op);
335       if (it != operations.end()) {
336         LOG(INFO) << "Self overlapping " << op;
337         operations.erase(it);
338       }
339       auto ret = merge_after.emplace(op, std::move(operations));
340       // Check the insertion indeed happens.
341       CHECK(ret.second) << op;
342     }
343   }
344 
345   return merge_after;
346 }
347 
Generate(std::vector<CowMergeOperation> * sequence) const348 bool MergeSequenceGenerator::Generate(
349     std::vector<CowMergeOperation>* sequence) const {
350   sequence->clear();
351 
352   LOG(INFO) << "Generating sequence";
353 
354   // Use the non-DFS version of the topology sort. So we can control the
355   // operations to discard to break cycles; thus yielding a deterministic
356   // sequence.
357   std::map<CowMergeOperation, int> incoming_edges;
358   for (const auto& it : merge_after_) {
359     for (const auto& blocked : it.second) {
360       // Value is default initialized to 0.
361       incoming_edges[blocked] += 1;
362     }
363   }
364 
365   // Technically, we can use std::unordered_set or just a std::vector. but
366   // std::set gives the benefit where operations are sorted by dst blocks. This
367   // will ensure that operations that do not have dependency constraints appear
368   // in increasing block order. Such order would help snapuserd batch merges and
369   // improve boot time, but isn't strictly needed for correctness.
370   std::set<CowMergeOperation> free_operations;
371   for (const auto& op : operations_) {
372     if (incoming_edges.find(op) == incoming_edges.end()) {
373       free_operations.insert(op);
374     }
375   }
376 
377   std::vector<CowMergeOperation> merge_sequence;
378   std::set<CowMergeOperation> convert_to_raw;
379   while (!incoming_edges.empty()) {
380     if (!free_operations.empty()) {
381       merge_sequence.insert(
382           merge_sequence.end(), free_operations.begin(), free_operations.end());
383     } else {
384       auto to_convert = PickConvertToRaw(incoming_edges, merge_after_);
385       // The operation we pick must be one of the nodes not already in merge
386       // sequence.
387       CHECK(incoming_edges.find(to_convert) != incoming_edges.end());
388 
389       free_operations.insert(to_convert);
390       convert_to_raw.insert(to_convert);
391       LOG(INFO) << "Converting operation to raw " << to_convert;
392     }
393 
394     std::set<CowMergeOperation> next_free_operations;
395     for (const auto& op : free_operations) {
396       incoming_edges.erase(op);
397 
398       // Now that this particular operation is merged, other operations
399       // blocked by this one may be free. Decrement the count of blocking
400       // operations, and set up the free operations for the next iteration.
401       for (const auto& blocked : merge_after_.at(op)) {
402         auto it = incoming_edges.find(blocked);
403         if (it == incoming_edges.end()) {
404           continue;
405         }
406 
407         auto blocking_transfer_count = &it->second;
408         if (*blocking_transfer_count <= 0) {
409           LOG(ERROR) << "Unexpected count in merge after map "
410                      << blocking_transfer_count;
411           return false;
412         }
413         // This operation is no longer blocked by anyone. Add it to the merge
414         // sequence in the next iteration.
415         *blocking_transfer_count -= 1;
416         if (*blocking_transfer_count == 0) {
417           next_free_operations.insert(blocked);
418         }
419       }
420     }
421 
422     LOG(INFO) << "Remaining transfers " << incoming_edges.size()
423               << ", free transfers " << free_operations.size()
424               << ", merge_sequence size " << merge_sequence.size();
425     free_operations = std::move(next_free_operations);
426   }
427 
428   if (!free_operations.empty()) {
429     merge_sequence.insert(
430         merge_sequence.end(), free_operations.begin(), free_operations.end());
431   }
432 
433   CHECK_EQ(operations_.size(), merge_sequence.size() + convert_to_raw.size());
434 
435   size_t blocks_in_sequence = 0;
436   for (const CowMergeOperation& transfer : merge_sequence) {
437     blocks_in_sequence += transfer.dst_extent().num_blocks();
438   }
439 
440   size_t blocks_in_raw = 0;
441   for (const CowMergeOperation& transfer : convert_to_raw) {
442     blocks_in_raw += transfer.dst_extent().num_blocks();
443   }
444 
445   LOG(INFO) << "Blocks in merge sequence " << blocks_in_sequence
446             << ", blocks in raw " << blocks_in_raw << ", partition "
447             << partition_name_;
448   if (!ValidateSequence(merge_sequence)) {
449     LOG(ERROR) << "Invalid Sequence";
450     return false;
451   }
452 
453   *sequence = std::move(merge_sequence);
454   return true;
455 }
456 
ValidateSequence(const std::vector<CowMergeOperation> & sequence)457 bool MergeSequenceGenerator::ValidateSequence(
458     const std::vector<CowMergeOperation>& sequence) {
459   LOG(INFO) << "Validating merge sequence";
460   ExtentRanges visited;
461   for (const auto& op : sequence) {
462     // If |src_offset| is greater than zero, dependency should include 1 extra
463     // block at end of src_extent, as the OP actually references data past
464     // original src_extent.
465     if (op.src_offset() > 0) {
466       CHECK_EQ(op.src_extent().num_blocks(), op.dst_extent().num_blocks() + 1)
467           << op;
468     } else {
469       CHECK_EQ(op.src_extent().num_blocks(), op.dst_extent().num_blocks())
470           << op;
471     }
472     if (visited.OverlapsWithExtent(op.src_extent())) {
473       LOG(ERROR) << "Transfer violates the merge sequence " << op
474                  << "Visited extent ranges: ";
475       visited.Dump();
476       return false;
477     }
478 
479     CHECK(!visited.OverlapsWithExtent(op.dst_extent()))
480         << "dst extent should write only once.";
481     visited.AddExtent(op.dst_extent());
482   }
483 
484   return true;
485 }
486 
487 }  // namespace chromeos_update_engine
488