1 /*
2  * Copyright (C) 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "snapuserd_core.h"
18 
19 /*
20  * Readahead is used to optimize the merge of COPY and XOR Ops.
21  *
22  * We create a scratch space of 2MB to store the read-ahead data in the COW
23  * device.
24  *
25  *      +-----------------------+
26  *      |     Header (fixed)    |
27  *      +-----------------------+
28  *      |    Scratch space      |  <-- 2MB
29  *      +-----------------------+
30  *
31  *      Scratch space is as follows:
32  *
33  *      +-----------------------+
34  *      |       Metadata        | <- 4k page
35  *      +-----------------------+
36  *      |       Metadata        | <- 4k page
37  *      +-----------------------+
38  *      |                       |
39  *      |    Read-ahead data    |
40  *      |                       |
41  *      +-----------------------+
42  *
43  *
44  * * ===================================================================
45  *
46  * Example:
47  *
48  * We have 6 copy operations to be executed in OTA. Update-engine
49  * will write to COW file as follows:
50  *
51  * Op-1: 20 -> 23
52  * Op-2: 19 -> 22
53  * Op-3: 18 -> 21
54  * Op-4: 17 -> 20
55  * Op-5: 16 -> 19
56  * Op-6: 15 -> 18
57  *
58  * Read-ahead thread will read all the 6 source blocks and store the data in the
59  * scratch space. Metadata will contain the destination block numbers. Thus,
60  * scratch space will look something like this:
61  *
62  * +--------------+
63  * | Block   23   |
64  * | offset - 1   |
65  * +--------------+
66  * | Block   22   |
67  * | offset - 2   |
68  * +--------------+
69  * | Block   21   |
70  * | offset - 3   |
71  * +--------------+
72  *    ...
73  *    ...
74  * +--------------+
75  * | Data-Block 20| <-- offset - 1
76  * +--------------+
77  * | Data-Block 19| <-- offset - 2
78  * +--------------+
79  * | Data-Block 18| <-- offset - 3
80  * +--------------+
81  *     ...
82  *     ...
83  *
84  * ====================================================================
85  *
86  *
87  *  Read-ahead thread will process the COW Ops in fixed set. Consider
88  *  the following example:
89  *
90  *  +--------------------------+
91  *  |op-1|op-2|op-3|....|op-510|
92  *  +--------------------------+
93  *
94  *  <------ One RA Block ------>
95  *
96  *  RA thread will read 510 ordered COW ops at a time and will store
97  *  the data in the scratch space.
98  *
99  *  RA thread and Merge thread will go lock-step wherein RA thread
100  *  will make sure that 510 COW operation data are read upfront
101  *  and is in memory. Thus, when merge thread will pick up the data
102  *  directly from memory and write it back to base device.
103  *
104  *
105  *  +--------------------------+------------------------------------+
106  *  |op-1|op-2|op-3|....|op-510|op-511|op-512|op-513........|op-1020|
107  *  +--------------------------+------------------------------------+
108  *
109  *  <------Merge 510 Blocks----><-Prepare 510 blocks for merge by RA->
110  *           ^                                  ^
111  *           |                                  |
112  *      Merge thread                        RA thread
113  *
114  * Both Merge and RA thread will strive to work in parallel.
115  *
116  * ===========================================================================
117  *
118  * State transitions and communication between RA thread and Merge thread:
119  *
120  *  Merge Thread                                      RA Thread
121  *  ----------------------------------------------------------------------------
122  *
123  *          |                                         |
124  *    WAIT for RA Block N                     READ one RA Block (N)
125  *        for merge                                   |
126  *          |                                         |
127  *          |                                         |
128  *          <--------------MERGE BEGIN--------READ Block N done(copy to scratch)
129  *          |                                         |
130  *          |                                         |
131  *    Merge Begin Block N                     READ one RA BLock (N+1)
132  *          |                                         |
133  *          |                                         |
134  *          |                                  READ done. Wait for merge complete
135  *          |                                         |
136  *          |                                        WAIT
137  *          |                                         |
138  *    Merge done Block N                              |
139  *          ----------------MERGE READY-------------->|
140  *    WAIT for RA Block N+1                     Copy RA Block (N+1)
141  *        for merge                              to scratch space
142  *          |                                         |
143  *          <---------------MERGE BEGIN---------BLOCK N+1 Done
144  *          |                                         |
145  *          |                                         |
146  *    Merge Begin Block N+1                   READ one RA BLock (N+2)
147  *          |                                         |
148  *          |                                         |
149  *          |                                  READ done. Wait for merge complete
150  *          |                                         |
151  *          |                                        WAIT
152  *          |                                         |
153  *    Merge done Block N+1                            |
154  *          ----------------MERGE READY-------------->|
155  *    WAIT for RA Block N+2                     Copy RA Block (N+2)
156  *        for merge                              to scratch space
157  *          |                                         |
158  *          <---------------MERGE BEGIN---------BLOCK N+2 Done
159  */
160 
161 namespace android {
162 namespace snapshot {
163 
164 using namespace android;
165 using namespace android::dm;
166 using android::base::unique_fd;
167 
MonitorMerge()168 void SnapshotHandler::MonitorMerge() {
169     {
170         std::lock_guard<std::mutex> lock(lock_);
171         merge_monitored_ = true;
172     }
173 }
174 
175 // This is invoked once primarily by update-engine to initiate
176 // the merge
InitiateMerge()177 void SnapshotHandler::InitiateMerge() {
178     {
179         std::lock_guard<std::mutex> lock(lock_);
180         merge_initiated_ = true;
181 
182         // If there are only REPLACE ops to be merged, then we need
183         // to explicitly set the state to MERGE_BEGIN as there
184         // is no read-ahead thread
185         if (!ra_thread_) {
186             io_state_ = MERGE_IO_TRANSITION::MERGE_BEGIN;
187         }
188     }
189     cv.notify_all();
190 }
191 
IsMergeBeginError(MERGE_IO_TRANSITION io_state)192 static inline bool IsMergeBeginError(MERGE_IO_TRANSITION io_state) {
193     return io_state == MERGE_IO_TRANSITION::READ_AHEAD_FAILURE ||
194            io_state == MERGE_IO_TRANSITION::IO_TERMINATED;
195 }
196 
197 // Invoked by Merge thread - Waits on RA thread to resume merging. Will
198 // be waken up RA thread.
WaitForMergeBegin()199 bool SnapshotHandler::WaitForMergeBegin() {
200     std::unique_lock<std::mutex> lock(lock_);
201 
202     cv.wait(lock, [this]() -> bool { return MergeInitiated() || IsMergeBeginError(io_state_); });
203 
204     if (IsMergeBeginError(io_state_)) {
205         SNAP_LOG(ERROR) << "WaitForMergeBegin failed with state: " << io_state_;
206         return false;
207     }
208 
209     cv.wait(lock, [this]() -> bool {
210         return io_state_ == MERGE_IO_TRANSITION::MERGE_BEGIN || IsMergeBeginError(io_state_);
211     });
212 
213     if (IsMergeBeginError(io_state_)) {
214         SNAP_LOG(ERROR) << "WaitForMergeBegin failed with state: " << io_state_;
215         return false;
216     }
217     return true;
218 }
219 
220 // Invoked by RA thread - Flushes the RA block to scratch space if necessary
221 // and then notifies the merge thread to resume merging
ReadAheadIOCompleted(bool sync)222 bool SnapshotHandler::ReadAheadIOCompleted(bool sync) {
223     if (sync) {
224         // Flush the entire buffer region
225         int ret = msync(mapped_addr_, total_mapped_addr_length_, MS_SYNC);
226         if (ret < 0) {
227             PLOG(ERROR) << "msync failed after ReadAheadIOCompleted: " << ret;
228             return false;
229         }
230 
231         // Metadata and data are synced. Now, update the state.
232         // We need to update the state after flushing data; if there is a crash
233         // when read-ahead IO is in progress, the state of data in the COW file
234         // is unknown. kCowReadAheadDone acts as a checkpoint wherein the data
235         // in the scratch space is good and during next reboot, read-ahead thread
236         // can safely re-construct the data.
237         struct BufferState* ra_state = GetBufferState();
238         ra_state->read_ahead_state = kCowReadAheadDone;
239 
240         ret = msync(mapped_addr_, BLOCK_SZ, MS_SYNC);
241         if (ret < 0) {
242             PLOG(ERROR) << "msync failed to flush Readahead completion state...";
243             return false;
244         }
245     }
246 
247     // Notify the merge thread to resume merging
248     {
249         std::lock_guard<std::mutex> lock(lock_);
250         if (io_state_ != MERGE_IO_TRANSITION::IO_TERMINATED &&
251             io_state_ != MERGE_IO_TRANSITION::MERGE_FAILED) {
252             io_state_ = MERGE_IO_TRANSITION::MERGE_BEGIN;
253         }
254     }
255 
256     cv.notify_all();
257     return true;
258 }
259 
260 // Invoked by RA thread - Waits for merge thread to finish merging
261 // RA Block N - RA thread would be ready will with Block N+1 but
262 // will wait to merge thread to finish Block N. Once Block N
263 // is merged, RA thread will be woken up by Merge thread and will
264 // flush the data of Block N+1 to scratch space
WaitForMergeReady()265 bool SnapshotHandler::WaitForMergeReady() {
266     {
267         std::unique_lock<std::mutex> lock(lock_);
268         while (!(io_state_ == MERGE_IO_TRANSITION::MERGE_READY ||
269                  io_state_ == MERGE_IO_TRANSITION::MERGE_FAILED ||
270                  io_state_ == MERGE_IO_TRANSITION::MERGE_COMPLETE ||
271                  io_state_ == MERGE_IO_TRANSITION::IO_TERMINATED)) {
272             cv.wait(lock);
273         }
274 
275         // Check if merge failed
276         if (io_state_ == MERGE_IO_TRANSITION::MERGE_FAILED ||
277             io_state_ == MERGE_IO_TRANSITION::MERGE_COMPLETE ||
278             io_state_ == MERGE_IO_TRANSITION::IO_TERMINATED) {
279             SNAP_LOG(ERROR) << "Wait for merge ready failed: " << io_state_;
280             return false;
281         }
282         return true;
283     }
284 }
285 
286 // Invoked by Merge thread - Notify RA thread about Merge completion
287 // for Block N and wake up
NotifyRAForMergeReady()288 void SnapshotHandler::NotifyRAForMergeReady() {
289     {
290         std::lock_guard<std::mutex> lock(lock_);
291         if (io_state_ != MERGE_IO_TRANSITION::IO_TERMINATED &&
292             io_state_ != MERGE_IO_TRANSITION::READ_AHEAD_FAILURE) {
293             io_state_ = MERGE_IO_TRANSITION::MERGE_READY;
294         }
295     }
296 
297     cv.notify_all();
298 }
299 
300 // The following transitions are mostly in the failure paths
MergeFailed()301 void SnapshotHandler::MergeFailed() {
302     {
303         std::lock_guard<std::mutex> lock(lock_);
304         io_state_ = MERGE_IO_TRANSITION::MERGE_FAILED;
305     }
306 
307     cv.notify_all();
308 }
309 
MergeCompleted()310 void SnapshotHandler::MergeCompleted() {
311     {
312         std::lock_guard<std::mutex> lock(lock_);
313         io_state_ = MERGE_IO_TRANSITION::MERGE_COMPLETE;
314     }
315 
316     cv.notify_all();
317 }
318 
319 // This is invoked by worker threads.
320 //
321 // Worker threads are terminated either by two scenarios:
322 //
323 // 1: If dm-user device is destroyed
324 // 2: We had an I/O failure when reading root partitions
325 //
326 // In case (1), this would be a graceful shutdown. In this case, merge
327 // thread and RA thread should have _already_ terminated by this point. We will be
328 // destroying the dm-user device only _after_ merge is completed.
329 //
330 // In case (2), if merge thread had started, then it will be
331 // continuing to merge; however, since we had an I/O failure and the
332 // I/O on root partitions are no longer served, we will terminate the
333 // merge.
334 //
335 // This functions is about handling case (2)
NotifyIOTerminated()336 void SnapshotHandler::NotifyIOTerminated() {
337     {
338         std::lock_guard<std::mutex> lock(lock_);
339         io_state_ = MERGE_IO_TRANSITION::IO_TERMINATED;
340     }
341 
342     cv.notify_all();
343 }
344 
IsIOTerminated()345 bool SnapshotHandler::IsIOTerminated() {
346     std::lock_guard<std::mutex> lock(lock_);
347     return (io_state_ == MERGE_IO_TRANSITION::IO_TERMINATED);
348 }
349 
350 // Invoked by RA thread
ReadAheadIOFailed()351 void SnapshotHandler::ReadAheadIOFailed() {
352     {
353         std::lock_guard<std::mutex> lock(lock_);
354         io_state_ = MERGE_IO_TRANSITION::READ_AHEAD_FAILURE;
355     }
356 
357     cv.notify_all();
358 }
359 
WaitForMergeComplete()360 void SnapshotHandler::WaitForMergeComplete() {
361     std::unique_lock<std::mutex> lock(lock_);
362     while (!(io_state_ == MERGE_IO_TRANSITION::MERGE_COMPLETE ||
363              io_state_ == MERGE_IO_TRANSITION::MERGE_FAILED ||
364              io_state_ == MERGE_IO_TRANSITION::IO_TERMINATED)) {
365         cv.wait(lock);
366     }
367 }
368 
RaThreadStarted()369 void SnapshotHandler::RaThreadStarted() {
370     std::unique_lock<std::mutex> lock(lock_);
371     ra_thread_started_ = true;
372 }
373 
WaitForRaThreadToStart()374 void SnapshotHandler::WaitForRaThreadToStart() {
375     auto now = std::chrono::system_clock::now();
376     auto deadline = now + 3s;
377     {
378         std::unique_lock<std::mutex> lock(lock_);
379         while (!ra_thread_started_) {
380             auto status = cv.wait_until(lock, deadline);
381             if (status == std::cv_status::timeout) {
382                 SNAP_LOG(ERROR) << "Read-ahead thread did not start";
383                 return;
384             }
385         }
386     }
387 }
388 
MarkMergeComplete()389 void SnapshotHandler::MarkMergeComplete() {
390     std::lock_guard<std::mutex> lock(lock_);
391     merge_complete_ = true;
392 }
393 
GetMergeStatus()394 std::string SnapshotHandler::GetMergeStatus() {
395     bool merge_not_initiated = false;
396     bool merge_monitored = false;
397     bool merge_failed = false;
398     bool merge_complete = false;
399 
400     {
401         std::lock_guard<std::mutex> lock(lock_);
402 
403         if (MergeMonitored()) {
404             merge_monitored = true;
405         }
406 
407         if (!MergeInitiated()) {
408             merge_not_initiated = true;
409         }
410 
411         if (io_state_ == MERGE_IO_TRANSITION::MERGE_FAILED) {
412             merge_failed = true;
413         }
414 
415         merge_complete = merge_complete_;
416     }
417 
418     if (merge_not_initiated) {
419         // Merge was not initiated yet; however, we have merge completion
420         // recorded in the COW Header. This can happen if the device was
421         // rebooted during merge. During next reboot, libsnapshot will
422         // query the status and if the merge is completed, then snapshot-status
423         // file will be deleted
424         if (merge_complete) {
425             return "snapshot-merge-complete";
426         }
427 
428         // Merge monitor thread is tracking the merge but the merge thread
429         // is not started yet.
430         if (merge_monitored) {
431             return "snapshot-merge";
432         }
433 
434         // Return the state as "snapshot". If the device was rebooted during
435         // merge, we will return the status as "snapshot". This is ok, as
436         // libsnapshot will explicitly resume the merge. This is slightly
437         // different from kernel snapshot wherein once the snapshot was switched
438         // to merge target, during next boot, we immediately switch to merge
439         // target. We don't do that here because, during first stage init, we
440         // don't want to initiate the merge. The problem is that we have daemon
441         // transition between first and second stage init. If the merge was
442         // started, then we will have to quiesce the merge before switching
443         // the dm tables. Instead, we just wait until second stage daemon is up
444         // before resuming the merge.
445         return "snapshot";
446     }
447 
448     if (merge_failed) {
449         return "snapshot-merge-failed";
450     }
451 
452     if (merge_complete) {
453         return "snapshot-merge-complete";
454     }
455 
456     // Merge is in-progress
457     return "snapshot-merge";
458 }
459 
460 //========== End of Read-ahead state transition functions ====================
461 
462 /*
463  * Root partitions are mounted off dm-user and the I/O's are served
464  * by snapuserd worker threads.
465  *
466  * When there is an I/O request to be served by worker threads, we check
467  * if the corresponding sector is "changed" due to OTA by doing a lookup.
468  * If the lookup succeeds then the sector has been changed and that can
469  * either fall into 4 COW operations viz: COPY, XOR, REPLACE and ZERO.
470  *
471  * For the case of REPLACE and ZERO ops, there is not much of a concern
472  * as there is no dependency between blocks. Hence all the I/O request
473  * mapped to these two COW operations will be served by reading the COW device.
474  *
475  * However, COPY and XOR ops are tricky. Since the merge operations are
476  * in-progress, we cannot just go and read from the source device. We need
477  * to be in sync with the state of the merge thread before serving the I/O.
478  *
479  * Given that we know merge thread processes a set of COW ops called as RA
480  * Blocks - These set of COW ops are fixed size wherein each Block comprises
481  * of 510 COW ops.
482  *
483  *  +--------------------------+
484  *  |op-1|op-2|op-3|....|op-510|
485  *  +--------------------------+
486  *
487  *  <------ Merge Group Block N ------>
488  *
489  * Thus, a Merge Group Block N, will fall into one of these states and will
490  * transition the states in the following order:
491  *
492  * 1: GROUP_MERGE_PENDING
493  * 2: GROUP_MERGE_RA_READY
494  * 2: GROUP_MERGE_IN_PROGRESS
495  * 3: GROUP_MERGE_COMPLETED
496  * 4: GROUP_MERGE_FAILED
497  *
498  * Let's say that we have the I/O request from dm-user whose sector gets mapped
499  * to a COPY operation with op-10 in the above "Merge Group Block N".
500  *
501  * 1: If the Group is in "GROUP_MERGE_PENDING" state:
502  *
503  *    Just read the data from source block based on COW op->source field. Note,
504  *    that we will take a ref count on "Block N". This ref count will prevent
505  *    merge thread to begin merging if there are any pending I/Os. Once the I/O
506  *    is completed, ref count on "Group N" is decremented. Merge thread will
507  *    resume merging "Group N" if there are no pending I/Os.
508  *
509  * 2: If the Group is in "GROUP_MERGE_IN_PROGRESS" or "GROUP_MERGE_RA_READY" state:
510  *
511  *    When the merge thread is ready to process a "Group", it will first move
512  *    the state to GROUP_MERGE_PENDING -> GROUP_MERGE_RA_READY. From this point
513  *    onwards, I/O will be served from Read-ahead buffer. However, merge thread
514  *    cannot start merging this "Group" immediately. If there were any in-flight
515  *    I/O requests, merge thread should wait and allow those I/O's to drain.
516  *    Once all the in-flight I/O's are completed, merge thread will move the
517  *    state from "GROUP_MERGE_RA_READY" -> "GROUP_MERGE_IN_PROGRESS". I/O will
518  *    be continued to serve from Read-ahead buffer during the entire duration
519  *    of the merge.
520  *
521  *    See SetMergeInProgress().
522  *
523  * 3: If the Group is in "GROUP_MERGE_COMPLETED" state:
524  *
525  *    This is straightforward. We just read the data directly from "Base"
526  *    device. We should not be reading the COW op->source field.
527  *
528  * 4: If the Block is in "GROUP_MERGE_FAILED" state:
529  *
530  *    Terminate the I/O with an I/O error as we don't know which "op" in the
531  *    "Group" failed.
532  *
533  *    Transition ensures that the I/O from root partitions are never made to
534  *    wait and are processed immediately. Thus the state transition for any
535  *    "Group" is:
536  *
537  *    GROUP_MERGE_PENDING
538  *          |
539  *          |
540  *          v
541  *    GROUP_MERGE_RA_READY
542  *          |
543  *          |
544  *          v
545  *    GROUP_MERGE_IN_PROGRESS
546  *          |
547  *          |----------------------------(on failure)
548  *          |                           |
549  *          v                           v
550  *    GROUP_MERGE_COMPLETED           GROUP_MERGE_FAILED
551  *
552  */
553 
554 // Invoked by Merge thread
SetMergeCompleted(size_t ra_index)555 void SnapshotHandler::SetMergeCompleted(size_t ra_index) {
556     MergeGroupState* blk_state = merge_blk_state_[ra_index].get();
557     {
558         std::lock_guard<std::mutex> lock(blk_state->m_lock);
559 
560         CHECK(blk_state->merge_state_ == MERGE_GROUP_STATE::GROUP_MERGE_IN_PROGRESS);
561         CHECK(blk_state->num_ios_in_progress == 0);
562 
563         // Merge is complete - All I/O henceforth should be read directly
564         // from base device
565         blk_state->merge_state_ = MERGE_GROUP_STATE::GROUP_MERGE_COMPLETED;
566     }
567 }
568 
569 // Invoked by Merge thread. This is called just before the beginning
570 // of merging a given Block of 510 ops. If there are any in-flight I/O's
571 // from dm-user then wait for them to complete.
SetMergeInProgress(size_t ra_index)572 void SnapshotHandler::SetMergeInProgress(size_t ra_index) {
573     MergeGroupState* blk_state = merge_blk_state_[ra_index].get();
574     {
575         std::unique_lock<std::mutex> lock(blk_state->m_lock);
576 
577         // We may have fallback from Async-merge to synchronous merging
578         // on the existing block. There is no need to reset as the
579         // merge is already in progress.
580         if (blk_state->merge_state_ == MERGE_GROUP_STATE::GROUP_MERGE_IN_PROGRESS) {
581             return;
582         }
583 
584         CHECK(blk_state->merge_state_ == MERGE_GROUP_STATE::GROUP_MERGE_PENDING);
585 
586         // First set the state to RA_READY so that in-flight I/O will drain
587         // and any new I/O will start reading from RA buffer
588         blk_state->merge_state_ = MERGE_GROUP_STATE::GROUP_MERGE_RA_READY;
589 
590         // Wait if there are any in-flight I/O's - we cannot merge at this point
591         while (!(blk_state->num_ios_in_progress == 0)) {
592             blk_state->m_cv.wait(lock);
593         }
594 
595         blk_state->merge_state_ = MERGE_GROUP_STATE::GROUP_MERGE_IN_PROGRESS;
596     }
597 }
598 
599 // Invoked by Merge thread on failure
SetMergeFailed(size_t ra_index)600 void SnapshotHandler::SetMergeFailed(size_t ra_index) {
601     MergeGroupState* blk_state = merge_blk_state_[ra_index].get();
602     {
603         std::unique_lock<std::mutex> lock(blk_state->m_lock);
604 
605         blk_state->merge_state_ = MERGE_GROUP_STATE::GROUP_MERGE_FAILED;
606     }
607 }
608 
609 // Invoked by worker threads when I/O is complete on a "MERGE_PENDING"
610 // Block. If there are no more in-flight I/Os, wake up merge thread
611 // to resume merging.
NotifyIOCompletion(uint64_t new_block)612 void SnapshotHandler::NotifyIOCompletion(uint64_t new_block) {
613     auto it = block_to_ra_index_.find(new_block);
614     CHECK(it != block_to_ra_index_.end()) << " invalid block: " << new_block;
615 
616     bool pending_ios = true;
617 
618     int ra_index = it->second;
619     MergeGroupState* blk_state = merge_blk_state_[ra_index].get();
620     {
621         std::unique_lock<std::mutex> lock(blk_state->m_lock);
622 
623         blk_state->num_ios_in_progress -= 1;
624         if (blk_state->num_ios_in_progress == 0) {
625             pending_ios = false;
626         }
627     }
628 
629     // Give a chance to merge-thread to resume merge
630     // as there are no pending I/O.
631     if (!pending_ios) {
632         blk_state->m_cv.notify_all();
633     }
634 }
635 
GetRABuffer(std::unique_lock<std::mutex> * lock,uint64_t block,void * buffer)636 bool SnapshotHandler::GetRABuffer(std::unique_lock<std::mutex>* lock, uint64_t block,
637                                   void* buffer) {
638     if (!lock->owns_lock()) {
639         SNAP_LOG(ERROR) << "GetRABuffer - Lock not held";
640         return false;
641     }
642     std::unordered_map<uint64_t, void*>::iterator it = read_ahead_buffer_map_.find(block);
643 
644     if (it == read_ahead_buffer_map_.end()) {
645         return false;
646     }
647 
648     memcpy(buffer, it->second, BLOCK_SZ);
649     return true;
650 }
651 
652 // Invoked by worker threads in the I/O path. This is called when a sector
653 // is mapped to a COPY/XOR COW op.
ProcessMergingBlock(uint64_t new_block,void * buffer)654 MERGE_GROUP_STATE SnapshotHandler::ProcessMergingBlock(uint64_t new_block, void* buffer) {
655     auto it = block_to_ra_index_.find(new_block);
656     if (it == block_to_ra_index_.end()) {
657         return MERGE_GROUP_STATE::GROUP_INVALID;
658     }
659 
660     int ra_index = it->second;
661     MergeGroupState* blk_state = merge_blk_state_[ra_index].get();
662     {
663         std::unique_lock<std::mutex> lock(blk_state->m_lock);
664 
665         MERGE_GROUP_STATE state = blk_state->merge_state_;
666         switch (state) {
667             case MERGE_GROUP_STATE::GROUP_MERGE_PENDING: {
668                 // If this is a merge-resume path, check if the data is
669                 // available from scratch space. Data from scratch space takes
670                 // higher precedence than from source device for overlapping
671                 // blocks.
672                 if (resume_merge_ && GetRABuffer(&lock, new_block, buffer)) {
673                     return (MERGE_GROUP_STATE::GROUP_MERGE_IN_PROGRESS);
674                 }
675                 blk_state->num_ios_in_progress += 1;  // ref count
676                 [[fallthrough]];
677             }
678             case MERGE_GROUP_STATE::GROUP_MERGE_COMPLETED: {
679                 [[fallthrough]];
680             }
681             case MERGE_GROUP_STATE::GROUP_MERGE_FAILED: {
682                 return state;
683             }
684             // Fetch the data from RA buffer.
685             case MERGE_GROUP_STATE::GROUP_MERGE_RA_READY: {
686                 [[fallthrough]];
687             }
688             case MERGE_GROUP_STATE::GROUP_MERGE_IN_PROGRESS: {
689                 if (!GetRABuffer(&lock, new_block, buffer)) {
690                     return MERGE_GROUP_STATE::GROUP_INVALID;
691                 }
692                 return state;
693             }
694             default: {
695                 return MERGE_GROUP_STATE::GROUP_INVALID;
696             }
697         }
698     }
699 }
700 
operator <<(std::ostream & os,MERGE_IO_TRANSITION value)701 std::ostream& operator<<(std::ostream& os, MERGE_IO_TRANSITION value) {
702     switch (value) {
703         case MERGE_IO_TRANSITION::INVALID:
704             return os << "INVALID";
705         case MERGE_IO_TRANSITION::MERGE_READY:
706             return os << "MERGE_READY";
707         case MERGE_IO_TRANSITION::MERGE_BEGIN:
708             return os << "MERGE_BEGIN";
709         case MERGE_IO_TRANSITION::MERGE_FAILED:
710             return os << "MERGE_FAILED";
711         case MERGE_IO_TRANSITION::MERGE_COMPLETE:
712             return os << "MERGE_COMPLETE";
713         case MERGE_IO_TRANSITION::IO_TERMINATED:
714             return os << "IO_TERMINATED";
715         case MERGE_IO_TRANSITION::READ_AHEAD_FAILURE:
716             return os << "READ_AHEAD_FAILURE";
717         default:
718             return os << "unknown";
719     }
720 }
721 
722 }  // namespace snapshot
723 }  // namespace android
724