1 // Copyright (C) 2023 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 #include "parser_v2.h"
15 
16 #include <unistd.h>
17 
18 #include <android-base/file.h>
19 #include <android-base/logging.h>
20 
21 #include <libsnapshot/cow_format.h>
22 
23 namespace android {
24 namespace snapshot {
25 
26 using android::base::borrowed_fd;
27 
Parse(borrowed_fd fd,const CowHeaderV3 & header,std::optional<uint64_t> label)28 bool CowParserV2::Parse(borrowed_fd fd, const CowHeaderV3& header, std::optional<uint64_t> label) {
29     auto pos = lseek(fd.get(), 0, SEEK_END);
30     if (pos < 0) {
31         PLOG(ERROR) << "lseek end failed";
32         return false;
33     }
34     fd_size_ = pos;
35     header_ = header;
36 
37     if (header_.footer_size != sizeof(CowFooter)) {
38         LOG(ERROR) << "Footer size unknown, read " << header_.footer_size << ", expected "
39                    << sizeof(CowFooter);
40         return false;
41     }
42     if (header_.op_size != sizeof(CowOperationV2)) {
43         LOG(ERROR) << "Operation size unknown, read " << header_.op_size << ", expected "
44                    << sizeof(CowOperationV2);
45         return false;
46     }
47     if (header_.cluster_ops == 1) {
48         LOG(ERROR) << "Clusters must contain at least two operations to function.";
49         return false;
50     }
51 
52     if (header_.prefix.major_version > 2 || header_.prefix.minor_version != 0) {
53         LOG(ERROR) << "Header version mismatch, "
54                    << "major version: " << header_.prefix.major_version
55                    << ", expected: " << kCowVersionMajor
56                    << ", minor version: " << header_.prefix.minor_version
57                    << ", expected: " << kCowVersionMinor;
58         return false;
59     }
60 
61     return ParseOps(fd, label);
62 }
63 
ParseOps(borrowed_fd fd,std::optional<uint64_t> label)64 bool CowParserV2::ParseOps(borrowed_fd fd, std::optional<uint64_t> label) {
65     uint64_t pos;
66     auto xor_data_loc = std::make_shared<std::unordered_map<uint64_t, uint64_t>>();
67 
68     // Skip the scratch space
69     if (header_.prefix.major_version >= 2 && (header_.buffer_size > 0)) {
70         LOG(DEBUG) << " Scratch space found of size: " << header_.buffer_size;
71         size_t init_offset = header_.prefix.header_size + header_.buffer_size;
72         pos = lseek(fd.get(), init_offset, SEEK_SET);
73         if (pos != init_offset) {
74             PLOG(ERROR) << "lseek ops failed";
75             return false;
76         }
77     } else {
78         pos = lseek(fd.get(), header_.prefix.header_size, SEEK_SET);
79         if (pos != header_.prefix.header_size) {
80             PLOG(ERROR) << "lseek ops failed";
81             return false;
82         }
83         // Reading a v1 version of COW which doesn't have buffer_size.
84         header_.buffer_size = 0;
85     }
86     uint64_t data_pos = 0;
87 
88     if (header_.cluster_ops) {
89         data_pos = pos + header_.cluster_ops * sizeof(CowOperationV2);
90     } else {
91         data_pos = pos + sizeof(CowOperationV2);
92     }
93 
94     auto ops_buffer = std::make_shared<std::vector<CowOperationV2>>();
95     uint64_t current_op_num = 0;
96     uint64_t cluster_ops = header_.cluster_ops ?: 1;
97     bool done = false;
98 
99     // Alternating op clusters and data
100     while (!done) {
101         uint64_t to_add = std::min(cluster_ops, (fd_size_ - pos) / sizeof(CowOperationV2));
102         if (to_add == 0) break;
103         ops_buffer->resize(current_op_num + to_add);
104         if (!android::base::ReadFully(fd, &ops_buffer->data()[current_op_num],
105                                       to_add * sizeof(CowOperationV2))) {
106             PLOG(ERROR) << "read op failed";
107             return false;
108         }
109         // Parse current cluster to find start of next cluster
110         while (current_op_num < ops_buffer->size()) {
111             auto& current_op = ops_buffer->data()[current_op_num];
112             current_op_num++;
113             if (current_op.type == kCowXorOp) {
114                 xor_data_loc->insert({current_op.new_block, data_pos});
115             }
116             pos += sizeof(CowOperationV2) + GetNextOpOffset(current_op, header_.cluster_ops);
117             data_pos += current_op.data_length + GetNextDataOffset(current_op, header_.cluster_ops);
118 
119             if (current_op.type == kCowClusterOp) {
120                 break;
121             } else if (current_op.type == kCowLabelOp) {
122                 last_label_ = {current_op.source};
123 
124                 // If we reach the requested label, stop reading.
125                 if (label && label.value() == current_op.source) {
126                     done = true;
127                     break;
128                 }
129             } else if (current_op.type == kCowFooterOp) {
130                 footer_.emplace();
131                 CowFooter* footer = &footer_.value();
132                 memcpy(&footer_->op, &current_op, sizeof(footer->op));
133                 off_t offs = lseek(fd.get(), pos, SEEK_SET);
134                 if (offs < 0 || pos != static_cast<uint64_t>(offs)) {
135                     PLOG(ERROR) << "lseek next op failed " << offs;
136                     return false;
137                 }
138                 if (!android::base::ReadFully(fd, &footer->unused, sizeof(footer->unused))) {
139                     LOG(ERROR) << "Could not read COW footer";
140                     return false;
141                 }
142 
143                 // Drop the footer from the op stream.
144                 current_op_num--;
145                 done = true;
146                 break;
147             }
148         }
149 
150         // Position for next cluster read
151         off_t offs = lseek(fd.get(), pos, SEEK_SET);
152         if (offs < 0 || pos != static_cast<uint64_t>(offs)) {
153             PLOG(ERROR) << "lseek next op failed " << offs;
154             return false;
155         }
156         ops_buffer->resize(current_op_num);
157     }
158 
159     LOG(DEBUG) << "COW file read complete. Total ops: " << ops_buffer->size();
160     // To successfully parse a COW file, we need either:
161     //  (1) a label to read up to, and for that label to be found, or
162     //  (2) a valid footer.
163     if (label) {
164         if (!last_label_) {
165             LOG(ERROR) << "Did not find label " << label.value()
166                        << " while reading COW (no labels found)";
167             return false;
168         }
169         if (last_label_.value() != label.value()) {
170             LOG(ERROR) << "Did not find label " << label.value()
171                        << ", last label=" << last_label_.value();
172             return false;
173         }
174     } else if (!footer_) {
175         LOG(ERROR) << "No COW footer found";
176         return false;
177     }
178 
179     uint8_t csum[32];
180     memset(csum, 0, sizeof(uint8_t) * 32);
181 
182     if (footer_) {
183         if (ops_buffer->size() != footer_->op.num_ops) {
184             LOG(ERROR) << "num ops does not match, expected " << footer_->op.num_ops << ", found "
185                        << ops_buffer->size();
186             return false;
187         }
188         if (ops_buffer->size() * sizeof(CowOperationV2) != footer_->op.ops_size) {
189             LOG(ERROR) << "ops size does not match ";
190             return false;
191         }
192     }
193 
194     v2_ops_ = ops_buffer;
195     v2_ops_->shrink_to_fit();
196     xor_data_loc_ = xor_data_loc;
197     return true;
198 }
199 
Translate(TranslatedCowOps * out)200 bool CowParserV2::Translate(TranslatedCowOps* out) {
201     out->ops = std::make_shared<std::vector<CowOperationV3>>(v2_ops_->size());
202 
203     // Translate the operation buffer from on disk to in memory
204     for (size_t i = 0; i < out->ops->size(); i++) {
205         const auto& v2_op = v2_ops_->at(i);
206 
207         auto& new_op = out->ops->at(i);
208         new_op.set_type(v2_op.type);
209         // v2 ops always have 4k compression
210         new_op.set_compression_bits(0);
211         new_op.data_length = v2_op.data_length;
212 
213         if (v2_op.new_block > std::numeric_limits<uint32_t>::max()) {
214             LOG(ERROR) << "Out-of-range new block in COW op: " << v2_op;
215             return false;
216         }
217         new_op.new_block = v2_op.new_block;
218 
219         uint64_t source_info = v2_op.source;
220         if (new_op.type() != kCowLabelOp) {
221             source_info &= kCowOpSourceInfoDataMask;
222             if (source_info != v2_op.source) {
223                 LOG(ERROR) << "Out-of-range source value in COW op: " << v2_op;
224                 return false;
225             }
226         }
227         if (v2_op.compression != kCowCompressNone) {
228             if (header_.compression_algorithm == kCowCompressNone) {
229                 header_.compression_algorithm = v2_op.compression;
230             } else if (header_.compression_algorithm != v2_op.compression) {
231                 LOG(ERROR) << "COW has mixed compression types which is not supported;"
232                            << " previously saw " << header_.compression_algorithm << ", got "
233                            << v2_op.compression << ", op: " << v2_op;
234                 return false;
235             }
236         }
237         new_op.set_source(source_info);
238     }
239 
240     out->header = header_;
241     return true;
242 }
243 
244 }  // namespace snapshot
245 }  // namespace android
246