1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 /*
20  * Read-only access to Zip archives, with minimal heap allocation.
21  */
22 
23 #include <stdint.h>
24 #include <string.h>
25 #include <sys/cdefs.h>
26 #include <sys/types.h>
27 
28 #include <functional>
29 #include <string>
30 #include <string_view>
31 #include <utility>
32 
33 #include "android-base/macros.h"
34 #include "android-base/off64_t.h"
35 
36 /* Zip compression methods we support */
37 enum {
38   kCompressStored = 0,    // no compression
39   kCompressDeflated = 8,  // standard deflate
40 };
41 
42 // This struct holds the common information of a zip entry other than the
43 // the entry size. The compressed and uncompressed length will be handled
44 // separately in the derived class.
45 struct ZipEntryCommon {
46   // Compression method. One of kCompressStored or kCompressDeflated.
47   // See also `gpbf` for deflate subtypes.
48   uint16_t method;
49 
50   // Modification time. The zipfile format specifies
51   // that the first two little endian bytes contain the time
52   // and the last two little endian bytes contain the date.
53   // See `GetModificationTime`. Use signed integer to avoid the
54   // sub-overflow.
55   // TODO: should be overridden by extra time field, if present.
56   int32_t mod_time;
57 
58   // Returns `mod_time` as a broken-down struct tm.
59   struct tm GetModificationTime() const;
60 
61   // Suggested Unix mode for this entry, from the zip archive if created on
62   // Unix, or a default otherwise. See also `external_file_attributes`.
63   mode_t unix_mode;
64 
65   // 1 if this entry contains a data descriptor segment, 0
66   // otherwise.
67   uint8_t has_data_descriptor;
68 
69   // Crc32 value of this ZipEntry. This information might
70   // either be stored in the local file header or in a special
71   // Data descriptor footer at the end of the file entry.
72   uint32_t crc32;
73 
74   // If the value of uncompressed length and compressed length are stored in
75   // the zip64 extended info of the extra field.
76   bool zip64_format_size{false};
77 
78   // The offset to the start of data for this ZipEntry.
79   off64_t offset;
80 
81   // The version of zip and the host file system this came from (for zipinfo).
82   uint16_t version_made_by;
83 
84   // The raw attributes, whose interpretation depends on the host
85   // file system in `version_made_by` (for zipinfo). See also `unix_mode`.
86   uint32_t external_file_attributes;
87 
88   // Specifics about the deflation (for zipinfo).
89   uint16_t gpbf;
90   // Whether this entry is believed to be text or binary (for zipinfo).
91   bool is_text;
92 
93   // extra field size
94   uint16_t extra_field_size;
95 };
96 
97 struct ZipEntry64;
98 // Many users of the library assume the entry size is capped at UNIT32_MAX. So we keep
99 // the interface for the old ZipEntry here; and we could switch them over to the new
100 // ZipEntry64 later.
101 struct ZipEntry : public ZipEntryCommon {
102   // Compressed length of this ZipEntry. The maximum value is UNIT32_MAX.
103   // Might be present either in the local file header or in the data
104   // descriptor footer.
105   uint32_t compressed_length{0};
106 
107   // Uncompressed length of this ZipEntry. The maximum value is UNIT32_MAX.
108   // Might be present either in the local file header or in the data
109   // descriptor footer.
110   uint32_t uncompressed_length{0};
111 
112   // Copies the contents of a ZipEntry64 object to a 32 bits ZipEntry. Returns 0 if the
113   // size of the entry fits into uint32_t, returns a negative error code
114   // (kUnsupportedEntrySize) otherwise.
115   static int32_t CopyFromZipEntry64(ZipEntry* dst, const ZipEntry64* src);
116 
117  private:
118   ZipEntry& operator=(const ZipEntryCommon& other) {
119     ZipEntryCommon::operator=(other);
120     return *this;
121   }
122 };
123 
124 // Represents information about a zip entry in a zip file.
125 struct ZipEntry64 : public ZipEntryCommon {
126   // Compressed length of this ZipEntry. The maximum value is UNIT64_MAX.
127   // Might be present either in the local file header, the zip64 extended field,
128   // or in the data descriptor footer.
129   uint64_t compressed_length{0};
130 
131   // Uncompressed length of this ZipEntry. The maximum value is UNIT64_MAX.
132   // Might be present either in the local file header, the zip64 extended field,
133   // or in the data descriptor footer.
134   uint64_t uncompressed_length{0};
135 
136   explicit ZipEntry64() = default;
ZipEntry64ZipEntry64137   explicit ZipEntry64(const ZipEntry& zip_entry) : ZipEntryCommon(zip_entry) {
138     compressed_length = zip_entry.compressed_length;
139     uncompressed_length = zip_entry.uncompressed_length;
140   }
141 };
142 
143 struct ZipArchive;
144 typedef ZipArchive* ZipArchiveHandle;
145 
146 /*
147  * Open a Zip archive, and sets handle to the value of the opaque
148  * handle for the file. This handle must be released by calling
149  * CloseArchive with this handle.
150  *
151  * Returns 0 on success, and negative values on failure.
152  */
153 int32_t OpenArchive(const char* fileName, ZipArchiveHandle* handle);
154 
155 /*
156  * Like OpenArchive, but takes a file descriptor open for reading
157  * at the start of the file.  The descriptor must be mappable (this does
158  * not allow access to a stream).
159  *
160  * Sets handle to the value of the opaque handle for this file descriptor.
161  * This handle must be released by calling CloseArchive with this handle.
162  *
163  * If assume_ownership parameter is 'true' calling CloseArchive will close
164  * the file.
165  *
166  * This function maps and scans the central directory and builds a table
167  * of entries for future lookups.
168  *
169  * "debugFileName" will appear in error messages, but is not otherwise used.
170  *
171  * Returns 0 on success, and negative values on failure.
172  */
173 int32_t OpenArchiveFd(const int fd, const char* debugFileName, ZipArchiveHandle* handle,
174                       bool assume_ownership = true);
175 
176 int32_t OpenArchiveFdRange(const int fd, const char* debugFileName, ZipArchiveHandle* handle,
177                            off64_t length, off64_t offset, bool assume_ownership = true);
178 
179 int32_t OpenArchiveFromMemory(const void* address, size_t length, const char* debugFileName,
180                               ZipArchiveHandle* handle);
181 /*
182  * Close archive, releasing resources associated with it. This will
183  * unmap the central directory of the zipfile and free all internal
184  * data structures associated with the file. It is an error to use
185  * this handle for any further operations without an intervening
186  * call to one of the OpenArchive variants.
187  */
188 void CloseArchive(ZipArchiveHandle archive);
189 
190 /** See GetArchiveInfo(). */
191 struct ZipArchiveInfo {
192   /** The size in bytes of the archive itself. Used by zipinfo. */
193   off64_t archive_size;
194   /** The number of entries in the archive. */
195   uint64_t entry_count;
196 };
197 
198 /**
199  * Returns information about the given archive.
200  */
201 ZipArchiveInfo GetArchiveInfo(ZipArchiveHandle archive);
202 
203 /*
204  * Find an entry in the Zip archive, by name. |data| must be non-null.
205  *
206  * Returns 0 if an entry is found, and populates |data| with information
207  * about this entry. Returns negative values otherwise.
208  *
209  * It's important to note that |data->crc32|, |data->compLen| and
210  * |data->uncompLen| might be set to values from the central directory
211  * if this file entry contains a data descriptor footer. To verify crc32s
212  * and length, a call to VerifyCrcAndLengths must be made after entry data
213  * has been processed.
214  *
215  * On non-Windows platforms this method does not modify internal state and
216  * can be called concurrently.
217  */
218 int32_t FindEntry(const ZipArchiveHandle archive, const std::string_view entryName,
219                   ZipEntry64* data);
220 
221 /*
222  * Start iterating over all entries of a zip file. The order of iteration
223  * is not guaranteed to be the same as the order of elements
224  * in the central directory but is stable for a given zip file. |cookie| will
225  * contain the value of an opaque cookie which can be used to make one or more
226  * calls to Next. All calls to StartIteration must be matched by a call to
227  * EndIteration to free any allocated memory.
228  *
229  * This method also accepts optional prefix and suffix to restrict iteration to
230  * entry names that start with |optional_prefix| or end with |optional_suffix|.
231  *
232  * Returns 0 on success and negative values on failure.
233  */
234 int32_t StartIteration(ZipArchiveHandle archive, void** cookie_ptr,
235                        const std::string_view optional_prefix = "",
236                        const std::string_view optional_suffix = "");
237 
238 /*
239  * Start iterating over all entries of a zip file. Use the matcher functor to
240  * restrict iteration to entry names that make the functor return true.
241  *
242  * Returns 0 on success and negative values on failure.
243  */
244 int32_t StartIteration(ZipArchiveHandle archive, void** cookie_ptr,
245                        std::function<bool(std::string_view entry_name)> matcher);
246 
247 /*
248  * Advance to the next element in the zipfile in iteration order.
249  *
250  * Returns 0 on success, -1 if there are no more elements in this
251  * archive and lower negative values on failure.
252  */
253 int32_t Next(void* cookie, ZipEntry64* data, std::string_view* name);
254 int32_t Next(void* cookie, ZipEntry64* data, std::string* name);
255 
256 /*
257  * End iteration over all entries of a zip file and frees the memory allocated
258  * in StartIteration.
259  */
260 void EndIteration(void* cookie);
261 
262 /*
263  * Uncompress and write an entry to an open file identified by |fd|.
264  * |entry->uncompressed_length| bytes will be written to the file at
265  * its current offset, and the file will be truncated at the end of
266  * the uncompressed data (no truncation if |fd| references a block
267  * device).
268  *
269  * Returns 0 on success and negative values on failure.
270  */
271 int32_t ExtractEntryToFile(ZipArchiveHandle archive, const ZipEntry64* entry, int fd);
272 
273 /**
274  * Uncompress a given zip entry to the memory region at |begin| and of
275  * size |size|. This size is expected to be the same as the *declared*
276  * uncompressed length of the zip entry. It is an error if the *actual*
277  * number of uncompressed bytes differs from this number.
278  *
279  * Returns 0 on success and negative values on failure.
280  */
281 int32_t ExtractToMemory(ZipArchiveHandle archive, const ZipEntry64* entry, uint8_t* begin,
282                         size_t size);
283 
284 int GetFileDescriptor(const ZipArchiveHandle archive);
285 
286 /**
287  * Returns the offset of the zip archive in the backing file descriptor, or 0 if the zip archive is
288  * not backed by a file descriptor.
289  */
290 off64_t GetFileDescriptorOffset(const ZipArchiveHandle archive);
291 
292 const char* ErrorCodeString(int32_t error_code);
293 
294 // Many users of libziparchive assume the entry size to be 32 bits long. So we keep these
295 // interfaces that use 32 bit ZipEntry to make old code work. TODO(xunchang) Remove the 32 bit
296 // wrapper functions once we switch all users to recognize ZipEntry64.
297 int32_t FindEntry(const ZipArchiveHandle archive, const std::string_view entryName, ZipEntry* data);
298 int32_t Next(void* cookie, ZipEntry* data, std::string* name);
299 int32_t Next(void* cookie, ZipEntry* data, std::string_view* name);
300 int32_t ExtractEntryToFile(ZipArchiveHandle archive, const ZipEntry* entry, int fd);
301 int32_t ExtractToMemory(ZipArchiveHandle archive, const ZipEntry* entry, uint8_t* begin,
302                         size_t size);
303 
304 //
305 // This gets defined for the version of the library that need to control all
306 // code accessing the zip file. Details in incfs_support/signal_handling.h
307 //
308 #if !ZIPARCHIVE_DISABLE_CALLBACK_API
309 
310 #if !defined(_WIN32)
311 typedef bool (*ProcessZipEntryFunction)(const uint8_t* buf, size_t buf_size, void* cookie);
312 
313 /*
314  * Stream the uncompressed data through the supplied function,
315  * passing cookie to it each time it gets called.
316  */
317 int32_t ProcessZipEntryContents(ZipArchiveHandle archive, const ZipEntry* entry,
318                                 ProcessZipEntryFunction func, void* cookie);
319 int32_t ProcessZipEntryContents(ZipArchiveHandle archive, const ZipEntry64* entry,
320                                 ProcessZipEntryFunction func, void* cookie);
321 #endif  // !defined(_WIN32)
322 
323 #endif  // !ZIPARCHIVE_DISABLE_CALLBACK_API
324 
325 namespace zip_archive {
326 
327 class Writer {
328  public:
329   virtual bool Append(uint8_t* buf, size_t buf_size) = 0;
330 
331   // Returns the internal buffer that can we written into directly.
332   using Buffer = std::pair<uint8_t*, size_t>;
333   virtual Buffer GetBuffer(size_t length);
334 
335  protected:
336   Writer() = default;
337   ~Writer() = default;
338 
339  private:
340   DISALLOW_COPY_AND_ASSIGN(Writer);
341 };
342 
343 class LowLevelReader {
344  public:
345   // Get |len| bytes of data starting at |offset|, either by copying them into the supplied |buf|,
346   // or returning an internal buffer directly.
347   // Returns a pointer to the data (which can be different from |buf|), or |nullptr| on error.
348   virtual const uint8_t* AccessAtOffset(uint8_t* buf, size_t len, off64_t offset) const = 0;
349 
350   // Returns |true| if the reader doesn't need an external buffer but instead returns its own one.
351   virtual bool IsZeroCopy() const = 0;
352 
353  protected:
354   LowLevelReader() = default;
355   ~LowLevelReader() = default;
356 
357  private:
358   DISALLOW_COPY_AND_ASSIGN(LowLevelReader);
359 };
360 
361 class Reader : public LowLevelReader {
362  public:
363   virtual bool ReadAtOffset(uint8_t* buf, size_t len, off64_t offset) const = 0;
364 
365   // Ensure the existing classes implementing Reader don't need to bother with
366   // the new method.
367   const uint8_t* AccessAtOffset(uint8_t* buf, size_t len, off64_t offset) const override;
368   bool IsZeroCopy() const override;
369 
370  protected:
371   Reader() = default;
372   ~Reader() = default;
373 
374  private:
375   DISALLOW_COPY_AND_ASSIGN(Reader);
376 };
377 
378 //
379 // This gets defined for the version of the library that need to control all
380 // code accessing the zip file. Details in incfs_support/signal_handling.h
381 //
382 #if !ZIPARCHIVE_DISABLE_CALLBACK_API
383 
384 /**
385  * Uncompress a given zip entry to given |writer|.
386  *
387  * Returns 0 on success and negative values on failure.
388  */
389 int32_t ExtractToWriter(ZipArchiveHandle handle, const ZipEntry64* entry,
390                         zip_archive::Writer* writer);
391 
392 #endif  // !ZIPARCHIVE_DISABLE_CALLBACK_API
393 
394 /*
395  * Inflates the first |compressed_length| bytes of |reader| to a given |writer|.
396  * |crc_out| is set to the CRC32 checksum of the uncompressed data.
397  *
398  * Returns 0 on success and negative values on failure, for example if |reader|
399  * cannot supply the right amount of data, or if the number of bytes written to
400  * data does not match |uncompressed_length|.
401  *
402  * If |crc_out| is not nullptr, it is set to the crc32 checksum of the
403  * uncompressed data.
404  *
405  * NOTE: in the IncFS version of the library this function remains
406  * unprotected, because the data |reader| is supplying is under the full reader's
407  * control; it's the reader's duty to ensure it is available and OK to access.
408  */
409 int32_t Inflate(const Reader& reader, const uint64_t compressed_length,
410                 const uint64_t uncompressed_length, Writer* writer, uint64_t* crc_out);
411 
412 }  // namespace zip_archive
413