1 /*
2  * Copyright (C) 2020 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *    http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <stdint.h>
20 
21 #include <map>
22 #include <memory>
23 #include <string_view>
24 #include <utility>
25 
26 #include <android-base/logging.h>
27 #include <log/log.h>
28 
29 #include "zip_error.h"
30 
31 /*
32  * Round up to the next highest power of 2.
33  *
34  * Found on http://graphics.stanford.edu/~seander/bithacks.html.
35  *
36  * TODO: could switch to use std::bit_ceil() once ready
37  */
RoundUpPower2(uint32_t val)38 static constexpr uint32_t RoundUpPower2(uint32_t val) {
39   val--;
40   val |= val >> 1;
41   val |= val >> 2;
42   val |= val >> 4;
43   val |= val >> 8;
44   val |= val >> 16;
45   val++;
46 
47   return val;
48 }
49 
50 // This class is the interface of the central directory entries map. The map
51 // helps to locate a particular cd entry based on the filename.
52 class CdEntryMapInterface {
53  public:
54   virtual ~CdEntryMapInterface() = default;
55   // Adds an entry to the map. The |name| should internally points to the
56   // filename field of a cd entry. And |start| points to the beginning of the
57   // central directory. Returns 0 on success.
58   virtual ZipError AddToMap(std::string_view name, const uint8_t* start) = 0;
59   // For the zip entry |entryName|, finds the offset of its filename field in
60   // the central directory. Returns a pair of [status, offset]. The value of
61   // the status is 0 on success.
62   virtual std::pair<ZipError, uint64_t> GetCdEntryOffset(std::string_view name,
63                                                          const uint8_t* cd_start) const = 0;
64   // Resets the iterator to the beginning of the map.
65   virtual void ResetIteration() = 0;
66   // Returns the [name, cd offset] of the current element. Also increments the
67   // iterator to points to the next element. Returns an empty pair we have read
68   // past boundary.
69   virtual std::pair<std::string_view, uint64_t> Next(const uint8_t* cd_start) = 0;
70 
71   static std::unique_ptr<CdEntryMapInterface> Create(uint64_t num_entries,
72           size_t cd_length, uint16_t max_file_name_length);
73 };
74 
75 /**
76  * More space efficient string representation of strings in an mmaped zipped
77  * file than std::string_view. Using std::string_view as an entry in the
78  * ZipArchive hash table wastes space. std::string_view stores a pointer to a
79  * string (on 64 bit, 8 bytes) and the length to read from that pointer,
80  * 2 bytes. Because of alignment, the structure consumes 16 bytes, wasting
81  * 6 bytes.
82  */
83 
84 /**
85  * ZipStringOffset20 stores 20-bit for offset from a fixed location in the memory
86  * mapped file instead of the entire address, with 12-bit for filename length (i.e.
87  * typical PATH_MAX), consuming 4 bytes in total
88  */
89 struct ZipStringOffset20 {
90   static constexpr size_t offset_max = (1u << 20) - 1;
91   static constexpr size_t length_max = (1u << 12) - 1;
92   uint32_t name_offset : 20;
93   uint16_t name_length : 12;
94 };
95 
96 static_assert(sizeof(struct ZipStringOffset20) == 4);
97 
98 /**
99  * ZipStringOffset32 stores a 4 byte offset from a fixed location in the memory
100  * mapped file instead of the entire address, consuming 8 bytes with alignment.
101  */
102 struct ZipStringOffset32 {
103   uint32_t name_offset;
104   uint16_t name_length;
105 };
106 
107 // This implementation of CdEntryMap uses an array hash table. It uses less
108 // memory than std::map; and it's used as the default implementation for zip
109 // archives without zip64 extension.
110 template <typename ZipStringOffset>
111 class CdEntryMapZip32 : public CdEntryMapInterface {
112  public:
113   ZipError AddToMap(std::string_view name, const uint8_t* start) override;
114   std::pair<ZipError, uint64_t> GetCdEntryOffset(std::string_view name,
115                                                  const uint8_t* cd_start) const override;
116   void ResetIteration() override;
117   std::pair<std::string_view, uint64_t> Next(const uint8_t* cd_start) override;
118 
CdEntryMapZip32(uint16_t num_entries)119   explicit CdEntryMapZip32(uint16_t num_entries) {
120     /*
121      * Create hash table.  We have a minimum 75% load factor, possibly as
122      * low as 50% after we round off to a power of 2.  There must be at
123      * least one unused entry to avoid an infinite loop during creation.
124      */
125     hash_table_size_ = RoundUpPower2(1 + (num_entries * 4) / 3);
126     hash_table_.reset(static_cast<ZipStringOffset*>(
127         calloc(hash_table_size_, sizeof(ZipStringOffset))));
128 
129     CHECK(hash_table_ != nullptr)
130       << "Zip: unable to allocate the " << hash_table_size_
131       << " entry hash_table, entry size: " << sizeof(ZipStringOffset);
132   }
133 
134   // We know how many entries are in the Zip archive, so we can have a
135   // fixed-size hash table. We define a load factor of 0.75 and over
136   // allocate so the maximum number entries can never be higher than
137   // ((4 * UINT16_MAX) / 3 + 1) which can safely fit into a uint32_t.
138   struct FreeDeleter {
operatorFreeDeleter139     void operator()(void* ptr) const { ::free(ptr); }
140   };
141   std::unique_ptr<ZipStringOffset[], FreeDeleter> hash_table_;
142   uint32_t hash_table_size_{0};
143 
144  private:
145   // The position of element for the current iteration.
146   uint32_t current_position_{0};
147 };
148 
149 // This implementation of CdEntryMap uses a std::map
150 class CdEntryMapZip64 : public CdEntryMapInterface {
151  public:
152   ZipError AddToMap(std::string_view name, const uint8_t* start) override;
153   std::pair<ZipError, uint64_t> GetCdEntryOffset(std::string_view name,
154                                                  const uint8_t* cd_start) const override;
155   void ResetIteration() override;
156   std::pair<std::string_view, uint64_t> Next(const uint8_t* cd_start) override;
157 
158   CdEntryMapZip64() = default;
159  private:
160 
161   std::map<std::string_view, uint64_t> entry_table_;
162 
163   std::map<std::string_view, uint64_t>::iterator iterator_;
164 };
165