1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define _POSIX_THREAD_SAFE_FUNCTIONS  // For mingw localtime_r().
18 
19 #include "ziparchive/zip_writer.h"
20 
21 #include <sys/param.h>
22 #include <sys/stat.h>
23 #include <zlib.h>
24 #include <cstdio>
25 #define DEF_MEM_LEVEL 8  // normally in zutil.h?
26 
27 #include <memory>
28 #include <vector>
29 
30 #include "android-base/logging.h"
31 
32 #include "entry_name_utils-inl.h"
33 #include "zip_archive_common.h"
34 
35 #undef powerof2
36 #define powerof2(x)                                               \
37   ({                                                              \
38     __typeof__(x) _x = (x);                                       \
39     __typeof__(x) _x2;                                            \
40     __builtin_add_overflow(_x, -1, &_x2) ? 1 : ((_x2 & _x) == 0); \
41   })
42 
43 /* Zip compression methods we support */
44 enum {
45   kCompressStored = 0,    // no compression
46   kCompressDeflated = 8,  // standard deflate
47 };
48 
49 // Size of the output buffer used for compression.
50 static const size_t kBufSize = 32768u;
51 
52 // No error, operation completed successfully.
53 static const int32_t kNoError = 0;
54 
55 // The ZipWriter is in a bad state.
56 static const int32_t kInvalidState = -1;
57 
58 // There was an IO error while writing to disk.
59 static const int32_t kIoError = -2;
60 
61 // The zip entry name was invalid.
62 static const int32_t kInvalidEntryName = -3;
63 
64 // An error occurred in zlib.
65 static const int32_t kZlibError = -4;
66 
67 // The start aligned function was called with the aligned flag.
68 static const int32_t kInvalidAlign32Flag = -5;
69 
70 // The alignment parameter is not a power of 2.
71 static const int32_t kInvalidAlignment = -6;
72 
73 static const char* sErrorCodes[] = {
74     "Invalid state", "IO error", "Invalid entry name", "Zlib error",
75 };
76 
ErrorCodeString(int32_t error_code)77 const char* ZipWriter::ErrorCodeString(int32_t error_code) {
78   if (error_code < 0 && (-error_code) < static_cast<int32_t>(arraysize(sErrorCodes))) {
79     return sErrorCodes[-error_code];
80   }
81   return nullptr;
82 }
83 
DeleteZStream(z_stream * stream)84 static void DeleteZStream(z_stream* stream) {
85   deflateEnd(stream);
86   delete stream;
87 }
88 
ZipWriter(FILE * f)89 ZipWriter::ZipWriter(FILE* f)
90     : file_(f),
91       seekable_(false),
92       current_offset_(0),
93       state_(State::kWritingZip),
94       z_stream_(nullptr, DeleteZStream),
95       buffer_(kBufSize) {
96   // Check if the file is seekable (regular file). If fstat fails, that's fine, subsequent calls
97   // will fail as well.
98   struct stat file_stats;
99   if (fstat(fileno(f), &file_stats) == 0) {
100     seekable_ = S_ISREG(file_stats.st_mode);
101   }
102 }
103 
ZipWriter(ZipWriter && writer)104 ZipWriter::ZipWriter(ZipWriter&& writer) noexcept
105     : file_(writer.file_),
106       seekable_(writer.seekable_),
107       current_offset_(writer.current_offset_),
108       state_(writer.state_),
109       files_(std::move(writer.files_)),
110       z_stream_(std::move(writer.z_stream_)),
111       buffer_(std::move(writer.buffer_)) {
112   writer.file_ = nullptr;
113   writer.state_ = State::kError;
114 }
115 
operator =(ZipWriter && writer)116 ZipWriter& ZipWriter::operator=(ZipWriter&& writer) noexcept {
117   file_ = writer.file_;
118   seekable_ = writer.seekable_;
119   current_offset_ = writer.current_offset_;
120   state_ = writer.state_;
121   files_ = std::move(writer.files_);
122   z_stream_ = std::move(writer.z_stream_);
123   buffer_ = std::move(writer.buffer_);
124   writer.file_ = nullptr;
125   writer.state_ = State::kError;
126   return *this;
127 }
128 
HandleError(int32_t error_code)129 int32_t ZipWriter::HandleError(int32_t error_code) {
130   state_ = State::kError;
131   z_stream_.reset();
132   return error_code;
133 }
134 
StartEntry(std::string_view path,size_t flags)135 int32_t ZipWriter::StartEntry(std::string_view path, size_t flags) {
136   uint32_t alignment = 0;
137   if (flags & kAlign32) {
138     flags &= ~kAlign32;
139     alignment = 4;
140   }
141   return StartAlignedEntryWithTime(path, flags, time_t(), alignment);
142 }
143 
StartAlignedEntry(std::string_view path,size_t flags,uint32_t alignment)144 int32_t ZipWriter::StartAlignedEntry(std::string_view path, size_t flags, uint32_t alignment) {
145   return StartAlignedEntryWithTime(path, flags, time_t(), alignment);
146 }
147 
StartEntryWithTime(std::string_view path,size_t flags,time_t time)148 int32_t ZipWriter::StartEntryWithTime(std::string_view path, size_t flags, time_t time) {
149   uint32_t alignment = 0;
150   if (flags & kAlign32) {
151     flags &= ~kAlign32;
152     alignment = 4;
153   }
154   return StartAlignedEntryWithTime(path, flags, time, alignment);
155 }
156 
ExtractTimeAndDate(time_t when,uint16_t * out_time,uint16_t * out_date)157 static void ExtractTimeAndDate(time_t when, uint16_t* out_time, uint16_t* out_date) {
158   /* round up to an even number of seconds */
159   when = static_cast<time_t>((static_cast<unsigned long>(when) + 1) & (~1));
160 
161   struct tm tm_result;
162   struct tm* ptm = localtime_r(&when, &tm_result);
163 
164   // The earliest valid time for ZIP file entries is 1980-01-01. See:
165   // https://users.cs.jmu.edu/buchhofp/forensics/formats/pkzip.html.
166   // Set any time before 1980 to 1980-01-01.
167   if (ptm->tm_year < 80) {
168     ptm->tm_year = 80;
169     ptm->tm_mon = 0;
170     ptm->tm_mday = 1;
171     ptm->tm_hour = 0;
172     ptm->tm_min = 0;
173     ptm->tm_sec = 0;
174   }
175 
176   *out_date =
177       static_cast<uint16_t>((ptm->tm_year - 80) << 9 | (ptm->tm_mon + 1) << 5 | ptm->tm_mday);
178   *out_time = static_cast<uint16_t>(ptm->tm_hour << 11 | ptm->tm_min << 5 | ptm->tm_sec >> 1);
179 }
180 
CopyFromFileEntry(const ZipWriter::FileEntry & src,bool use_data_descriptor,LocalFileHeader * dst)181 static void CopyFromFileEntry(const ZipWriter::FileEntry& src, bool use_data_descriptor,
182                               LocalFileHeader* dst) {
183   dst->lfh_signature = LocalFileHeader::kSignature;
184   if (use_data_descriptor) {
185     // Set this flag to denote that a DataDescriptor struct will appear after the data,
186     // containing the crc and size fields.
187     dst->gpb_flags |= kGPBDDFlagMask;
188 
189     // The size and crc fields must be 0.
190     dst->compressed_size = 0u;
191     dst->uncompressed_size = 0u;
192     dst->crc32 = 0u;
193   } else {
194     dst->compressed_size = src.compressed_size;
195     dst->uncompressed_size = src.uncompressed_size;
196     dst->crc32 = src.crc32;
197   }
198   dst->compression_method = src.compression_method;
199   dst->last_mod_time = src.last_mod_time;
200   dst->last_mod_date = src.last_mod_date;
201   DCHECK_LE(src.path.size(), std::numeric_limits<uint16_t>::max());
202   dst->file_name_length = static_cast<uint16_t>(src.path.size());
203   dst->extra_field_length = src.padding_length;
204 }
205 
StartAlignedEntryWithTime(std::string_view path,size_t flags,time_t time,uint32_t alignment)206 int32_t ZipWriter::StartAlignedEntryWithTime(std::string_view path, size_t flags, time_t time,
207                                              uint32_t alignment) {
208   if (state_ != State::kWritingZip) {
209     return kInvalidState;
210   }
211 
212   // Can only have 16535 entries because of zip records.
213   if (files_.size() == std::numeric_limits<uint16_t>::max()) {
214     return HandleError(kIoError);
215   }
216 
217   if (flags & kAlign32) {
218     return kInvalidAlign32Flag;
219   }
220 
221   if (powerof2(alignment) == 0) {
222     return kInvalidAlignment;
223   }
224   if (alignment > std::numeric_limits<uint16_t>::max()) {
225     return kInvalidAlignment;
226   }
227 
228   FileEntry file_entry = {};
229   file_entry.local_file_header_offset = current_offset_;
230   file_entry.path = path;
231   // No support for larger than 4GB files.
232   if (file_entry.local_file_header_offset > std::numeric_limits<uint32_t>::max()) {
233     return HandleError(kIoError);
234   }
235 
236   if (!IsValidEntryName(reinterpret_cast<const uint8_t*>(file_entry.path.data()),
237                         file_entry.path.size())) {
238     return kInvalidEntryName;
239   }
240 
241   if (flags & ZipWriter::kCompress) {
242     file_entry.compression_method = kCompressDeflated;
243 
244     int compression_level = (flags & ZipWriter::kDefaultCompression) ? 6 : 9;
245     int32_t result = PrepareDeflate(compression_level);
246     if (result != kNoError) {
247       return result;
248     }
249   } else {
250     file_entry.compression_method = kCompressStored;
251   }
252 
253   ExtractTimeAndDate(time, &file_entry.last_mod_time, &file_entry.last_mod_date);
254 
255   off_t offset = current_offset_ + sizeof(LocalFileHeader) + file_entry.path.size();
256   // prepare a pre-zeroed 4K memory block in case when we need to pad some aligned data.
257   static constexpr char kSmallZeroPadding[4096] = {};
258   // use this buffer if our preallocated one is too small
259   std::vector<char> zero_padding_big;
260   const char* zero_padding = nullptr;
261 
262   if (alignment != 0 && (offset & (alignment - 1))) {
263     // Pad the extra field so the data will be aligned.
264     uint16_t padding = static_cast<uint16_t>(alignment - (offset % alignment));
265     file_entry.padding_length = padding;
266     offset += padding;
267     if (padding <= std::size(kSmallZeroPadding)) {
268         zero_padding = kSmallZeroPadding;
269     } else {
270         zero_padding_big.resize(padding, 0);
271         zero_padding = zero_padding_big.data();
272     }
273   }
274 
275   LocalFileHeader header = {};
276   // Always start expecting a data descriptor. When the data has finished being written,
277   // if it is possible to seek back, the GPB flag will reset and the sizes written.
278   CopyFromFileEntry(file_entry, true /*use_data_descriptor*/, &header);
279 
280   if (fwrite(&header, sizeof(header), 1, file_) != 1) {
281     return HandleError(kIoError);
282   }
283 
284   if (fwrite(path.data(), 1, path.size(), file_) != path.size()) {
285     return HandleError(kIoError);
286   }
287 
288   if (file_entry.padding_length != 0 && fwrite(zero_padding, 1, file_entry.padding_length,
289                                                file_) != file_entry.padding_length) {
290     return HandleError(kIoError);
291   }
292 
293   current_file_entry_ = std::move(file_entry);
294   current_offset_ = offset;
295   state_ = State::kWritingEntry;
296   return kNoError;
297 }
298 
DiscardLastEntry()299 int32_t ZipWriter::DiscardLastEntry() {
300   if (state_ != State::kWritingZip || files_.empty()) {
301     return kInvalidState;
302   }
303 
304   FileEntry& last_entry = files_.back();
305   current_offset_ = last_entry.local_file_header_offset;
306   if (fseeko(file_, current_offset_, SEEK_SET) != 0) {
307     return HandleError(kIoError);
308   }
309   files_.pop_back();
310   return kNoError;
311 }
312 
GetLastEntry(FileEntry * out_entry)313 int32_t ZipWriter::GetLastEntry(FileEntry* out_entry) {
314   CHECK(out_entry != nullptr);
315 
316   if (files_.empty()) {
317     return kInvalidState;
318   }
319   *out_entry = files_.back();
320   return kNoError;
321 }
322 
PrepareDeflate(int compression_level)323 int32_t ZipWriter::PrepareDeflate(int compression_level) {
324   CHECK(state_ == State::kWritingZip);
325 
326   // Initialize the z_stream for compression.
327   z_stream_ = std::unique_ptr<z_stream, void (*)(z_stream*)>(new z_stream(), DeleteZStream);
328 
329 #pragma GCC diagnostic push
330 #pragma GCC diagnostic ignored "-Wold-style-cast"
331   int zerr = deflateInit2(z_stream_.get(), compression_level, Z_DEFLATED,
332                           -MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY);
333 #pragma GCC diagnostic pop
334 
335   if (zerr != Z_OK) {
336     if (zerr == Z_VERSION_ERROR) {
337       LOG(ERROR) << "Installed zlib is not compatible with linked version (" << ZLIB_VERSION << ")";
338       return HandleError(kZlibError);
339     } else {
340       LOG(ERROR) << "deflateInit2 failed (zerr=" << zerr << ")";
341       return HandleError(kZlibError);
342     }
343   }
344 
345   z_stream_->next_out = buffer_.data();
346   DCHECK_EQ(buffer_.size(), kBufSize);
347   z_stream_->avail_out = static_cast<uint32_t>(buffer_.size());
348   return kNoError;
349 }
350 
WriteBytes(const void * data,size_t len)351 int32_t ZipWriter::WriteBytes(const void* data, size_t len) {
352   if (state_ != State::kWritingEntry) {
353     return HandleError(kInvalidState);
354   }
355   // Need to be able to mark down data correctly.
356   if (len + static_cast<uint64_t>(current_file_entry_.uncompressed_size) >
357       std::numeric_limits<uint32_t>::max()) {
358     return HandleError(kIoError);
359   }
360   uint32_t len32 = static_cast<uint32_t>(len);
361 
362   int32_t result = kNoError;
363   if (current_file_entry_.compression_method & kCompressDeflated) {
364     result = CompressBytes(&current_file_entry_, data, len32);
365   } else {
366     result = StoreBytes(&current_file_entry_, data, len32);
367   }
368 
369   if (result != kNoError) {
370     return result;
371   }
372 
373   current_file_entry_.crc32 = static_cast<uint32_t>(
374       crc32(current_file_entry_.crc32, reinterpret_cast<const Bytef*>(data), len32));
375   current_file_entry_.uncompressed_size += len32;
376   return kNoError;
377 }
378 
StoreBytes(FileEntry * file,const void * data,uint32_t len)379 int32_t ZipWriter::StoreBytes(FileEntry* file, const void* data, uint32_t len) {
380   CHECK(state_ == State::kWritingEntry);
381 
382   if (fwrite(data, 1, len, file_) != len) {
383     return HandleError(kIoError);
384   }
385   file->compressed_size += len;
386   current_offset_ += len;
387   return kNoError;
388 }
389 
CompressBytes(FileEntry * file,const void * data,uint32_t len)390 int32_t ZipWriter::CompressBytes(FileEntry* file, const void* data, uint32_t len) {
391   CHECK(state_ == State::kWritingEntry);
392   CHECK(z_stream_);
393   CHECK(z_stream_->next_out != nullptr);
394   CHECK(z_stream_->avail_out != 0);
395 
396   // Prepare the input.
397   z_stream_->next_in = reinterpret_cast<const uint8_t*>(data);
398   z_stream_->avail_in = len;
399 
400   while (z_stream_->avail_in > 0) {
401     // We have more data to compress.
402     int zerr = deflate(z_stream_.get(), Z_NO_FLUSH);
403     if (zerr != Z_OK) {
404       return HandleError(kZlibError);
405     }
406 
407     if (z_stream_->avail_out == 0) {
408       // The output is full, let's write it to disk.
409       size_t write_bytes = z_stream_->next_out - buffer_.data();
410       if (fwrite(buffer_.data(), 1, write_bytes, file_) != write_bytes) {
411         return HandleError(kIoError);
412       }
413       file->compressed_size += write_bytes;
414       current_offset_ += write_bytes;
415 
416       // Reset the output buffer for the next input.
417       z_stream_->next_out = buffer_.data();
418       DCHECK_EQ(buffer_.size(), kBufSize);
419       z_stream_->avail_out = static_cast<uint32_t>(buffer_.size());
420     }
421   }
422   return kNoError;
423 }
424 
FlushCompressedBytes(FileEntry * file)425 int32_t ZipWriter::FlushCompressedBytes(FileEntry* file) {
426   CHECK(state_ == State::kWritingEntry);
427   CHECK(z_stream_);
428   CHECK(z_stream_->next_out != nullptr);
429   CHECK(z_stream_->avail_out != 0);
430 
431   // Keep deflating while there isn't enough space in the buffer to
432   // to complete the compress.
433   int zerr;
434   while ((zerr = deflate(z_stream_.get(), Z_FINISH)) == Z_OK) {
435     CHECK(z_stream_->avail_out == 0);
436     size_t write_bytes = z_stream_->next_out - buffer_.data();
437     if (fwrite(buffer_.data(), 1, write_bytes, file_) != write_bytes) {
438       return HandleError(kIoError);
439     }
440     file->compressed_size += write_bytes;
441     current_offset_ += write_bytes;
442 
443     z_stream_->next_out = buffer_.data();
444     DCHECK_EQ(buffer_.size(), kBufSize);
445     z_stream_->avail_out = static_cast<uint32_t>(buffer_.size());
446   }
447   if (zerr != Z_STREAM_END) {
448     return HandleError(kZlibError);
449   }
450 
451   size_t write_bytes = z_stream_->next_out - buffer_.data();
452   if (write_bytes != 0) {
453     if (fwrite(buffer_.data(), 1, write_bytes, file_) != write_bytes) {
454       return HandleError(kIoError);
455     }
456     file->compressed_size += write_bytes;
457     current_offset_ += write_bytes;
458   }
459   z_stream_.reset();
460   return kNoError;
461 }
462 
ShouldUseDataDescriptor() const463 bool ZipWriter::ShouldUseDataDescriptor() const {
464   // Only use a trailing "data descriptor" if the output isn't seekable.
465   return !seekable_;
466 }
467 
FinishEntry()468 int32_t ZipWriter::FinishEntry() {
469   if (state_ != State::kWritingEntry) {
470     return kInvalidState;
471   }
472 
473   if (current_file_entry_.compression_method & kCompressDeflated) {
474     int32_t result = FlushCompressedBytes(&current_file_entry_);
475     if (result != kNoError) {
476       return result;
477     }
478   }
479 
480   if (ShouldUseDataDescriptor()) {
481     // Some versions of ZIP don't allow STORED data to have a trailing DataDescriptor.
482     // If this file is not seekable, or if the data is compressed, write a DataDescriptor.
483     // We haven't supported zip64 format yet. Write both uncompressed size and compressed
484     // size as uint32_t.
485     std::vector<uint32_t> dataDescriptor = {
486         DataDescriptor::kOptSignature, current_file_entry_.crc32,
487         current_file_entry_.compressed_size, current_file_entry_.uncompressed_size};
488     if (fwrite(dataDescriptor.data(), dataDescriptor.size() * sizeof(uint32_t), 1, file_) != 1) {
489       return HandleError(kIoError);
490     }
491 
492     current_offset_ += sizeof(uint32_t) * dataDescriptor.size();
493   } else {
494     // Seek back to the header and rewrite to include the size.
495     if (fseeko(file_, current_file_entry_.local_file_header_offset, SEEK_SET) != 0) {
496       return HandleError(kIoError);
497     }
498 
499     LocalFileHeader header = {};
500     CopyFromFileEntry(current_file_entry_, false /*use_data_descriptor*/, &header);
501 
502     if (fwrite(&header, sizeof(header), 1, file_) != 1) {
503       return HandleError(kIoError);
504     }
505 
506     if (fseeko(file_, current_offset_, SEEK_SET) != 0) {
507       return HandleError(kIoError);
508     }
509   }
510 
511   files_.emplace_back(std::move(current_file_entry_));
512   state_ = State::kWritingZip;
513   return kNoError;
514 }
515 
Finish()516 int32_t ZipWriter::Finish() {
517   if (state_ != State::kWritingZip) {
518     return kInvalidState;
519   }
520 
521   off_t startOfCdr = current_offset_;
522   for (FileEntry& file : files_) {
523     CentralDirectoryRecord cdr = {};
524     cdr.record_signature = CentralDirectoryRecord::kSignature;
525     if (ShouldUseDataDescriptor()) {
526       cdr.gpb_flags |= kGPBDDFlagMask;
527     }
528     cdr.compression_method = file.compression_method;
529     cdr.last_mod_time = file.last_mod_time;
530     cdr.last_mod_date = file.last_mod_date;
531     cdr.crc32 = file.crc32;
532     cdr.compressed_size = file.compressed_size;
533     cdr.uncompressed_size = file.uncompressed_size;
534     // Checked in IsValidEntryName.
535     DCHECK_LE(file.path.size(), std::numeric_limits<uint16_t>::max());
536     cdr.file_name_length = static_cast<uint16_t>(file.path.size());
537     // Checked in StartAlignedEntryWithTime.
538     DCHECK_LE(file.local_file_header_offset, std::numeric_limits<uint32_t>::max());
539     cdr.local_file_header_offset = static_cast<uint32_t>(file.local_file_header_offset);
540     if (fwrite(&cdr, sizeof(cdr), 1, file_) != 1) {
541       return HandleError(kIoError);
542     }
543 
544     if (fwrite(file.path.data(), 1, file.path.size(), file_) != file.path.size()) {
545       return HandleError(kIoError);
546     }
547 
548     current_offset_ += sizeof(cdr) + file.path.size();
549   }
550 
551   EocdRecord er = {};
552   er.eocd_signature = EocdRecord::kSignature;
553   er.disk_num = 0;
554   er.cd_start_disk = 0;
555   // Checked when adding entries.
556   DCHECK_LE(files_.size(), std::numeric_limits<uint16_t>::max());
557   er.num_records_on_disk = static_cast<uint16_t>(files_.size());
558   er.num_records = static_cast<uint16_t>(files_.size());
559   if (current_offset_ > std::numeric_limits<uint32_t>::max()) {
560     return HandleError(kIoError);
561   }
562   er.cd_size = static_cast<uint32_t>(current_offset_ - startOfCdr);
563   er.cd_start_offset = static_cast<uint32_t>(startOfCdr);
564 
565   if (fwrite(&er, sizeof(er), 1, file_) != 1) {
566     return HandleError(kIoError);
567   }
568 
569   current_offset_ += sizeof(er);
570 
571   // Since we can BackUp() and potentially finish writing at an offset less than one we had
572   // already written at, we must truncate the file.
573 
574   if (ftruncate(fileno(file_), current_offset_) != 0) {
575     return HandleError(kIoError);
576   }
577 
578   if (fflush(file_) != 0) {
579     return HandleError(kIoError);
580   }
581 
582   state_ = State::kDone;
583   return kNoError;
584 }
585