1# Copyright (C) 2018 The Android Open Source Project 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14"""Functions to process archive files.""" 15 16import os 17import tempfile 18import tarfile 19import urllib.parse 20import zipfile 21 22 23class ZipFileWithPermission(zipfile.ZipFile): 24 """Subclassing Zipfile to preserve file permission. 25 26 See https://bugs.python.org/issue15795 27 """ 28 def _extract_member(self, member, targetpath, pwd): 29 ret_val = super()._extract_member(member, targetpath, pwd) 30 31 if not isinstance(member, zipfile.ZipInfo): 32 member = self.getinfo(member) 33 attr = member.external_attr >> 16 34 if attr != 0: 35 os.chmod(ret_val, attr) 36 return ret_val 37 38 39def unzip(archive_path, target_path): 40 """Extracts zip file to a path. 41 42 Args: 43 archive_path: Path to the zip file. 44 target_path: Path to extract files to. 45 """ 46 47 with ZipFileWithPermission(archive_path) as zfile: 48 zfile.extractall(target_path) 49 50 51def untar(archive_path, target_path): 52 """Extracts tar file to a path. 53 54 Args: 55 archive_path: Path to the tar file. 56 target_path: Path to extract files to. 57 """ 58 59 with tarfile.open(archive_path, mode='r') as tfile: 60 tfile.extractall(target_path) 61 62 63ARCHIVE_TYPES = { 64 '.zip': unzip, 65 '.tar.gz': untar, 66 '.tar.bz2': untar, 67 '.tar.xz': untar, 68} 69 70 71def is_supported_archive(url): 72 """Checks whether the url points to a supported archive.""" 73 return get_extract_func(url) is not None 74 75 76def get_extract_func(url): 77 """Gets the function to extract an archive. 78 79 Args: 80 url: The url to the archive file. 81 82 Returns: 83 A function to extract the archive. None if not found. 84 """ 85 86 parsed_url = urllib.parse.urlparse(url) 87 filename = os.path.basename(parsed_url.path) 88 for ext, func in ARCHIVE_TYPES.items(): 89 if filename.endswith(ext): 90 return func 91 # crates.io download url does not have file suffix 92 # e.g., https://crates.io/api/v1/crates/syn/1.0.16/download 93 if url.find('/crates.io/api/') > 0 or url.find('/static.crates.io/crates/'): 94 return untar 95 return None 96 97 98def download_and_extract(url): 99 """Downloads and extracts an archive file to a temporary directory. 100 101 Args: 102 url: Url to download. 103 104 Returns: 105 Path to the temporary directory. 106 """ 107 108 print(f'Downloading {url}') 109 archive_file, _headers = urllib.request.urlretrieve(url) 110 111 temporary_dir = tempfile.mkdtemp() 112 print(f'Extracting {archive_file} to {temporary_dir}') 113 get_extract_func(url)(archive_file, temporary_dir) 114 115 return temporary_dir 116 117 118def find_archive_root(path): 119 """Finds the real root of an extracted archive. 120 121 Sometimes archives has additional layers of directories. This function tries 122 to guess the right 'root' path by entering all single subdirectories. 123 124 Args: 125 path: Path to the extracted archive. 126 127 Returns: 128 The root path we found. 129 """ 130 for root, dirs, files in os.walk(path): 131 if files or len(dirs) > 1: 132 return root 133 return path 134