1# Copyright (C) 2018 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#      http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14"""Functions to process archive files."""
15
16import os
17import tempfile
18import tarfile
19import urllib.parse
20import zipfile
21
22
23class ZipFileWithPermission(zipfile.ZipFile):
24    """Subclassing Zipfile to preserve file permission.
25
26    See https://bugs.python.org/issue15795
27    """
28    def _extract_member(self, member, targetpath, pwd):
29        ret_val = super()._extract_member(member, targetpath, pwd)
30
31        if not isinstance(member, zipfile.ZipInfo):
32            member = self.getinfo(member)
33        attr = member.external_attr >> 16
34        if attr != 0:
35            os.chmod(ret_val, attr)
36        return ret_val
37
38
39def unzip(archive_path, target_path):
40    """Extracts zip file to a path.
41
42    Args:
43        archive_path: Path to the zip file.
44        target_path: Path to extract files to.
45    """
46
47    with ZipFileWithPermission(archive_path) as zfile:
48        zfile.extractall(target_path)
49
50
51def untar(archive_path, target_path):
52    """Extracts tar file to a path.
53
54    Args:
55        archive_path: Path to the tar file.
56        target_path: Path to extract files to.
57    """
58
59    with tarfile.open(archive_path, mode='r') as tfile:
60        tfile.extractall(target_path)
61
62
63ARCHIVE_TYPES = {
64    '.zip': unzip,
65    '.tar.gz': untar,
66    '.tar.bz2': untar,
67    '.tar.xz': untar,
68}
69
70
71def is_supported_archive(url):
72    """Checks whether the url points to a supported archive."""
73    return get_extract_func(url) is not None
74
75
76def get_extract_func(url):
77    """Gets the function to extract an archive.
78
79    Args:
80        url: The url to the archive file.
81
82    Returns:
83        A function to extract the archive. None if not found.
84    """
85
86    parsed_url = urllib.parse.urlparse(url)
87    filename = os.path.basename(parsed_url.path)
88    for ext, func in ARCHIVE_TYPES.items():
89        if filename.endswith(ext):
90            return func
91    # crates.io download url does not have file suffix
92    # e.g., https://crates.io/api/v1/crates/syn/1.0.16/download
93    if url.find('/crates.io/api/') > 0 or url.find('/static.crates.io/crates/'):
94        return untar
95    return None
96
97
98def download_and_extract(url):
99    """Downloads and extracts an archive file to a temporary directory.
100
101    Args:
102        url: Url to download.
103
104    Returns:
105        Path to the temporary directory.
106    """
107
108    print(f'Downloading {url}')
109    archive_file, _headers = urllib.request.urlretrieve(url)
110
111    temporary_dir = tempfile.mkdtemp()
112    print(f'Extracting {archive_file} to {temporary_dir}')
113    get_extract_func(url)(archive_file, temporary_dir)
114
115    return temporary_dir
116
117
118def find_archive_root(path):
119    """Finds the real root of an extracted archive.
120
121    Sometimes archives has additional layers of directories. This function tries
122    to guess the right 'root' path by entering all single subdirectories.
123
124    Args:
125        path: Path to the extracted archive.
126
127    Returns:
128        The root path we found.
129    """
130    for root, dirs, files in os.walk(path):
131        if files or len(dirs) > 1:
132            return root
133    return path
134