forked from GoogleContainerTools/distroless
-
Notifications
You must be signed in to change notification settings - Fork 0
/
dpkg_parser.py
222 lines (194 loc) · 9.59 KB
/
dpkg_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
# Copyright 2017 Google Inc. All rights reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import gzip
import json
import os
import io
from six.moves import urllib
from package_manager.parse_metadata import parse_package_metadata
from package_manager import util
OUT_FOLDER = "file"
OS_RELEASE_PATH = "etc"
PACKAGES_FILE_NAME = os.path.join(OUT_FOLDER,"Packages.json")
PACKAGE_MAP_FILE_NAME = os.path.join(OUT_FOLDER,"packages.bzl")
OS_RELEASE_FILE_NAME = os.path.join(OS_RELEASE_PATH, "os-release")
OS_RELEASE_TAR_FILE_NAME = os.path.join(OUT_FOLDER, "os_release.tar")
DEB_FILE_NAME = os.path.join(OUT_FOLDER,"pkg.deb")
FILENAME_KEY = "Filename"
SHA256_KEY = "SHA256"
VERSION_KEY = "Version"
parser = argparse.ArgumentParser(
description="Downloads a deb package from a package source file"
)
parser.add_argument("--package-files", action='store',
help='A list of Packages.gz files to use')
parser.add_argument("--packages", action='store',
help='A comma delimited list of packages to search for and download')
parser.add_argument("--workspace-name", action='store',
help='The name of the current bazel workspace')
parser.add_argument("--download-and-extract-only", action='store',
help='If True, download Packages.gz and make urls absolute from mirror url')
parser.add_argument("--mirror-url", action='store',
help='The base url for the package list mirror')
parser.add_argument("--arch", action='store',
help='The target architecture for the package list')
parser.add_argument("--distro", action='store',
help='The target distribution for the package list')
parser.add_argument("--snapshot", action='store',
help='The snapshot date to download')
parser.add_argument("--sha256", action='store',
help='The sha256 checksum to validate for the Packages.gz file')
parser.add_argument("--packages-gz-url", action='store',
help='The full url for the Packages.gz file')
parser.add_argument("--package-prefix", action='store',
help='The prefix to prepend to the value of Filename key in the Packages.gz file.')
def main():
""" A tool for downloading debian packages and package metadata """
args = parser.parse_args()
if args.download_and_extract_only:
download_package_list(args.mirror_url,args.distro, args.arch, args.snapshot, args.sha256,
args.packages_gz_url, args.package_prefix)
util.build_os_release_tar(args.distro, OS_RELEASE_FILE_NAME, OS_RELEASE_PATH, OS_RELEASE_TAR_FILE_NAME)
else:
download_dpkg(args.package_files, args.packages, args.workspace_name)
def download_dpkg(package_files, packages, workspace_name):
""" Using an unzipped, json package file with full urls,
downloads a .deb package
Uses the 'Filename' key to download the .deb package
"""
pkg_vals_to_package_file_and_sha256 = {}
package_to_rule_map = {}
package_to_version_map = {}
package_file_to_metadata = {}
for pkg_vals in set(packages.split(",")):
pkg_split = pkg_vals.split("=")
if len(pkg_split) != 2:
pkg_name = pkg_vals
pkg_version = ""
else:
pkg_name, pkg_version = pkg_split
for package_file in package_files.split(","):
if package_file not in package_file_to_metadata:
with open(package_file, 'rb') as f:
data = f.read()
package_file_to_metadata[package_file] = json.loads(data.decode('utf-8'))
metadata = package_file_to_metadata[package_file]
if (pkg_name in metadata and
(pkg_version == "" or
pkg_version == metadata[pkg_name][VERSION_KEY])):
pkg = metadata[pkg_name]
out_file = os.path.join("file", util.encode_package_name(pkg_name))
download_and_save(pkg_name, pkg[FILENAME_KEY], out_file)
package_to_rule_map[pkg_name] = util.package_to_rule(workspace_name, pkg_name)
package_to_version_map[pkg_name] = metadata[pkg_name][VERSION_KEY]
actual_checksum = util.sha256_checksum(out_file)
expected_checksum = pkg[SHA256_KEY]
if actual_checksum != expected_checksum:
raise Exception("Wrong checksum for package %s (%s). Expected: %s, Actual: %s" %(pkg_name, pkg[FILENAME_KEY], expected_checksum, actual_checksum))
if pkg_version == "":
break
if (pkg_vals in pkg_vals_to_package_file_and_sha256 and
pkg_vals_to_package_file_and_sha256[pkg_vals][1] != actual_checksum):
raise Exception("Conflicting checksums for package %s, version %s. Conflicting checksums: %s:%s, %s:%s" %
(pkg_name, pkg_version,
pkg_vals_to_package_file_and_sha256[pkg_vals][0], pkg_vals_to_package_file_and_sha256[pkg_vals][1],
package_file, actual_checksum))
else:
pkg_vals_to_package_file_and_sha256[pkg_vals] = [package_file, actual_checksum]
break
else:
raise Exception("Package: %s, Version: %s not found in any of the sources" % (pkg_name, pkg_version))
with open(PACKAGE_MAP_FILE_NAME, 'w') as f:
f.write("packages = " + json.dumps(package_to_rule_map))
f.write("\nversions = " + json.dumps(package_to_version_map))
def download_and_save(pkg_key, url, out_file, retry_count=20):
res = urllib.request.urlopen(url)
remaining_bytes = int(res.info().get("Content-Length"))
downloaded = res.read()
contents = []
contents.append(downloaded)
remaining_bytes -= len(downloaded)
offset = len(downloaded)
if remaining_bytes != 0:
range_access_enabled = "bytes" in res.info().get("Accept-Ranges")
etag = res.info().get("ETag")
if not range_access_enabled:
raise Exception("Fail to download %s (%s). Server returned partial contents." %(pkg_key, url))
while retry_count > 0:
retry_count -= 1
req = urllib.request.Request(url, headers={"Range": "bytes=%d-" % offset, "If-Range": etag})
res = urllib.request.urlopen(req)
if res.getcode() != 206:
raise Exception("Fail to download %s (%s). Server did not return '206 Partial Content'" %(pkg_key, url))
downloaded = res.read()
contents.append(downloaded)
remaining_bytes -= len(downloaded)
offset += len(downloaded)
if remaining_bytes == 0:
break
if remaining_bytes != 0:
raise Exception("Fail to download %s (%s). Too many Range request retries." %(pkg_key, url))
with io.open(out_file, 'wb') as f:
for c in contents:
f.write(c)
def download_package_list(mirror_url, distro, arch, snapshot, sha256, packages_gz_url, package_prefix):
"""Downloads a debian package list, expands the relative urls,
and saves the metadata as a json file
A debian package list is a gzipped, newline delimited, colon separated
file with metadata about all the packages available in that repository.
Multiline keys are indented with spaces.
An example package looks like:
Package: newmail
Version: 0.5-2
Installed-Size: 76
Maintainer: Martin Schulze <[email protected]>
Architecture: amd64
Depends: libc6 (>= 2.7-1)
Description: Notificator for incoming mail
Homepage: http://www.infodrom.org/projects/newmail/
Description-md5: 49b0168ce625e668ce3031036ad2f541
Tag: interface::commandline, mail::notification, role::program,
scope::utility, works-with::mail
Section: mail
Priority: optional
Filename: pool/main/n/newmail/newmail_0.5-2_amd64.deb
Size: 14154
MD5sum: 5cd31aab55877339145517fb6d5646cb
SHA1: 869934a25a8bb3def0f17fef9221bed2d3a460f9
SHA256: 52ec3ac93cf8ba038fbcefe1e78f26ca1d59356cdc95e60f987c3f52b3f5e7ef
"""
if bool(packages_gz_url) != bool(package_prefix):
raise Exception("packages_gz_url and package_prefix must be specified or skipped at the same time.")
if (not packages_gz_url) and (not mirror_url or not snapshot or not distro or not arch):
raise Exception("If packages_gz_url is not specified, all of mirror_url, snapshot, "
"distro and arch must be specified.")
url = packages_gz_url
if not url:
url = "%s/debian/%s/dists/%s/main/binary-%s/Packages.gz" % (
mirror_url,
snapshot,
distro,
arch
)
buf = urllib.request.urlopen(url)
with io.open("Packages.gz", 'wb') as f:
f.write(buf.read())
actual_sha256 = util.sha256_checksum("Packages.gz")
if sha256 != actual_sha256:
raise Exception("sha256 of Packages.gz don't match: Expected: %s, Actual:%s" %(sha256, actual_sha256))
with gzip.open("Packages.gz", 'rb') as f:
data = f.read()
metadata = parse_package_metadata(data, mirror_url, snapshot, package_prefix)
with open(PACKAGES_FILE_NAME, 'w') as f:
json.dump(metadata, f)
if __name__ == "__main__":
main()