Subject: Re: pkg/35199: mk/bulk/upload shouldn't upload restricted binaries
To: None <rillig@NetBSD.org, gnats-admin@netbsd.org,>
From: Joerg Sonnenberger <joerg@britannica.bec.de>
List: pkgsrc-bugs
Date: 12/07/2006 14:30:05
The following reply was made to PR pkg/35199; it has been noted by GNATS.

From: Joerg Sonnenberger <joerg@britannica.bec.de>
To: gnats-bugs@NetBSD.org
Cc: 
Subject: Re: pkg/35199: mk/bulk/upload shouldn't upload restricted binaries
Date: Thu, 07 Dec 2006 15:25:31 +0100

 --Boundary_(ID_3EMgGMFn06fpjVjJ2YRdwA)
 Content-type: text/plain; charset="us-ascii"
 Content-transfer-encoding: 7BIT
 Content-disposition: inline
 
 On Thu, Dec 07, 2006 at 01:35:02PM +0000, Quentin Garnier wrote:
 >  I'm pretty sure this happens because of the way --include-from and
 >  --exclude-from are used.  Replacing --include-from with --files-from
 >  *might* be what is intended.  rsync's manual is not very clear.
 
 I've attached the upload script I'm using in the rewritten version,
 which works. It should illustrate how it can work.
 
 Joerg
 
 --Boundary_(ID_3EMgGMFn06fpjVjJ2YRdwA)
 Content-type: text/plain; charset=us-ascii; NAME="upload.py"
 Content-transfer-encoding: 7BIT
 Content-disposition: attachment; filename="upload.py"
 
 #! /usr/pkg_bulk/bin/python2.4
 
 import cPickle
 import md5
 import sha
 import os
 import sys
 import bulk.config
 import bulk.pmatch
 
 meta_file = open(bulk.config.bulk_resolved_pkgtree, "r")
 tree = cPickle.load(meta_file)
 del meta_file
 
 meta_file = open(bulk.config.bulk_dependencies_dict, "r")
 pkg_dependencies = cPickle.load(meta_file)
 del meta_file
 
 packages = set(map(lambda x: x[:-4], os.listdir("%s/All" % bulk.config.package_root)))
 unrestricted = set()
 restricted = set()
 
 try:
 	status_file = open(bulk.config.bulk_report_finished, "r")
 	already_done = set(map(lambda x: x.strip(), status_file.readlines()))
 	status_file.close()
 except IOError:
 	already_done = []
 packages.intersection_update(already_done)
 del already_done
 
 def recursive_can_upload(pkg):
 	if pkg in unrestricted:
 		return True
 	if pkg in restricted:
 		return False
 	if not tree[pkg].can_upload():
 		restricted.add(pkg)
 		return False
 	for p in pkg_dependencies[pkg]:
 		if not recursive_can_upload(p):
 			restricted.add(pkg)
 			return False
 	unrestricted.add(pkg)
 	return True
 
 print "Computing restricted packages... ",
 sys.stdout.flush()
 
 for pkg in packages:
 	if pkg not in tree:
 		continue
 	recursive_can_upload(pkg)
 
 print "done"
 
 print "Computing full category tree... ",
 sys.stdout.flush()
 
 full_tree = {'All': set()}
 for pkg in unrestricted:
 	if pkg not in packages:
 		continue
 	full_tree['All'].add(pkg)
 	for cat in tree[pkg].categories:
 		if cat not in full_tree: full_tree[cat] = set()
 		full_tree[cat].add(pkg)
 
 print "done"
 
 print "Computing vulnerable packages... ",
 sys.stdout.flush()
 
 vul_pkgs = set()
 vul_patterns = set()
 for line in open("%s/pkg-vulnerabilities" % bulk.config.distfiles_root).readlines():
 	if line.startswith("#"):
 		continue
 	vul_patterns.add(line.split(None, 1)[0])
 
 pattern_hash = {}
 for pattern in vul_patterns:
 	if pattern[0].isalnum() and pattern[1].isalnum() and pattern[2].isalnum():
 		key = pattern[:3]
 		pattern_hash[key] = pattern_hash.get(key, []) + [pattern]
 
 for pkg in full_tree['All']:
 	if pkg[0].isalnum() and pkg[1].isalnum() and pkg[2].isalnum():
 		search_list = pattern_hash.get(pkg[:3], [])
 	else:
 		search_list = vul_patterns
 	for pattern in search_list:
 		if bulk.pmatch.pmatch(pattern, pkg):
 			vul_pkgs.add(pkg)
 			break
 full_tree['All'].difference_update(vul_pkgs)
 
 print "done"
 
 print "Computing summary file... ",
 sys.stdout.flush()
 os.system("""cd %s/All && ls -t | grep "\.t[gb]z\$" | while read n ; do %s -X "$n"; done | gzip > pkg_summary.gz""" % (bulk.config.package_root, bulk.config.pkg_info))
 print "done"
 
 print "Computing checksums... ",
 sys.stdout.flush()
 md5file = open("%s/MD5" % bulk.config.package_root, "w")
 sha1file = open("%s/SHA1" % bulk.config.package_root, "w")
 for pkg in full_tree['All']:
 	m = md5.new()
 	f = open("%s/All/%s.tgz" % (bulk.config.package_root, pkg))
 	content = f.read()
 	f.close()
 	m.update(content)
 	md5file.write("MD5 (All/%s.tgz) = %s\n" % (pkg, m.hexdigest()))
 	m = sha.new()
 	m.update(content)
 	sha1file.write("SHA1 (All/%s.tgz) = %s\n" % (pkg, m.hexdigest()))
 for pkg in vul_pkgs:
 	m = md5.new()
 	f = open("%s/All/%s.tgz" % (bulk.config.package_root, pkg))
 	content = f.read()
 	f.close()
 	m.update(content)
 	md5file.write("MD5 (vulnerable/%s.tgz) = %s\n" % (pkg, m.hexdigest()))
 	m = sha.new()
 	m.update(content)
 	sha1file.write("SHA1 (vulnerable/%s.tgz) = %s\n" % (pkg, m.hexdigest()))
 print "done"
 
 print "Signing checksums... ",
 sys.stdout.flush()
 os.system("%s --clearsign %s/MD5" % (bulk.config.gpg_cmd, bulk.config.package_root))
 os.system("%s --clearsign %s/SHA1" % (bulk.config.gpg_cmd, bulk.config.package_root))
 print "done"
 
 print "Uploading packages... ",
 cmd = "cd %s && %s %s --exclude-from=- . %s" % (bulk.config.package_root, bulk.config.rsync_cmd, bulk.config.rsync_args, bulk.config.rsync_target)
 f = os.popen(cmd, "w")
 for file in ('MD5', 'MD5.asc', 'SHA1', 'SHA1.asc', 'All/pkg_summary.gz'):
 	f.write("+ /%s\n" % file)
 # /vulnerable cleaned up later...
 f.write("+ /vulnerable\n")
 for cat in full_tree:
 	f.write("+ %s\n" % cat)
 for cat in full_tree:
 	for pkg in full_tree[cat]:
 		f.write("+ %s/%s.tgz\n" % (cat, pkg))
 for cat in full_tree:
 	f.write("- %s/*\n" % cat)
 f.close()
 
 cmd = "cd %s/All && %s %s --exclude-from=- . %s/vulnerable" % (bulk.config.package_root, bulk.config.rsync_cmd, bulk.config.rsync_args, bulk.config.rsync_target)
 f = os.popen(cmd, "w")
 for pkg in vul_pkgs:
 		f.write("+ /%s.tgz\n" % pkg)
 f.write("- /*\n")
 f.close()
 sys.stdout.flush()
 print "done"
 
 --Boundary_(ID_3EMgGMFn06fpjVjJ2YRdwA)--