diff options
Diffstat (limited to 'default/steps/data/helpers/netinstall_iso_finder.py')
-rw-r--r-- | default/steps/data/helpers/netinstall_iso_finder.py | 163 |
1 files changed, 163 insertions, 0 deletions
diff --git a/default/steps/data/helpers/netinstall_iso_finder.py b/default/steps/data/helpers/netinstall_iso_finder.py new file mode 100644 index 0000000..b4a135b --- /dev/null +++ b/default/steps/data/helpers/netinstall_iso_finder.py @@ -0,0 +1,163 @@ +#!/usr/bin/env python2 +# -*- coding: utf-8 -*- +"""Find the latest netinstall iso for a Debian version and system architecture.""" + +from html.parser import HTMLParser +from urllib2 import urlopen +from urlparse import urljoin +import re +import sys +import argparse +import logging + +logger = logging.getLogger(__name__) + +class LinkParser(HTMLParser): + """Retrieve links (a hrefs) from a text/html document""" + def __init__(self, url): + HTMLParser.__init__(self) + self.url = url + self.links = set() + response = urlopen(url) + contentType = response.info().get('Content-Type') + if not contentType: + return + logger.debug("url = " + url ); + logger.debug("contentType = " + contentType ); + if ';' in contentType: + (mediaType,charset) = contentType.split(";") + charset = charset.split("=")[1] + else: + mediaType = contentType + # ISO-8859-1 is no longer the default charset, see https://tools.ietf.org/html/rfc7231#appendix-B + # Let's use UTF-8. + charset = "utf-8" + if mediaType =='text/html': + htmlBytes = response.read() + htmlString = htmlBytes.decode(charset) + self.feed(htmlString) + + def handle_starttag(self, tag, attrs): + if tag == 'a': + for (key, value) in attrs: + if key == 'href': + new_url = urljoin(self.url,value) + if re.match("^"+self.url, new_url): + self.links.add(new_url) + + def get_links(self): + """Returns all the collected links""" + return self.links + + +def url_find(to_visit_url_set,visited_url_set,found_url_set): + """Recursively look for urls given a regex, a set of urls to visit, a set of already visited urls, a set of already found urls. Returns the set of found urls""" + logger.debug("Progress: to_visit:{} visited:{} found:{}".format(len(to_visit_url_set),len(visited_url_set),len(found_url_set))) + assert(len(to_visit_url_set.intersection(visited_url_set)) == 0) + assert(len(to_visit_url_set.intersection(found_url_set)) == 0) + if (len(to_visit_url_set) == 0): + return [visited_url_set,found_url_set] + else: + url = to_visit_url_set.pop() + visited_url_set.add(url) + if target_regex.match(url): + found_url_set.add(url) + return url_find(to_visit_url_set, visited_url_set, found_url_set) + else: + new_url_set = set([url for url in LinkParser(url).get_links() if (logger.debug(url) or True) and url_regex.match(url)]) + new_url_set.difference_update(visited_url_set) + to_visit_url_set.update(new_url_set) + return url_find(to_visit_url_set, visited_url_set, found_url_set) + +def key_normalize(version_string): + """" + In order to perform a natural sorting, we normalize the version (X.Y.Z) as a unique integer with the following formula: X*100 + Y*10 + Z + For instance, it solves situations where "9.9.0" is greater than "9.9.11" + """ + splitted_string = version_string.split('.') + assert(len(splitted_string) == 3) + return int(splitted_string[0])*100+int(splitted_string[1])*10+int(splitted_string[2]) + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description=sys.modules[__name__].__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument("distrib", metavar="DISTRIB", help="distribution") + parser.add_argument("version", metavar="VERSION", help="version") + parser.add_argument("arch", metavar="ARCH", help="architecture") + parser.add_argument("mirror", metavar="MIRROR", help="mirror", nargs="?") + parser.add_argument('--info', action="store_true", default=False, help='print info messages') + parser.add_argument('--debug', action="store_true", default=False, help='print debug messages') + args = parser.parse_args() + + handler = logging.StreamHandler() + if args.debug: + logger.setLevel(logging.DEBUG) + handler.setLevel(logging.DEBUG) + elif args.info: + logger.setLevel(logging.INFO) + handler.setLevel(logging.INFO) + else: + logger.setLevel(logging.WARNING) + handler.setLevel(logging.WARNING) + handler.setFormatter(logging.Formatter('%(levelname)s: %(message)s')) + logger.addHandler(handler) + + try: + visited = set([]) + found = set([]) + if (args.distrib.lower() == "debian"): + if args.mirror == None: + args.mirror = "http://cdimage.debian.org/" + if not re.match("^\d+$",args.version): + raise Exception("please give the Debian release number (e.g. 8 for Jessie)") + if args.version == '10': + url_regex = re.compile("^"+args.mirror+"cdimage/release/(?:"+args.version+"\.\d+\.\d+/(?:"+args.arch+"/(?:iso-cd/(?:debian-"+args.version+"\.\d+\.\d+-"+args.arch+"-netinst\.iso)?)?)?)?$") + else: + url_regex = re.compile("^"+args.mirror+"cdimage/archive/(?:"+args.version+"\.\d+\.\d+/(?:"+args.arch+"/(?:iso-cd/(?:debian-"+args.version+"\.\d+\.\d+-"+args.arch+"-netinst\.iso)?)?)?)?$") + target_regex = re.compile("^.*-netinst\.iso$") + [visited,found] = url_find(set([args.mirror+"cdimage/"+v+"/" for v in ["release","archive"]]), set(), set()) + elif (args.distrib.lower() == "ubuntu"): + if args.mirror == None: + args.mirror = "http://(?:archive|old-releases).ubuntu.com/" + servers = set(["http://"+s+".ubuntu.com/ubuntu/" for s in ["old-releases","archive"]]) + else: + servers = set([args.mirror]) + if not re.match("^\w+$",args.version): + raise Exception("please give the Ubuntu release name") + url_regex = re.compile("^"+args.mirror+"ubuntu/dists/(?:"+args.version+"(?:-updates)?/(?:main/(?:installer-"+args.arch+"/(?:current/(?:(?:legacy-)?images/(?:netboot/(?:mini\.iso)?)?)?)?)?)?)?$") + target_regex = re.compile("^.*/mini\.iso$") + [visited,found] = url_find(servers, set(), set()) + elif (args.distrib.lower() == "centos"): + if args.mirror == None: + args.mirror = "http://mirror.in2p3.fr/linux/CentOS/" + if not re.match("^\d+$",args.version): + raise Exception("please give the CentOS release number (e.g. 7 for CentOS-7)") + if args.version == '6': + url_regex = re.compile("^"+args.mirror+"(?:"+args.version+"/(?:isos/(?:"+args.arch+"/(?:CentOS-"+args.version+"(?:\.\d+)?-"+args.arch+"-netinstall\.iso)?)?)?)?$") + target_regex = re.compile("^.*CentOS-\d+(?:\.\d+)?-\w+-netinstall\.iso$") + elif args.version == '7': + url_regex = re.compile("^"+args.mirror+"(?:"+args.version+"/(?:isos/(?:"+args.arch+"/(?:CentOS-"+args.version+"-"+args.arch+"-NetInstall-\d+\.iso)?)?)?)?$") + target_regex = re.compile("^.*CentOS-\d+-\w+-NetInstall-\d+\.iso$") + else: + url_regex = re.compile("^"+args.mirror+"(?:"+args.version+"/(?:isos/(?:"+args.arch+"/(?:CentOS-"+args.version+"\.\d+\.\d+-"+args.arch+"-boot\.iso)?)?)?)?$") + target_regex = re.compile("^.*CentOS-\d+\.\d+\.\d+-\w+-boot\.iso$") + [visited,found] = url_find(set([args.mirror]), set(), set()) + else: + raise Exception("this distribution is not supported") + logger.info("URL regex: "+url_regex.pattern) + logger.info("Target regex: "+target_regex.pattern) + logger.debug("Visited URLs:") + for url in visited: + logger.debug(url) + logger.info("Found URLs:") + for url in found: + logger.info(url) + if len(found) > 0: + if (args.distrib.lower() == "debian"): + print(sorted(found,key=lambda x:key_normalize(re.sub(r".*/debian-(\d+).(\d+).(\d+)-"+args.arch+"-netinst\.iso$",r"\1.\2.\3",x)),reverse=True)[0]) + else: + print(sorted(found, reverse=False)[0]) + else: + raise Exception("no url found") + except Exception as exc: + sys.stderr.write(u"Error: %s\n" % exc) + sys.exit(1) |