From c4dcbe1567307ea3e587ddf184c62cda59f66da8 Mon Sep 17 00:00:00 2001 From: Bernhard Ehlers <be@bernhard-ehlers.de> Date: Thu, 4 Feb 2016 17:03:56 +0100 Subject: [PATCH 1/2] use GET requests instead of HEAD for checking URLs --- appliances/microsoft-windows+ie.gns3a | 2 +- check_urls.py | 13 ++++++++++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/appliances/microsoft-windows+ie.gns3a b/appliances/microsoft-windows+ie.gns3a index 16bc2c6..ee78f59 100644 --- a/appliances/microsoft-windows+ie.gns3a +++ b/appliances/microsoft-windows+ie.gns3a @@ -3,7 +3,7 @@ "category": "guest", "description": "Microsoft Windows (or simply Windows) is a graphical operating system developed, marketed, and sold by Microsoft.\n\nMicrosoft releases time limited VMs for testing Internet Explorer.\n\nOn the download site select the VM, as platform select VirtualBox, then download the zip file, afterwards unzip it.", "vendor_name": "Microsoft", - "vendor_url": "https://dev.microsoft.com/", + "vendor_url": "http://www.microsoft.com", "product_name": "Windows", "registry_version": 1, "status": "experimental", diff --git a/check_urls.py b/check_urls.py index 84db421..64def69 100755 --- a/check_urls.py +++ b/check_urls.py @@ -22,6 +22,11 @@ import pycurl err_list = [] + +def data_abort(data): + return -1 + + def check_url(url, appliance): print(" " + url) @@ -30,15 +35,17 @@ def check_url(url, appliance): c = pycurl.Curl() c.setopt(c.URL, url) c.setopt(c.USERAGENT, 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)') - c.setopt(c.NOBODY, True) c.setopt(c.FOLLOWLOCATION, True) + c.setopt(c.WRITEFUNCTION, data_abort) c.perform() http_status = c.getinfo(c.RESPONSE_CODE) if http_status >= 400: error = 'HTTP status {}'.format(http_status) c.close() - except pycurl.error: - error = c.errstr() + except pycurl.error as err: + errno, errstr = err.args + if errno != pycurl.E_WRITE_ERROR: + error = errstr if error: print(" " + error) From 380454b081892ba2965441fb62c189f65c21b4b3 Mon Sep 17 00:00:00 2001 From: Bernhard Ehlers <be@bernhard-ehlers.de> Date: Thu, 4 Feb 2016 18:42:09 +0100 Subject: [PATCH 2/2] fix processing of HTTP error code --- check_urls.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/check_urls.py b/check_urls.py index 64def69..c17ef46 100755 --- a/check_urls.py +++ b/check_urls.py @@ -31,26 +31,30 @@ def check_url(url, appliance): print(" " + url) error = None + c = pycurl.Curl() try: - c = pycurl.Curl() c.setopt(c.URL, url) c.setopt(c.USERAGENT, 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)') + c.setopt(c.HTTPHEADER, ['Accept-Language: en-us']) c.setopt(c.FOLLOWLOCATION, True) c.setopt(c.WRITEFUNCTION, data_abort) c.perform() - http_status = c.getinfo(c.RESPONSE_CODE) - if http_status >= 400: - error = 'HTTP status {}'.format(http_status) - c.close() except pycurl.error as err: errno, errstr = err.args if errno != pycurl.E_WRITE_ERROR: error = errstr + if not error: + http_status = c.getinfo(c.RESPONSE_CODE) + if http_status >= 400: + error = 'HTTP status {}'.format(http_status) + if error: print(" " + error) err_list.append("{}: {} - {}".format(appliance, url, error)) + c.close() + def check_urls(appliance): try: