Use berhnard is check urls

This commit is contained in:
Julien Duponchelle 2016-02-04 13:48:25 +01:00
parent 011dbbc474
commit 593ba5b8ad
No known key found for this signature in database
GPG Key ID: F1E2485547D4595D
3 changed files with 58 additions and 167 deletions

127
check_urls.py Normal file → Executable file
View File

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3
# #
# Copyright (C) 2015 GNS3 Technologies Inc. # Copyright (C) 2015 GNS3 Technologies Inc.
# #
@ -18,94 +18,81 @@
import os import os
import json import json
import sys import sys
import socket import pycurl
import time
import urllib.request
import http.client
from multiprocessing import Pool
class CheckError(Exception): err_list = []
def __init__(self, m):
self.message = m
def __str__(self): def check_url(url, appliance):
return self.message print(" " + url)
class MyHTTPRedirectHandler(urllib.request.HTTPRedirectHandler):
def redirect_request(self, req, fp, code, msg, hdrs, newurl):
return None
urllib.request.install_opener(urllib.request.build_opener(MyHTTPRedirectHandler))
def check_url(args):
url, appliance = args
print("Check " + url)
remaining_failure = 5
error = None error = None
while remaining_failure != 0: try:
try: c = pycurl.Curl()
req = urllib.request.Request(url, method='HEAD') c.setopt(c.URL, url)
req.add_header c.setopt(c.USERAGENT, 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)')
urllib.request.urlopen(req, timeout=45) #Yeah a big big timeout for broken websites... c.setopt(c.NOBODY, True)
except urllib.error.HTTPError as err: c.setopt(c.FOLLOWLOCATION, True)
if err.getcode() >= 400: c.perform()
error = CheckError('Error with url {} ({})'.format(url, str(err))) http_status = c.getinfo(c.RESPONSE_CODE)
else: if http_status >= 400:
# We allow error code like 302 error = 'HTTP status {}'.format(http_status)
return c.close()
except http.client.BadStatusLine as err: except pycurl.error:
error = CheckError('Bad status line {} ({})'.format(url, str(err))) error = c.errstr()
except urllib.error.URLError as err:
error = CheckError('Invalid URL {} ({})'.format(url, str(err))) if error:
except socket.timeout as err: print(" " + error)
error = CheckError('Timeout URL {} ({})'.format(url, str(err))) err_list.append("{}: {} - {}".format(appliance, url, error))
else:
return
remaining_failure -= 1
time.sleep(5)
raise error
def check_urls(pool, appliance): def check_urls(appliance):
with open(os.path.join('appliances', appliance)) as f: try:
appliance_json = json.load(f) with open(os.path.join('appliances', appliance)) as f:
appliance_json = json.load(f)
except Exception as err:
print(" " + str(err))
err_list.append("{}: {}".format(appliance, err))
return []
calls = [] urls = set()
for image in appliance_json['images']: for image in appliance_json['images']:
if 'direct_download_url' in image: if 'direct_download_url' in image:
calls.append((image['direct_download_url'], appliance)) urls.add(image['direct_download_url'])
if 'download_url' in image: if 'download_url' in image:
calls.append((image['download_url'], appliance)) urls.add(image['download_url'])
if 'vendor_url' in appliance_json: if 'vendor_url' in appliance_json:
calls.append((appliance_json['vendor_url'], appliance)) urls.add(appliance_json['vendor_url'])
if 'documentation_url' in appliance_json: if 'documentation_url' in appliance_json:
calls.append((appliance_json['documentation_url'], appliance)) urls.add(appliance_json['documentation_url'])
if 'product_url' in appliance_json: if 'product_url' in appliance_json:
calls.append((appliance_json['product_url'], appliance)) urls.add(appliance_json['product_url'])
return calls return list(urls)
def main(): def main():
pool = Pool(processes=8)
calls_check_url = []
for appliance in os.listdir('appliances'):
calls_check_url += check_urls(pool, appliance)
print("=> Check URL in appliances") print("=> Check URL in appliances")
try: if len(sys.argv) >= 2:
pool.map_async(check_url, calls_check_url).get() appliance_list = sys.argv[1:]
except CheckError as e: else:
print(e) appliance_list = os.listdir('appliances')
sys.exit(1) appliance_list.sort()
pool.close()
pool.join() for appliance in appliance_list:
print("Everything is ok!") if not appliance.endswith('.gns3a'):
appliance += '.gns3a'
print("-> {}".format(appliance))
for url in check_urls(appliance):
check_url(url, appliance)
print()
if len(err_list) == 0:
print("Everything is ok!")
else:
print("{} error(s):".format(len(err_list)))
for error in err_list:
print(error)
if __name__ == '__main__': if __name__ == '__main__':
main() main()

View File

@ -1,97 +0,0 @@
#!/usr/bin/env python3
#
# Copyright (C) 2015 GNS3 Technologies Inc.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import os
import json
import sys
import pycurl
err_list = []
def check_url(url, appliance):
print(" " + url)
error = None
try:
c = pycurl.Curl()
c.setopt(c.URL, url)
c.setopt(c.USERAGENT, 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)')
c.setopt(c.NOBODY, True)
c.perform()
http_status = c.getinfo(c.RESPONSE_CODE)
if http_status >= 400:
error = 'HTTP status {}'.format(http_status)
c.close()
except pycurl.error:
error = c.errstr()
if error:
print(" " + error)
err_list.append("{}: {} - {}".format(appliance, url, error))
def check_urls(appliance):
try:
with open(os.path.join('appliances', appliance)) as f:
appliance_json = json.load(f)
except Exception as err:
print(" " + str(err))
err_list.append("{}: {}".format(appliance, err))
return []
urls = set()
for image in appliance_json['images']:
if 'direct_download_url' in image:
urls.add(image['direct_download_url'])
if 'download_url' in image:
urls.add(image['download_url'])
if 'vendor_url' in appliance_json:
urls.add(appliance_json['vendor_url'])
if 'documentation_url' in appliance_json:
urls.add(appliance_json['documentation_url'])
if 'product_url' in appliance_json:
urls.add(appliance_json['product_url'])
return list(urls)
def main():
print("=> Check URL in appliances")
if len(sys.argv) >= 2:
appliance_list = sys.argv[1:]
else:
appliance_list = os.listdir('appliances')
appliance_list.sort()
for appliance in appliance_list:
if not appliance.endswith('.gns3a'):
appliance += '.gns3a'
print("-> {}".format(appliance))
for url in check_urls(appliance):
check_url(url, appliance)
print()
if len(err_list) == 0:
print("Everything is ok!")
else:
print("{} error(s):".format(len(err_list)))
for error in err_list:
print(error)
if __name__ == '__main__':
main()

View File

@ -1 +1,2 @@
jsonschema==2.5.1 jsonschema==2.5.1
pycurl==7.21.5