#!/usr/bin/env python3
# Copyright (C) 2022 Bernhard Ehlers
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
"""
docker_build - (re)build outdated docker images
usage: docker_build [--help] [--all] [--dir DIR] [--dry-run] [--file FILE]
[image ...]
positional arguments:
image images to build additionally
optional arguments:
--help, -h, -? prints this screen
--all, -a build all images
--dir DIR, -C DIR change directory before building
--dry-run, -n do not build images, just print them
--file FILE, -f FILE use FILE as image config (default: 'docker_images')
The docker images and their properties are configured in a
file, by default 'docker_images' in the current directory.
Format of the lines in the configuration file:
Name Directory [ Base Image] [ Build Options]
or
Global Build Options
When running without an image arg, it checks all images,
if the directory containing its Dockerfile has changed or
its base image has been updated.
In some special cases a docker image needs a forced rebuild.
For that add the list of images or base images, to be rebuild,
to the arguments. When using the option -a/--all, all images are
forcibly rebuild, except those specified on the command line.
The environment variable DOCKER_REPOSITORY must be set to the
Docker repository to use for name-only targets.
"""
import os
import sys
import argparse
import datetime
import json
import re
import shlex
import subprocess
import dxf
import requests.exceptions
import dateutil.parser
docker_login = {}
image_info = {}
images = []
parser = argparse.ArgumentParser(add_help=False, \
description='%(prog)s - (re)build outdated docker images')
parser.add_argument('--help', '-h', '-?', action='help',
help='prints this screen')
parser.add_argument('--all', '-a', action='store_true',
help='build all images')
parser.add_argument('--dir', '-C', action='append',
help='change directory before building')
parser.add_argument('--dry-run', '-n', action='store_true',
help='do not build images, just print them')
parser.add_argument('--file', '-f', default='docker_images',
help="use FILE as image config (default: '%(default)s')")
parser.add_argument('image', nargs="*",
help='images to build additionally')
# regex for repository
RE_REPOSITORY = re.compile(r'''
(?:(?P[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])? # host name
(?: # followed by ...
(?:\.[a-zA-Z0-9] # domain
(?:[a-zA-Z0-9-]*[a-zA-Z0-9])?)+
(?::[0-9]+)? # and optional port
| # or ...
:[0-9]+) # port
)/)? # finally a /
(?P[a-z0-9]+(?:(?:\.|__?|-+)[a-z0-9]+)* # repo component
(?:/[a-z0-9]+(?:(?:\.|__?|-+)[a-z0-9]+)*)* # more components
)
(?::(?P[a-zA-Z0-9_][a-zA-Z0-9_.-]{,127}))? # optional tag
(?:@(?P[a-z0-9]+(?:[.+_-][a-z0-9]+)* # optional digest
:[0-9a-f]{32,}))*
''', re.VERBOSE)
def parse_repository(repository):
""" extract registry, repo and tag from repository """
# verify repository format and extract components
match = RE_REPOSITORY.fullmatch(repository)
if not match:
raise ValueError("invalid reference format")
registry = (match.group('host') or "docker.io").lower()
repo = match.group('repo')
tag = match.group('digest') or match.group('tag') or "latest"
len_registry = len(registry)
# special handling for docker.io
if registry == "docker.io":
registry = "registry-1.docker.io"
if "/" not in repo:
repo = "library/" + repo
# check length of repository string (without tag)
if len_registry + len(repo) > 254:
raise ValueError("repository name must not be more than 255 characters")
return registry, repo, tag
def docker_auth(docker, response):
""" authenticate docker access """
docker.authenticate(docker.registry_auth[0], docker.registry_auth[1],
response=response)
def get_time_layers(repository):
"""
get created time and layer info from the docker registry
To retrieve this information the Docker registry client dxf is used.
https://github.com/davedoesdev/dxf
"""
try:
registry, repo, tag = parse_repository(repository)
# open docker connection
with dxf.DXF(registry, repo, docker_auth, timeout=30) as docker:
docker.registry_auth = docker_login.get(registry, [None, None])
# get config digest
try:
digest = docker.get_digest(tag, platform="linux/amd64")
except dxf.exceptions.DXFUnauthorizedError:
return None
except requests.exceptions.HTTPError as err:
if err.response.status_code not in (401, 403, 404):
raise
return None
# get config: pull_blob(digest)
data = json.loads(b''.join(docker.pull_blob(digest)))
return {"created": dateutil.parser.parse(data["created"]),
"layers": data["rootfs"]["diff_ids"]}
except json.JSONDecodeError:
sys.exit(f"{repository}: Invalid JSON")
except (dxf.exceptions.DXFError, ValueError) as err:
sys.exit(f"{repository}: {err}")
except requests.exceptions.RequestException as err:
msg = str(err)
match = re.search(r"\(Caused by ([a-zA-Z0-9_]+)\('?[^:']*[:'] *(.*)'\)",
msg)
if match:
msg = match.group(2)
sys.exit(f"{repository}: {msg}")
except KeyError:
sys.exit(f"{repository}: missing information from registry")
def expand_base_image(base_name, target):
""" expand base image """
options = []
base_split = base_name.split("/", maxsplit=1)
if len(base_split) == 2 and \
base_split[0] in ("$DOCKER_REPOSITORY", "${DOCKER_REPOSITORY}"):
try:
target_base = target[:target.rindex("/")]
except ValueError as err:
raise ValueError(f"{base_name}: "
f"Invalid target repository {target}") from err
base_name = target_base + "/" + base_split[1]
options = ["--build-arg", "DOCKER_REPOSITORY=" + target_base]
return (base_name, options)
def full_image_name(image_name, default_repository):
""" get full image name """
if "/" in image_name:
return image_name
if not default_repository:
raise ValueError(f"{image_name}: Missing default repository")
return default_repository + "/" + image_name
def image_add_tag(image_name):
""" return image name including tag """
name = image_name.split("/")[-1]
if image_name not in ("scratch", "NONE") and \
":" not in name and "@" not in name:
image_name += ":latest"
return image_name
def dockerfile_base(directory):
""" get base repository from Dockerfile """
base = None
re_from = re.compile(r'\s*FROM\s+(\S+)', re.IGNORECASE)
try:
with open(os.path.join(directory, "Dockerfile"), "r",
encoding="utf-8") as dockerfile:
for dockerline in dockerfile:
match = re_from.match(dockerline)
if match:
base = match.group(1)
break
except OSError as err:
raise ValueError(f"Dockerfile: {err}") from err
if not base:
raise ValueError("Dockerfile: Missing FROM instruction")
return base
RE_CONF_LINE = re.compile(r'''
(?:| # empty line or...
(?P[^\t\#][^\t]*)| # global option or...
(?P[^\t\#][^\t]*) # name +
\t+(?P[^\t\#][^\t]*) # directory +
(?:\t+(?P[^\t\#'"-][^\t]*))? # optional base +
(?:\t+(?P['"-][^\t]*))? # optional option
)
(?:[\t ]*\#.*)? # followed by optional comment
''', re.VERBOSE)
def get_images(image_file):
""" read images configuration file
Format of the lines in the configuration file:
Name Directory [ Base Image] [ Build Options]
or
Global Build Options
If the base image is not given, it is extracted from /Dockerfile.
"""
gbl_options = []
name_set = set()
try:
lineno = 0
with open(image_file, "r", encoding="utf-8") as img_file:
for line in img_file:
lineno += 1
match = RE_CONF_LINE.fullmatch(line.strip())
if not match:
sys.exit(f"{image_file} line {lineno}: "
"invalid number of fields")
if match.group('gbl_opt'):
gbl_options = shlex.split(match.group('gbl_opt'))
if match.group('name') and match.group('dir'):
name = match.group('name')
try:
parse_repository(full_image_name(name, "test.io/test"))
except ValueError:
sys.exit(f"{image_file} line {lineno}: "
f"invalid image name '{name}'")
if name == "scratch":
sys.exit(f"{image_file} line {lineno}: "
"Reserved image name 'scratch'")
image_name = image_add_tag(name)
if image_name in name_set:
sys.exit(f"{image_file}: "
f"multiple entries for {name}")
name_set.add(image_name)
directory = match.group('dir')
if not os.path.isdir(directory):
sys.exit(f"{image_file} line {lineno}: "
f"unknown directory '{directory}'")
base = match.group('base')
if not base: # extract base repo from Dockerfile
base = dockerfile_base(directory)
base = image_add_tag(base)
options = gbl_options.copy()
if match.group('opt'):
options += shlex.split(match.group('opt'))
images.append({"name": name, "image": image_name,
"dir": directory, "base": base,
"options": options})
except OSError as err:
sys.exit(f"Can't read images file: {err}")
except ValueError as err:
sys.exit(f"{image_file} line {lineno}: {err}")
if not images:
sys.exit("Empty image configuration")
def init_image_info():
""" initialize image info structure """
dt_min = datetime.datetime.fromtimestamp(0, tz=datetime.timezone.utc)
image_info["scratch"] = {"created": dt_min, "layers": [None]}
image_info["NONE"] = image_info["scratch"].copy()
def mtime_tree(directory):
""" get modification time of a directory tree """
mtime = 0
for root, _, filenames in os.walk(directory):
mtime = max(mtime, os.stat(root).st_mtime)
for fname in filenames:
mtime = max(mtime, os.stat(os.path.join(root, fname)).st_mtime)
return mtime
def needs_rebuild(image, default_repository=None):
""" check if an image needs rebuilding """
full_name = full_image_name(image["image"], default_repository)
base_name, _ = expand_base_image(image["base"], full_name)
# get information of base image, if unknown
if base_name not in image_info:
image_info[base_name] = get_time_layers(base_name)
if not image_info[base_name]:
sys.exit(f"Missing base image: {base_name}")
# get information of image, if unknown
if full_name not in image_info:
image_info[full_name] = get_time_layers(full_name)
if not image_info[full_name]:
return "Image missing in repository"
# check if base image has changed
base_layer = image_info[base_name]["layers"][-1]
if base_layer and base_layer not in image_info[full_name]["layers"]:
return "Base image has changed"
# check if build directory has changed, needs full git history
env = os.environ.copy()
env["LC_ALL"] = "C"
try:
# check if git repository is up-to-date
proc = subprocess.run(["git", "-C", image["dir"], "status",
"--porcelain", "--", "."],
capture_output=True,
check=False,
env=env,
universal_newlines=True)
if proc.returncode != 0 and "not a git repository" not in proc.stderr:
# Fatal error
sys.exit(f"{image['name']}: Can't get git status: " + \
proc.stderr.rstrip('\r\n'))
if proc.returncode != 0 or proc.stdout.rstrip('\r\n'):
# Non-fatal error or changes: use modification date of the files
mtime = mtime_tree(image["dir"])
rebuild_reason = "Files in docker context more recent than image"
else:
# clean git repository: use "git log" to get commit time
proc = subprocess.run(["git", "-C", image["dir"], "log", "-n", "1",
"--pretty=tformat:%ct", "--", "."],
capture_output=True,
check=True,
env=env,
universal_newlines=True)
mtime = int(proc.stdout.strip())
rebuild_reason = "Git change more recent than image"
except OSError as err:
sys.exit(f"Can't run git: {err}")
except subprocess.CalledProcessError as err:
sys.exit(f"{image['name']}: Can't get commit date: " + \
err.stderr.rstrip('\r\n'))
except ValueError as err:
sys.exit(f"{image['name']}: Can't get commit date: {err}")
if mtime > image_info[full_name]["created"].timestamp():
return rebuild_reason
return None
def build(image, default_repository=None):
""" build image """
full_name = full_image_name(image["image"], default_repository)
_, options = expand_base_image(image["base"], full_name)
options += image["options"]
try:
subprocess.run(["docker", "buildx", "build"] + options + \
["--push", "--tag", full_name, image["dir"]],
check=True)
except OSError as err:
sys.exit(f"Can't run docker: {err}")
except subprocess.CalledProcessError as err:
sys.exit(err.returncode)
print()
image_info.pop(full_name, None) # remove outdated image information
def fill_login_table():
""" fill login table from DOCKER_LOGIN* environment variables """
login_table = {}
for key, val in list(os.environ.items()):
if key.startswith("DOCKER_LOGIN"):
val_split = val.strip().split(maxsplit=2)
if len(val_split) != 3:
sys.exit(f"{key} requires 3 fields: registry user password")
registry = val_split[0].lower()
if registry == "docker.io":
registry = "registry-1.docker.io"
if registry in login_table:
sys.exit(f"DOCKER_LOGIN: {registry} defined multiple times")
login_table[registry] = val_split[1:3]
del os.environ[key]
return login_table
def rebuild_images(dry_run, all_flag, forced_images):
""" rebuild images """
for image in images:
if "/" in image["image"]: # full image name
base_repos = [None]
else: # name-only image name
base_repos = docker_repositories
reason = False
if xor(all_flag, image["image"] in forced_images or \
image["base"] in forced_images):
reason = "Rebuild triggered by command line"
else: # check if image needs rebuilding
for repo in base_repos:
reason = needs_rebuild(image, repo)
if reason:
break
if reason: # rebuild image
for repo in base_repos:
print(f"*** {full_image_name(image['name'], repo)}\n"
f"Reason: {reason}\n")
if not dry_run:
build(image, repo)
def xor(*params):
""" logical xor """
result = False
for arg in params:
result = result != bool(arg)
return result
# main
args = parser.parse_args()
sys.stdout.reconfigure(line_buffering=True)
# DOCKER_REPOSITORY environment
docker_repositories = os.environ.get("DOCKER_REPOSITORY", "") \
.strip().lower().split()
for docker_repo in docker_repositories:
try:
parse_repository(docker_repo)
except ValueError as err_info:
sys.exit(f"DOCKER_REPOSITORY: {docker_repo}: {err_info}")
# fill user/password table
docker_login = fill_login_table()
if args.dir:
try:
os.chdir(os.path.join(*args.dir))
except OSError as err_info:
sys.exit(f"Can't change directory: {err_info}")
get_images(args.file)
init_image_info()
# check arguments
all_inames = {img["image"] for img in images} \
.union(img["base"] for img in images)
for idx, iname in enumerate(args.image):
iname_tag = image_add_tag(iname)
if iname_tag not in all_inames:
sys.exit(f"Image {iname} not found in '{args.file}' configuration file")
args.image[idx] = iname_tag
for img in images:
if "/" not in img["image"] and not docker_repositories:
sys.exit(f"{img['name']}: "
"Environment variable DOCKER_REPOSITORY is not defined")
# rebuild images
rebuild_images(args.dry_run, args.all, args.image)