From c80dd65dfc7a8d601ed66c4fa24b6eb377382dc9 Mon Sep 17 00:00:00 2001 From: Bernhard Ehlers Date: Sat, 27 May 2023 16:15:39 +0200 Subject: [PATCH] Docker Build System Docker images are rebuilt, when the base image or the build context has changed. --- .github/bin/docker_build | 453 +++++++++++++++++++++++++++++++++++ .github/bin/requirements.txt | 2 + .github/docker_build.md | 118 +++++++++ .github/workflows/build.yml | 59 +++++ docker/docker_images | 43 ++++ 5 files changed, 675 insertions(+) create mode 100755 .github/bin/docker_build create mode 100644 .github/bin/requirements.txt create mode 100644 .github/docker_build.md create mode 100644 .github/workflows/build.yml create mode 100644 docker/docker_images diff --git a/.github/bin/docker_build b/.github/bin/docker_build new file mode 100755 index 0000000..0c2bacd --- /dev/null +++ b/.github/bin/docker_build @@ -0,0 +1,453 @@ +#!/usr/bin/env python3 + +# Copyright (C) 2022 Bernhard Ehlers +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +""" +docker_build - (re)build outdated docker images + +usage: docker_build [--help] [--all] [--dir DIR] [--dry-run] [--file FILE] + [image ...] + +positional arguments: + image images to build additionally + +optional arguments: + --help, -h, -? prints this screen + --all, -a build all images + --dir DIR, -C DIR change directory before building + --dry-run, -n do not build images, just print them + --file FILE, -f FILE use FILE as image config (default: 'docker_images') + +The docker images and their properties are configured in a +file, by default 'docker_images' in the current directory. + +Format of the lines in the configuration file: + Name Directory [ Base Image] [ Build Options] + or + Global Build Options + +When running without an image arg, it checks all images, +if the directory containing its Dockerfile has changed or +its base image has been updated. + +In some special cases a docker image needs a forced rebuild. +For that add the list of images or base images, to be rebuild, +to the arguments. When using the option -a/--all, all images are +forcibly rebuild, except those specified on the command line. + +The environment variable DOCKER_ACCOUNT must be set to the +registry/user of the Docker account to use. +""" + +import os +import sys +import argparse +import json +import re +import shlex +import subprocess +import dxf +import requests.exceptions +import dateutil.parser + +base_images = {} +images = [] + + +parser = argparse.ArgumentParser(add_help=False, \ + description='%(prog)s - (re)build outdated docker images') +parser.add_argument('--help', '-h', '-?', action='help', + help='prints this screen') +parser.add_argument('--all', '-a', action='store_true', + help='build all images') +parser.add_argument('--dir', '-C', action='append', + help='change directory before building') +parser.add_argument('--dry-run', '-n', action='store_true', + help='do not build images, just print them') +parser.add_argument('--file', '-f', default='docker_images', + help="use FILE as image config (default: '%(default)s')") +parser.add_argument('image', nargs="*", + help='images to build additionally') + + +# regex for repository +RE_REPOSITORY = re.compile(r''' +(?:(?P[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])? # host name + (?: # followed by ... + (?:\.[a-zA-Z0-9] # domain + (?:[a-zA-Z0-9-]*[a-zA-Z0-9])?)+ + (?::[0-9]+)? # and optional port + | # or ... + :[0-9]+) # port +)/)? # finally a / + +(?P[a-z0-9]+(?:(?:\.|__?|-+)[a-z0-9]+)* # repo component + (?:/[a-z0-9]+(?:(?:\.|__?|-+)[a-z0-9]+)*)* # more components +) +(?::(?P[a-zA-Z0-9_][a-zA-Z0-9_.-]{,127}))? # optional tag +(?:@(?P[a-z0-9]+(?:[.+_-][a-z0-9]+)* # optional digest + :[0-9a-f]{32,}))* +''', re.VERBOSE) + +def parse_repository(repository): + """ extract registry, user, repo and tag from repository """ + # verify repository format and extract components + match = RE_REPOSITORY.fullmatch(repository) + if not match: + raise ValueError("invalid reference format") + registry = match.group('host') or "docker.io" + repo = match.group('repo') + tag = match.group('digest') or match.group('tag') or "latest" + len_registry = len(registry) + # user is first component of repo, if repo has more than one part + user = repo.split("/", 1)[0] if "/" in repo else None + # special handling for docker.io + if registry == "docker.io": + registry = "registry-1.docker.io" + if not user: + repo = "library/" + repo + # check length of repository string (without tag) + if len_registry + len(repo) > 254: + raise ValueError("repository name must not be more than 255 characters") + return registry, user, repo, tag + + +def docker_auth_user(docker, response): + """ authenticate with user/password """ + docker.authenticate(docker_login["account"], docker_login["password"], + response=response) + + +def docker_auth_none(docker, response): + """ public access """ + docker.authenticate(None, None, response=response) + + +def get_time_layers(repository): + """ + get created time and layer info from the docker registry + + To retrieve this information the Docker registry client dxf is used. + https://github.com/davedoesdev/dxf + """ + + try: + registry, _, repo, tag = parse_repository(repository) + if registry == docker_login["registry"] and \ + docker_login["user"] and docker_login["password"]: + docker_auth = docker_auth_user + else: + docker_auth = docker_auth_none + + # open docker connection + with dxf.DXF(registry, repo, docker_auth, timeout=30) as docker: + # get config digest + try: + digest = docker.get_digest(tag, platform="linux/amd64") + except dxf.exceptions.DXFUnauthorizedError: + return (None, []) + except requests.exceptions.HTTPError as err: + if err.response.status_code not in (401, 403, 404): + raise + return (None, []) + + # get config: pull_blob(digest) + data = json.loads(b''.join(docker.pull_blob(digest))) + + return (dateutil.parser.parse(data["created"]), + data["rootfs"]["diff_ids"]) + + except json.JSONDecodeError: + sys.exit(f"{repository}: Invalid JSON") + except (dxf.exceptions.DXFError, ValueError) as err: + sys.exit(f"{repository}: {err}") + except requests.exceptions.RequestException as err: + msg = str(err) + match = re.search(r"\(Caused by ([a-zA-Z0-9_]+)\('?[^:']*[:'] *(.*)'\)", + msg) + if match: + msg = match.group(2) + sys.exit(f"{repository}: {msg}") + except KeyError: + sys.exit(f"{repository}: missing information from registry") + + +def expand_base_image(base_name): + """ expand base image """ + match = re.match(r"\$\{?DOCKER_ACCOUNT\}?/(.+)", base_name) + if not match: + return (base_name, []) + if not docker_login["account"]: + raise ValueError("Environment variable DOCKER_ACCOUNT " + "is not defined or is empty") + base_name = docker_login["account"] + "/" + match.group(1) + options = ["--build-arg", "DOCKER_ACCOUNT=" + docker_login["account"]] + return (base_name, options) + + +def full_image_name(image_name): + """ get full image name """ + if "/" in image_name: + return image_name + if not docker_login["account"]: + raise ValueError("Environment variable DOCKER_ACCOUNT " + "is not defined or is empty") + return docker_login["account"] + "/" + image_name + + +def dockerfile_base(directory): + """ get base repository from Dockerfile """ + base = None + re_from = re.compile(r'\s*FROM\s+(\S+)', re.IGNORECASE) + try: + with open(os.path.join(directory, "Dockerfile"), "r", + encoding="utf-8") as dockerfile: + for dockerline in dockerfile: + match = re_from.match(dockerline) + if match: + base = match.group(1) + break + except OSError as err: + raise ValueError(f"Dockerfile: {err}") from err + if not base: + raise ValueError("Dockerfile: Missing FROM instruction") + return base + + +RE_CONF_LINE = re.compile(r''' +(?:| # empty line or... + (?P[^\t\#][^\t]*)| # global option or... + (?P[^\t\#][^\t]*) # name + + \t+(?P[^\t\#][^\t]*) # directory + + (?:\t+(?P[^\t\#'"-][^\t]*))? # optional base + + (?:\t+(?P['"-][^\t]*))? # optional option +) +(?:[\t ]*\#.*)? # followed by optional comment +''', re.VERBOSE) + +def get_images(image_file): + """ read images configuration file + + Format of the lines in the configuration file: + Name Directory [ Base Image] [ Build Options] + or + Global Build Options + + If the base image is not given, it is extracted from /Dockerfile. + """ + gbl_options = [] + name_set = set() + try: + lineno = 0 + with open(image_file, "r", encoding="utf-8") as img_file: + for line in img_file: + lineno += 1 + match = RE_CONF_LINE.fullmatch(line.strip()) + if not match: + sys.exit(f"{image_file} line {lineno}: " + "invalid number of fields") + if match.group('gbl_opt'): + gbl_options = shlex.split(match.group('gbl_opt')) + if match.group('name') and match.group('dir'): + name = match.group('name') + full_name = full_image_name(name) + try: + parse_repository(full_name) + except ValueError: + sys.exit(f"{image_file} line {lineno}: " + f"invalid image name '{full_name}'") + if full_name in name_set: + sys.exit(f"{image_file}: " + f"multiple entries for {full_name}") + name_set.add(full_name) + directory = match.group('dir') + if not os.path.isdir(directory): + sys.exit(f"{image_file} line {lineno}: " + f"unknown directory '{directory}'") + base = match.group('base') + if not base: # extract base repo from Dockerfile + base = dockerfile_base(directory) + (base, options) = expand_base_image(base) + options += gbl_options + if match.group('opt'): + options += shlex.split(match.group('opt')) + images.append({"name": name, "dir": directory, + "base": base, "options": options}) + except OSError as err: + sys.exit(f"Can't read images file: {err}") + except ValueError as err: + sys.exit(f"{image_file} line {lineno}: {err}") + if not images: + sys.exit("Empty image configuration") + + +def init_base_images(): + """ initialize base image data structure """ + for image in images: + base_name = image["base"] + if base_name not in base_images: + base_images[base_name] = {"layer": False} + base_images["scratch"] = {"layer": None} + base_images["NONE"] = {"layer": None} + + +def mtime_tree(directory): + """ get modification time of a directory tree """ + mtime = 0 + for root, _, filenames in os.walk(directory): + mtime = max(mtime, os.stat(root).st_mtime) + for fname in filenames: + mtime = max(mtime, os.stat(os.path.join(root, fname)).st_mtime) + return mtime + + +def needs_rebuild(image): + """ check if an image needs rebuilding """ + # update base_image layer, if empty + base_img = base_images[image["base"]] + if base_img["layer"] is False: + _, layers = get_time_layers(image["base"]) + # store last layer + if layers: + base_img["layer"] = layers[-1] + else: + sys.exit(f"Missing base image: {image['base']}") + + # get image data + full_name = full_image_name(image["name"]) + itime, layers = get_time_layers(full_name) + if layers and full_name in base_images: # image is a base image + base_images[full_name]["layer"] = layers[-1] + + # check if base image has changed + if not layers: + return "Image missing in repository" + if base_img["layer"] and base_img["layer"] not in layers: + return "Base image has changed" + + # check if build directory has changed, needs full git history + env = os.environ.copy() + env["LC_ALL"] = "C" + try: + # check if git repository is up-to-date + proc = subprocess.run(["git", "-C", image["dir"], "status", + "--porcelain", "--", "."], + capture_output=True, + check=False, + env=env, + universal_newlines=True) + if proc.returncode != 0 and "not a git repository" not in proc.stderr: + # Fatal error + sys.exit(f"{image['name']}: Can't get git status: " + \ + proc.stderr.rstrip('\r\n')) + if proc.returncode != 0 or proc.stdout.rstrip('\r\n'): + # Non-fatal error or changes: use modification date of the files + mtime = mtime_tree(image["dir"]) + rebuild_reason = "Files in docker context more recent than image" + else: + # clean git repository: use "git log" to get commit time + proc = subprocess.run(["git", "-C", image["dir"], "log", "-n", "1", + "--pretty=tformat:%ct", "--", "."], + capture_output=True, + check=True, + env=env, + universal_newlines=True) + mtime = int(proc.stdout.strip()) + rebuild_reason = "Git change more recent than image" + except OSError as err: + sys.exit(f"Can't run git: {err}") + except subprocess.CalledProcessError as err: + sys.exit(f"{image['name']}: Can't get commit date: " + \ + err.stderr.rstrip('\r\n')) + except ValueError as err: + sys.exit(f"{image['name']}: Can't get commit date: {err}") + + return rebuild_reason if mtime > itime.timestamp() else None + + +def build(image): + """ build image """ + full_name = full_image_name(image["name"]) + try: + subprocess.run(["docker", "buildx", "build"] + image["options"] + \ + ["--push", "--tag", full_name, image["dir"]], + check=True) + except OSError as err: + sys.exit(f"Can't run docker: {err}") + except subprocess.CalledProcessError as err: + sys.exit(err.returncode) + print() + + if full_name in base_images: # just modified a base image + _, layers = get_time_layers(full_name) + # store last layer + if layers: + base_images[full_name]["layer"] = layers[-1] + else: + sys.exit(f"{image['name']}: Can't get image layers") + + +def xor(*params): + """ logical xor """ + result = False + for arg in params: + result = result != bool(arg) + return result + + +# main +args = parser.parse_args() +sys.stdout.reconfigure(line_buffering=True) + +docker_login = {"account": os.environ.get("DOCKER_ACCOUNT"), + "password": os.environ.pop("DOCKER_PASSWORD", None)} +if docker_login["account"]: + docker_login["account"] = docker_login["account"].rstrip("/") + try: + docker_login["registry"], docker_login["user"], *_ = \ + parse_repository(docker_login["account"] + "/dummy") + except ValueError as err_info: + sys.exit(f"DOCKER_ACCOUNT={docker_login['account']}: {err_info}") + if not docker_login["user"]: + sys.exit(f"DOCKER_ACCOUNT={docker_login['account']}: missing user") +else: + docker_login["registry"] = docker_login["user"] = None + +if args.dir: + try: + os.chdir(os.path.join(*args.dir)) + except OSError as err_info: + sys.exit(f"Can't change directory: {err_info}") +get_images(args.file) +init_base_images() + +# check arguments +all_inames = {img["name"] for img in images}.union(base_images.keys()) +for iname in args.image: + if iname not in all_inames: + sys.exit(f"Image {iname} not found in '{args.file}' configuration file") + +# rebuild images +for img in images: + if xor(args.all, img["name"] in args.image or img["base"] in args.image): + # pragma pylint: disable=invalid-name + reason = "Rebuild triggered by command line" + else: + reason = needs_rebuild(img) + if reason: + print(f"*** {img['name']}\nReason: {reason}\n") + if not args.dry_run: + build(img) diff --git a/.github/bin/requirements.txt b/.github/bin/requirements.txt new file mode 100644 index 0000000..01c8180 --- /dev/null +++ b/.github/bin/requirements.txt @@ -0,0 +1,2 @@ +python-dateutil +python-dxf>=11 diff --git a/.github/docker_build.md b/.github/docker_build.md new file mode 100644 index 0000000..3c9ad0d --- /dev/null +++ b/.github/docker_build.md @@ -0,0 +1,118 @@ +# Docker Build System + +The regular Docker build system has the disadvantage, +that only the cache system prevents a repetitive rebuild. +But when running the build in a VM this cache is initially +empty, resulting in a rebuild of all images on every run. +This not only increases the CPU load of the VM, but also +creates a lot of updated Docker images, that differ only +in the timestamps of the files. + +This Docker build system wants to improve this situation. +It rebuilds only, when the base image or the build context +has changed. There are some other situations, where an +image needs to be recreated, that are not detected. +This mainly affects installation packages and external +files, that got an update. Then a manual trigger is needed. + + +## Build Tool + +The `docker_build` tool reads a configuration file and +then starts building images with `docker buildx build`. + +If `docker_build` is launched without arguments, it checks +all configured images for an update of the base image. +Additionally it checks, if `git` shows an update of +the directory containing the docker build context. +When at least one of these conditions is met, the tool +starts a rebuild of that image. + +When `docker_build` is run with some image names as +arguments, then these images are additionally built. +When using a base image name as an argument, then all +images with that base image are rebuilt. + +When using the option -a/--all, all images are forcibly +rebuild, except those specified on the command line. + + +## Configuration File + +The build tool reads a configuration file, by default +'docker_images', located in the current directory. For each +target image, it contains its name, its context directory +and optionally the base image and some build options. + +The fields are separated by one or more \ characters. +Comments start with a `#` as its first field character. +An empty line or an all-comments line is ignored. + +A line with only one field is used for build options, +which are effective from that point until they are redefined. +This global build option and an optional image specific option +are combined and sent to the `docker buildx` command. + +Here an example: + +``` +# Name Directory [Base Image] [Build Options] + +--platform=linux/arm64,linux/amd64 # global options + +alpine-1 alpine-1 alpine --image-specific-option +alpine-1:test alpine-1a --another-image-specific-option +``` + +If the base image is `NONE` (all caps), the target +image is not checked against its real base image. +Changes to the base image will then not cause a rebuild. + +The target image may contain the full name, in which +case it will contain one or more '/' characters. + +Another option is to specify only the last part of the +image name. Then `docker_build` uses the `DOCKER_ACCOUNT` +environment variable as its initial part. For example, an +DOCKER_ACCOUNT value of "ghcr.io/b-ehlers" plus the image +name of "alpine-1" results in "ghcr.io/b-ehlers/alpine-1". + +This method is not applied to the base images, they always +have to contain the complete name. + +But there is a workaround. + +If the base image name starts with `$DOCKER_ACCOUNT` +or `${DOCKER_ACCOUNT}` the variable DOCKER_ACCOUNT +gets replaced by its value from the environment. +In the Dockerfile the variable must be declared by a +`ARG DOCKER_ACCOUNT` instruction. A Dockerfile would +then start with: + +``` +ARG DOCKER_ACCOUNT +FROM $DOCKER_ACCOUNT/base-image +``` + + +## Workflow Definition + +[GitHub Actions](https://docs.github.com/en/actions) +uses YAML files in the .github/workflows directory +to define, which tasks should be run. + +Before `docker_build` can be run the following steps +need to be done: + +* Check out the repository code +* Set up QEMU (for multi-arch building) +* Set up Docker Buildx +* Login to the Container Registry +* Install python requirements + +Then `docker_build` can be executed, +normally without any arguments. + +But what, when an image build needs to be forced? +For that, run the action manually and enter the list +of images, separated by spaces, into the input field. diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..d5f1192 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,59 @@ +name: Build Docker images and upload to ghcr.io +on: + push: + branches: + - main + - master + schedule: + - cron: '37 7 * * 3' + workflow_dispatch: + inputs: + images: + description: 'List of images to be built' + required: false + type: string + +jobs: + docker-images: + runs-on: ubuntu-latest + steps: + - name: Check out repository code + # https://github.com/marketplace/actions/checkout + uses: actions/checkout@v3 + with: + fetch-depth: 0 + - name: Set up QEMU + # https://github.com/marketplace/actions/docker-setup-qemu + uses: docker/setup-qemu-action@v2 + - name: Set up Docker Buildx + # https://github.com/marketplace/actions/docker-setup-buildx + uses: docker/setup-buildx-action@v2 + - name: Login to GitHub Container Registry + # https://github.com/marketplace/actions/docker-login + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + # DockerHub: + # with: + # username: ${{ secrets.DOCKERHUB_USERNAME }} + # password: ${{ secrets.DOCKERHUB_TOKEN }} + - name: Install python requirements + run: python3 -m pip install --requirement .github/bin/requirements.txt + - name: Build and push images + env: + DOCKER_ACCOUNT: ghcr.io/${{ github.repository_owner }} + # DOCKER_PASSWORD is optional, only needed for private repositories + # DOCKER_PASSWORD: ${{ secrets.GITHUB_TOKEN }} + # DockerHub: + # DOCKER_ACCOUNT: ${{ secrets.DOCKERHUB_USERNAME }} + # DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} + # + IMAGES: ${{ inputs.images }} + run: | + set -f + set -- $IMAGES + set +f + # use option --dir to use a subdirectory as a docker base dir + python3 "$GITHUB_WORKSPACE/.github/bin/docker_build" --dir "docker" "$@" diff --git a/docker/docker_images b/docker/docker_images new file mode 100644 index 0000000..f3d59ea --- /dev/null +++ b/docker/docker_images @@ -0,0 +1,43 @@ +# Name Directory [Base Image] [Build Options] + +--platform=linux/amd64 # global options + +# chromium: base image is updated too often, use manual build on base updates +chromium chromium NONE +# ipterm: base image debian:jessie too old +#ipterm-base ipterm/base +#ipterm ipterm/cli +#webterm ipterm/web +jupyter:v2 jupyter +jupyter27:v2 jupyter-2.7 +# kalilinux: base image is updated too often, use manual build on base updates +kalilinux kalilinux NONE +mikrotik-winbox mikrotik-winbox +# ostinato-wireshark: failed to build: mesa-dri-swrast - no such package +#ostinato-wireshark ostinato-wireshark +openvswitch openvswitch +ovs-snmp ovs-snmp --platform=linux/arm64 +pyats pyats +ubuntu:focal ubuntu + +# +# The following images are not used by any appliance +# + +# Replaced by https://github.com/adosztal/gns3-containers +#network_automation network_automation +#python-go-perl-php python-go-perl-php + +# AJ Nouri +#endhost endhost +#haproxy haproxy +#network_automation_pycharm network_automation_pycharm +#openvswitch28 openvswitch28 + +# GNS3 Developers +#dhcp dhcp +#snmpsimulator snmpsimulator + +# Tests +#iou iou +#xeyes xeyes