Merge pull request #767 from b-ehlers/master

Docker Build System
This commit is contained in:
Jeremy Grossmann 2023-05-29 20:25:18 +08:00 committed by GitHub
commit c33d38da00
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 694 additions and 3 deletions

453
.github/bin/docker_build vendored Executable file
View File

@ -0,0 +1,453 @@
#!/usr/bin/env python3
# Copyright (C) 2022 Bernhard Ehlers
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
docker_build - (re)build outdated docker images
usage: docker_build [--help] [--all] [--dir DIR] [--dry-run] [--file FILE]
[image ...]
positional arguments:
image images to build additionally
optional arguments:
--help, -h, -? prints this screen
--all, -a build all images
--dir DIR, -C DIR change directory before building
--dry-run, -n do not build images, just print them
--file FILE, -f FILE use FILE as image config (default: 'docker_images')
The docker images and their properties are configured in a
file, by default 'docker_images' in the current directory.
Format of the lines in the configuration file:
Name <tab> Directory [<tab> Base Image] [<tab> Build Options]
or
Global Build Options
When running without an image arg, it checks all images,
if the directory containing its Dockerfile has changed or
its base image has been updated.
In some special cases a docker image needs a forced rebuild.
For that add the list of images or base images, to be rebuild,
to the arguments. When using the option -a/--all, all images are
forcibly rebuild, except those specified on the command line.
The environment variable DOCKER_ACCOUNT must be set to the
registry/user of the Docker account to use.
"""
import os
import sys
import argparse
import json
import re
import shlex
import subprocess
import dxf
import requests.exceptions
import dateutil.parser
base_images = {}
images = []
parser = argparse.ArgumentParser(add_help=False, \
description='%(prog)s - (re)build outdated docker images')
parser.add_argument('--help', '-h', '-?', action='help',
help='prints this screen')
parser.add_argument('--all', '-a', action='store_true',
help='build all images')
parser.add_argument('--dir', '-C', action='append',
help='change directory before building')
parser.add_argument('--dry-run', '-n', action='store_true',
help='do not build images, just print them')
parser.add_argument('--file', '-f', default='docker_images',
help="use FILE as image config (default: '%(default)s')")
parser.add_argument('image', nargs="*",
help='images to build additionally')
# regex for repository
RE_REPOSITORY = re.compile(r'''
(?:(?P<host>[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])? # host name
(?: # followed by ...
(?:\.[a-zA-Z0-9] # domain
(?:[a-zA-Z0-9-]*[a-zA-Z0-9])?)+
(?::[0-9]+)? # and optional port
| # or ...
:[0-9]+) # port
)/)? # finally a /
(?P<repo>[a-z0-9]+(?:(?:\.|__?|-+)[a-z0-9]+)* # repo component
(?:/[a-z0-9]+(?:(?:\.|__?|-+)[a-z0-9]+)*)* # more components
)
(?::(?P<tag>[a-zA-Z0-9_][a-zA-Z0-9_.-]{,127}))? # optional tag
(?:@(?P<digest>[a-z0-9]+(?:[.+_-][a-z0-9]+)* # optional digest
:[0-9a-f]{32,}))*
''', re.VERBOSE)
def parse_repository(repository):
""" extract registry, user, repo and tag from repository """
# verify repository format and extract components
match = RE_REPOSITORY.fullmatch(repository)
if not match:
raise ValueError("invalid reference format")
registry = match.group('host') or "docker.io"
repo = match.group('repo')
tag = match.group('digest') or match.group('tag') or "latest"
len_registry = len(registry)
# user is first component of repo, if repo has more than one part
user = repo.split("/", 1)[0] if "/" in repo else None
# special handling for docker.io
if registry == "docker.io":
registry = "registry-1.docker.io"
if not user:
repo = "library/" + repo
# check length of repository string (without tag)
if len_registry + len(repo) > 254:
raise ValueError("repository name must not be more than 255 characters")
return registry, user, repo, tag
def docker_auth_user(docker, response):
""" authenticate with user/password """
docker.authenticate(docker_login["account"], docker_login["password"],
response=response)
def docker_auth_none(docker, response):
""" public access """
docker.authenticate(None, None, response=response)
def get_time_layers(repository):
"""
get created time and layer info from the docker registry
To retrieve this information the Docker registry client dxf is used.
https://github.com/davedoesdev/dxf
"""
try:
registry, _, repo, tag = parse_repository(repository)
if registry == docker_login["registry"] and \
docker_login["user"] and docker_login["password"]:
docker_auth = docker_auth_user
else:
docker_auth = docker_auth_none
# open docker connection
with dxf.DXF(registry, repo, docker_auth, timeout=30) as docker:
# get config digest
try:
digest = docker.get_digest(tag, platform="linux/amd64")
except dxf.exceptions.DXFUnauthorizedError:
return (None, [])
except requests.exceptions.HTTPError as err:
if err.response.status_code not in (401, 403, 404):
raise
return (None, [])
# get config: pull_blob(digest)
data = json.loads(b''.join(docker.pull_blob(digest)))
return (dateutil.parser.parse(data["created"]),
data["rootfs"]["diff_ids"])
except json.JSONDecodeError:
sys.exit(f"{repository}: Invalid JSON")
except (dxf.exceptions.DXFError, ValueError) as err:
sys.exit(f"{repository}: {err}")
except requests.exceptions.RequestException as err:
msg = str(err)
match = re.search(r"\(Caused by ([a-zA-Z0-9_]+)\('?[^:']*[:'] *(.*)'\)",
msg)
if match:
msg = match.group(2)
sys.exit(f"{repository}: {msg}")
except KeyError:
sys.exit(f"{repository}: missing information from registry")
def expand_base_image(base_name):
""" expand base image """
match = re.match(r"\$\{?DOCKER_ACCOUNT\}?/(.+)", base_name)
if not match:
return (base_name, [])
if not docker_login["account"]:
raise ValueError("Environment variable DOCKER_ACCOUNT "
"is not defined or is empty")
base_name = docker_login["account"] + "/" + match.group(1)
options = ["--build-arg", "DOCKER_ACCOUNT=" + docker_login["account"]]
return (base_name, options)
def full_image_name(image_name):
""" get full image name """
if "/" in image_name:
return image_name
if not docker_login["account"]:
raise ValueError("Environment variable DOCKER_ACCOUNT "
"is not defined or is empty")
return docker_login["account"] + "/" + image_name
def dockerfile_base(directory):
""" get base repository from Dockerfile """
base = None
re_from = re.compile(r'\s*FROM\s+(\S+)', re.IGNORECASE)
try:
with open(os.path.join(directory, "Dockerfile"), "r",
encoding="utf-8") as dockerfile:
for dockerline in dockerfile:
match = re_from.match(dockerline)
if match:
base = match.group(1)
break
except OSError as err:
raise ValueError(f"Dockerfile: {err}") from err
if not base:
raise ValueError("Dockerfile: Missing FROM instruction")
return base
RE_CONF_LINE = re.compile(r'''
(?:| # empty line or...
(?P<gbl_opt>[^\t\#][^\t]*)| # global option or...
(?P<name>[^\t\#][^\t]*) # name +
\t+(?P<dir>[^\t\#][^\t]*) # directory +
(?:\t+(?P<base>[^\t\#'"-][^\t]*))? # optional base +
(?:\t+(?P<opt>['"-][^\t]*))? # optional option
)
(?:[\t ]*\#.*)? # followed by optional comment
''', re.VERBOSE)
def get_images(image_file):
""" read images configuration file
Format of the lines in the configuration file:
Name <tab> Directory [<tab> Base Image] [<tab> Build Options]
or
Global Build Options
If the base image is not given, it is extracted from <directory>/Dockerfile.
"""
gbl_options = []
name_set = set()
try:
lineno = 0
with open(image_file, "r", encoding="utf-8") as img_file:
for line in img_file:
lineno += 1
match = RE_CONF_LINE.fullmatch(line.strip())
if not match:
sys.exit(f"{image_file} line {lineno}: "
"invalid number of fields")
if match.group('gbl_opt'):
gbl_options = shlex.split(match.group('gbl_opt'))
if match.group('name') and match.group('dir'):
name = match.group('name')
full_name = full_image_name(name)
try:
parse_repository(full_name)
except ValueError:
sys.exit(f"{image_file} line {lineno}: "
f"invalid image name '{full_name}'")
if full_name in name_set:
sys.exit(f"{image_file}: "
f"multiple entries for {full_name}")
name_set.add(full_name)
directory = match.group('dir')
if not os.path.isdir(directory):
sys.exit(f"{image_file} line {lineno}: "
f"unknown directory '{directory}'")
base = match.group('base')
if not base: # extract base repo from Dockerfile
base = dockerfile_base(directory)
(base, options) = expand_base_image(base)
options += gbl_options
if match.group('opt'):
options += shlex.split(match.group('opt'))
images.append({"name": name, "dir": directory,
"base": base, "options": options})
except OSError as err:
sys.exit(f"Can't read images file: {err}")
except ValueError as err:
sys.exit(f"{image_file} line {lineno}: {err}")
if not images:
sys.exit("Empty image configuration")
def init_base_images():
""" initialize base image data structure """
for image in images:
base_name = image["base"]
if base_name not in base_images:
base_images[base_name] = {"layer": False}
base_images["scratch"] = {"layer": None}
base_images["NONE"] = {"layer": None}
def mtime_tree(directory):
""" get modification time of a directory tree """
mtime = 0
for root, _, filenames in os.walk(directory):
mtime = max(mtime, os.stat(root).st_mtime)
for fname in filenames:
mtime = max(mtime, os.stat(os.path.join(root, fname)).st_mtime)
return mtime
def needs_rebuild(image):
""" check if an image needs rebuilding """
# update base_image layer, if empty
base_img = base_images[image["base"]]
if base_img["layer"] is False:
_, layers = get_time_layers(image["base"])
# store last layer
if layers:
base_img["layer"] = layers[-1]
else:
sys.exit(f"Missing base image: {image['base']}")
# get image data
full_name = full_image_name(image["name"])
itime, layers = get_time_layers(full_name)
if layers and full_name in base_images: # image is a base image
base_images[full_name]["layer"] = layers[-1]
# check if base image has changed
if not layers:
return "Image missing in repository"
if base_img["layer"] and base_img["layer"] not in layers:
return "Base image has changed"
# check if build directory has changed, needs full git history
env = os.environ.copy()
env["LC_ALL"] = "C"
try:
# check if git repository is up-to-date
proc = subprocess.run(["git", "-C", image["dir"], "status",
"--porcelain", "--", "."],
capture_output=True,
check=False,
env=env,
universal_newlines=True)
if proc.returncode != 0 and "not a git repository" not in proc.stderr:
# Fatal error
sys.exit(f"{image['name']}: Can't get git status: " + \
proc.stderr.rstrip('\r\n'))
if proc.returncode != 0 or proc.stdout.rstrip('\r\n'):
# Non-fatal error or changes: use modification date of the files
mtime = mtime_tree(image["dir"])
rebuild_reason = "Files in docker context more recent than image"
else:
# clean git repository: use "git log" to get commit time
proc = subprocess.run(["git", "-C", image["dir"], "log", "-n", "1",
"--pretty=tformat:%ct", "--", "."],
capture_output=True,
check=True,
env=env,
universal_newlines=True)
mtime = int(proc.stdout.strip())
rebuild_reason = "Git change more recent than image"
except OSError as err:
sys.exit(f"Can't run git: {err}")
except subprocess.CalledProcessError as err:
sys.exit(f"{image['name']}: Can't get commit date: " + \
err.stderr.rstrip('\r\n'))
except ValueError as err:
sys.exit(f"{image['name']}: Can't get commit date: {err}")
return rebuild_reason if mtime > itime.timestamp() else None
def build(image):
""" build image """
full_name = full_image_name(image["name"])
try:
subprocess.run(["docker", "buildx", "build"] + image["options"] + \
["--push", "--tag", full_name, image["dir"]],
check=True)
except OSError as err:
sys.exit(f"Can't run docker: {err}")
except subprocess.CalledProcessError as err:
sys.exit(err.returncode)
print()
if full_name in base_images: # just modified a base image
_, layers = get_time_layers(full_name)
# store last layer
if layers:
base_images[full_name]["layer"] = layers[-1]
else:
sys.exit(f"{image['name']}: Can't get image layers")
def xor(*params):
""" logical xor """
result = False
for arg in params:
result = result != bool(arg)
return result
# main
args = parser.parse_args()
sys.stdout.reconfigure(line_buffering=True)
docker_login = {"account": os.environ.get("DOCKER_ACCOUNT", "").lower(),
"password": os.environ.pop("DOCKER_PASSWORD", None)}
if docker_login["account"]:
docker_login["account"] = docker_login["account"].rstrip("/")
try:
docker_login["registry"], docker_login["user"], *_ = \
parse_repository(docker_login["account"] + "/dummy")
except ValueError as err_info:
sys.exit(f"DOCKER_ACCOUNT={docker_login['account']}: {err_info}")
if not docker_login["user"]:
sys.exit(f"DOCKER_ACCOUNT={docker_login['account']}: missing user")
else:
docker_login["registry"] = docker_login["user"] = None
if args.dir:
try:
os.chdir(os.path.join(*args.dir))
except OSError as err_info:
sys.exit(f"Can't change directory: {err_info}")
get_images(args.file)
init_base_images()
# check arguments
all_inames = {img["name"] for img in images}.union(base_images.keys())
for iname in args.image:
if iname not in all_inames:
sys.exit(f"Image {iname} not found in '{args.file}' configuration file")
# rebuild images
for img in images:
if xor(args.all, img["name"] in args.image or img["base"] in args.image):
# pragma pylint: disable=invalid-name
reason = "Rebuild triggered by command line"
else:
reason = needs_rebuild(img)
if reason:
print(f"*** {img['name']}\nReason: {reason}\n")
if not args.dry_run:
build(img)

2
.github/bin/requirements.txt vendored Normal file
View File

@ -0,0 +1,2 @@
python-dateutil
python-dxf>=11

118
.github/docker_build.md vendored Normal file
View File

@ -0,0 +1,118 @@
# Docker Build System
The regular Docker build system has the disadvantage,
that only the cache system prevents a repetitive rebuild.
But when running the build in a VM this cache is initially
empty, resulting in a rebuild of all images on every run.
This not only increases the CPU load of the VM, but also
creates a lot of updated Docker images, that differ only
in the timestamps of the files.
This Docker build system wants to improve this situation.
It rebuilds only, when the base image or the build context
has changed. There are some other situations, where an
image needs to be recreated, that are not detected.
This mainly affects installation packages and external
files, that got an update. Then a manual trigger is needed.
## Build Tool
The `docker_build` tool reads a configuration file and
then starts building images with `docker buildx build`.
If `docker_build` is launched without arguments, it checks
all configured images for an update of the base image.
Additionally it checks, if `git` shows an update of
the directory containing the docker build context.
When at least one of these conditions is met, the tool
starts a rebuild of that image.
When `docker_build` is run with some image names as
arguments, then these images are additionally built.
When using a base image name as an argument, then all
images with that base image are rebuilt.
When using the option -a/--all, all images are forcibly
rebuild, except those specified on the command line.
## Configuration File
The build tool reads a configuration file, by default
'docker_images', located in the current directory. For each
target image, it contains its name, its context directory
and optionally the base image and some build options.
The fields are separated by one or more \<tab\> characters.
Comments start with a `#` as its first field character.
An empty line or an all-comments line is ignored.
A line with only one field is used for build options,
which are effective from that point until they are redefined.
This global build option and an optional image specific option
are combined and sent to the `docker buildx` command.
Here an example:
```
# Name Directory [Base Image] [Build Options]
--platform=linux/arm64,linux/amd64 # global options
alpine-1 alpine-1 alpine --image-specific-option
alpine-1:test alpine-1a --another-image-specific-option
```
If the base image is `NONE` (all caps), the target
image is not checked against its real base image.
Changes to the base image will then not cause a rebuild.
The target image may contain the full name, in which
case it will contain one or more '/' characters.
Another option is to specify only the last part of the
image name. Then `docker_build` uses the `DOCKER_ACCOUNT`
environment variable as its initial part. For example, an
DOCKER_ACCOUNT value of "ghcr.io/b-ehlers" plus the image
name of "alpine-1" results in "ghcr.io/b-ehlers/alpine-1".
This method is not applied to the base images, they always
have to contain the complete name.
But there is a workaround.
If the base image name starts with `$DOCKER_ACCOUNT`
or `${DOCKER_ACCOUNT}` the variable DOCKER_ACCOUNT
gets replaced by its value from the environment.
In the Dockerfile the variable must be declared by a
`ARG DOCKER_ACCOUNT` instruction. A Dockerfile would
then start with:
```
ARG DOCKER_ACCOUNT
FROM $DOCKER_ACCOUNT/base-image
```
## Workflow Definition
[GitHub Actions](https://docs.github.com/en/actions)
uses YAML files in the .github/workflows directory
to define, which tasks should be run.
Before `docker_build` can be run the following steps
need to be done:
* Check out the repository code
* Set up QEMU (for multi-arch building)
* Set up Docker Buildx
* Login to the Container Registry
* Install python requirements
Then `docker_build` can be executed,
normally without any arguments.
But what, when an image build needs to be forced?
For that, run the action manually and enter the list
of images, separated by spaces, into the input field.

View File

@ -0,0 +1,65 @@
name: Build Docker images and upload to DockerHub
on:
push:
branches:
- main
- master
schedule:
- cron: '37 7 * * 3'
workflow_dispatch:
inputs:
images:
description: 'List of images to be built'
required: false
type: string
jobs:
docker-images:
runs-on: ubuntu-latest
permissions:
packages: write
steps:
- name: Check out repository code
# https://github.com/marketplace/actions/checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Set up QEMU
# https://github.com/marketplace/actions/docker-setup-qemu
uses: docker/setup-qemu-action@v2
- name: Set up Docker Buildx
# https://github.com/marketplace/actions/docker-setup-buildx
uses: docker/setup-buildx-action@v2
- name: Login to GitHub Container Registry
# https://github.com/marketplace/actions/docker-login
uses: docker/login-action@v2
with:
# GitHub Container Registry:
# registry: ghcr.io
# username: ${{ github.repository_owner }}
# password: ${{ secrets.GITHUB_TOKEN }}
#
# DockerHub:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Install python requirements
run: python3 -m pip install --requirement .github/bin/requirements.txt
- name: Build and push images
env:
# DOCKER_PASSWORD is optional, only needed for private repositories
#
# GitHub Container Registry:
# DOCKER_ACCOUNT: ghcr.io/${{ github.repository_owner }}
# DOCKER_PASSWORD: ${{ secrets.GITHUB_TOKEN }}
#
# DockerHub:
DOCKER_ACCOUNT: ${{ secrets.DOCKERHUB_USERNAME }}
# DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
#
IMAGES: ${{ inputs.images }}
run: |
set -f
set -- $IMAGES
set +f
# use option --dir to use a subdirectory as a docker base dir
python3 "$GITHUB_WORKSPACE/.github/bin/docker_build" --dir "docker" "$@"

View File

@ -1,4 +1,7 @@
FROM jess/chromium
MAINTAINER developers@gns3.net
# add a dummy layer to get an updated container timestamp
RUN /bin/true
VOLUME /data

43
docker/docker_images Normal file
View File

@ -0,0 +1,43 @@
# Name Directory [Base Image] [Build Options]
--platform=linux/amd64 # global options
# chromium: base image is updated too often, use manual build on base updates
chromium chromium NONE
# ipterm: base image debian:jessie too old
#ipterm-base ipterm/base
#ipterm ipterm/cli
#webterm ipterm/web
jupyter:v2 jupyter
jupyter27:v2 jupyter-2.7
# kalilinux: base image is updated too often, use manual build on base updates
kalilinux kalilinux NONE
mikrotik-winbox mikrotik-winbox
# ostinato-wireshark: failed to build: mesa-dri-swrast - no such package
#ostinato-wireshark ostinato-wireshark
openvswitch openvswitch
ovs-snmp ovs-snmp --platform=linux/arm64
pyats pyats
ubuntu:focal ubuntu
#
# The following images are not used by any appliance
#
# Replaced by https://github.com/adosztal/gns3-containers
#network_automation network_automation
#python-go-perl-php python-go-perl-php
# AJ Nouri
#endhost endhost
#haproxy haproxy
#network_automation_pycharm network_automation_pycharm
#openvswitch28 openvswitch28
# GNS3 Developers
#dhcp dhcp
#snmpsimulator snmpsimulator
# Tests
#iou iou
#xeyes xeyes

View File

@ -2,5 +2,8 @@
FROM gns3/ipterm-base
# add a dummy layer to get an updated container timestamp
RUN /bin/true
VOLUME [ "/root" ]
CMD [ "sh", "-c", "cd; exec bash -i" ]

View File

@ -1,6 +1,6 @@
FROM kalilinux/kali-rolling
RUN DEBIAN_FRONTEND=noninteractive apt-get update && apt-get install -y tshark && apt-get install -y --no-install-recommends metasploit-framework nmap hydra sqlmap telnet openssh-client dnsutils yersinia ettercap-text-only cisco-global-exploiter cisco-auditing-tool snmp dsniff dnschef fping hping3 python-scapy\
RUN DEBIAN_FRONTEND=noninteractive apt-get update && apt-get install -y tshark && apt-get install -y --no-install-recommends metasploit-framework nmap hydra sqlmap telnet openssh-client dnsutils yersinia ettercap-text-only cisco-global-exploiter cisco-auditing-tool snmp dsniff dnschef fping hping3 python3-scapy\
&& rm -rf /var/lib/apt/lists/*
CMD /bin/bash

View File

@ -20,7 +20,7 @@ metasploit-framework
* fping
* hping3
* tshark
* python-scapy
* python3-scapy
* yersinia
## Build and publish the Images

View File

@ -19,7 +19,7 @@ metasploit-framework
* fping
* hping3
* tshark
* python-scapy
* python3-scapy
* yersinia</p>
<h2 id="build-and-publish-the-images">Build and publish the Images</h2>
<p>First the base image has to be created:</p>

View File

@ -1,2 +1,6 @@
FROM alexhorner/winbox-dockerised
# add a dummy layer to get an updated container timestamp
RUN /bin/true
ENV VNC_BUILTIN_DISABLED=true