#!/bin/bash # Master Control Script for Cloudron Packaging # This script orchestrates the packaging of applications from a Git URL list # It runs three packaging projects in parallel and maintains status tracking # Usage: ./master-control-script.sh [/path/to/git-url-list.txt] set -e # Exit on any error set -u # Exit on undefined variables set -o pipefail # Exit on pipe failures # Configuration SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" OUTPUT_DIR="$SCRIPT_DIR" ARTIFACTS_DIR="$OUTPUT_DIR/CloudronPackages-Artifacts" WORKSPACES_DIR="$OUTPUT_DIR/CloudronPackages-Workspaces" STATUS_FILE="$(dirname "$SCRIPT_DIR")/collab/STATUS.md" # Allow Git URL list to be passed as command-line argument, with default fallback if [[ $# -gt 0 && -n "$1" && -f "$1" ]]; then GIT_URL_LIST="$1" else GIT_URL_LIST="$(dirname "$SCRIPT_DIR")/collab/GitUrlList.txt" fi HUMAN_HELP_DIR="$WORKSPACES_DIR/human-help-required" MAX_RETRIES=5 LOG_FILE="$WORKSPACES_DIR/packaging.log" # Docker image prefix DOCKER_PREFIX="tsysdevstack-cloudron-buildtest-" # Source the packaging functions source "$SCRIPT_DIR/package-functions.sh" # Create necessary directories mkdir -p "$ARTIFACTS_DIR" "$WORKSPACES_DIR" "$HUMAN_HELP_DIR" # Function to log messages log_message() { local level=$1 local message=$2 local timestamp=$(date '+%Y-%m-%d %H:%M:%S') # Sanitize message to prevent injection in logs local clean_message=$(printf '%s\n' "$message" | sed 's/[\`\$|&;<>]//g') echo "[$timestamp] [$level] $clean_message" >> "$LOG_FILE" } # Function to perform audit of the packaging process perform_audit() { log_message "INFO" "Starting audit process" # Count total, completed, failed, and in-progress applications local total_count=$(grep -c "https://" "$GIT_URL_LIST" || echo 0) local completed_count=$(grep -c "✅ COMPLETE" "$STATUS_FILE" | grep -v "Progress Summary" || echo 0) local failed_count=$(grep -c "🛑 FAILED" "$STATUS_FILE" | grep -v "Human Help Required" || echo 0) local in_progress_count=$(grep -c "🔄 IN PROGRESS" "$STATUS_FILE" || echo 0) local pending_count=$((total_count - completed_count - failed_count - in_progress_count)) log_message "INFO" "Audit Summary - Total: $total_count, Completed: $completed_count, Failed: $failed_count, In Progress: $in_progress_count, Pending: $pending_count" # Check for artifacts directory health local artifact_count=$(find "$ARTIFACTS_DIR" -mindepth 1 -maxdepth 1 -type d | wc -l) log_message "INFO" "Found $artifact_count artifact directories in $ARTIFACTS_DIR" # Check for workspace directory health local workspace_count=$(find "$WORKSPACES_DIR" -mindepth 1 -maxdepth 1 -type d | grep -v "human-help-required\|packaging.log" | wc -l) log_message "INFO" "Found $workspace_count workspace directories in $WORKSPACES_DIR" # Check for human help requests local help_requests=$(find "$HUMAN_HELP_DIR" -mindepth 1 -maxdepth 1 -name "STATUS-HumanHelp-*" | wc -l) log_message "INFO" "Found $help_requests human help requests in $HUMAN_HELP_DIR" # Verify Docker images local docker_images=$(docker images --format "table {{.Repository}}:{{.Tag}}" | grep "$DOCKER_PREFIX" | wc -l) log_message "INFO" "Found $docker_images Docker images with prefix $DOCKER_PREFIX" log_message "INFO" "Audit process completed" } # Function to add a new Git URL to the list add_git_url() { local new_url=$1 local git_list_file=${2:-"$GIT_URL_LIST"} if [[ -z "$new_url" ]]; then log_message "ERROR" "No URL provided to add_git_url function" return 1 fi # Validate URL format if [[ ! "$new_url" =~ ^https?:// ]]; then log_message "ERROR" "Invalid URL format: $new_url" return 1 fi # Check if URL already exists in the file if grep -Fxq "$new_url" "$git_list_file"; then log_message "INFO" "URL already exists in $git_list_file: $new_url" return 0 fi # Add the URL to the file echo "$new_url" >> "$git_list_file" log_message "INFO" "Added new URL to $git_list_file: $new_url" # Also update STATUS.md to include the new application local repo_name=$(get_repo_name "$new_url") local username_repo=$(get_username_repo "$new_url") # Check if the application is already in STATUS.md if ! grep -q "| $repo_name |" "$STATUS_FILE"; then # Sanitize inputs to prevent injection in the sed command local sanitized_repo_name=$(printf '%s\n' "$repo_name" | sed 's/[[\.*^$()+?{|]/\\&/g; s/[&/]/\\&/g') local sanitized_url=$(printf '%s\n' "$new_url" | sed 's/[[\.*^$()+?{|]/\\&/g; s/[&/]/\\&/g') # Append the new application to the table in STATUS.md sed -i "/## Applications Status/,/|-----|-----|-----|-----|/ {/|-----|-----|-----|-----|/a\| $sanitized_repo_name | $sanitized_url | ⏳ PENDING | |" "$STATUS_FILE" log_message "INFO" "Added $repo_name to STATUS.md" else log_message "INFO" "Application $repo_name already exists in STATUS.md" fi return 0 } # Function to add multiple Git URLs from a file add_git_urls_from_file() { local input_file=$1 local git_list_file=${2:-"$GIT_URL_LIST"} if [[ ! -f "$input_file" ]]; then log_message "ERROR" "Input file does not exist: $input_file" return 1 fi while IFS= read -r url; do # Skip empty lines and comments if [[ -n "$url" && ! "$url" =~ ^[[:space:]]*# ]]; then add_git_url "$url" "$git_list_file" fi done < "$input_file" log_message "INFO" "Finished processing URLs from $input_file" } # Function to clean up Docker resources periodically cleanup_docker_resources() { log_message "INFO" "Starting Docker resource cleanup" # Remove unused Docker images that are related to our builds # Use a broader pattern match since we now include timestamps in image names docker images --format "table {{.Repository}}\t{{.Tag}}\t{{.ID}}" | grep "$DOCKER_PREFIX" | awk '{print $3}' | xargs -r docker rmi -f 2>/dev/null || true # Alternative: Remove all images with our prefix pattern (for cases where the grep doesn't catch all variations) docker images -q --filter "reference=$DOCKER_PREFIX*" | xargs -r docker rmi -f 2>/dev/null || true # Remove exited containers docker ps -a --format "table {{.Names}}\t{{.Status}}\t{{.ID}}" | awk 'NR>1 {if($2 ~ /Exited|Created|Removal/) print $3}' | xargs -r docker rm -f 2>/dev/null || true # Also remove our smoke test containers that might still be running docker ps -aq --filter name="smoke-test-" | xargs -r docker rm -f 2>/dev/null || true # Remove unused volumes docker volume ls -q | xargs -r docker volume rm 2>/dev/null || true # Remove unused networks docker network ls -q | xargs -r docker network rm 2>/dev/null || true log_message "INFO" "Docker resource cleanup completed" } # Function to clean up file system resources periodically cleanup_file_resources() { log_message "INFO" "Starting file system resource cleanup" # Clean up old error logs in workspace directories find "$WORKSPACES_DIR" -name "error.log" -type f -mtime +1 -delete 2>/dev/null || true # Remove old workspace directories that may have been left from failed processes # Keep only directories that have active entries in STATUS.md local active_apps=() while IFS= read -r -d '' app; do # Get app name from the directory name active_apps+=("$(basename "$app")") done < <(find "$WORKSPACES_DIR" -mindepth 1 -maxdepth 1 -type d -print0) # Note: This is a simplified approach - in a real implementation we'd compare with STATUS.md log_message "INFO" "File system resource cleanup completed" } # Function to update status in STATUS.md update_status() { local app_name=$1 local new_status=$2 local notes=${3:-""} # Validate inputs to prevent injection if [[ -z "$app_name" ]] || [[ -z "$new_status" ]]; then log_message "ERROR" "Empty app_name or new_status in update_status function" return 1 fi # Sanitize inputs to prevent injection # Remove any pipe characters which would interfere with table format # Escape regex special characters to prevent sed injection local clean_app_name=$(printf '%s\n' "$app_name" | sed 's/|//g; s/[[\.*^$()+?{|\\]/\\&/g') local clean_status=$(printf '%s\n' "$new_status" | sed 's/|//g; s/[[\.*^$()+?{|\\]/\\&/g') local clean_notes=$(printf '%s\n' "$notes" | sed 's/|//g; s/[[\.*^$()+?{|\\]/\\&/g' | sed 's/&/&/g; s//>/g') # Use file locking to prevent race conditions when multiple processes update the file local lock_file="$STATUS_FILE.lock" exec 200>"$lock_file" flock -x 200 # Exclusive lock # Update status in the file - find the line with the app name and update its status # Use a more targeted sed pattern to reduce chance of unintended matches sed -i "s/^| $clean_app_name | \([^|]*\) | \([^|]*\) | \([^|]*\) |$/| $clean_app_name | \1 | $clean_status | $clean_notes |/" "$STATUS_FILE" # Release the lock by closing the file descriptor exec 200>&- log_message "INFO" "Updated status for $app_name to $new_status" } # Function to get the repository name from URL get_repo_name() { local url=$1 if [[ -z "$url" ]]; then log_message "ERROR" "URL is empty in get_repo_name function" echo "unknown-repo" return 1 fi # Extract the basename more securely by using parameter expansion # First remove any trailing slashes local clean_url="${url%/}" local repo_part="${clean_url##*/}" repo_part="${repo_part%.git}" # Sanitize the repo name to contain only valid characters local sanitized=$(printf '%s\n' "$repo_part" | sed 's/[^a-zA-Z0-9._-]/-/g') # Double-check to prevent path traversal sanitized=$(printf '%s\n' "$sanitized" | sed 's/\.\.//g; s/\/\///g') # Ensure the result is not empty if [[ -z "$sanitized" ]] || [[ "$sanitized" == "." ]] || [[ "$sanitized" == ".." ]]; then sanitized="unknown-repo-$(date +%s)" fi echo "$sanitized" } # Function to extract username/repo from URL for GitHub/GitLab/other get_username_repo() { local url=$1 if [[ -z "$url" ]]; then log_message "ERROR" "URL is empty in get_username_repo function" echo "unknown-user/unknown-repo" return 1 fi # Clean the URL to prevent path traversal local clean_url="${url#*://}" # Remove protocol clean_url="${clean_url#*[email]*/}" # Remove potential user@host if [[ "$clean_url" == *"github.com"* ]]; then # Extract username/repo from GitHub URL local path=${clean_url#*github.com/} path=${path%.git} # Ensure we have a valid path if [[ "$path" != *"/"* ]] || [[ "$path" == "/" ]]; then # If there's no slash, it might be malformed, use repo name path="unknown-user/$(get_repo_name "$url")" else # Sanitize the path to prevent directory traversal path=$(printf '%s\n' "$path" | sed 's/\.\.//g; s/\/\///g') fi echo "$path" elif [[ "$clean_url" == *"gitlab.com"* ]]; then # Extract username/repo from GitLab URL local path=${clean_url#*gitlab.com/} path=${path%.git} # Ensure we have a valid path if [[ "$path" != *"/"* ]] || [[ "$path" == "/" ]]; then # If there's no slash, it might be malformed, use repo name path="unknown-user/$(get_repo_name "$url")" else # Sanitize the path to prevent directory traversal path=$(printf '%s\n' "$path" | sed 's/\.\.//g; s/\/\///g') fi echo "$path" else # For other URLs, try to extract pattern user/repo local path=${clean_url#*/} # Remove host part if [[ "$path" == *"/"* ]]; then path=${path%.git} # Sanitize the path to prevent directory traversal path=$(printf '%s\n' "$path" | sed 's/\.\.//g; s/\/\///g') else # If no slash, use a generic format local repo=$(get_repo_name "$url") path="unknown-user/$repo" fi echo "$path" fi } # Function to run individual packaging script run_packaging_script() { local url=$1 local repo_name=$(get_repo_name "$url") local username_repo=$(get_username_repo "$url") local workspace_dir="$WORKSPACES_DIR/$repo_name" local artifact_dir="$ARTIFACTS_DIR/$repo_name" local app_log_file="$artifact_dir/${repo_name}-package.log" echo "$(date): Starting packaging for $repo_name ($url)" >> "$WORKSPACES_DIR/packaging.log" # Update status to IN PROGRESS update_status "$repo_name" "🔄 IN PROGRESS" "Packaging started" # Initialize workspace mkdir -p "$workspace_dir" "$artifact_dir" # Create application-specific log file touch "$app_log_file" # Clone repository if [ ! -d "$workspace_dir/repo" ] || [ -z "$(ls -A "$workspace_dir/repo" 2>/dev/null)" ]; then echo "Cloning $url to $workspace_dir/repo" | tee -a "$app_log_file" if ! git clone "$url" "$workspace_dir/repo" 2>&1 | tee -a "$app_log_file"; then echo "$(date): Failed to clone $url" >> "$WORKSPACES_DIR/packaging.log" update_status "$repo_name" "🛑 FAILED" "Failed to clone repository" return 1 fi else # Update repository echo "Updating $url in $workspace_dir/repo" | tee -a "$app_log_file" if ! (cd "$workspace_dir/repo" && git remote -v && git fetch origin && # Sanitize the HEAD branch name to prevent command injection HEAD_BRANCH=$(git remote show origin 2>/dev/null | sed -n '/HEAD branch/s/.*: //p' | tr -cd '[:alnum:]/_-') && if [ -n "$HEAD_BRANCH" ]; then git reset --hard "origin/$HEAD_BRANCH" 2>/dev/null || git reset --hard origin/main 2>/dev/null || git reset --hard origin/master 2>/dev/null || git pull "origin" "$HEAD_BRANCH" 2>/dev/null || git pull origin main 2>/dev/null || git pull origin master 2>/dev/null else # If we couldn't determine the HEAD branch, try common defaults git reset --hard origin/main 2>/dev/null || git reset --hard origin/master 2>/dev/null || git pull origin main 2>/dev/null || git pull origin master 2>/dev/null fi) 2>&1 | tee -a "$app_log_file"; then echo "$(date): Failed to update $url" >> "$WORKSPACES_DIR/packaging.log" update_status "$repo_name" "🔄 IN PROGRESS" "Repo update failed, will retry with fresh clone" # Remove the repo and try to clone again rm -rf "$workspace_dir/repo" if ! git clone "$url" "$workspace_dir/repo" 2>&1 | tee -a "$app_log_file"; then echo "$(date): Failed to re-clone $url after update failure" >> "$WORKSPACES_DIR/packaging.log" update_status "$repo_name" "🛑 FAILED" "Failed to update or re-clone repository" return 1 fi fi fi # Attempt packaging with retries local attempt=1 local success=0 while [ $attempt -le $MAX_RETRIES ] && [ $success -eq 0 ]; do echo "$(date): Attempt $attempt/$MAX_RETRIES for $repo_name" >> "$WORKSPACES_DIR/packaging.log" # Capture the output and error of the packaging function if package_application "$repo_name" "$username_repo" "$workspace_dir" "$artifact_dir" "$url" 2>"$workspace_dir/error.log"; then success=1 update_status "$repo_name" "✅ COMPLETE" "Packaged successfully on attempt $attempt" echo "$(date): Successfully packaged $repo_name on attempt $attempt" >> "$WORKSPACES_DIR/packaging.log" else echo "$(date): Failed to package $repo_name on attempt $attempt" >> "$WORKSPACES_DIR/packaging.log" cat "$workspace_dir/error.log" >> "$WORKSPACES_DIR/packaging.log" ((attempt++)) fi done if [ $success -eq 0 ]; then # Mark as failed and create human help request with more detailed information local error_details="" if [ -f "$workspace_dir/error.log" ]; then error_details=$(cat "$workspace_dir/error.log" 2>/dev/null | head -20 | sed 's/"/\\"/g; s/[\t$`]/ /g; s/secret[^[:space:]]*/[REDACTED]/gi; s/token[^[:space:]]*/[REDACTED]/gi; s/key[^[:space:]]*/[REDACTED]/gi' | tr '\n' ' ') fi update_status "$repo_name" "🛑 FAILED" "Failed after $MAX_RETRIES attempts. Error: $error_details" # Create a detailed human help file with proper sanitization { echo "Application: $repo_name" echo "URL: $url" echo "Issue: Failed to package after $MAX_RETRIES attempts" echo "Date: $(date)" echo "Error Details:" if [ -f "$workspace_dir/error.log" ]; then # Sanitize the error log to remove potential sensitive information cat "$workspace_dir/error.log" 2>/dev/null | sed 's/secret[^[:space:]]*/[REDACTED]/gi; s/token[^[:space:]]*/[REDACTED]/gi; s/key[^[:space:]]*/[REDACTED]/gi; s/[A-Za-z0-9]\{20,\}/[REDACTED]/g' else echo "No error log file found" fi } > "$HUMAN_HELP_DIR/STATUS-HumanHelp-$repo_name" echo "$(date): Marked $repo_name for human help after $MAX_RETRIES failed attempts" >> "$WORKSPACES_DIR/packaging.log" else # On success, clean up error log if it exists if [ -f "$workspace_dir/error.log" ]; then rm -f "$workspace_dir/error.log" fi fi } # Function to package a specific application package_application() { local repo_name=$1 local username_repo=$2 local workspace_dir=$3 local artifact_dir=$4 local url=${5:-"https://github.com/unknown-user/$repo_name"} # Default URL if not provided local repo_path="$workspace_dir/repo" local app_log_file="$artifact_dir/${repo_name}-package.log" # Use the function library to detect and package the application # Redirect output to both the main log and the application-specific log detect_and_package "$repo_name" "$repo_path" "$artifact_dir" "$url" 2>&1 | tee -a "$app_log_file" } # Function to create a Dockerfile based on the application type create_dockerfile() { local repo_name=$1 local repo_path=$2 # Detect application type and create appropriate Dockerfile # This is a simplified approach - in reality, this would be much more complex if [ -f "$repo_path/package.json" ]; then # Node.js application cat > "$repo_path/Dockerfile" << EOF FROM node:18-alpine WORKDIR /app COPY package*.json ./ RUN npm install COPY . . EXPOSE 3000 CMD ["npm", "start"] EOF elif [ -f "$repo_path/requirements.txt" ]; then # Python application cat > "$repo_path/Dockerfile" << EOF FROM python:3.11-slim WORKDIR /app COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt COPY . . EXPOSE 8000 CMD ["python", "app.py"] EOF elif [ -f "$repo_path/composer.json" ]; then # PHP application cat > "$repo_path/Dockerfile" << EOF FROM php:8.1-apache RUN docker-php-ext-install mysqli && docker-php-ext-enable mysqli COPY . /var/www/html/ EXPOSE 80 CMD ["apache2-foreground"] EOF elif [ -f "$repo_path/Gemfile" ]; then # Ruby application cat > "$repo_path/Dockerfile" << EOF FROM ruby:3.0 WORKDIR /app COPY Gemfile Gemfile.lock ./ RUN bundle install COPY . . EXPOSE 3000 CMD ["ruby", "app.rb"] EOF else # Default to a basic server cat > "$repo_path/Dockerfile" << EOF FROM alpine:latest WORKDIR /app COPY . . RUN apk add --no-cache bash EXPOSE 8080 CMD ["sh", "-c", "while true; do sleep 30; done"] EOF fi } # Function to load URLs from Git URL list file load_git_urls() { local git_list_file=${1:-"$GIT_URL_LIST"} local urls=() if [[ ! -f "$git_list_file" ]]; then log_message "ERROR" "Git URL list file does not exist: $git_list_file" return 1 fi while IFS= read -r line; do # Skip empty lines and comments if [[ -n "$line" && ! "$line" =~ ^[[:space:]]*# ]]; then # Validate that the line looks like a URL if [[ "$line" =~ ^https?:// ]]; then urls+=("$line") else log_message "WARN" "Invalid URL format skipped: $line" fi fi done < "$git_list_file" # Print the urls array to stdout so the caller can capture it printf '%s\n' "${urls[@]}" } # Main function to process all applications main() { # Create main log file for the overall process local main_log_file="$WORKSPACES_DIR/main-process.log" touch "$main_log_file" log_message "INFO" "Starting Cloudron packaging process" | tee -a "$main_log_file" # Validate that required files exist if [[ ! -f "$SCRIPT_DIR/package-functions.sh" ]]; then log_message "ERROR" "Package functions file does not exist: $SCRIPT_DIR/package-functions.sh" | tee -a "$main_log_file" exit 1 fi # Load URLs from the git list file local url_list mapfile -t url_list < <(load_git_urls) local total=${#url_list[@]} log_message "INFO" "Found $total URLs to process" | tee -a "$main_log_file" # Process applications in batches of 3 for parallel execution local i=0 local batch_count=0 # Add heartbeat file to track process is alive local heartbeat_file="$WORKSPACES_DIR/process-heartbeat-$(date +%s).tmp" touch "$heartbeat_file" while [ $i -lt $total ]; do # Process up to 3 applications in parallel local end=$((i + 3)) [ $end -gt $total ] && end=$total log_message "INFO" "Starting batch with applications $(printf '%s; ' "${url_list[@]:i:3}")" for ((j = i; j < end; j++)); do log_message "INFO" "Starting packaging for ${url_list[$j]}" run_packaging_script "${url_list[$j]}" & done # Wait for all background processes to complete wait # Update heartbeat to show process is active touch "$heartbeat_file" # Perform audit after each batch perform_audit # Perform resource cleanup every 10 batches to prevent resource exhaustion during long runs ((batch_count++)) if [ $((batch_count % 10)) -eq 0 ]; then log_message "INFO" "Performing periodic resource cleanup after batch $batch_count" cleanup_docker_resources cleanup_file_resources fi # Check for critical errors that might require stopping local failed_count_current=$(grep -o "🛑 FAILED" "$STATUS_FILE" | wc -l) local total_failed_since_start=$((failed_count_current)) # Optional: Add logic for stopping if too many failures occur in a row # This is commented out but can be enabled if needed # if [ $total_failed_since_start -gt 50 ]; then # log_message "ERROR" "Too many failures (${total_failed_since_start}), stopping process" # break # fi # Update i for the next batch i=$end # Update progress summary in STATUS.md local completed=$(grep -o "✅ COMPLETE" "$STATUS_FILE" | wc -l) local failed=$(grep -o "🛑 FAILED" "$STATUS_FILE" | wc -l) local in_progress=$(grep -o "🔄 IN PROGRESS" "$STATUS_FILE" | wc -l) local pending=$((total - completed - failed - in_progress)) # Ensure we don't have negative pending due to counting issues [ $pending -lt 0 ] && pending=0 # Update summary section in STATUS.md sed -i '/## Progress Summary/Q' "$STATUS_FILE" cat >> "$STATUS_FILE" << EOF ## Progress Summary - Total Applications: $total - Completed: $completed ($(awk "BEGIN {printf \"%.0f\", $completed * 100 / $total}")%) - In Progress: $in_progress ($(awk "BEGIN {printf \"%.0f\", $in_progress * 100 / $total}")%) - Failed: $failed ($(awk "BEGIN {printf \"%.0f\", $failed * 100 / $total}")%) - Pending: $pending ($(awk "BEGIN {printf \"%.0f\", $pending * 100 / $total}")%) ## Human Help Required $(ls -1 "$HUMAN_HELP_DIR" 2>/dev/null || echo "None at the moment.") ## Last Updated $(date) EOF done # Final cleanup rm -f "$heartbeat_file" 2>/dev/null || true # Final audit perform_audit log_message "INFO" "Completed Cloudron packaging process" } # Run the main function if script is executed directly if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then main "$@" fi