#!/bin/bash set -e IMAGE_NAME="gis-etl-inbox" TAG="latest" BOUNDARY_FILE="" OUTPUT_DIR="$(pwd)/data/output" DATA_DIR="$(pwd)/data" usage() { echo "Usage: $0 -b BOUNDARY_FILE [-o OUTPUT_DIR] [-d DATA_DIR] [-t TAG]" echo " -b BOUNDARY_FILE Path to boundary file (KMZ/GeoJSON)" echo " -o OUTPUT_DIR Host output directory (default: ./data/output)" echo " -d DATA_DIR Host data directory (default: ./data)" echo " -t TAG Docker image tag (default: latest)" exit 1 } while getopts "b:o:d:t:h" opt; do case $opt in b) BOUNDARY_FILE="$OPTARG" ;; o) OUTPUT_DIR="$OPTARG" ;; d) DATA_DIR="$OPTARG" ;; t) TAG="$OPTARG" ;; h) usage ;; \?) echo "Invalid option -$OPTARG" >&2; usage ;; esac done if [ -z "$BOUNDARY_FILE" ]; then echo "Error: Boundary file is required" usage fi if [ ! -f "$BOUNDARY_FILE" ]; then echo "Error: Boundary file '$BOUNDARY_FILE' not found" exit 1 fi mkdir -p "$OUTPUT_DIR" mkdir -p "$DATA_DIR" BOUNDARY_FILENAME=$(basename "$BOUNDARY_FILE") echo "Building Docker image..." docker build -t "${IMAGE_NAME}:${TAG}" . echo "Running ETL in Docker container..." docker run --rm \ -v "$BOUNDARY_FILE:/app/data/input/$BOUNDARY_FILENAME:ro" \ -v "$OUTPUT_DIR:/app/data/output" \ -v "$DATA_DIR/temp:/app/data/temp" \ "${IMAGE_NAME}:${TAG}" \ python src/main.py \ --boundary-file "/app/data/input/$BOUNDARY_FILENAME" \ --output-dir "/app/data/output" \ --temp-dir "/app/data/temp" echo "Docker ETL process completed successfully"