Add complete GIS ETL project skeleton for SRTM data processing

- Python modules for geo boundary processing and SRTM downloads
- Docker containerization with GDAL support
- Development environment with devcontainer
- Orchestration scripts for local and containerized execution
- Support for KMZ/GeoJSON boundary files and NASA SRTM data

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-07-21 12:24:35 -05:00
parent 57fa60b78d
commit 2192c2ae9c
10 changed files with 419 additions and 1 deletions

60
scripts/docker_run.sh Executable file
View File

@@ -0,0 +1,60 @@
#!/bin/bash
set -e
IMAGE_NAME="gis-etl-inbox"
TAG="latest"
BOUNDARY_FILE=""
OUTPUT_DIR="$(pwd)/data/output"
DATA_DIR="$(pwd)/data"
usage() {
echo "Usage: $0 -b BOUNDARY_FILE [-o OUTPUT_DIR] [-d DATA_DIR] [-t TAG]"
echo " -b BOUNDARY_FILE Path to boundary file (KMZ/GeoJSON)"
echo " -o OUTPUT_DIR Host output directory (default: ./data/output)"
echo " -d DATA_DIR Host data directory (default: ./data)"
echo " -t TAG Docker image tag (default: latest)"
exit 1
}
while getopts "b:o:d:t:h" opt; do
case $opt in
b) BOUNDARY_FILE="$OPTARG" ;;
o) OUTPUT_DIR="$OPTARG" ;;
d) DATA_DIR="$OPTARG" ;;
t) TAG="$OPTARG" ;;
h) usage ;;
\?) echo "Invalid option -$OPTARG" >&2; usage ;;
esac
done
if [ -z "$BOUNDARY_FILE" ]; then
echo "Error: Boundary file is required"
usage
fi
if [ ! -f "$BOUNDARY_FILE" ]; then
echo "Error: Boundary file '$BOUNDARY_FILE' not found"
exit 1
fi
mkdir -p "$OUTPUT_DIR"
mkdir -p "$DATA_DIR"
BOUNDARY_FILENAME=$(basename "$BOUNDARY_FILE")
echo "Building Docker image..."
docker build -t "${IMAGE_NAME}:${TAG}" .
echo "Running ETL in Docker container..."
docker run --rm \
-v "$BOUNDARY_FILE:/app/data/input/$BOUNDARY_FILENAME:ro" \
-v "$OUTPUT_DIR:/app/data/output" \
-v "$DATA_DIR/temp:/app/data/temp" \
"${IMAGE_NAME}:${TAG}" \
python src/main.py \
--boundary-file "/app/data/input/$BOUNDARY_FILENAME" \
--output-dir "/app/data/output" \
--temp-dir "/app/data/temp"
echo "Docker ETL process completed successfully"

58
scripts/run_etl.sh Executable file
View File

@@ -0,0 +1,58 @@
#!/bin/bash
set -e
BOUNDARY_FILE=""
OUTPUT_DIR="./data/output"
TEMP_DIR="./data/temp"
VERBOSE=false
usage() {
echo "Usage: $0 -b BOUNDARY_FILE [-o OUTPUT_DIR] [-t TEMP_DIR] [-v]"
echo " -b BOUNDARY_FILE Path to boundary file (KMZ/GeoJSON)"
echo " -o OUTPUT_DIR Output directory (default: ./data/output)"
echo " -t TEMP_DIR Temporary directory (default: ./data/temp)"
echo " -v Verbose output"
exit 1
}
while getopts "b:o:t:vh" opt; do
case $opt in
b) BOUNDARY_FILE="$OPTARG" ;;
o) OUTPUT_DIR="$OPTARG" ;;
t) TEMP_DIR="$OPTARG" ;;
v) VERBOSE=true ;;
h) usage ;;
\?) echo "Invalid option -$OPTARG" >&2; usage ;;
esac
done
if [ -z "$BOUNDARY_FILE" ]; then
echo "Error: Boundary file is required"
usage
fi
if [ ! -f "$BOUNDARY_FILE" ]; then
echo "Error: Boundary file '$BOUNDARY_FILE' not found"
exit 1
fi
echo "Starting SRTM ETL process..."
echo "Boundary file: $BOUNDARY_FILE"
echo "Output directory: $OUTPUT_DIR"
echo "Temp directory: $TEMP_DIR"
if [ "$VERBOSE" = true ]; then
LOG_LEVEL="DEBUG"
else
LOG_LEVEL="INFO"
fi
export PYTHONPATH="${PYTHONPATH}:$(pwd)/src"
python3 src/main.py \
--boundary-file "$BOUNDARY_FILE" \
--output-dir "$OUTPUT_DIR" \
--temp-dir "$TEMP_DIR"
echo "ETL process completed successfully"