#!/usr/bin/env bash
# ─────────────────────────────────────────────────────────────────────────────
# qa-api-e2e-run.sh   (DEV Ground Rule §Precondition.2 — API coverage 100%)
#
# Stack-agnostic: makes sure a server is live (starting one itself from
# START_CMD when nothing is listening), runs whatever E2E command is configured
# in .cqa-runner.conf, then (if the suite recorded one) reports API coverage
# against the OpenAPI spec from API_COVERAGE_FILE.
#
# Prerequisites:
#   - START_CMD set in .cqa-runner.conf (so this script can launch the server),
#     or a server already running at DEFAULT_BASE_URL.
#   - External dependencies (DB, third-party APIs) are up with seed data.
#
# Usage:
#   ./qa-api-e2e-run.sh             # auto-start the server (if needed) + run E2E
#   ./qa-api-e2e-run.sh --no-start  # never start a server; require one already up
#   ./qa-api-e2e-run.sh --keep-server # leave an auto-started server running
#   ./qa-api-e2e-run.sh --skip-llm  # set E2E_SKIP_LLM=1 (suite decides what to skip)
#   ./qa-api-e2e-run.sh --base URL  # override the base URL
#   ./qa-api-e2e-run.sh --strict    # exit ≠ 0 if any coverage axis < 100%
#   ./qa-api-e2e-run.sh -- ...       # everything after `--` is appended to E2E_TEST_CMD
#
# Config (.cqa-runner.conf, overridable via env):
#   E2E_TEST_CMD        command to run the E2E suite (BASE_URL exported as E2E_BASE_URL)
#   START_CMD           command that starts the server (run in the background)
#   DEFAULT_BASE_URL    base URL for the health check (default http://localhost:8000)
#   HEALTH_PATH         health endpoint (default /health)
#   HEALTH_TIMEOUT      per-attempt curl timeout in seconds (default 10)
#   START_WAIT          max seconds to wait for an auto-started server (default 60)
#   API_COVERAGE_FILE   JSON report the suite records (optional)
# ─────────────────────────────────────────────────────────────────────────────
set -euo pipefail

GREEN='\033[0;32m'; RED='\033[0;31m'; YEL='\033[0;33m'
CYAN='\033[0;36m';  BOLD='\033[1m';   NC='\033[0m'

ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$ROOT_DIR"

CONF="${CQA_RUNNER_CONF:-$ROOT_DIR/.cqa-runner.conf}"
if [[ -f "$CONF" ]]; then
  # shellcheck disable=SC1090
  source "$CONF"
else
  echo -e "${RED}❌ config not found: $CONF${NC}"; exit 1
fi

OUT_DIR="${OUT_DIR:-qa-coverage}"
DEFAULT_BASE_URL="${DEFAULT_BASE_URL:-http://localhost:8000}"
HEALTH_PATH="${HEALTH_PATH:-/health}"
HEALTH_TIMEOUT="${HEALTH_TIMEOUT:-10}"
START_WAIT="${START_WAIT:-60}"
START_CMD="${START_CMD:-}"
SERVER_LOG="${SERVER_LOG:-$OUT_DIR/server.log}"

SKIP_LLM=0
STRICT=0
NO_START=0
KEEP_SERVER=0
BASE_URL_OVERRIDE=""
PASSTHRU=()
while [[ $# -gt 0 ]]; do
  case "$1" in
    --skip-llm)     SKIP_LLM=1; shift ;;
    --strict)       STRICT=1; shift ;;
    --no-start)     NO_START=1; shift ;;
    --keep-server)  KEEP_SERVER=1; shift ;;
    --base)         BASE_URL_OVERRIDE="$2"; shift 2 ;;
    --) shift; PASSTHRU+=("$@"); break ;;
    -h|--help) sed -n '4,36p' "$0"; exit 0 ;;
    *) echo "Unknown option: $1" >&2; exit 2 ;;
  esac
done

BASE_URL="${BASE_URL_OVERRIDE:-${E2E_BASE_URL:-$DEFAULT_BASE_URL}}"
export E2E_BASE_URL="$BASE_URL"
[[ "$SKIP_LLM" -eq 1 ]] && export E2E_SKIP_LLM=1

# Literal ${OUT_DIR} substitution only — never eval (commands may contain && / ; / pipes).
expand_out() { local s="${1:-}"; printf '%s' "${s//\$\{OUT_DIR\}/$OUT_DIR}"; }
E2E_CMD="$(expand_out "${E2E_TEST_CMD:-}")"
if [[ -z "$E2E_CMD" ]]; then
  echo -e "${RED}❌ E2E_TEST_CMD is not set in $CONF${NC}"; exit 1
fi
API_COVERAGE_FILE="$(expand_out "${API_COVERAGE_FILE:-}")"
SERVER_LOG="$(expand_out "$SERVER_LOG")"

# ── Server lifecycle ─────────────────────────────────────────────────────────
SERVER_PID=""
STARTED_SERVER=0

# A slow first response (e.g. hostname/metadata lookups) must not look like a
# dead server, so the probe allows up to HEALTH_TIMEOUT seconds per attempt.
probe() { curl -sf -o /dev/null --max-time "$HEALTH_TIMEOUT" "${BASE_URL}${HEALTH_PATH}"; }

# Recursively TERM (then KILL) the process tree so child processes the server
# spawned (e.g. a SimpleHTTPServer subprocess) don't get orphaned.
kill_tree() {
  local pid="$1" sig="${2:-TERM}" child
  for child in $(pgrep -P "$pid" 2>/dev/null); do kill_tree "$child" "$sig"; done
  kill "-$sig" "$pid" 2>/dev/null || true
}

stop_server() {
  [[ "$STARTED_SERVER" -eq 1 && -n "$SERVER_PID" ]] || return 0
  if [[ "$KEEP_SERVER" -eq 1 ]]; then
    echo -e "${YEL}↪ leaving auto-started server running (pid $SERVER_PID, log $SERVER_LOG)${NC}"
    return 0
  fi
  echo -e "${CYAN}stopping auto-started server (pid $SERVER_PID)${NC}"
  kill_tree "$SERVER_PID" TERM
  for _ in 1 2 3 4 5; do kill -0 "$SERVER_PID" 2>/dev/null || break; sleep 0.5; done
  kill -0 "$SERVER_PID" 2>/dev/null && kill_tree "$SERVER_PID" KILL
  wait "$SERVER_PID" 2>/dev/null || true
}
trap stop_server EXIT INT TERM

start_server() {
  mkdir -p "$(dirname "$SERVER_LOG")"
  echo "  \$ $START_CMD"
  echo "  (logging to $SERVER_LOG)"
  bash -c "$START_CMD" > "$SERVER_LOG" 2>&1 &
  SERVER_PID=$!
  STARTED_SERVER=1
}

wait_for_health() {
  local deadline=$(( $(date +%s) + START_WAIT ))
  while (( $(date +%s) < deadline )); do
    if probe; then return 0; fi
    if [[ "$STARTED_SERVER" -eq 1 ]] && ! kill -0 "$SERVER_PID" 2>/dev/null; then
      echo -e "${RED}❌ server process exited before becoming healthy.${NC}"
      [[ -f "$SERVER_LOG" ]] && { echo "--- server log (tail) ---"; tail -20 "$SERVER_LOG"; }
      return 1
    fi
    sleep 2
  done
  return 1
}

echo -e "${CYAN}${BOLD}[1/4] server${NC} → ${BASE_URL}${HEALTH_PATH}"
if probe; then
  echo -e "${GREEN}✓ server already up — using it${NC}"
else
  if [[ "$NO_START" -eq 1 ]]; then
    echo -e "${RED}❌ no server response and --no-start given — start the server first.${NC}"; exit 1
  fi
  if [[ -z "$START_CMD" ]]; then
    echo -e "${RED}❌ no server response and START_CMD is not set in $CONF — cannot auto-start.${NC}"; exit 1
  fi
  echo -e "${YEL}no server listening — starting one${NC}"
  start_server
  echo -e "${CYAN}${BOLD}[2/4] waiting for health${NC} (up to ${START_WAIT}s)"
  if ! wait_for_health; then
    echo -e "${RED}❌ server did not become healthy within ${START_WAIT}s.${NC}"; exit 1
  fi
  echo -e "${GREEN}✓ server up (pid $SERVER_PID)${NC}"
fi

echo -e "${CYAN}${BOLD}[3/4] E2E suite${NC} (skip_llm=${SKIP_LLM})"
echo "  \$ $E2E_CMD ${PASSTHRU[*]:-}"
set +e
bash -c "$E2E_CMD ${PASSTHRU[*]:-}"
TEST_EXIT=$?
set -e

echo -e "${CYAN}${BOLD}[4/4] API coverage${NC}"
if [[ -z "$API_COVERAGE_FILE" || ! -f "$API_COVERAGE_FILE" ]]; then
  echo -e "${YEL}⚠ ${API_COVERAGE_FILE:-API_COVERAGE_FILE} not generated — suite recorded no API coverage.${NC}"
  exit "$TEST_EXIT"
fi

read_axes() {  # prints "endpoint hit total pct" lines via jq/python3/node
  local f="$1"
  if command -v jq >/dev/null 2>&1; then
    jq -r '.summary | to_entries[] | "\(.key) \(.value.hit) \(.value.total) \(.value.pct)"' "$f"
  elif command -v python3 >/dev/null 2>&1; then
    python3 -c 'import json,sys
s=json.load(open(sys.argv[1])).get("summary",{})
for k,v in s.items(): print(k, v.get("hit",0), v.get("total",0), v.get("pct",0))' "$f"
  elif command -v node >/dev/null 2>&1; then
    node -e 'const s=(require(process.argv[1]).summary)||{};for(const k in s){const v=s[k];console.log(k,v.hit||0,v.total||0,v.pct||0)}' "$f"
  fi
}

FAIL_AXES=""
while read -r name hit total pct; do
  [[ -z "$name" ]] && continue
  printf "  %-10s %3s/%-3s  %5.1f%%\n" "$name" "$hit" "$total" "$pct"
  below=$(awk -v p="$pct" 'BEGIN{print (p+0<100)?1:0}')
  [[ "$below" -eq 1 ]] && FAIL_AXES="$FAIL_AXES $name=${pct}%"
done < <(read_axes "$API_COVERAGE_FILE")

echo
echo -e "${BOLD}report:${NC} $API_COVERAGE_FILE"

if [[ "$STRICT" -eq 1 && -n "$FAIL_AXES" ]]; then
  echo -e "${RED}❌ STRICT: axes below 100% —$FAIL_AXES${NC}"
  exit 2
fi

exit "$TEST_EXIT"
