Expands the Containers feature with two new ways to see and manage Docker containers without exposing the Docker Engine TCP socket, plus the docs and roadmap entries that frame them. Docker over SSH (management): - Runs the `docker` CLI on a remote SSH host instead of talking to the Engine TCP API, reusing the existing SSH transport (jump-host chaining, host-key verification, key/password auth) via connectTarget + execCommand. No dockerd socket has to be exposed — the mesh + SSH auth are the gate. - backend/src/ssh/docker.ts: list/logs/start/stop/restart/pause/unpause/remove and an interactive `docker exec` shell builder. Container refs are validated against a strict allowlist and single-quoted to prevent command injection; action verbs are whitelisted. - backend/src/routes/dockerSsh.ts: REST routes mirroring the TCP Docker API shape (mutating actions gated by adminOnly) + a /api/docker-ssh/exec WebSocket modeled on the terminal PTY plumbing. - Note: the SSH path uses the ssh2 key/password auth; it does not implement the OpenSSH-certificate (OPKSSH) fallback that the terminal route has. Docker push-agent monitoring (self-hosted, read-only): - A small bash agent (agent/archnest-docker-agent.sh) runs on each Docker VM, collects a rich snapshot (docker ps + inspect + a stats snapshot), masks secret-looking env values locally, and POSTs it to ArchNest. VMs need outbound-only mesh access — no exposed port, no SSH for monitoring. - backend/src/routes/agents.ts: token-gated ingest (POST /api/agents/docker/report, ARCHNEST_AGENT_TOKEN, constant-time compare; 503 when unset, so it is disabled by default) plus user-auth read endpoints (hosts list with staleness flag, per-host containers, single-container detail). New docker_agent_reports table (latest report per host). - Ingest stores data only; it never executes anything from the agent. Containers page: - Host selector now spans Docker API, SSH, and Agent sources. - Intra-page tabs: a Containers list plus dynamic, closeable per-container detail tabs opened by clicking a container name. Agent detail shows overview/state/stats/ports/networks/mounts/env(masked)/labels; docker/ssh degrade gracefully. Agent rows are read-only; docker/ssh keep management. Docs/roadmap: - docs/docker-agent-monitoring.md (design doc, written before implementation). - ROADMAP.md: LXC management (paid), Docker monitoring agent tiering (push self-hosted now / pull-agent paid), terminal grid tiering. Deferred (documented, not built here): the mesh-prerequisite setup gate, the paid pull-agent (Option 2), per-host tokens, time-series metrics. Requires ARCHNEST_AGENT_TOKEN in the backend env to enable agent ingest. Verified: backend `tsc --noEmit` and frontend `tsc -b && vite build` both pass; agent jq filters, byte conversion, and `bash -n` checked locally. Co-authored-by: Samuel James <ssamjame@amazon.com> Co-authored-by: Kiro <noreply@kiro.dev>
175 lines
7 KiB
Bash
175 lines
7 KiB
Bash
#!/usr/bin/env bash
|
|
#
|
|
# ArchNest Docker monitoring agent (self-hosted, push model).
|
|
#
|
|
# Collects a rich snapshot of this host's Docker containers (docker ps +
|
|
# docker inspect + a docker stats snapshot) and POSTs it to ArchNest. ArchNest
|
|
# stores the latest report per host and shows it read-only on the Containers
|
|
# page. This is MONITORING ONLY — it never receives or runs commands.
|
|
#
|
|
# Requirements: bash, docker, curl, jq.
|
|
#
|
|
# Configuration (env vars; may live in /etc/archnest/agent.env):
|
|
# ARCHNEST_URL Base URL of the ArchNest backend, reachable over your
|
|
# mesh / private network, e.g. http://100.64.0.5:4000
|
|
# ARCHNEST_AGENT_TOKEN Shared token; must match the backend's ARCHNEST_AGENT_TOKEN.
|
|
# ARCHNEST_HOST_ID Stable id for this host, e.g. "proxmox-vm-1"
|
|
# (allowed: letters, digits, . _ - ; max 128 chars).
|
|
# ARCHNEST_HOSTNAME Optional display hostname (defaults to `hostname`).
|
|
#
|
|
# Exit codes: 0 ok, 1 misconfig/missing deps, 2 report POST failed.
|
|
|
|
set -euo pipefail
|
|
|
|
AGENT_VERSION="1"
|
|
|
|
# Load config file if present (does not override already-exported env).
|
|
if [ -f /etc/archnest/agent.env ]; then
|
|
# shellcheck disable=SC1091
|
|
. /etc/archnest/agent.env
|
|
fi
|
|
|
|
err() { echo "archnest-docker-agent: $*" >&2; }
|
|
|
|
# --- Dependency + config checks -------------------------------------------
|
|
for bin in docker curl jq; do
|
|
if ! command -v "$bin" >/dev/null 2>&1; then
|
|
err "missing required dependency: $bin"
|
|
exit 1
|
|
fi
|
|
done
|
|
|
|
: "${ARCHNEST_URL:?ARCHNEST_URL is required}"
|
|
: "${ARCHNEST_AGENT_TOKEN:?ARCHNEST_AGENT_TOKEN is required}"
|
|
: "${ARCHNEST_HOST_ID:?ARCHNEST_HOST_ID is required}"
|
|
HOSTNAME_VALUE="${ARCHNEST_HOSTNAME:-$(hostname)}"
|
|
|
|
if ! printf '%s' "$ARCHNEST_HOST_ID" | grep -Eq '^[A-Za-z0-9][A-Za-z0-9._-]{0,127}$'; then
|
|
err "ARCHNEST_HOST_ID '$ARCHNEST_HOST_ID' is invalid (allowed: A-Z a-z 0-9 . _ - , max 128)"
|
|
exit 1
|
|
fi
|
|
|
|
REPORT_URL="${ARCHNEST_URL%/}/api/agents/docker/report"
|
|
|
|
# --- Collect container ids -------------------------------------------------
|
|
mapfile -t IDS < <(docker ps --all --no-trunc --format '{{.ID}}')
|
|
|
|
# --- Stats snapshot (one shot) keyed by full id ----------------------------
|
|
# `docker stats` reports a short id; we map short->full via the ids list.
|
|
# Build a jq object: { "<shortid>": {cpu,mem,...} }.
|
|
STATS_JSON="$(docker stats --no-stream --no-trunc \
|
|
--format '{{.ID}}|{{.CPUPerc}}|{{.MemUsage}}|{{.NetIO}}|{{.BlockIO}}' 2>/dev/null \
|
|
| jq -R -s '
|
|
def bytes:
|
|
# converts "12.3MiB" / "1.2GB" etc to a number of bytes
|
|
capture("(?<n>[0-9.]+)\\s*(?<u>[A-Za-z]*)") as $m
|
|
| ($m.n | tonumber) as $n
|
|
| ($m.u | ascii_downcase) as $u
|
|
| $n * (
|
|
if $u|startswith("ki") then 1024
|
|
elif $u|startswith("mi") then 1048576
|
|
elif $u|startswith("gi") then 1073741824
|
|
elif $u|startswith("ti") then 1099511627776
|
|
elif $u|startswith("kb") or $u=="k" then 1000
|
|
elif $u|startswith("mb") or $u=="m" then 1000000
|
|
elif $u|startswith("gb") or $u=="g" then 1000000000
|
|
elif $u|startswith("tb") or $u=="t" then 1000000000000
|
|
elif $u|startswith("b") or $u=="" then 1
|
|
else 1 end
|
|
) | floor;
|
|
split("\n") | map(select(length > 0)) | map(split("|")) | map({
|
|
key: .[0],
|
|
value: {
|
|
cpuPercent: (.[1] | gsub("%";"") | tonumber? // 0),
|
|
memUsage: (.[2] | split("/")[0] | gsub(" ";"") | (try bytes catch 0)),
|
|
memLimit: (.[2] | split("/")[1] | gsub(" ";"") | (try bytes catch 0)),
|
|
netRxBytes: (.[3] | split("/")[0] | gsub(" ";"") | (try bytes catch 0)),
|
|
netTxBytes: (.[3] | split("/")[1] | gsub(" ";"") | (try bytes catch 0)),
|
|
blockReadBytes: (.[4] | split("/")[0] | gsub(" ";"") | (try bytes catch 0)),
|
|
blockWriteBytes: (.[4] | split("/")[1] | gsub(" ";"") | (try bytes catch 0))
|
|
}
|
|
}) | from_entries
|
|
')"
|
|
[ -z "$STATS_JSON" ] && STATS_JSON='{}'
|
|
|
|
# --- Per-container detail from docker inspect ------------------------------
|
|
# jq transform turning one inspect object into our report schema, masking
|
|
# secret-looking env values.
|
|
INSPECT_FILTER='
|
|
def mask($k): ($k | ascii_upcase) as $u
|
|
| ($u | test("PASS|SECRET|TOKEN|KEY|PRIVATE|CREDENTIAL"));
|
|
.[0] as $c
|
|
| {
|
|
id: $c.Id,
|
|
name: ($c.Name // "" | ltrimstr("/")),
|
|
image: ($c.Config.Image // ""),
|
|
imageId: ($c.Image // ""),
|
|
state: ($c.State.Status // "unknown"),
|
|
status: ($c.State.Status // ""),
|
|
createdAt: ($c.Created // null),
|
|
startedAt: ($c.State.StartedAt // null),
|
|
restartCount: ($c.RestartCount // 0),
|
|
restartPolicy: ($c.HostConfig.RestartPolicy.Name // ""),
|
|
health: ($c.State.Health.Status // "none"),
|
|
ports: (
|
|
($c.NetworkSettings.Ports // {}) | to_entries | map(
|
|
(.key | split("/")) as $p
|
|
| (.value // [])[]? as $b
|
|
| { hostIp: ($b.HostIp // ""), hostPort: ($b.HostPort | tonumber? // null),
|
|
containerPort: ($p[0] | tonumber? // 0), proto: ($p[1] // "tcp") }
|
|
)
|
|
),
|
|
networks: (
|
|
($c.NetworkSettings.Networks // {}) | to_entries
|
|
| map({ name: .key, ip: (.value.IPAddress // "") })
|
|
),
|
|
mounts: (
|
|
($c.Mounts // []) | map({
|
|
type: (.Type // ""), source: (.Source // .Name // ""),
|
|
destination: (.Destination // ""), rw: (.RW // true)
|
|
})
|
|
),
|
|
env: (
|
|
($c.Config.Env // []) | map(
|
|
(. | split("=")) as $kv
|
|
| { key: $kv[0], value: (if mask($kv[0]) then "********" else ($kv[1:] | join("=")) end) }
|
|
)
|
|
),
|
|
command: (($c.Config.Entrypoint // []) + ($c.Config.Cmd // []) | join(" ")),
|
|
labels: ($c.Config.Labels // {})
|
|
}
|
|
'
|
|
|
|
CONTAINERS='[]'
|
|
for id in "${IDS[@]}"; do
|
|
[ -z "$id" ] && continue
|
|
detail="$(docker inspect "$id" 2>/dev/null | jq -c "$INSPECT_FILTER" 2>/dev/null || true)"
|
|
[ -z "$detail" ] && continue
|
|
short="${id:0:12}"
|
|
# Attach the matching stats snapshot (match by full or short id).
|
|
detail="$(jq -c --argjson stats "$STATS_JSON" --arg id "$id" --arg short "$short" \
|
|
'. + { stats: ($stats[$id] // $stats[$short] // null) }' <<<"$detail")"
|
|
CONTAINERS="$(jq -c --argjson c "$detail" '. + [$c]' <<<"$CONTAINERS")"
|
|
done
|
|
|
|
# --- Assemble + POST -------------------------------------------------------
|
|
PAYLOAD="$(jq -n \
|
|
--arg hostId "$ARCHNEST_HOST_ID" \
|
|
--arg hostname "$HOSTNAME_VALUE" \
|
|
--arg agentVersion "$AGENT_VERSION" \
|
|
--arg reportedAt "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
|
|
--argjson containers "$CONTAINERS" \
|
|
'{ hostId: $hostId, hostname: $hostname, agentVersion: $agentVersion, reportedAt: $reportedAt, containers: $containers }')"
|
|
|
|
HTTP_CODE="$(curl -s -o /dev/null -w '%{http_code}' \
|
|
-X POST "$REPORT_URL" \
|
|
-H 'Content-Type: application/json' \
|
|
-H "Authorization: Bearer ${ARCHNEST_AGENT_TOKEN}" \
|
|
--data-binary "$PAYLOAD" || echo "000")"
|
|
|
|
if [ "$HTTP_CODE" != "200" ]; then
|
|
err "report POST to $REPORT_URL failed (HTTP $HTTP_CODE)"
|
|
exit 2
|
|
fi
|
|
|
|
echo "archnest-docker-agent: reported ${#IDS[@]} container(s) as '$ARCHNEST_HOST_ID' (HTTP $HTTP_CODE)"
|