accept tailscale ssh banner in haproxy health checks
This commit is contained in:
parent
e8ee085649
commit
f0f6020d6a
3 changed files with 153 additions and 11 deletions
|
|
@ -94,13 +94,131 @@ def _print_reservations(data: list[dict[str, Any]]) -> None:
|
|||
_print_table(["reservation_id", "phase", "system", "slot", "instance_id"], rows)
|
||||
|
||||
|
||||
def _parse_args() -> argparse.Namespace:
|
||||
def _print_status_summary(data: dict[str, Any]) -> None:
|
||||
slots = data.get("slots", {})
|
||||
reservations = data.get("reservations", {})
|
||||
ec2 = data.get("ec2", {})
|
||||
haproxy = data.get("haproxy", {})
|
||||
rows = [
|
||||
["slots.total", str(slots.get("total", 0))],
|
||||
["slots.ready", str(slots.get("ready", 0))],
|
||||
["slots.launching", str(slots.get("launching", 0))],
|
||||
["slots.booting", str(slots.get("booting", 0))],
|
||||
["slots.binding", str(slots.get("binding", 0))],
|
||||
["slots.terminating", str(slots.get("terminating", 0))],
|
||||
["slots.empty", str(slots.get("empty", 0))],
|
||||
["slots.error", str(slots.get("error", 0))],
|
||||
["reservations.pending", str(reservations.get("pending", 0))],
|
||||
["reservations.ready", str(reservations.get("ready", 0))],
|
||||
["reservations.failed", str(reservations.get("failed", 0))],
|
||||
["ec2.api_ok", str(ec2.get("api_ok", False))],
|
||||
["haproxy.socket_ok", str(haproxy.get("socket_ok", False))],
|
||||
]
|
||||
_print_table(["metric", "value"], rows)
|
||||
|
||||
|
||||
def _bulk_slot_action(socket_path: str, action: str) -> dict[str, Any]:
|
||||
if action == "drain":
|
||||
eligible_states = {"ready"}
|
||||
action_path = "/v1/admin/drain"
|
||||
elif action == "unquarantine":
|
||||
eligible_states = {"error"}
|
||||
action_path = "/v1/admin/unquarantine"
|
||||
else:
|
||||
msg = f"unknown bulk action: {action}"
|
||||
raise ValueError(msg)
|
||||
|
||||
status, data = _uds_request(socket_path, "GET", "/v1/slots")
|
||||
if status < 200 or status >= 300 or not isinstance(data, list):
|
||||
msg = "failed to list slots for bulk action"
|
||||
raise RuntimeError(msg)
|
||||
|
||||
results: list[dict[str, Any]] = []
|
||||
summary: dict[str, Any] = {
|
||||
"action": action,
|
||||
"matched": 0,
|
||||
"attempted": 0,
|
||||
"succeeded": 0,
|
||||
"failed": 0,
|
||||
"skipped": 0,
|
||||
"results": results,
|
||||
}
|
||||
|
||||
for slot in data:
|
||||
slot_id = str(slot.get("slot_id", ""))
|
||||
state = str(slot.get("state", ""))
|
||||
if not slot_id:
|
||||
continue
|
||||
|
||||
if state not in eligible_states:
|
||||
summary["skipped"] += 1
|
||||
results.append(
|
||||
{
|
||||
"slot_id": slot_id,
|
||||
"state": state,
|
||||
"result": "skipped",
|
||||
"reason": "ineligible_state",
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
summary["matched"] += 1
|
||||
summary["attempted"] += 1
|
||||
try:
|
||||
action_status, action_data = _uds_request(
|
||||
socket_path,
|
||||
"POST",
|
||||
action_path,
|
||||
body={"slot_id": slot_id},
|
||||
)
|
||||
except OSError as err:
|
||||
summary["failed"] += 1
|
||||
results.append(
|
||||
{
|
||||
"slot_id": slot_id,
|
||||
"state": state,
|
||||
"result": "failed",
|
||||
"error": str(err),
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
if 200 <= action_status < 300:
|
||||
summary["succeeded"] += 1
|
||||
results.append(
|
||||
{
|
||||
"slot_id": slot_id,
|
||||
"state": state,
|
||||
"result": "ok",
|
||||
}
|
||||
)
|
||||
else:
|
||||
summary["failed"] += 1
|
||||
results.append(
|
||||
{
|
||||
"slot_id": slot_id,
|
||||
"state": state,
|
||||
"result": "failed",
|
||||
"status": action_status,
|
||||
"response": action_data,
|
||||
}
|
||||
)
|
||||
|
||||
return summary
|
||||
|
||||
|
||||
def _parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(prog="autoscalerctl", description="Autoscaler CLI")
|
||||
parser.add_argument(
|
||||
"--socket",
|
||||
default="/run/nix-builder-autoscaler/daemon.sock",
|
||||
help="Daemon Unix socket path",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--json",
|
||||
action="store_true",
|
||||
help="Output JSON for status command.",
|
||||
)
|
||||
subparsers = parser.add_subparsers(dest="command")
|
||||
subparsers.add_parser("status", help="Show state summary")
|
||||
subparsers.add_parser("slots", help="List slots")
|
||||
|
|
@ -110,8 +228,14 @@ def _parse_args() -> argparse.Namespace:
|
|||
parser_drain.add_argument("slot_id")
|
||||
parser_unq = subparsers.add_parser("unquarantine", help="Unquarantine a slot")
|
||||
parser_unq.add_argument("slot_id")
|
||||
subparsers.add_parser("drain-all", help="Drain all eligible slots (state=ready)")
|
||||
subparsers.add_parser("unquarantine-all", help="Unquarantine all error slots")
|
||||
subparsers.add_parser("reconcile-now", help="Trigger immediate reconcile tick")
|
||||
return parser.parse_args()
|
||||
args = parser.parse_args(argv)
|
||||
if not args.command:
|
||||
parser.print_help()
|
||||
raise SystemExit(0)
|
||||
return args
|
||||
|
||||
|
||||
def _print_error(data: object) -> None:
|
||||
|
|
@ -124,8 +248,19 @@ def _print_error(data: object) -> None:
|
|||
def main() -> None:
|
||||
"""Entry point for the autoscalerctl CLI."""
|
||||
args = _parse_args()
|
||||
if not args.command:
|
||||
raise SystemExit(1)
|
||||
|
||||
if args.command in {"drain-all", "unquarantine-all"}:
|
||||
action = "drain" if args.command == "drain-all" else "unquarantine"
|
||||
try:
|
||||
summary = _bulk_slot_action(args.socket, action)
|
||||
except OSError as err:
|
||||
print(f"Error: cannot connect to daemon at {args.socket}")
|
||||
raise SystemExit(1) from err
|
||||
except RuntimeError as err:
|
||||
print(str(err))
|
||||
raise SystemExit(1) from err
|
||||
print(json.dumps(summary, indent=2))
|
||||
raise SystemExit(0 if summary["failed"] == 0 else 1)
|
||||
|
||||
method = "GET"
|
||||
path = ""
|
||||
|
|
@ -160,7 +295,15 @@ def main() -> None:
|
|||
_print_error(data)
|
||||
raise SystemExit(1)
|
||||
|
||||
if args.command in {"status", "drain", "unquarantine", "reconcile-now"}:
|
||||
if args.command == "status":
|
||||
if not isinstance(data, dict):
|
||||
_print_error(data)
|
||||
raise SystemExit(1)
|
||||
if args.json:
|
||||
print(json.dumps(data, indent=2))
|
||||
else:
|
||||
_print_status_summary(data)
|
||||
elif args.command in {"drain", "unquarantine", "reconcile-now"}:
|
||||
print(json.dumps(data, indent=2))
|
||||
elif args.command == "slots":
|
||||
if isinstance(data, list):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue