agent: complete plan05 closeout
This commit is contained in:
parent
33ba248c49
commit
2f0fffa905
12 changed files with 1347 additions and 313 deletions
|
|
@ -10,6 +10,7 @@ from typing import TYPE_CHECKING, NoReturn
|
|||
|
||||
from fastapi import FastAPI, HTTPException, Request, Response
|
||||
from fastapi.responses import JSONResponse
|
||||
from pydantic import BaseModel
|
||||
|
||||
from .models import (
|
||||
CapacityHint,
|
||||
|
|
@ -35,6 +36,12 @@ if TYPE_CHECKING:
|
|||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SlotAdminRequest(BaseModel):
|
||||
"""Admin action request that targets a slot."""
|
||||
|
||||
slot_id: str
|
||||
|
||||
|
||||
def _parse_required_dt(value: str) -> datetime:
|
||||
return datetime.fromisoformat(value)
|
||||
|
||||
|
|
@ -95,6 +102,8 @@ def create_app(
|
|||
haproxy: HAProxyRuntime | None = None,
|
||||
scheduler_running: Callable[[], bool] | None = None,
|
||||
reconciler_running: Callable[[], bool] | None = None,
|
||||
ready_check: Callable[[], bool] | None = None,
|
||||
reconcile_now: Callable[[], dict[str, object] | None] | None = None,
|
||||
) -> FastAPI:
|
||||
"""Create the FastAPI application."""
|
||||
app = FastAPI(title="nix-builder-autoscaler", version="0.1.0")
|
||||
|
|
@ -191,6 +200,11 @@ def create_app(
|
|||
|
||||
@app.get("/health/ready", response_model=HealthResponse)
|
||||
def health_ready() -> HealthResponse:
|
||||
if ready_check is not None and not ready_check():
|
||||
return JSONResponse( # type: ignore[return-value]
|
||||
status_code=503,
|
||||
content=HealthResponse(status="degraded").model_dump(mode="json"),
|
||||
)
|
||||
if scheduler_running is not None and not scheduler_running():
|
||||
return JSONResponse( # type: ignore[return-value]
|
||||
status_code=503,
|
||||
|
|
@ -207,4 +221,83 @@ def create_app(
|
|||
def metrics_endpoint() -> Response:
|
||||
return Response(content=metrics.render(), media_type="text/plain")
|
||||
|
||||
@app.post("/v1/admin/drain")
|
||||
def admin_drain(body: SlotAdminRequest, request: Request) -> dict[str, str]:
|
||||
slot = db.get_slot(body.slot_id)
|
||||
if slot is None:
|
||||
_error_response(request, 404, "not_found", "Slot not found")
|
||||
state = str(slot["state"])
|
||||
if state == SlotState.DRAINING.value or state == SlotState.TERMINATING.value:
|
||||
return {"status": "accepted", "slot_id": body.slot_id, "state": state}
|
||||
|
||||
allowed_states = {
|
||||
SlotState.READY.value,
|
||||
SlotState.BINDING.value,
|
||||
SlotState.BOOTING.value,
|
||||
SlotState.LAUNCHING.value,
|
||||
}
|
||||
if state not in allowed_states:
|
||||
_error_response(
|
||||
request,
|
||||
409,
|
||||
"invalid_state",
|
||||
f"Cannot drain slot from state {state}",
|
||||
)
|
||||
db.update_slot_state(body.slot_id, SlotState.DRAINING, interruption_pending=0)
|
||||
return {"status": "accepted", "slot_id": body.slot_id, "state": SlotState.DRAINING.value}
|
||||
|
||||
@app.post("/v1/admin/unquarantine")
|
||||
def admin_unquarantine(body: SlotAdminRequest, request: Request) -> dict[str, str]:
|
||||
slot = db.get_slot(body.slot_id)
|
||||
if slot is None:
|
||||
_error_response(request, 404, "not_found", "Slot not found")
|
||||
|
||||
state = str(slot["state"])
|
||||
if state != SlotState.ERROR.value:
|
||||
_error_response(
|
||||
request,
|
||||
409,
|
||||
"invalid_state",
|
||||
f"Cannot unquarantine slot from state {state}",
|
||||
)
|
||||
|
||||
db.update_slot_state(
|
||||
body.slot_id,
|
||||
SlotState.EMPTY,
|
||||
instance_id=None,
|
||||
instance_ip=None,
|
||||
instance_launch_time=None,
|
||||
lease_count=0,
|
||||
cooldown_until=None,
|
||||
interruption_pending=0,
|
||||
)
|
||||
return {"status": "accepted", "slot_id": body.slot_id, "state": SlotState.EMPTY.value}
|
||||
|
||||
@app.post("/v1/admin/reconcile-now")
|
||||
def admin_reconcile_now(request: Request) -> dict[str, object]:
|
||||
if reconcile_now is None:
|
||||
_error_response(
|
||||
request,
|
||||
503,
|
||||
"not_configured",
|
||||
"Reconcile trigger not configured",
|
||||
retryable=True,
|
||||
)
|
||||
try:
|
||||
result = reconcile_now()
|
||||
except Exception:
|
||||
log.exception("admin_reconcile_now_failed")
|
||||
_error_response(
|
||||
request,
|
||||
500,
|
||||
"reconcile_failed",
|
||||
"Reconcile tick failed",
|
||||
retryable=True,
|
||||
)
|
||||
|
||||
payload: dict[str, object] = {"status": "accepted"}
|
||||
if isinstance(result, dict):
|
||||
payload.update(result)
|
||||
return payload
|
||||
|
||||
return app
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue