nix-builder-autoscaler/agent/nix_builder_autoscaler/models.py

182 lines
3.9 KiB
Python

"""Data models for the autoscaler daemon."""
from __future__ import annotations
from datetime import datetime
from enum import StrEnum
from typing import Any
from pydantic import BaseModel, Field
class SlotState(StrEnum):
"""Exhaustive slot states."""
EMPTY = "empty"
LAUNCHING = "launching"
BOOTING = "booting"
BINDING = "binding"
READY = "ready"
DRAINING = "draining"
TERMINATING = "terminating"
ERROR = "error"
class ReservationPhase(StrEnum):
"""Exhaustive reservation phases."""
PENDING = "pending"
READY = "ready"
FAILED = "failed"
RELEASED = "released"
EXPIRED = "expired"
# ---------------------------------------------------------------------------
# API request models
# ---------------------------------------------------------------------------
class ReservationRequest(BaseModel):
"""POST /v1/reservations request body."""
system: str
reason: str
build_id: int | None = None
class CapacityHint(BaseModel):
"""POST /v1/hints/capacity request body."""
builder: str
queued: int
running: int
system: str
timestamp: datetime
# ---------------------------------------------------------------------------
# API response models
# ---------------------------------------------------------------------------
class ReservationResponse(BaseModel):
"""Reservation representation returned by the API."""
reservation_id: str
phase: ReservationPhase
slot: str | None = None
instance_id: str | None = None
system: str
created_at: datetime
updated_at: datetime
expires_at: datetime
released_at: datetime | None = None
class SlotInfo(BaseModel):
"""Slot representation returned by the API."""
slot_id: str
system: str
state: SlotState
instance_id: str | None = None
instance_ip: str | None = None
lease_count: int
last_state_change: datetime
class SlotsSummary(BaseModel):
"""Aggregate slot counts by state."""
total: int = 0
ready: int = 0
launching: int = 0
booting: int = 0
binding: int = 0
draining: int = 0
terminating: int = 0
empty: int = 0
error: int = 0
class ReservationsSummary(BaseModel):
"""Aggregate reservation counts by phase."""
pending: int = 0
ready: int = 0
failed: int = 0
class Ec2Summary(BaseModel):
"""EC2 subsystem health."""
api_ok: bool = True
last_reconcile_at: datetime | None = None
class HaproxySummary(BaseModel):
"""HAProxy subsystem health."""
socket_ok: bool = True
last_stat_poll_at: datetime | None = None
class StateSummary(BaseModel):
"""GET /v1/state/summary response."""
slots: SlotsSummary = Field(default_factory=SlotsSummary)
reservations: ReservationsSummary = Field(default_factory=ReservationsSummary)
ec2: Ec2Summary = Field(default_factory=Ec2Summary)
haproxy: HaproxySummary = Field(default_factory=HaproxySummary)
class CapacityPolicy(BaseModel):
"""Effective capacity timeout and sizing policy."""
min_slots: int
max_slots: int
target_warm_slots: int
max_leases_per_slot: int
idle_scale_down_seconds: int
drain_timeout_seconds: int
launch_timeout_seconds: int
boot_timeout_seconds: int
binding_timeout_seconds: int
terminating_timeout_seconds: int
class SchedulerPolicy(BaseModel):
"""Effective scheduler timing policy."""
tick_seconds: float
reconcile_seconds: float
class EffectiveConfigResponse(BaseModel):
"""GET /v1/config/effective response."""
capacity: CapacityPolicy
scheduler: SchedulerPolicy
class ErrorDetail(BaseModel):
"""Structured error detail."""
code: str
message: str
retryable: bool = False
details: dict[str, Any] | None = None
class ErrorResponse(BaseModel):
"""Standard error response envelope."""
error: ErrorDetail
request_id: str
class HealthResponse(BaseModel):
"""Health check response."""
status: str