WIP autoscaler agent
This commit is contained in:
parent
c610a3e284
commit
28059dcedf
34 changed files with 2409 additions and 35 deletions
155
agent/nix_builder_autoscaler/config.py
Normal file
155
agent/nix_builder_autoscaler/config.py
Normal file
|
|
@ -0,0 +1,155 @@
|
|||
"""Configuration loading from TOML with environment variable overrides."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import tomllib
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
@dataclass
|
||||
class ServerConfig:
|
||||
"""[server] section."""
|
||||
|
||||
socket_path: str = "/run/nix-builder-autoscaler/daemon.sock"
|
||||
log_level: str = "info"
|
||||
db_path: str = "/var/lib/nix-builder-autoscaler/state.db"
|
||||
|
||||
|
||||
@dataclass
|
||||
class AwsConfig:
|
||||
"""[aws] section."""
|
||||
|
||||
region: str = "us-east-1"
|
||||
launch_template_id: str = ""
|
||||
subnet_ids: list[str] = field(default_factory=list)
|
||||
security_group_ids: list[str] = field(default_factory=list)
|
||||
instance_profile_arn: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class HaproxyConfig:
|
||||
"""[haproxy] section."""
|
||||
|
||||
runtime_socket: str = "/run/haproxy/admin.sock"
|
||||
backend: str = "all"
|
||||
slot_prefix: str = "slot"
|
||||
slot_count: int = 8
|
||||
check_ready_up_count: int = 2
|
||||
|
||||
|
||||
@dataclass
|
||||
class SystemConfig:
|
||||
"""[[systems]] entry for per-architecture capacity policy."""
|
||||
|
||||
name: str = "x86_64-linux"
|
||||
min_slots: int = 0
|
||||
max_slots: int = 8
|
||||
target_warm_slots: int = 0
|
||||
max_leases_per_slot: int = 1
|
||||
launch_batch_size: int = 1
|
||||
scale_down_idle_seconds: int = 900
|
||||
|
||||
|
||||
@dataclass
|
||||
class CapacityConfig:
|
||||
"""[capacity] section — global defaults."""
|
||||
|
||||
default_system: str = "x86_64-linux"
|
||||
min_slots: int = 0
|
||||
max_slots: int = 8
|
||||
target_warm_slots: int = 0
|
||||
max_leases_per_slot: int = 1
|
||||
reservation_ttl_seconds: int = 1200
|
||||
idle_scale_down_seconds: int = 900
|
||||
drain_timeout_seconds: int = 120
|
||||
|
||||
|
||||
@dataclass
|
||||
class SecurityConfig:
|
||||
"""[security] section."""
|
||||
|
||||
socket_mode: str = "0660"
|
||||
socket_owner: str = "buildbot"
|
||||
socket_group: str = "buildbot"
|
||||
|
||||
|
||||
@dataclass
|
||||
class SchedulerConfig:
|
||||
"""[scheduler] section."""
|
||||
|
||||
tick_seconds: float = 3.0
|
||||
reconcile_seconds: float = 15.0
|
||||
|
||||
|
||||
@dataclass
|
||||
class AppConfig:
|
||||
"""Top-level application configuration."""
|
||||
|
||||
server: ServerConfig = field(default_factory=ServerConfig)
|
||||
aws: AwsConfig = field(default_factory=AwsConfig)
|
||||
haproxy: HaproxyConfig = field(default_factory=HaproxyConfig)
|
||||
capacity: CapacityConfig = field(default_factory=CapacityConfig)
|
||||
security: SecurityConfig = field(default_factory=SecurityConfig)
|
||||
scheduler: SchedulerConfig = field(default_factory=SchedulerConfig)
|
||||
systems: list[SystemConfig] = field(default_factory=list)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Environment variable overrides
|
||||
# ---------------------------------------------------------------------------
|
||||
# AUTOSCALER_TAILSCALE_API_TOKEN — Tailscale API token for IP discovery
|
||||
# AWS_REGION — override aws.region
|
||||
# AWS_ACCESS_KEY_ID — explicit AWS credential
|
||||
# AWS_SECRET_ACCESS_KEY — explicit AWS credential
|
||||
|
||||
|
||||
def _apply_env_overrides(cfg: AppConfig) -> None:
|
||||
"""Apply environment variable overrides for secrets and region."""
|
||||
region = os.environ.get("AWS_REGION")
|
||||
if region:
|
||||
cfg.aws.region = region
|
||||
|
||||
|
||||
def _build_dataclass(cls: type, data: dict) -> object: # noqa: ANN001
|
||||
"""Construct a dataclass from a dict, ignoring unknown keys."""
|
||||
valid = {f.name for f in cls.__dataclass_fields__.values()} # type: ignore[attr-defined]
|
||||
return cls(**{k: v for k, v in data.items() if k in valid})
|
||||
|
||||
|
||||
def load_config(path: Path) -> AppConfig:
|
||||
"""Load configuration from a TOML file.
|
||||
|
||||
Args:
|
||||
path: Path to the TOML config file.
|
||||
|
||||
Returns:
|
||||
Validated AppConfig instance.
|
||||
"""
|
||||
with open(path, "rb") as f:
|
||||
raw = tomllib.load(f)
|
||||
|
||||
cfg = AppConfig()
|
||||
|
||||
if "server" in raw:
|
||||
cfg.server = _build_dataclass(ServerConfig, raw["server"]) # type: ignore[assignment]
|
||||
if "aws" in raw:
|
||||
cfg.aws = _build_dataclass(AwsConfig, raw["aws"]) # type: ignore[assignment]
|
||||
if "haproxy" in raw:
|
||||
cfg.haproxy = _build_dataclass(HaproxyConfig, raw["haproxy"]) # type: ignore[assignment]
|
||||
if "capacity" in raw:
|
||||
cfg.capacity = _build_dataclass(CapacityConfig, raw["capacity"]) # type: ignore[assignment]
|
||||
if "security" in raw:
|
||||
cfg.security = _build_dataclass(SecurityConfig, raw["security"]) # type: ignore[assignment]
|
||||
if "scheduler" in raw:
|
||||
cfg.scheduler = _build_dataclass(SchedulerConfig, raw["scheduler"]) # type: ignore[assignment]
|
||||
|
||||
if "systems" in raw:
|
||||
cfg.systems = list[SystemConfig](
|
||||
_build_dataclass(SystemConfig, s) # type: ignore[list-item]
|
||||
for s in raw["systems"]
|
||||
)
|
||||
|
||||
_apply_env_overrides(cfg)
|
||||
return cfg
|
||||
Loading…
Add table
Add a link
Reference in a new issue