nix-builder-autoscaler/agent/nix_builder_autoscaler/__main__.py
2026-02-27 12:46:32 +01:00

163 lines
4.5 KiB
Python

"""Daemon entry point: python -m nix_builder_autoscaler."""
from __future__ import annotations
import argparse
import logging
import signal
import threading
from pathlib import Path
from types import FrameType
import uvicorn
from .api import create_app
from .config import AppConfig, load_config
from .logging import setup_logging
from .metrics import MetricsRegistry
from .providers.clock import SystemClock
from .providers.haproxy import HAProxyRuntime
from .reconciler import Reconciler
from .runtime.ec2 import EC2Runtime
from .scheduler import scheduling_tick
from .state_db import StateDB
log = logging.getLogger(__name__)
def _scheduler_loop(
db: StateDB,
runtime: EC2Runtime,
config: AppConfig,
clock: SystemClock,
metrics: MetricsRegistry,
stop_event: threading.Event,
) -> None:
while not stop_event.is_set():
try:
scheduling_tick(db, runtime, config, clock, metrics)
except Exception:
log.exception("scheduler_tick_failed")
stop_event.wait(config.scheduler.tick_seconds)
def _reconciler_loop(
reconciler: Reconciler,
config: AppConfig,
stop_event: threading.Event,
) -> None:
while not stop_event.is_set():
try:
reconciler.tick()
except Exception:
log.exception("reconciler_tick_failed")
stop_event.wait(config.scheduler.reconcile_seconds)
def _parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
prog="nix-builder-autoscaler",
description="Nix builder autoscaler daemon",
)
parser.add_argument("--config", required=True, help="Path to TOML config file")
return parser.parse_args()
def main() -> None:
"""Parse config, initialize components, and run the daemon."""
args = _parse_args()
config = load_config(Path(args.config))
setup_logging(config.server.log_level)
clock = SystemClock()
db = StateDB(config.server.db_path, clock=clock)
db.init_schema()
db.init_slots(
config.haproxy.slot_prefix,
config.haproxy.slot_count,
config.capacity.default_system,
config.haproxy.backend,
)
runtime = EC2Runtime(config.aws)
haproxy = HAProxyRuntime(
config.haproxy.runtime_socket,
config.haproxy.backend,
config.haproxy.slot_prefix,
)
metrics = MetricsRegistry()
reconciler = Reconciler(db, runtime, haproxy, config, clock, metrics)
reconciler.tick()
stop_event = threading.Event()
scheduler_thread: threading.Thread | None = None
reconciler_thread: threading.Thread | None = None
server: uvicorn.Server | None = None
def scheduler_running() -> bool:
return scheduler_thread is not None and scheduler_thread.is_alive()
def reconciler_running() -> bool:
return reconciler_thread is not None and reconciler_thread.is_alive()
app = create_app(
db,
config,
clock,
metrics,
runtime=runtime,
haproxy=haproxy,
scheduler_running=scheduler_running,
reconciler_running=reconciler_running,
)
scheduler_thread = threading.Thread(
target=_scheduler_loop,
name="autoscaler-scheduler",
args=(db, runtime, config, clock, metrics, stop_event),
daemon=True,
)
reconciler_thread = threading.Thread(
target=_reconciler_loop,
name="autoscaler-reconciler",
args=(reconciler, config, stop_event),
daemon=True,
)
scheduler_thread.start()
reconciler_thread.start()
socket_path = Path(config.server.socket_path)
socket_path.parent.mkdir(parents=True, exist_ok=True)
if socket_path.exists():
socket_path.unlink()
uvicorn_config = uvicorn.Config(
app=app,
uds=config.server.socket_path,
log_level=config.server.log_level.lower(),
)
server = uvicorn.Server(uvicorn_config)
def _handle_signal(signum: int, _: FrameType | None) -> None:
log.info("shutdown_signal", extra={"signal": signum})
stop_event.set()
if server is not None:
server.should_exit = True
signal.signal(signal.SIGTERM, _handle_signal)
signal.signal(signal.SIGINT, _handle_signal)
try:
server.run()
finally:
stop_event.set()
if scheduler_thread is not None:
scheduler_thread.join(timeout=10)
if reconciler_thread is not None:
reconciler_thread.join(timeout=10)
db.close()
if __name__ == "__main__":
main()