nix-builder-autoscaler/agent/nix_builder_autoscaler/tests/test_scheduler.py

194 lines
6.3 KiB
Python

"""Scheduler unit tests — Plan 03."""
from nix_builder_autoscaler.config import AppConfig, AwsConfig, CapacityConfig
from nix_builder_autoscaler.metrics import MetricsRegistry
from nix_builder_autoscaler.models import ReservationPhase, SlotState
from nix_builder_autoscaler.providers.clock import FakeClock
from nix_builder_autoscaler.runtime.fake import FakeRuntime
from nix_builder_autoscaler.scheduler import scheduling_tick
from nix_builder_autoscaler.state_db import StateDB
def _make_env(
slot_count=3,
max_slots=3,
max_leases=1,
idle_scale_down_seconds=900,
target_warm=0,
min_slots=0,
):
clock = FakeClock()
db = StateDB(":memory:", clock=clock)
db.init_schema()
db.init_slots("slot", slot_count, "x86_64-linux", "all")
runtime = FakeRuntime(launch_latency_ticks=2, ip_delay_ticks=1)
config = AppConfig(
capacity=CapacityConfig(
max_slots=max_slots,
max_leases_per_slot=max_leases,
idle_scale_down_seconds=idle_scale_down_seconds,
target_warm_slots=target_warm,
min_slots=min_slots,
reservation_ttl_seconds=1200,
),
aws=AwsConfig(region="us-east-1"),
)
metrics = MetricsRegistry()
return db, runtime, config, clock, metrics
def _make_slot_ready(db, slot_id, instance_id="i-test1", ip="100.64.0.1"):
"""Transition a slot through the full state machine to ready."""
db.update_slot_state(slot_id, SlotState.LAUNCHING, instance_id=instance_id)
db.update_slot_state(slot_id, SlotState.BOOTING)
db.update_slot_state(slot_id, SlotState.BINDING, instance_ip=ip)
db.update_slot_state(slot_id, SlotState.READY)
# --- Test cases ---
def test_pending_reservation_assigned_to_ready_slot():
db, runtime, config, clock, metrics = _make_env()
_make_slot_ready(db, "slot001")
resv = db.create_reservation("x86_64-linux", "test", None, 1200)
scheduling_tick(db, runtime, config, clock, metrics)
updated = db.get_reservation(resv["reservation_id"])
assert updated["phase"] == ReservationPhase.READY.value
assert updated["slot_id"] == "slot001"
assert updated["instance_id"] == "i-test1"
slot = db.get_slot("slot001")
assert slot["lease_count"] == 1
def test_two_pending_one_slot_only_one_assigned_per_tick():
db, runtime, config, clock, metrics = _make_env(max_leases=1)
_make_slot_ready(db, "slot001")
r1 = db.create_reservation("x86_64-linux", "test1", None, 1200)
r2 = db.create_reservation("x86_64-linux", "test2", None, 1200)
scheduling_tick(db, runtime, config, clock, metrics)
u1 = db.get_reservation(r1["reservation_id"])
u2 = db.get_reservation(r2["reservation_id"])
ready_count = sum(1 for r in [u1, u2] if r["phase"] == ReservationPhase.READY.value)
pending_count = sum(1 for r in [u1, u2] if r["phase"] == ReservationPhase.PENDING.value)
assert ready_count == 1
assert pending_count == 1
slot = db.get_slot("slot001")
assert slot["lease_count"] == 1
def test_reservation_expires_when_ttl_passes():
db, runtime, config, clock, metrics = _make_env()
config.capacity.reservation_ttl_seconds = 60
db.create_reservation("x86_64-linux", "test", None, 60)
clock.advance(61)
scheduling_tick(db, runtime, config, clock, metrics)
reservations = db.list_reservations(ReservationPhase.EXPIRED)
assert len(reservations) == 1
def test_scale_down_starts_when_idle_exceeds_threshold():
db, runtime, config, clock, metrics = _make_env(idle_scale_down_seconds=900)
_make_slot_ready(db, "slot001")
clock.advance(901)
scheduling_tick(db, runtime, config, clock, metrics)
slot = db.get_slot("slot001")
assert slot["state"] == SlotState.DRAINING.value
def test_slot_does_not_drain_while_lease_count_positive():
db, runtime, config, clock, metrics = _make_env(idle_scale_down_seconds=900)
_make_slot_ready(db, "slot001")
resv = db.create_reservation("x86_64-linux", "test", None, 1200)
scheduling_tick(db, runtime, config, clock, metrics)
# Confirm assigned
updated = db.get_reservation(resv["reservation_id"])
assert updated["phase"] == ReservationPhase.READY.value
clock.advance(901)
scheduling_tick(db, runtime, config, clock, metrics)
slot = db.get_slot("slot001")
assert slot["state"] == SlotState.READY.value
def test_interruption_pending_slot_moves_to_draining():
db, runtime, config, clock, metrics = _make_env()
_make_slot_ready(db, "slot001")
db.update_slot_fields("slot001", interruption_pending=1)
scheduling_tick(db, runtime, config, clock, metrics)
slot = db.get_slot("slot001")
assert slot["state"] == SlotState.DRAINING.value
assert slot["interruption_pending"] == 0
def test_launch_triggered_for_unmet_demand():
db, runtime, config, clock, metrics = _make_env()
db.create_reservation("x86_64-linux", "test", None, 1200)
scheduling_tick(db, runtime, config, clock, metrics)
launching = db.list_slots(SlotState.LAUNCHING)
assert len(launching) == 1
assert launching[0]["instance_id"] is not None
# FakeRuntime should have one pending instance
managed = runtime.list_managed_instances()
assert len(managed) == 1
def test_launch_respects_max_slots():
db, runtime, config, clock, metrics = _make_env(max_slots=1)
_make_slot_ready(db, "slot001")
# Slot001 is at capacity (lease_count will be 1 after assignment)
db.create_reservation("x86_64-linux", "test1", None, 1200)
db.create_reservation("x86_64-linux", "test2", None, 1200)
scheduling_tick(db, runtime, config, clock, metrics)
# One reservation assigned, one still pending — but no new launch
# because active_slots (1) == max_slots (1)
launching = db.list_slots(SlotState.LAUNCHING)
assert len(launching) == 0
def test_min_slots_maintained():
db, runtime, config, clock, metrics = _make_env(min_slots=1)
# No reservations, all slots empty
scheduling_tick(db, runtime, config, clock, metrics)
launching = db.list_slots(SlotState.LAUNCHING)
assert len(launching) == 1
def test_scale_down_respects_min_slots():
db, runtime, config, clock, metrics = _make_env(min_slots=1, idle_scale_down_seconds=900)
_make_slot_ready(db, "slot001")
clock.advance(901)
scheduling_tick(db, runtime, config, clock, metrics)
slot = db.get_slot("slot001")
assert slot["state"] == SlotState.READY.value