account for in-flight capacity in launch scheduling

This commit is contained in:
Abel Luck 2026-02-27 16:32:16 +01:00
parent 57b4df2a17
commit d8afde8b18
2 changed files with 64 additions and 3 deletions

View file

@ -9,6 +9,7 @@ from __future__ import annotations
import logging import logging
import time import time
from collections import Counter
from datetime import datetime from datetime import datetime
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
@ -146,6 +147,25 @@ def _launch_for_unmet_demand(
if not pending: if not pending:
return return
demand_by_system = Counter(str(resv["system"]) for resv in pending)
in_flight_slots = (
db.list_slots(SlotState.LAUNCHING)
+ db.list_slots(SlotState.BOOTING)
+ db.list_slots(SlotState.BINDING)
)
in_flight_by_system = Counter(str(slot["system"]) for slot in in_flight_slots)
leases_per_slot = max(1, config.capacity.max_leases_per_slot)
for system, in_flight_count in in_flight_by_system.items():
in_flight_capacity = in_flight_count * leases_per_slot
if in_flight_capacity <= 0:
continue
current_demand = demand_by_system.get(system, 0)
demand_by_system[system] = max(0, current_demand - in_flight_capacity)
if sum(demand_by_system.values()) <= 0:
return
active = _count_active_slots(db) active = _count_active_slots(db)
if active >= config.capacity.max_slots: if active >= config.capacity.max_slots:
return return
@ -154,12 +174,20 @@ def _launch_for_unmet_demand(
if not empty_slots: if not empty_slots:
return return
for launched, slot in enumerate(empty_slots): launched = 0
if launched >= len(pending): for slot in empty_slots:
break
if active + launched >= config.capacity.max_slots: if active + launched >= config.capacity.max_slots:
break break
system = str(slot["system"])
if demand_by_system.get(system, 0) <= 0:
continue
_launch_slot(db, runtime, config, metrics, slot) _launch_slot(db, runtime, config, metrics, slot)
launched += 1
demand_by_system[system] = max(0, demand_by_system[system] - leases_per_slot)
if sum(demand_by_system.values()) <= 0:
break
def _ensure_min_and_warm( def _ensure_min_and_warm(

View file

@ -173,6 +173,39 @@ def test_launch_respects_max_slots():
assert len(launching) == 0 assert len(launching) == 0
def test_launch_does_not_overprovision_when_in_flight_capacity_exists():
db, runtime, config, clock, metrics = _make_env(slot_count=4, max_slots=4)
db.create_reservation("x86_64-linux", "test1", None, 1200)
db.create_reservation("x86_64-linux", "test2", None, 1200)
# Tick 1 launches two slots for two pending reservations.
scheduling_tick(db, runtime, config, clock, metrics)
launching_after_first_tick = db.list_slots(SlotState.LAUNCHING)
assert len(launching_after_first_tick) == 2
# Tick 2 sees in-flight capacity and should not launch more.
scheduling_tick(db, runtime, config, clock, metrics)
launching_after_second_tick = db.list_slots(SlotState.LAUNCHING)
assert len(launching_after_second_tick) == 2
assert len(runtime.list_managed_instances()) == 2
def test_in_flight_capacity_uses_max_leases_per_slot():
db, runtime, config, clock, metrics = _make_env(slot_count=3, max_slots=3, max_leases=2)
db.create_reservation("x86_64-linux", "test1", None, 1200)
db.create_reservation("x86_64-linux", "test2", None, 1200)
# One in-flight slot should represent capacity for two reservations.
db.update_slot_state("slot001", SlotState.LAUNCHING, instance_id="i-launching")
scheduling_tick(db, runtime, config, clock, metrics)
launching = db.list_slots(SlotState.LAUNCHING)
assert len(launching) == 1
assert len(runtime.list_managed_instances()) == 0
def test_min_slots_maintained(): def test_min_slots_maintained():
db, runtime, config, clock, metrics = _make_env(min_slots=1) db, runtime, config, clock, metrics = _make_env(min_slots=1)