support dual launch templates: spot for normal builds, on-demand for nested virtualization
Some checks failed
buildbot/nix-eval Build done.
buildbot/nix-build gitea:ops/nix-builder-autoscaler#checks.x86_64-linux.package-nix-builder-autoscaler Build done.
buildbot/nix-build gitea:ops/nix-builder-autoscaler#checks.x86_64-linux.package-default Build done.
buildbot/nix-build gitea:ops/nix-builder-autoscaler#checks.x86_64-linux.app-autoscalerctl Build done.
buildbot/nix-build gitea:ops/nix-builder-autoscaler#checks.x86_64-linux.app-default Build done.
buildbot/nix-build gitea:ops/nix-builder-autoscaler#checks.x86_64-linux.app-nix-builder-autoscaler Build done.
buildbot/nix-build gitea:ops/nix-builder-autoscaler#checks.x86_64-linux.nix-builder-autoscaler-pyright Build done.
buildbot/nix-build gitea:ops/nix-builder-autoscaler#checks.x86_64-linux.nix-builder-autoscaler-integration-tests Build done.
buildbot/nix-build gitea:ops/nix-builder-autoscaler#checks.x86_64-linux.nix-builder-autoscaler-ruff Build done.
buildbot/nix-build gitea:ops/nix-builder-autoscaler#checks.x86_64-linux.nix-builder-autoscaler-unit-tests Build done.
buildbot/nix-build gitea:ops/nix-builder-autoscaler#checks.x86_64-linux.package-buildbot-autoscale-ext Build done.
buildbot/nix-build Build done.
Some checks failed
buildbot/nix-eval Build done.
buildbot/nix-build gitea:ops/nix-builder-autoscaler#checks.x86_64-linux.package-nix-builder-autoscaler Build done.
buildbot/nix-build gitea:ops/nix-builder-autoscaler#checks.x86_64-linux.package-default Build done.
buildbot/nix-build gitea:ops/nix-builder-autoscaler#checks.x86_64-linux.app-autoscalerctl Build done.
buildbot/nix-build gitea:ops/nix-builder-autoscaler#checks.x86_64-linux.app-default Build done.
buildbot/nix-build gitea:ops/nix-builder-autoscaler#checks.x86_64-linux.app-nix-builder-autoscaler Build done.
buildbot/nix-build gitea:ops/nix-builder-autoscaler#checks.x86_64-linux.nix-builder-autoscaler-pyright Build done.
buildbot/nix-build gitea:ops/nix-builder-autoscaler#checks.x86_64-linux.nix-builder-autoscaler-integration-tests Build done.
buildbot/nix-build gitea:ops/nix-builder-autoscaler#checks.x86_64-linux.nix-builder-autoscaler-ruff Build done.
buildbot/nix-build gitea:ops/nix-builder-autoscaler#checks.x86_64-linux.nix-builder-autoscaler-unit-tests Build done.
buildbot/nix-build gitea:ops/nix-builder-autoscaler#checks.x86_64-linux.package-buildbot-autoscale-ext Build done.
buildbot/nix-build Build done.
AWS does not allow cpu_options.nested_virtualization with spot instances. Add a second launch template (on-demand, cpu_options enabled) alongside the existing spot template. The autoscaler selects the template per-system based on nested_virtualization config. - RuntimeAdapter.launch_spot -> launch_instance(nested_virtualization=False) - EC2Runtime: selects spot or on-demand LT; raises misconfiguration error if on_demand_launch_template_id is empty when nested_virtualization=True - AwsConfig: add on_demand_launch_template_id field - SystemConfig: add nested_virtualization field - Scheduler: looks up system config to pass nested_virtualization flag - NixOS module: new aws.onDemandLaunchTemplateIdFile + capacity.nestedVirtualization options; assertion prevents enabling nestedVirtualization without the LT ID file
This commit is contained in:
parent
3f70094c0a
commit
02b1a063ab
9 changed files with 101 additions and 35 deletions
|
|
@ -73,7 +73,7 @@ class TestLaunchSpot:
|
|||
stubber.add_response("run_instances", response, expected_params)
|
||||
runtime = _make_runtime(stubber, ec2_client, config=config)
|
||||
|
||||
iid = runtime.launch_spot("slot001", "#!/bin/bash\necho hello")
|
||||
iid = runtime.launch_instance("slot001", "#!/bin/bash\necho hello")
|
||||
assert iid == "i-12345678"
|
||||
stubber.assert_no_pending_responses()
|
||||
|
||||
|
|
@ -90,8 +90,8 @@ class TestLaunchSpot:
|
|||
)
|
||||
|
||||
runtime = _make_runtime(stubber, ec2_client, config=config)
|
||||
runtime.launch_spot("slot001", "")
|
||||
runtime.launch_spot("slot002", "")
|
||||
runtime.launch_instance("slot001", "")
|
||||
runtime.launch_instance("slot002", "")
|
||||
stubber.assert_no_pending_responses()
|
||||
|
||||
|
||||
|
|
@ -418,7 +418,7 @@ class TestErrorClassification:
|
|||
runtime = _make_runtime(stubber, ec2_client)
|
||||
|
||||
with pytest.raises(RuntimeAdapterError) as exc_info:
|
||||
runtime.launch_spot("slot001", "#!/bin/bash")
|
||||
runtime.launch_instance("slot001", "#!/bin/bash")
|
||||
assert exc_info.value.category == "capacity_unavailable"
|
||||
|
||||
@patch("nix_builder_autoscaler.runtime.ec2.time.sleep")
|
||||
|
|
@ -439,7 +439,7 @@ class TestErrorClassification:
|
|||
)
|
||||
runtime = _make_runtime(stubber, ec2_client)
|
||||
|
||||
iid = runtime.launch_spot("slot001", "#!/bin/bash")
|
||||
iid = runtime.launch_instance("slot001", "#!/bin/bash")
|
||||
assert iid == "i-retry123"
|
||||
assert mock_sleep.called
|
||||
stubber.assert_no_pending_responses()
|
||||
|
|
@ -460,5 +460,5 @@ class TestErrorClassification:
|
|||
runtime = _make_runtime(stubber, ec2_client)
|
||||
|
||||
with pytest.raises(RuntimeAdapterError) as exc_info:
|
||||
runtime.launch_spot("slot001", "#!/bin/bash")
|
||||
runtime.launch_instance("slot001", "#!/bin/bash")
|
||||
assert exc_info.value.category == "throttled"
|
||||
|
|
|
|||
|
|
@ -9,13 +9,13 @@ from nix_builder_autoscaler.runtime.fake import FakeRuntime
|
|||
class TestLaunchSpot:
|
||||
def test_returns_synthetic_instance_id(self):
|
||||
rt = FakeRuntime()
|
||||
iid = rt.launch_spot("slot001", "#!/bin/bash\necho hello")
|
||||
iid = rt.launch_instance("slot001", "#!/bin/bash\necho hello")
|
||||
assert iid.startswith("i-fake-")
|
||||
assert len(iid) > 10
|
||||
|
||||
def test_instance_starts_pending(self):
|
||||
rt = FakeRuntime()
|
||||
iid = rt.launch_spot("slot001", "")
|
||||
iid = rt.launch_instance("slot001", "")
|
||||
info = rt.describe_instance(iid)
|
||||
assert info["state"] == "pending"
|
||||
assert info["tailscale_ip"] is None
|
||||
|
|
@ -24,7 +24,7 @@ class TestLaunchSpot:
|
|||
class TestTickProgression:
|
||||
def test_transitions_to_running_after_configured_ticks(self):
|
||||
rt = FakeRuntime(launch_latency_ticks=3, ip_delay_ticks=1)
|
||||
iid = rt.launch_spot("slot001", "")
|
||||
iid = rt.launch_instance("slot001", "")
|
||||
|
||||
for _ in range(2):
|
||||
rt.tick()
|
||||
|
|
@ -35,7 +35,7 @@ class TestTickProgression:
|
|||
|
||||
def test_tailscale_ip_appears_after_configured_delay(self):
|
||||
rt = FakeRuntime(launch_latency_ticks=2, ip_delay_ticks=2)
|
||||
iid = rt.launch_spot("slot001", "")
|
||||
iid = rt.launch_instance("slot001", "")
|
||||
|
||||
for _ in range(2):
|
||||
rt.tick()
|
||||
|
|
@ -56,7 +56,7 @@ class TestInjectedFailure:
|
|||
rt = FakeRuntime()
|
||||
rt.inject_launch_failure("slot001")
|
||||
try:
|
||||
rt.launch_spot("slot001", "")
|
||||
rt.launch_instance("slot001", "")
|
||||
raise AssertionError("Should have raised")
|
||||
except RuntimeAdapterError as e:
|
||||
assert e.category == "capacity_unavailable"
|
||||
|
|
@ -65,16 +65,16 @@ class TestInjectedFailure:
|
|||
rt = FakeRuntime()
|
||||
rt.inject_launch_failure("slot001")
|
||||
with contextlib.suppress(RuntimeAdapterError):
|
||||
rt.launch_spot("slot001", "")
|
||||
rt.launch_instance("slot001", "")
|
||||
# Second call should succeed
|
||||
iid = rt.launch_spot("slot001", "")
|
||||
iid = rt.launch_instance("slot001", "")
|
||||
assert iid.startswith("i-fake-")
|
||||
|
||||
|
||||
class TestInjectedInterruption:
|
||||
def test_interruption_returns_terminated(self):
|
||||
rt = FakeRuntime(launch_latency_ticks=1)
|
||||
iid = rt.launch_spot("slot001", "")
|
||||
iid = rt.launch_instance("slot001", "")
|
||||
rt.tick()
|
||||
assert rt.describe_instance(iid)["state"] == "running"
|
||||
|
||||
|
|
@ -85,7 +85,7 @@ class TestInjectedInterruption:
|
|||
def test_interruption_is_one_shot(self):
|
||||
"""After the interruption fires, subsequent describes stay terminated."""
|
||||
rt = FakeRuntime(launch_latency_ticks=1)
|
||||
iid = rt.launch_spot("slot001", "")
|
||||
iid = rt.launch_instance("slot001", "")
|
||||
rt.tick()
|
||||
rt.inject_interruption(iid)
|
||||
rt.describe_instance(iid) # consumes the injection
|
||||
|
|
@ -96,7 +96,7 @@ class TestInjectedInterruption:
|
|||
class TestTerminate:
|
||||
def test_terminate_marks_instance(self):
|
||||
rt = FakeRuntime(launch_latency_ticks=1)
|
||||
iid = rt.launch_spot("slot001", "")
|
||||
iid = rt.launch_instance("slot001", "")
|
||||
rt.tick()
|
||||
rt.terminate_instance(iid)
|
||||
assert rt.describe_instance(iid)["state"] == "terminated"
|
||||
|
|
@ -105,8 +105,8 @@ class TestTerminate:
|
|||
class TestListManaged:
|
||||
def test_lists_non_terminated(self):
|
||||
rt = FakeRuntime(launch_latency_ticks=1)
|
||||
iid1 = rt.launch_spot("slot001", "")
|
||||
iid2 = rt.launch_spot("slot002", "")
|
||||
iid1 = rt.launch_instance("slot001", "")
|
||||
iid2 = rt.launch_instance("slot002", "")
|
||||
rt.tick()
|
||||
rt.terminate_instance(iid1)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue