add buildbot extension for autoscaling nix builders

This commit is contained in:
Abel Luck 2026-02-27 12:17:36 +01:00
parent ea12318b88
commit d1976a5fd8
13 changed files with 2300 additions and 8 deletions

View file

@ -0,0 +1,12 @@
"""Buildbot autoscale extension package."""
from .configurator import AutoscaleConfigurator
from .settings import AutoscaleSettings
from .steps import CapacityGateStep, CapacityReleaseStep
__all__ = [
"AutoscaleConfigurator",
"AutoscaleSettings",
"CapacityGateStep",
"CapacityReleaseStep",
]

View file

@ -0,0 +1,169 @@
from __future__ import annotations
import http.client
import json
import random
import socket
import time
from dataclasses import dataclass
from typing import Any
@dataclass(frozen=True)
class RetryPolicy:
max_attempts: int
base_seconds: float
max_seconds: float
class DaemonError(RuntimeError):
def __init__(
self,
message: str,
*,
path: str,
status: int | None = None,
response: dict[str, Any] | None = None,
cause: Exception | None = None,
) -> None:
super().__init__(message)
self.path = path
self.status = status
self.response = response
self.cause = cause
class UnixSocketHTTPConnection(http.client.HTTPConnection):
def __init__(self, socket_path: str, timeout: float) -> None:
super().__init__(host="localhost", port=0, timeout=timeout)
self._socket_path = socket_path
def connect(self) -> None:
self.sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
self.sock.settimeout(self.timeout)
self.sock.connect(self._socket_path)
class DaemonClient:
def __init__(self, socket_path: str, retry_policy: RetryPolicy) -> None:
self._socket_path = socket_path
self._retry = retry_policy
def post_json(
self,
path: str,
body: dict[str, Any],
timeout_seconds: float,
retryable_statuses: set[int],
) -> dict[str, Any]:
return self._request_json(
method="POST",
path=path,
timeout_seconds=timeout_seconds,
retryable_statuses=retryable_statuses,
body=body,
)
def get_json(
self,
path: str,
timeout_seconds: float,
retryable_statuses: set[int],
) -> dict[str, Any]:
return self._request_json(
method="GET",
path=path,
timeout_seconds=timeout_seconds,
retryable_statuses=retryable_statuses,
body=None,
)
def _request_json(
self,
*,
method: str,
path: str,
timeout_seconds: float,
retryable_statuses: set[int],
body: dict[str, Any] | None,
) -> dict[str, Any]:
last_error: DaemonError | None = None
for attempt in range(1, self._retry.max_attempts + 1):
try:
payload = json.dumps(body).encode("utf-8") if body is not None else None
response_body, status = self._raw_request(
method=method,
path=path,
timeout_seconds=timeout_seconds,
payload=payload,
)
parsed = self._parse_json(response_body, path)
if 200 <= status < 300:
return parsed
err = DaemonError(
f"daemon returned HTTP {status} for {method} {path}",
path=path,
status=status,
response=parsed,
)
retryable = status in retryable_statuses
if not retryable:
raise err
last_error = err
except (ConnectionRefusedError, FileNotFoundError, TimeoutError, OSError) as exc:
last_error = DaemonError(
f"daemon transport error during {method} {path}: {exc}",
path=path,
cause=exc,
)
except DaemonError:
raise
if attempt < self._retry.max_attempts:
self._sleep_backoff(attempt)
assert last_error is not None
raise last_error
def _raw_request(
self,
*,
method: str,
path: str,
timeout_seconds: float,
payload: bytes | None,
) -> tuple[bytes, int]:
conn = UnixSocketHTTPConnection(self._socket_path, timeout=timeout_seconds)
headers = {"Accept": "application/json"}
if payload is not None:
headers["Content-Type"] = "application/json"
try:
conn.request(method=method, url=path, body=payload, headers=headers)
response = conn.getresponse()
data = response.read()
return data, response.status
finally:
conn.close()
@staticmethod
def _parse_json(raw: bytes, path: str) -> dict[str, Any]:
if not raw:
return {}
try:
data = json.loads(raw.decode("utf-8"))
except json.JSONDecodeError as exc:
raise DaemonError(
f"daemon returned invalid JSON for {path}",
path=path,
cause=exc,
) from exc
if not isinstance(data, dict):
raise DaemonError(f"daemon returned non-object JSON for {path}", path=path)
return data
def _sleep_backoff(self, attempt: int) -> None:
ceiling = min(self._retry.max_seconds, self._retry.base_seconds * (2 ** (attempt - 1)))
time.sleep(random.uniform(0.0, ceiling))

View file

@ -0,0 +1,66 @@
from __future__ import annotations
import logging
from typing import Any
from buildbot.configurators import ConfiguratorBase
from .settings import AutoscaleSettings
from .steps import CapacityGateStep, CapacityReleaseStep
log = logging.getLogger(__name__)
class AutoscaleConfigurator(ConfiguratorBase):
def __init__(self, settings: AutoscaleSettings) -> None:
super().__init__()
self.settings = settings
def configure(self, config_dict: dict[str, Any]) -> None:
builders = config_dict.get("builders", [])
patched: list[str] = []
for builder in builders:
name = getattr(builder, "name", "")
if not isinstance(name, str) or not name.endswith("/nix-build"):
continue
factory = getattr(builder, "factory", None)
steps = getattr(factory, "steps", None)
if factory is None or not isinstance(steps, list):
log.warning("Skipping builder with unrecognized factory shape: %s", name)
continue
gate = CapacityGateStep(
name="Ensure remote builder capacity",
daemon_socket=self.settings.daemon_socket,
system_property=self.settings.system_property,
default_system=self.settings.default_system,
reserve_timeout_seconds=self.settings.reserve_timeout_seconds,
poll_interval_seconds=self.settings.poll_interval_seconds,
retry_max_attempts=self.settings.retry_max_attempts,
retry_base_seconds=self.settings.retry_base_seconds,
retry_max_seconds=self.settings.retry_max_seconds,
haltOnFailure=True,
flunkOnFailure=True,
warnOnFailure=False,
)
steps.insert(0, gate)
if self.settings.release_on_finish:
steps.append(
CapacityReleaseStep(
name="Release autoscaler reservation",
daemon_socket=self.settings.daemon_socket,
retry_max_attempts=self.settings.retry_max_attempts,
retry_base_seconds=self.settings.retry_base_seconds,
retry_max_seconds=self.settings.retry_max_seconds,
alwaysRun=True,
flunkOnFailure=False,
warnOnFailure=True,
)
)
patched.append(name)
log.info("AutoscaleConfigurator patched builders: %s", patched)

View file

@ -0,0 +1,14 @@
from dataclasses import dataclass
@dataclass(frozen=True)
class AutoscaleSettings:
daemon_socket: str
system_property: str = "system"
default_system: str = "x86_64-linux"
reserve_timeout_seconds: int = 900
poll_interval_seconds: float = 5.0
retry_max_attempts: int = 5
retry_base_seconds: float = 0.5
retry_max_seconds: float = 5.0
release_on_finish: bool = True

View file

@ -0,0 +1,199 @@
from __future__ import annotations
import time
from typing import Any, cast
from buildbot.plugins import util
from buildbot.process import buildstep
from twisted.internet import defer, reactor
from twisted.internet.interfaces import IReactorTime
from twisted.internet.task import deferLater
from twisted.internet.threads import deferToThread
from .client import DaemonClient, DaemonError, RetryPolicy
from .telemetry import phase_message
class CapacityGateStep(buildstep.BuildStep):
renderables = ()
def __init__(
self,
*,
daemon_socket: str,
system_property: str,
default_system: str,
reserve_timeout_seconds: int,
poll_interval_seconds: float,
retry_max_attempts: int,
retry_base_seconds: float,
retry_max_seconds: float = 5.0,
**kwargs: object,
) -> None:
super().__init__(**kwargs)
self._system_property = system_property
self._default_system = default_system
self._reserve_timeout_seconds = reserve_timeout_seconds
self._poll_interval_seconds = poll_interval_seconds
self._client = DaemonClient(
socket_path=daemon_socket,
retry_policy=RetryPolicy(
max_attempts=retry_max_attempts,
base_seconds=retry_base_seconds,
max_seconds=retry_max_seconds,
),
)
def _determine_system(self) -> str:
if self.build is None:
return self._default_system
props = self.build.getProperties()
value = props.getProperty(self._system_property)
if value:
return str(value)
return self._default_system
def run(self) -> defer.Deferred[int]:
return defer.ensureDeferred(self._run())
async def _run(self) -> int:
system = self._determine_system()
start = time.monotonic()
try:
reserve = await deferToThread(
self._client.post_json,
"/v1/reservations",
{
"system": system,
"reason": "buildbot-nix-build",
"build_id": getattr(self.build, "buildid", None),
},
10.0,
{429, 500, 502, 503, 504},
)
except Exception as exc: # noqa: BLE001
await self._add_log("autoscale_gate_reserve_error", f"reserve failed: {exc}")
self.descriptionDone = ["capacity reservation failed after retries"]
return util.FAILURE
reservation_id = str(reserve["reservation_id"])
self._set_property("autoscale_reservation_id", reservation_id)
last_phase: str | None = "pending"
last_reason: str | None = None
while True:
elapsed = time.monotonic() - start
self.descriptionSuffix = [f"phase={last_phase} elapsed={int(elapsed)}s"]
if elapsed > self._reserve_timeout_seconds:
await self._add_log(
"autoscale_gate_timeout",
f"capacity wait timeout {phase_message(last_phase, last_reason)}",
)
self.descriptionDone = ["capacity wait timeout"]
return util.FAILURE
try:
status = await deferToThread(
self._client.get_json,
f"/v1/reservations/{reservation_id}",
10.0,
{429, 500, 502, 503, 504},
)
except DaemonError as exc:
last_reason = str(exc)
await deferLater(
cast(IReactorTime, reactor),
self._poll_interval_seconds,
lambda: None,
)
continue
phase = str(status.get("phase", "pending"))
reason = status.get("reason")
last_phase = phase
last_reason = str(reason) if reason is not None else None
if phase == "ready":
slot = str(status["slot"])
instance_id = str(status["instance_id"])
waited = int(time.monotonic() - start)
self._set_property("autoscale_slot", slot)
self._set_property("autoscale_instance_id", instance_id)
self._set_property("autoscale_wait_seconds", waited)
self.descriptionDone = [f"capacity ready in {waited}s"]
self.descriptionSuffix = [f"phase={phase}"]
return util.SUCCESS
if phase in {"failed", "expired", "released"}:
await self._add_log(
"autoscale_gate_failure",
f"capacity gate terminal {phase_message(last_phase, last_reason)}",
)
self.descriptionDone = [f"autoscaler reservation {phase}"]
self.descriptionSuffix = [f"phase={phase}"]
return util.FAILURE
await deferLater(
cast(IReactorTime, reactor),
self._poll_interval_seconds,
lambda: None,
)
def _set_property(self, name: str, value: object) -> None:
if self.build is None:
return
self.build.setProperty(name, value, "autoscale")
async def _add_log(self, name: str, message: str) -> None:
log = cast(Any, await self.addLog(name))
log.addStderr(f"{message}\n")
class CapacityReleaseStep(buildstep.BuildStep):
def __init__(
self,
*,
daemon_socket: str,
retry_max_attempts: int,
retry_base_seconds: float,
retry_max_seconds: float = 5.0,
**kwargs: object,
) -> None:
super().__init__(**kwargs)
self._client = DaemonClient(
socket_path=daemon_socket,
retry_policy=RetryPolicy(
max_attempts=retry_max_attempts,
base_seconds=retry_base_seconds,
max_seconds=retry_max_seconds,
),
)
def run(self) -> defer.Deferred[int]:
return defer.ensureDeferred(self._run())
async def _run(self) -> int:
if self.build is None:
return util.SKIPPED
reservation_id = self.build.getProperty("autoscale_reservation_id")
if not reservation_id:
return util.SKIPPED
try:
await deferToThread(
self._client.post_json,
f"/v1/reservations/{reservation_id}/release",
{},
10.0,
{429, 500, 502, 503, 504},
)
except Exception as exc: # noqa: BLE001
log = cast(Any, await self.addLog("autoscale_release_error"))
log.addStderr(f"release failed: {exc}\n")
return util.WARNINGS
self.descriptionDone = ["autoscaler reservation released"]
return util.SUCCESS

View file

@ -0,0 +1,20 @@
from __future__ import annotations
import logging
def get_logger(name: str) -> logging.Logger:
return logging.getLogger(name)
def event(logger: logging.Logger, level: int, name: str, **fields: object) -> None:
logger.log(level, "%s %s", name, " ".join(f"{key}={value!r}" for key, value in fields.items()))
def phase_message(phase: str | None, reason: str | None) -> str:
details: list[str] = []
if phase:
details.append(f"phase={phase}")
if reason:
details.append(f"reason={reason}")
return " ".join(details) if details else "no daemon details"

View file

@ -0,0 +1,11 @@
"""Tests for buildbot_autoscale_ext."""
import asyncio
from contextlib import suppress
from twisted.internet import asyncioreactor
TEST_LOOP = asyncio.new_event_loop()
with suppress(Exception):
asyncioreactor.install(TEST_LOOP)

View file

@ -0,0 +1,212 @@
from __future__ import annotations
import json
import os
import socketserver
import tempfile
import threading
from collections.abc import Callable
from contextlib import suppress
from dataclasses import dataclass
from http import HTTPStatus
from http.server import BaseHTTPRequestHandler
from pathlib import Path
from typing import Any
import pytest
from buildbot_autoscale_ext.client import (
DaemonClient,
DaemonError,
RetryPolicy,
UnixSocketHTTPConnection,
)
@dataclass
class ServerState:
post_count: int = 0
get_count: int = 0
class _Handler(BaseHTTPRequestHandler):
server: _UnixHTTPServer
def do_GET(self) -> None: # noqa: N802
self.server.state.get_count += 1
status, body = self.server.on_get(self.path, self.server.state.get_count)
self._send(status, body)
def do_POST(self) -> None: # noqa: N802
self.server.state.post_count += 1
size = int(self.headers.get("Content-Length", "0"))
raw = self.rfile.read(size) if size else b"{}"
payload = json.loads(raw.decode("utf-8"))
status, body = self.server.on_post(self.path, payload, self.server.state.post_count)
self._send(status, body)
def log_message(self, format: str, *args: object) -> None:
del format, args
def _send(self, status: int, body: dict[str, Any]) -> None:
encoded = json.dumps(body).encode("utf-8")
self.send_response(status)
self.send_header("Content-Type", "application/json")
self.send_header("Content-Length", str(len(encoded)))
self.end_headers()
self.wfile.write(encoded)
class _UnixHTTPServer(socketserver.UnixStreamServer):
def __init__(
self,
socket_path: str,
*,
on_get: Callable[[str, int], tuple[int, dict[str, Any]]],
on_post: Callable[[str, dict[str, Any], int], tuple[int, dict[str, Any]]],
) -> None:
self.on_get = on_get
self.on_post = on_post
self.state = ServerState()
super().__init__(socket_path, _Handler)
class FakeDaemon:
def __init__(
self,
socket_path: str,
*,
on_get: Callable[[str, int], tuple[int, dict[str, Any]]],
on_post: Callable[[str, dict[str, Any], int], tuple[int, dict[str, Any]]],
) -> None:
self._socket_path = socket_path
self._server = _UnixHTTPServer(socket_path, on_get=on_get, on_post=on_post)
self._thread = threading.Thread(target=self._server.serve_forever, daemon=True)
def __enter__(self) -> FakeDaemon:
self._thread.start()
return self
def __exit__(self, exc_type: object, exc: object, tb: object) -> None:
del exc_type, exc, tb
self._server.shutdown()
self._server.server_close()
with suppress(FileNotFoundError):
os.unlink(self._socket_path)
@pytest.fixture
def socket_path() -> str:
with tempfile.TemporaryDirectory() as tmp:
yield str(Path(tmp) / "daemon.sock")
def _client(socket_path: str, attempts: int = 3) -> DaemonClient:
return DaemonClient(
socket_path=socket_path,
retry_policy=RetryPolicy(max_attempts=attempts, base_seconds=0.001, max_seconds=0.01),
)
def test_post_json_success(socket_path: str) -> None:
with FakeDaemon(
socket_path,
on_get=lambda _p, _a: (HTTPStatus.OK, {}),
on_post=lambda _p, payload, _a: (HTTPStatus.OK, {"echo": payload["system"]}),
):
response = _client(socket_path).post_json(
"/v1/reservations",
{"system": "x86_64-linux"},
timeout_seconds=1.0,
retryable_statuses={429, 500, 503},
)
assert response == {"echo": "x86_64-linux"}
def test_get_json_success(socket_path: str) -> None:
with FakeDaemon(
socket_path,
on_get=lambda _p, _a: (HTTPStatus.OK, {"phase": "ready"}),
on_post=lambda _p, _payload, _a: (HTTPStatus.OK, {}),
):
response = _client(socket_path).get_json(
"/v1/reservations/r1",
timeout_seconds=1.0,
retryable_statuses={429, 500, 503},
)
assert response == {"phase": "ready"}
def test_transient_503_retries_then_raises(socket_path: str) -> None:
with (
FakeDaemon(
socket_path,
on_get=lambda _p, _a: (HTTPStatus.SERVICE_UNAVAILABLE, {"error": "busy"}),
on_post=lambda _p, _payload, _a: (HTTPStatus.OK, {}),
) as daemon,
pytest.raises(DaemonError) as exc,
):
_client(socket_path, attempts=3).get_json(
"/v1/reservations/r1",
timeout_seconds=1.0,
retryable_statuses={429, 500, 502, 503, 504},
)
assert exc.value.status == HTTPStatus.SERVICE_UNAVAILABLE
assert daemon._server.state.get_count == 3
def test_400_not_retried(socket_path: str) -> None:
with (
FakeDaemon(
socket_path,
on_get=lambda _p, _a: (HTTPStatus.BAD_REQUEST, {"error": "bad"}),
on_post=lambda _p, _payload, _a: (HTTPStatus.OK, {}),
) as daemon,
pytest.raises(DaemonError) as exc,
):
_client(socket_path, attempts=5).get_json(
"/v1/reservations/r1",
timeout_seconds=1.0,
retryable_statuses={429, 500, 502, 503, 504},
)
assert exc.value.status == HTTPStatus.BAD_REQUEST
assert daemon._server.state.get_count == 1
def test_connection_refused_retries_then_raises(
socket_path: str,
monkeypatch: pytest.MonkeyPatch,
) -> None:
def _boom(self: UnixSocketHTTPConnection) -> None:
raise ConnectionRefusedError("refused")
monkeypatch.setattr(UnixSocketHTTPConnection, "connect", _boom)
with pytest.raises(DaemonError):
_client(socket_path, attempts=3).get_json(
"/v1/reservations/r1",
timeout_seconds=1.0,
retryable_statuses={429, 500, 502, 503, 504},
)
def test_backoff_attempts_at_least_two(socket_path: str) -> None:
with (
FakeDaemon(
socket_path,
on_get=lambda _p, _a: (HTTPStatus.SERVICE_UNAVAILABLE, {"error": "busy"}),
on_post=lambda _p, _payload, _a: (HTTPStatus.OK, {}),
) as daemon,
pytest.raises(DaemonError),
):
_client(socket_path, attempts=2).get_json(
"/v1/reservations/r1",
timeout_seconds=1.0,
retryable_statuses={429, 500, 502, 503, 504},
)
assert daemon._server.state.get_count >= 2

View file

@ -0,0 +1,59 @@
from __future__ import annotations
import logging
from dataclasses import dataclass, field
import pytest
from buildbot_autoscale_ext.configurator import AutoscaleConfigurator
from buildbot_autoscale_ext.settings import AutoscaleSettings
from buildbot_autoscale_ext.steps import CapacityGateStep, CapacityReleaseStep
@dataclass
class FakeFactory:
steps: list[object] = field(default_factory=list)
@dataclass
class FakeBuilder:
name: str
factory: FakeFactory
def test_patches_nix_builders() -> None:
cfg = {
"builders": [
FakeBuilder("proj/nix-build", FakeFactory(["original"])),
FakeBuilder("proj/eval", FakeFactory(["eval"])),
]
}
AutoscaleConfigurator(AutoscaleSettings(daemon_socket="/tmp/daemon.sock")).configure(cfg)
patched_steps = cfg["builders"][0].factory.steps
assert isinstance(patched_steps[0], CapacityGateStep)
assert patched_steps[1] == "original"
assert isinstance(patched_steps[-1], CapacityReleaseStep)
untouched_steps = cfg["builders"][1].factory.steps
assert untouched_steps == ["eval"]
def test_empty_builders_no_error() -> None:
cfg = {"builders": []}
AutoscaleConfigurator(AutoscaleSettings(daemon_socket="/tmp/daemon.sock")).configure(cfg)
def test_startup_log_contains_patched_names(caplog: pytest.LogCaptureFixture) -> None:
caplog.set_level(logging.INFO)
cfg = {
"builders": [
FakeBuilder("one/nix-build", FakeFactory()),
FakeBuilder("two/eval", FakeFactory()),
]
}
AutoscaleConfigurator(AutoscaleSettings(daemon_socket="/tmp/daemon.sock")).configure(cfg)
assert "one/nix-build" in caplog.text

View file

@ -0,0 +1,337 @@
from __future__ import annotations
import json
import os
import socketserver
import tempfile
import threading
from collections.abc import Callable
from contextlib import suppress
from http import HTTPStatus
from http.server import BaseHTTPRequestHandler
from pathlib import Path
from typing import Any
import pytest
from buildbot.plugins import util
from twisted.internet import defer
from buildbot_autoscale_ext.steps import CapacityGateStep, CapacityReleaseStep
class FakeProperties:
def __init__(self, data: dict[str, Any]) -> None:
self._data = data
def getProperty(self, name: str) -> Any:
return self._data.get(name)
class FakeBuild:
def __init__(self) -> None:
self.buildid = 42
self._props: dict[str, Any] = {}
def getProperties(self) -> FakeProperties:
return FakeProperties(self._props)
def setProperty(self, key: str, value: Any, _source: str) -> None:
self._props[key] = value
def getProperty(self, key: str) -> Any:
return self._props.get(key)
class FakeLog:
def __init__(self) -> None:
self.stderr: list[str] = []
def addStderr(self, text: str) -> None:
self.stderr.append(text)
class _Handler(BaseHTTPRequestHandler):
server: _UnixHTTPServer
def do_GET(self) -> None: # noqa: N802
self.server.get_count += 1
status, body = self.server.on_get(self.path, self.server.get_count)
self._send(status, body)
def do_POST(self) -> None: # noqa: N802
self.server.post_count += 1
size = int(self.headers.get("Content-Length", "0"))
raw = self.rfile.read(size) if size else b"{}"
payload = json.loads(raw.decode("utf-8"))
status, body = self.server.on_post(self.path, payload, self.server.post_count)
self._send(status, body)
def _send(self, status: int, body: dict[str, Any]) -> None:
data = json.dumps(body).encode("utf-8")
self.send_response(status)
self.send_header("Content-Type", "application/json")
self.send_header("Content-Length", str(len(data)))
self.end_headers()
self.wfile.write(data)
def log_message(self, format: str, *args: object) -> None:
del format, args
class _UnixHTTPServer(socketserver.UnixStreamServer):
def __init__(
self,
socket_path: str,
*,
on_get: Callable[[str, int], tuple[int, dict[str, Any]]],
on_post: Callable[[str, dict[str, Any], int], tuple[int, dict[str, Any]]],
) -> None:
self.on_get = on_get
self.on_post = on_post
self.get_count = 0
self.post_count = 0
super().__init__(socket_path, _Handler)
class FakeDaemon:
def __init__(
self,
socket_path: str,
*,
on_get: Callable[[str, int], tuple[int, dict[str, Any]]],
on_post: Callable[[str, dict[str, Any], int], tuple[int, dict[str, Any]]],
) -> None:
self._socket_path = socket_path
self._server = _UnixHTTPServer(socket_path, on_get=on_get, on_post=on_post)
self._thread = threading.Thread(target=self._server.serve_forever, daemon=True)
def __enter__(self) -> FakeDaemon:
self._thread.start()
return self
def __exit__(self, exc_type: object, exc: object, tb: object) -> None:
del exc_type, exc, tb
self._server.shutdown()
self._server.server_close()
with suppress(FileNotFoundError):
os.unlink(self._socket_path)
@pytest.fixture
def socket_path() -> str:
with tempfile.TemporaryDirectory() as tmp:
yield str(Path(tmp) / "daemon.sock")
def _attach_build(step: Any, build: FakeBuild) -> None:
object.__setattr__(build, "master", None)
object.__setattr__(step, "build", build)
object.__setattr__(step, "master", None)
async def _add_log(_name: str) -> FakeLog:
return FakeLog()
object.__setattr__(step, "addLog", _add_log)
@pytest.fixture(autouse=True)
def _patch_twisted_waits(monkeypatch: pytest.MonkeyPatch) -> None:
from buildbot.config import errors as config_errors
from buildbot.process import buildstep as buildstep_module
import buildbot_autoscale_ext.steps as steps_mod
def _defer_to_thread(func: Any, *args: object, **kwargs: object) -> defer.Deferred[object]:
try:
return defer.succeed(func(*args, **kwargs))
except Exception as exc: # noqa: BLE001
return defer.fail(exc)
def _defer_later(
_clock: object,
_seconds: float,
callback: Any,
*args: object,
**kwargs: object,
) -> defer.Deferred[object]:
return defer.succeed(callback(*args, **kwargs))
monkeypatch.setattr(steps_mod, "deferToThread", _defer_to_thread)
monkeypatch.setattr(steps_mod, "deferLater", _defer_later)
monkeypatch.setattr(config_errors, "_errors", None, raising=False)
monkeypatch.setattr(buildstep_module.config, "error", lambda *_a, **_k: None)
def _run_step(step: Any) -> int:
deferred = step.run()
out: list[int] = []
failures: list[defer.Failure] = []
deferred.addCallbacks(lambda value: out.append(value), lambda err: failures.append(err))
if failures:
failures[0].raiseException()
return out[0]
def test_gate_success_pending_then_ready(socket_path: str) -> None:
with FakeDaemon(
socket_path,
on_post=lambda _p, _payload, _n: (
HTTPStatus.OK,
{
"reservation_id": "r1",
"phase": "pending",
"created_at": "now",
"expires_at": "soon",
},
),
on_get=lambda _p, n: (
HTTPStatus.OK,
{"reservation_id": "r1", "phase": "pending"}
if n == 1
else {
"reservation_id": "r1",
"phase": "ready",
"slot": "slot-1",
"instance_id": "i-123",
"system": "x86_64-linux",
"updated_at": "now",
},
),
):
step = CapacityGateStep(
name="gate",
daemon_socket=socket_path,
system_property="system",
default_system="x86_64-linux",
reserve_timeout_seconds=2,
poll_interval_seconds=0.01,
retry_max_attempts=2,
retry_base_seconds=0.001,
)
build = FakeBuild()
_attach_build(step, build)
result = _run_step(step)
assert result == util.SUCCESS
assert build.getProperty("autoscale_reservation_id") == "r1"
assert build.getProperty("autoscale_slot") == "slot-1"
assert build.getProperty("autoscale_instance_id") == "i-123"
assert isinstance(build.getProperty("autoscale_wait_seconds"), int)
def test_gate_failure_on_failed_phase(socket_path: str) -> None:
with FakeDaemon(
socket_path,
on_post=lambda _p, _payload, _n: (
HTTPStatus.OK,
{"reservation_id": "r1", "phase": "pending"},
),
on_get=lambda _p, _n: (
HTTPStatus.OK,
{"reservation_id": "r1", "phase": "failed", "reason": "no"},
),
):
step = CapacityGateStep(
name="gate",
daemon_socket=socket_path,
system_property="system",
default_system="x86_64-linux",
reserve_timeout_seconds=2,
poll_interval_seconds=0.01,
retry_max_attempts=2,
retry_base_seconds=0.001,
)
build = FakeBuild()
_attach_build(step, build)
result = _run_step(step)
assert result == util.FAILURE
def test_gate_failure_on_timeout(socket_path: str) -> None:
with FakeDaemon(
socket_path,
on_post=lambda _p, _payload, _n: (
HTTPStatus.OK,
{"reservation_id": "r1", "phase": "pending"},
),
on_get=lambda _p, _n: (HTTPStatus.OK, {"reservation_id": "r1", "phase": "pending"}),
):
step = CapacityGateStep(
name="gate",
daemon_socket=socket_path,
system_property="system",
default_system="x86_64-linux",
reserve_timeout_seconds=0,
poll_interval_seconds=0.01,
retry_max_attempts=2,
retry_base_seconds=0.001,
)
build = FakeBuild()
_attach_build(step, build)
result = _run_step(step)
assert result == util.FAILURE
def test_release_skipped_without_reservation(socket_path: str) -> None:
step = CapacityReleaseStep(
name="release",
daemon_socket=socket_path,
retry_max_attempts=2,
retry_base_seconds=0.001,
)
build = FakeBuild()
_attach_build(step, build)
result = _run_step(step)
assert result == util.SKIPPED
def test_release_warnings_on_retry_exhausted_500(socket_path: str) -> None:
with FakeDaemon(
socket_path,
on_post=lambda _p, _payload, _n: (HTTPStatus.INTERNAL_SERVER_ERROR, {"error": "boom"}),
on_get=lambda _p, _n: (HTTPStatus.OK, {}),
):
step = CapacityReleaseStep(
name="release",
daemon_socket=socket_path,
retry_max_attempts=2,
retry_base_seconds=0.001,
)
build = FakeBuild()
build.setProperty("autoscale_reservation_id", "r1", "test")
_attach_build(step, build)
result = _run_step(step)
assert result == util.WARNINGS
def test_release_success(socket_path: str) -> None:
with FakeDaemon(
socket_path,
on_post=lambda _p, _payload, _n: (
HTTPStatus.OK,
{"reservation_id": "r1", "phase": "released"},
),
on_get=lambda _p, _n: (HTTPStatus.OK, {}),
):
step = CapacityReleaseStep(
name="release",
daemon_socket=socket_path,
retry_max_attempts=2,
retry_base_seconds=0.001,
)
build = FakeBuild()
build.setProperty("autoscale_reservation_id", "r1", "test")
_attach_build(step, build)
result = _run_step(step)
assert result == util.SUCCESS

View file

@ -0,0 +1,43 @@
[build-system]
requires = ["setuptools>=68", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "buildbot-autoscale-ext"
version = "0.1.0"
description = "Buildbot extension for nix-builder-autoscaler capacity gating"
requires-python = ">=3.12"
dependencies = [
"buildbot",
"twisted",
]
[dependency-groups]
dev = [
"pytest",
"ruff",
"pyright",
]
[tool.uv.extra-build-dependencies]
py-ubjson = ["setuptools"]
[tool.ruff]
target-version = "py312"
line-length = 100
[tool.ruff.lint]
select = ["E", "F", "I", "UP", "B", "SIM", "ANN"]
ignore = []
[tool.ruff.lint.per-file-ignores]
"*/tests/*" = ["ANN"]
[tool.pyright]
pythonVersion = "3.12"
typeCheckingMode = "standard"
include = ["buildbot_autoscale_ext"]
exclude = ["**/tests"]
[tool.pytest.ini_options]
testpaths = ["buildbot_autoscale_ext/tests"]

1052
buildbot-ext/uv.lock generated Normal file

File diff suppressed because it is too large Load diff