autoscaler: preflight check instance type availability at startup
This commit is contained in:
parent
02b1a063ab
commit
1a355429cb
4 changed files with 83 additions and 0 deletions
|
|
@ -127,6 +127,7 @@ def main() -> None:
|
|||
)
|
||||
|
||||
runtime = EC2Runtime(config.aws)
|
||||
runtime.preflight_validate()
|
||||
haproxy = HAProxyRuntime(
|
||||
config.haproxy.runtime_socket,
|
||||
config.haproxy.backend,
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ class AwsConfig:
|
|||
"""[aws] section."""
|
||||
|
||||
region: str = "us-east-1"
|
||||
instance_type: str = ""
|
||||
launch_template_id: str = ""
|
||||
on_demand_launch_template_id: str = ""
|
||||
subnet_ids: list[str] = field(default_factory=list)
|
||||
|
|
|
|||
|
|
@ -59,6 +59,7 @@ class EC2Runtime(RuntimeAdapter):
|
|||
_tailscale_socket_path: str = "/run/tailscale/tailscaled.sock",
|
||||
) -> None:
|
||||
self._client: Any = _client or boto3.client("ec2", region_name=config.region)
|
||||
self._instance_type = config.instance_type
|
||||
self._launch_template_id = config.launch_template_id
|
||||
self._on_demand_launch_template_id = config.on_demand_launch_template_id
|
||||
self._subnet_ids = list(config.subnet_ids)
|
||||
|
|
@ -68,6 +69,79 @@ class EC2Runtime(RuntimeAdapter):
|
|||
self._subnet_index = 0
|
||||
self._tailscale_socket_path = _tailscale_socket_path
|
||||
|
||||
def preflight_validate(self) -> None:
|
||||
"""Check that the configured instance type is available in the configured subnets' AZs.
|
||||
|
||||
Logs a clear error if the instance type is absent from the region or missing from
|
||||
any subnet AZ so misconfigurations are surfaced at startup rather than discovered
|
||||
silently on every failed launch attempt. Never raises; API failures are logged as
|
||||
warnings so a transient permissions issue does not prevent startup.
|
||||
"""
|
||||
if not self._instance_type:
|
||||
return
|
||||
|
||||
try:
|
||||
target_azs: set[str] = set()
|
||||
if self._subnet_ids:
|
||||
resp = self._client.describe_subnets(SubnetIds=self._subnet_ids)
|
||||
for subnet in resp.get("Subnets", []):
|
||||
az = subnet.get("AvailabilityZone")
|
||||
if az:
|
||||
target_azs.add(az)
|
||||
|
||||
filters: list[dict[str, Any]] = [
|
||||
{"Name": "instance-type", "Values": [self._instance_type]},
|
||||
]
|
||||
if target_azs:
|
||||
filters.append({"Name": "location", "Values": list(target_azs)})
|
||||
|
||||
resp = self._client.describe_instance_type_offerings(
|
||||
LocationType="availability-zone",
|
||||
Filters=filters,
|
||||
)
|
||||
available_azs = {o["Location"] for o in resp.get("InstanceTypeOfferings", [])}
|
||||
|
||||
if not available_azs:
|
||||
region = self._client.meta.region_name
|
||||
log.error(
|
||||
"preflight_misconfiguration",
|
||||
extra={
|
||||
"error": (
|
||||
f"instance type {self._instance_type!r} is not available in"
|
||||
f" region {region!r} - all launches will fail with Unsupported"
|
||||
),
|
||||
"category": "misconfiguration",
|
||||
},
|
||||
)
|
||||
return
|
||||
|
||||
missing_azs = target_azs - available_azs
|
||||
if missing_azs:
|
||||
log.warning(
|
||||
"preflight_misconfiguration",
|
||||
extra={
|
||||
"error": (
|
||||
f"instance type {self._instance_type!r} is not available in"
|
||||
f" AZs {sorted(missing_azs)} - launches into those subnets will"
|
||||
f" fail with Unsupported"
|
||||
),
|
||||
"category": "misconfiguration",
|
||||
},
|
||||
)
|
||||
else:
|
||||
log.info(
|
||||
"preflight_ok",
|
||||
extra={
|
||||
"error": None,
|
||||
"category": None,
|
||||
},
|
||||
)
|
||||
except Exception as exc:
|
||||
log.warning(
|
||||
"preflight_validate_failed",
|
||||
extra={"error": str(exc), "category": "unknown"},
|
||||
)
|
||||
|
||||
def launch_instance(
|
||||
self, slot_id: str, user_data: str, *, nested_virtualization: bool = False
|
||||
) -> str:
|
||||
|
|
|
|||
|
|
@ -63,6 +63,12 @@ in
|
|||
description = "AWS region for EC2 launches.";
|
||||
};
|
||||
|
||||
instanceType = lib.mkOption {
|
||||
type = lib.types.str;
|
||||
default = "";
|
||||
description = "EC2 instance type for nix builders. Used for preflight availability validation at startup.";
|
||||
};
|
||||
|
||||
launchTemplateIdFile = lib.mkOption {
|
||||
type = lib.types.nullOr lib.types.str;
|
||||
default = null;
|
||||
|
|
@ -329,6 +335,7 @@ in
|
|||
|
||||
[aws]
|
||||
region = "${cfg.aws.region}"
|
||||
${lib.optionalString (cfg.aws.instanceType != "") ''instance_type = "${cfg.aws.instanceType}"''}
|
||||
launch_template_id = "$launch_template_id"
|
||||
${lib.optionalString (
|
||||
cfg.aws.onDemandLaunchTemplateIdFile != null
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue