add timeout safeguards for all slot lifecycle stages

This commit is contained in:
Abel Luck 2026-02-27 15:40:39 +01:00
parent 48ff711f39
commit 3be933f16b
4 changed files with 356 additions and 23 deletions

View file

@ -181,6 +181,30 @@ in
description = "Drain timeout before force termination.";
};
launchTimeoutSeconds = lib.mkOption {
type = lib.types.int;
default = 300;
description = "Max seconds a slot may remain launching before forced termination.";
};
bootTimeoutSeconds = lib.mkOption {
type = lib.types.int;
default = 300;
description = "Max seconds a slot may remain booting before forced termination.";
};
bindingTimeoutSeconds = lib.mkOption {
type = lib.types.int;
default = 180;
description = "Max seconds a slot may remain binding before forced termination.";
};
terminatingTimeoutSeconds = lib.mkOption {
type = lib.types.int;
default = 300;
description = "Max seconds between terminate retries while slot is terminating.";
};
launchBatchSize = lib.mkOption {
type = lib.types.int;
default = 1;
@ -301,6 +325,10 @@ in
reservation_ttl_seconds = ${toString cfg.capacity.reservationTtlSeconds}
idle_scale_down_seconds = ${toString cfg.capacity.idleScaleDownSeconds}
drain_timeout_seconds = ${toString cfg.capacity.drainTimeoutSeconds}
launch_timeout_seconds = ${toString cfg.capacity.launchTimeoutSeconds}
boot_timeout_seconds = ${toString cfg.capacity.bootTimeoutSeconds}
binding_timeout_seconds = ${toString cfg.capacity.bindingTimeoutSeconds}
terminating_timeout_seconds = ${toString cfg.capacity.terminatingTimeoutSeconds}
[security]
socket_mode = "${cfg.security.socketMode}"