agent: complete plan05 closeout
This commit is contained in:
parent
33ba248c49
commit
2f0fffa905
12 changed files with 1347 additions and 313 deletions
|
|
@ -68,7 +68,7 @@ class Reconciler:
|
|||
|
||||
# 2. Query HAProxy
|
||||
try:
|
||||
haproxy_health = self._haproxy.read_slot_health()
|
||||
haproxy_health = self._haproxy_read_slot_health()
|
||||
except HAProxyError:
|
||||
log.warning("haproxy_stat_failed", exc_info=True)
|
||||
haproxy_health = {}
|
||||
|
|
@ -142,8 +142,8 @@ class Reconciler:
|
|||
if tailscale_ip is not None:
|
||||
self._db.update_slot_state(slot["slot_id"], SlotState.BINDING, instance_ip=tailscale_ip)
|
||||
try:
|
||||
self._haproxy.set_slot_addr(slot["slot_id"], tailscale_ip)
|
||||
self._haproxy.enable_slot(slot["slot_id"])
|
||||
self._haproxy_set_slot_addr(slot["slot_id"], tailscale_ip)
|
||||
self._haproxy_enable_slot(slot["slot_id"])
|
||||
except HAProxyError:
|
||||
log.warning(
|
||||
"haproxy_binding_setup_failed",
|
||||
|
|
@ -169,8 +169,8 @@ class Reconciler:
|
|||
ip = slot.get("instance_ip")
|
||||
if ip:
|
||||
try:
|
||||
self._haproxy.set_slot_addr(slot_id, ip)
|
||||
self._haproxy.enable_slot(slot_id)
|
||||
self._haproxy_set_slot_addr(slot_id, ip)
|
||||
self._haproxy_enable_slot(slot_id)
|
||||
except HAProxyError:
|
||||
pass
|
||||
|
||||
|
|
@ -204,7 +204,7 @@ class Reconciler:
|
|||
|
||||
# Disable HAProxy (idempotent)
|
||||
with contextlib.suppress(HAProxyError):
|
||||
self._haproxy.disable_slot(slot_id)
|
||||
self._haproxy_disable_slot(slot_id)
|
||||
|
||||
now = self._clock.now()
|
||||
last_change = datetime.fromisoformat(slot["last_state_change"])
|
||||
|
|
@ -216,8 +216,17 @@ class Reconciler:
|
|||
if instance_id:
|
||||
try:
|
||||
self._runtime.terminate_instance(instance_id)
|
||||
self._metrics.counter("autoscaler_ec2_terminate_total", {}, 1.0)
|
||||
self._metrics.counter(
|
||||
"autoscaler_ec2_terminate_total",
|
||||
{"result": "success"},
|
||||
1.0,
|
||||
)
|
||||
except Exception:
|
||||
self._metrics.counter(
|
||||
"autoscaler_ec2_terminate_total",
|
||||
{"result": "error"},
|
||||
1.0,
|
||||
)
|
||||
log.warning(
|
||||
"terminate_failed",
|
||||
extra={"slot_id": slot_id, "instance_id": instance_id},
|
||||
|
|
@ -252,7 +261,70 @@ class Reconciler:
|
|||
"""Emit reconciler metrics."""
|
||||
summary = self._db.get_state_summary()
|
||||
for state, count in summary["slots"].items():
|
||||
if state == "total":
|
||||
continue
|
||||
self._metrics.gauge("autoscaler_slots", {"state": state}, float(count))
|
||||
self._metrics.histogram_observe("autoscaler_reconciler_tick_seconds", {}, tick_duration)
|
||||
self._metrics.gauge("autoscaler_slots_total", {"state": state}, float(count))
|
||||
self._metrics.histogram_observe("autoscaler_reconcile_duration_seconds", {}, tick_duration)
|
||||
|
||||
def _haproxy_set_slot_addr(self, slot_id: str, ip: str) -> None:
|
||||
try:
|
||||
self._haproxy.set_slot_addr(slot_id, ip)
|
||||
self._metrics.counter(
|
||||
"autoscaler_haproxy_command_total",
|
||||
{"cmd": "set_slot_addr", "result": "success"},
|
||||
1.0,
|
||||
)
|
||||
except HAProxyError:
|
||||
self._metrics.counter(
|
||||
"autoscaler_haproxy_command_total",
|
||||
{"cmd": "set_slot_addr", "result": "error"},
|
||||
1.0,
|
||||
)
|
||||
raise
|
||||
|
||||
def _haproxy_enable_slot(self, slot_id: str) -> None:
|
||||
try:
|
||||
self._haproxy.enable_slot(slot_id)
|
||||
self._metrics.counter(
|
||||
"autoscaler_haproxy_command_total",
|
||||
{"cmd": "enable_slot", "result": "success"},
|
||||
1.0,
|
||||
)
|
||||
except HAProxyError:
|
||||
self._metrics.counter(
|
||||
"autoscaler_haproxy_command_total",
|
||||
{"cmd": "enable_slot", "result": "error"},
|
||||
1.0,
|
||||
)
|
||||
raise
|
||||
|
||||
def _haproxy_disable_slot(self, slot_id: str) -> None:
|
||||
try:
|
||||
self._haproxy.disable_slot(slot_id)
|
||||
self._metrics.counter(
|
||||
"autoscaler_haproxy_command_total",
|
||||
{"cmd": "disable_slot", "result": "success"},
|
||||
1.0,
|
||||
)
|
||||
except HAProxyError:
|
||||
self._metrics.counter(
|
||||
"autoscaler_haproxy_command_total",
|
||||
{"cmd": "disable_slot", "result": "error"},
|
||||
1.0,
|
||||
)
|
||||
raise
|
||||
|
||||
def _haproxy_read_slot_health(self) -> dict:
|
||||
try:
|
||||
health = self._haproxy.read_slot_health()
|
||||
self._metrics.counter(
|
||||
"autoscaler_haproxy_command_total",
|
||||
{"cmd": "show_stat", "result": "success"},
|
||||
1.0,
|
||||
)
|
||||
return health
|
||||
except HAProxyError:
|
||||
self._metrics.counter(
|
||||
"autoscaler_haproxy_command_total",
|
||||
{"cmd": "show_stat", "result": "error"},
|
||||
1.0,
|
||||
)
|
||||
raise
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue