use userdata only to seed ami bootstrap env

This commit is contained in:
Abel Luck 2026-02-27 16:03:00 +01:00
parent 3be933f16b
commit a7aabdff51
3 changed files with 30 additions and 56 deletions

View file

@ -1,71 +1,27 @@
"""EC2 user-data template rendering for builder instance bootstrap.
The generated script follows the NixOS AMI pattern: write config files
that existing systemd services (tailscale-autoconnect, nix-daemon) consume,
rather than calling ``tailscale up`` directly.
"""
"""EC2 user-data template rendering for builder instance bootstrap."""
from __future__ import annotations
import textwrap
def render_userdata(slot_id: str, region: str, ssm_param: str = "/nix-builder/ts-authkey") -> str:
"""Render a bash user-data script for builder instance bootstrap.
def render_userdata(slot_id: str, ssm_param: str = "/nix-builder/ts-authkey") -> str:
"""Render user-data that seeds AMI bootstrap inputs only.
The returned string is a complete shell script. On NixOS AMIs the script
is executed by ``amazon-init.service``. The caller (EC2Runtime) passes it
to ``run_instances`` as ``UserData``; boto3 base64-encodes automatically.
Args:
slot_id: Autoscaler slot identifier (used as Tailscale hostname suffix).
region: AWS region for SSM parameter lookup.
ssm_param: SSM parameter path containing the Tailscale auth key.
The AMI's buildbot-ami-bootstrap service consumes this env file and handles
SSM fetch + tailscale-autoconnect config generation.
"""
return textwrap.dedent(f"""\
#!/usr/bin/env bash
set -euo pipefail
SLOT_ID="{slot_id}"
REGION="{region}"
SSM_PARAM="{ssm_param}"
# --- Fetch Tailscale auth key from SSM Parameter Store ---
mkdir -p /run/credentials
TS_AUTHKEY=$(aws ssm get-parameter \\
--region "$REGION" \\
--with-decryption \\
--name "$SSM_PARAM" \\
--query 'Parameter.Value' \\
--output text)
printf '%s' "$TS_AUTHKEY" > /run/credentials/tailscale-auth-key
chmod 600 /run/credentials/tailscale-auth-key
# --- Resolve instance identity from IMDSv2 for unique hostname ---
IMDS_TOKEN=$(curl -fsS -X PUT "http://169.254.169.254/latest/api/token" \\
-H "X-aws-ec2-metadata-token-ttl-seconds: 21600" || true)
INSTANCE_ID=$(curl -fsS -H "X-aws-ec2-metadata-token: $IMDS_TOKEN" \\
"http://169.254.169.254/latest/meta-data/instance-id" || true)
if [ -z "$INSTANCE_ID" ]; then
INSTANCE_ID="unknown"
fi
# --- Write tailscale-autoconnect config ---
mkdir -p /etc/tailscale
cat > /etc/tailscale/autoconnect.conf <<TSCONF
TS_AUTHKEY_FILE=/run/credentials/tailscale-auth-key
TS_AUTHKEY_EPHEMERAL=true
TS_AUTHKEY_PREAUTHORIZED=true
TS_HOSTNAME=nix-builder-$SLOT_ID-$INSTANCE_ID
TS_EXTRA_ARGS="--ssh --advertise-tags=tag:nix-builder"
TSCONF
# --- Start/restart tailscale-autoconnect so it picks up the config ---
systemctl restart tailscale-autoconnect.service || true
# --- Ensure nix-daemon is running ---
systemctl start nix-daemon.service || true
# --- Signal readiness ---
echo "ready" > /run/nix-builder-ready
# Seed AMI bootstrap inputs only; buildbot-ami-bootstrap reads this file.
cat > /etc/nix-builder-bootstrap-env <<EOF
SLOT_ID="$SLOT_ID"
TS_SSM_PARAM="$SSM_PARAM"
EOF
chmod 600 /etc/nix-builder-bootstrap-env
""")

View file

@ -216,7 +216,7 @@ def _launch_slot(
) -> None:
"""Launch a single slot. Transition to LAUNCHING on success, ERROR on failure."""
slot_id = slot["slot_id"]
user_data = render_userdata(slot_id, config.aws.region)
user_data = render_userdata(slot_id)
try:
instance_id = runtime.launch_spot(slot_id, user_data)
metrics.counter("autoscaler_ec2_launch_total", {"result": "success"}, 1.0)

View file

@ -0,0 +1,18 @@
"""Unit tests for builder bootstrap user-data rendering."""
from nix_builder_autoscaler.bootstrap.userdata import render_userdata
def test_render_userdata_writes_bootstrap_env_inputs() -> None:
script = render_userdata("slot001", ssm_param="/nix-builder/ts-authkey")
assert "cat > /etc/nix-builder-bootstrap-env <<EOF" in script
assert 'SLOT_ID="$SLOT_ID"' in script
assert 'TS_SSM_PARAM="$SSM_PARAM"' in script
assert "chmod 600 /etc/nix-builder-bootstrap-env" in script
def test_render_userdata_does_not_inline_tailscale_setup() -> None:
script = render_userdata("slot001", ssm_param="/nix-builder/ts-authkey")
assert "aws ssm get-parameter" not in script
assert "/etc/tailscale/autoconnect.conf" not in script
assert "TS_EXTRA_ARGS=" not in script