diff --git a/Makefile b/Makefile index d9c6c9a..c2d5961 100644 --- a/Makefile +++ b/Makefile @@ -25,6 +25,7 @@ check: $(MAKE) lint $(MAKE) types $(MAKE) bandit + $(MAKE) test lint: $(POETRY) flake8 $(SRC) diff --git a/poetry.lock b/poetry.lock index 9aac367..07165c4 100644 --- a/poetry.lock +++ b/poetry.lock @@ -987,6 +987,37 @@ files = [ dev = ["pre-commit", "tox"] testing = ["pytest", "pytest-benchmark"] +[[package]] +name = "prometheus-client" +version = "0.18.0" +description = "Python client for the Prometheus monitoring system." +category = "main" +optional = false +python-versions = ">=3.8" +files = [ + {file = "prometheus_client-0.18.0-py3-none-any.whl", hash = "sha256:8de3ae2755f890826f4b6479e5571d4f74ac17a81345fe69a6778fdb92579184"}, + {file = "prometheus_client-0.18.0.tar.gz", hash = "sha256:35f7a8c22139e2bb7ca5a698e92d38145bc8dc74c1c0bf56f25cca886a764e17"}, +] + +[package.extras] +twisted = ["twisted"] + +[[package]] +name = "prometheus-fastapi-instrumentator" +version = "6.1.0" +description = "Instrument your FastAPI with Prometheus metrics." +category = "main" +optional = false +python-versions = ">=3.7.0,<4.0.0" +files = [ + {file = "prometheus_fastapi_instrumentator-6.1.0-py3-none-any.whl", hash = "sha256:2279ac1cf5b9566a4c3a07f78c9c5ee19648ed90976ab87d73d672abc1bfa017"}, + {file = "prometheus_fastapi_instrumentator-6.1.0.tar.gz", hash = "sha256:1820d7a90389ce100f7d1285495ead388818ae0882e761c1f3e6e62a410bdf13"}, +] + +[package.dependencies] +fastapi = ">=0.38.1,<1.0.0" +prometheus-client = ">=0.8.0,<1.0.0" + [[package]] name = "pycodestyle" version = "2.11.1" @@ -1512,4 +1543,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "c48151b734ebe301d09fb06d15888e2b92fd11b80e710e36aa2552082c1637ad" +content-hash = "8712e625f6772d4b4126ad799b658caa72b103d756c35572d63dee3db88e1175" diff --git a/pyproject.toml b/pyproject.toml index df6f055..282103b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,6 +23,8 @@ uvicorn = "^0.24.0" httpx = "^0.25.1" pydantic-settings = "^2.0.3" json-logging = "^1.3.0" +prometheus-client = "^0.18.0" +prometheus-fastapi-instrumentator = "^6.1.0" [tool.poetry.dev-dependencies] pytest = "*" diff --git a/tailscalesd/main.py b/tailscalesd/main.py index adfb8cb..57150ba 100644 --- a/tailscalesd/main.py +++ b/tailscalesd/main.py @@ -2,6 +2,7 @@ import asyncio import logging import os import sys +from contextlib import asynccontextmanager from ipaddress import ip_address from typing import Dict, List @@ -9,6 +10,8 @@ import httpx import json_logging # type: ignore import uvicorn from fastapi import FastAPI +from prometheus_client import Counter +from prometheus_fastapi_instrumentator import Instrumentator from pydantic import Field, SecretStr from pydantic_settings import BaseSettings, SettingsConfigDict @@ -20,6 +23,17 @@ log = logging.getLogger("tailscalesd") log.setLevel(logging.DEBUG if debug else logging.INFO) log.addHandler(logging.StreamHandler(sys.stdout)) +counter_unhandled_background_task_crashes = Counter( + "tailscalesd_unhandled_background_task_crashes", + "The number of unhandled background task crashes", +) + +counter_matrix_sd_down = Counter( + "tailscalesd_matrix_sd_down", + "The number times a matrix sd host was unreachable", + ["device_hostname"], +) + def ipv4_only(addresses) -> List[str]: """Given a list of ip addresses, returns only the ipv4 ones""" @@ -38,11 +52,6 @@ class Settings(BaseSettings): settings = Settings() # type: ignore[call-arg] -app = FastAPI() - -json_logging.init_fastapi(enable_json=True) -json_logging.init_request_instrument(app) - CACHE_SD = [] @@ -60,6 +69,7 @@ async def tailscale_devices() -> List: "Polling tailscale devices failed!", exc_info=e, ) + counter_unhandled_background_task_crashes.inc() return [] @@ -132,6 +142,7 @@ async def matrix_sd(tailnet, devices) -> List: try: workers = await matrix_node_sd(device) except Exception as e: + counter_matrix_sd_down.labels(device_hostname=device["hostname"]).inc() log.error( f"Failed parsing matrix node sd for device={device['hostname']}", exc_info=e, @@ -169,15 +180,25 @@ async def poll_sd(): CACHE_SD = matrix_targets + device_targets await asyncio.sleep(settings.interval) except Exception as e: + counter_unhandled_background_task_crashes.inc() log.error( "Service Discovery poller failed", exc_info=e, ) -@app.on_event("startup") -async def start_sd(): +@asynccontextmanager +async def lifespan(app: FastAPI): + instrumentator.expose(app) asyncio.create_task(poll_sd()) + yield + + +app = FastAPI(lifespan=lifespan) +instrumentator = Instrumentator().instrument(app) + +json_logging.init_fastapi(enable_json=True) +json_logging.init_request_instrument(app) @app.get("/")