Add prometheus metrics to monitor tailscalesd
This commit is contained in:
parent
b080318748
commit
2db375820f
4 changed files with 63 additions and 8 deletions
1
Makefile
1
Makefile
|
|
@ -25,6 +25,7 @@ check:
|
||||||
$(MAKE) lint
|
$(MAKE) lint
|
||||||
$(MAKE) types
|
$(MAKE) types
|
||||||
$(MAKE) bandit
|
$(MAKE) bandit
|
||||||
|
$(MAKE) test
|
||||||
|
|
||||||
lint:
|
lint:
|
||||||
$(POETRY) flake8 $(SRC)
|
$(POETRY) flake8 $(SRC)
|
||||||
|
|
|
||||||
33
poetry.lock
generated
33
poetry.lock
generated
|
|
@ -987,6 +987,37 @@ files = [
|
||||||
dev = ["pre-commit", "tox"]
|
dev = ["pre-commit", "tox"]
|
||||||
testing = ["pytest", "pytest-benchmark"]
|
testing = ["pytest", "pytest-benchmark"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "prometheus-client"
|
||||||
|
version = "0.18.0"
|
||||||
|
description = "Python client for the Prometheus monitoring system."
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.8"
|
||||||
|
files = [
|
||||||
|
{file = "prometheus_client-0.18.0-py3-none-any.whl", hash = "sha256:8de3ae2755f890826f4b6479e5571d4f74ac17a81345fe69a6778fdb92579184"},
|
||||||
|
{file = "prometheus_client-0.18.0.tar.gz", hash = "sha256:35f7a8c22139e2bb7ca5a698e92d38145bc8dc74c1c0bf56f25cca886a764e17"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
twisted = ["twisted"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "prometheus-fastapi-instrumentator"
|
||||||
|
version = "6.1.0"
|
||||||
|
description = "Instrument your FastAPI with Prometheus metrics."
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.7.0,<4.0.0"
|
||||||
|
files = [
|
||||||
|
{file = "prometheus_fastapi_instrumentator-6.1.0-py3-none-any.whl", hash = "sha256:2279ac1cf5b9566a4c3a07f78c9c5ee19648ed90976ab87d73d672abc1bfa017"},
|
||||||
|
{file = "prometheus_fastapi_instrumentator-6.1.0.tar.gz", hash = "sha256:1820d7a90389ce100f7d1285495ead388818ae0882e761c1f3e6e62a410bdf13"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
fastapi = ">=0.38.1,<1.0.0"
|
||||||
|
prometheus-client = ">=0.8.0,<1.0.0"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pycodestyle"
|
name = "pycodestyle"
|
||||||
version = "2.11.1"
|
version = "2.11.1"
|
||||||
|
|
@ -1512,4 +1543,4 @@ multidict = ">=4.0"
|
||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = "^3.11"
|
python-versions = "^3.11"
|
||||||
content-hash = "c48151b734ebe301d09fb06d15888e2b92fd11b80e710e36aa2552082c1637ad"
|
content-hash = "8712e625f6772d4b4126ad799b658caa72b103d756c35572d63dee3db88e1175"
|
||||||
|
|
|
||||||
|
|
@ -23,6 +23,8 @@ uvicorn = "^0.24.0"
|
||||||
httpx = "^0.25.1"
|
httpx = "^0.25.1"
|
||||||
pydantic-settings = "^2.0.3"
|
pydantic-settings = "^2.0.3"
|
||||||
json-logging = "^1.3.0"
|
json-logging = "^1.3.0"
|
||||||
|
prometheus-client = "^0.18.0"
|
||||||
|
prometheus-fastapi-instrumentator = "^6.1.0"
|
||||||
|
|
||||||
[tool.poetry.dev-dependencies]
|
[tool.poetry.dev-dependencies]
|
||||||
pytest = "*"
|
pytest = "*"
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ import asyncio
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
from contextlib import asynccontextmanager
|
||||||
from ipaddress import ip_address
|
from ipaddress import ip_address
|
||||||
from typing import Dict, List
|
from typing import Dict, List
|
||||||
|
|
||||||
|
|
@ -9,6 +10,8 @@ import httpx
|
||||||
import json_logging # type: ignore
|
import json_logging # type: ignore
|
||||||
import uvicorn
|
import uvicorn
|
||||||
from fastapi import FastAPI
|
from fastapi import FastAPI
|
||||||
|
from prometheus_client import Counter
|
||||||
|
from prometheus_fastapi_instrumentator import Instrumentator
|
||||||
from pydantic import Field, SecretStr
|
from pydantic import Field, SecretStr
|
||||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||||
|
|
||||||
|
|
@ -20,6 +23,17 @@ log = logging.getLogger("tailscalesd")
|
||||||
log.setLevel(logging.DEBUG if debug else logging.INFO)
|
log.setLevel(logging.DEBUG if debug else logging.INFO)
|
||||||
log.addHandler(logging.StreamHandler(sys.stdout))
|
log.addHandler(logging.StreamHandler(sys.stdout))
|
||||||
|
|
||||||
|
counter_unhandled_background_task_crashes = Counter(
|
||||||
|
"tailscalesd_unhandled_background_task_crashes",
|
||||||
|
"The number of unhandled background task crashes",
|
||||||
|
)
|
||||||
|
|
||||||
|
counter_matrix_sd_down = Counter(
|
||||||
|
"tailscalesd_matrix_sd_down",
|
||||||
|
"The number times a matrix sd host was unreachable",
|
||||||
|
["device_hostname"],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def ipv4_only(addresses) -> List[str]:
|
def ipv4_only(addresses) -> List[str]:
|
||||||
"""Given a list of ip addresses, returns only the ipv4 ones"""
|
"""Given a list of ip addresses, returns only the ipv4 ones"""
|
||||||
|
|
@ -38,11 +52,6 @@ class Settings(BaseSettings):
|
||||||
|
|
||||||
|
|
||||||
settings = Settings() # type: ignore[call-arg]
|
settings = Settings() # type: ignore[call-arg]
|
||||||
app = FastAPI()
|
|
||||||
|
|
||||||
json_logging.init_fastapi(enable_json=True)
|
|
||||||
json_logging.init_request_instrument(app)
|
|
||||||
|
|
||||||
CACHE_SD = []
|
CACHE_SD = []
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -60,6 +69,7 @@ async def tailscale_devices() -> List:
|
||||||
"Polling tailscale devices failed!",
|
"Polling tailscale devices failed!",
|
||||||
exc_info=e,
|
exc_info=e,
|
||||||
)
|
)
|
||||||
|
counter_unhandled_background_task_crashes.inc()
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -132,6 +142,7 @@ async def matrix_sd(tailnet, devices) -> List:
|
||||||
try:
|
try:
|
||||||
workers = await matrix_node_sd(device)
|
workers = await matrix_node_sd(device)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
counter_matrix_sd_down.labels(device_hostname=device["hostname"]).inc()
|
||||||
log.error(
|
log.error(
|
||||||
f"Failed parsing matrix node sd for device={device['hostname']}",
|
f"Failed parsing matrix node sd for device={device['hostname']}",
|
||||||
exc_info=e,
|
exc_info=e,
|
||||||
|
|
@ -169,15 +180,25 @@ async def poll_sd():
|
||||||
CACHE_SD = matrix_targets + device_targets
|
CACHE_SD = matrix_targets + device_targets
|
||||||
await asyncio.sleep(settings.interval)
|
await asyncio.sleep(settings.interval)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
counter_unhandled_background_task_crashes.inc()
|
||||||
log.error(
|
log.error(
|
||||||
"Service Discovery poller failed",
|
"Service Discovery poller failed",
|
||||||
exc_info=e,
|
exc_info=e,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@app.on_event("startup")
|
@asynccontextmanager
|
||||||
async def start_sd():
|
async def lifespan(app: FastAPI):
|
||||||
|
instrumentator.expose(app)
|
||||||
asyncio.create_task(poll_sd())
|
asyncio.create_task(poll_sd())
|
||||||
|
yield
|
||||||
|
|
||||||
|
|
||||||
|
app = FastAPI(lifespan=lifespan)
|
||||||
|
instrumentator = Instrumentator().instrument(app)
|
||||||
|
|
||||||
|
json_logging.init_fastapi(enable_json=True)
|
||||||
|
json_logging.init_request_instrument(app)
|
||||||
|
|
||||||
|
|
||||||
@app.get("/")
|
@app.get("/")
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue