feat: metrics collectors don't depend on database being ready

also adds new "database_collector" metric to show current collector status
This commit is contained in:
Iain Learmonth 2024-12-03 14:55:53 +00:00
parent 3c3a1485e7
commit 173eea2625

View file

@ -9,6 +9,7 @@ from prometheus_client import make_wsgi_app, Metric, CollectorRegistry
from prometheus_client.metrics_core import GaugeMetricFamily, CounterMetricFamily from prometheus_client.metrics_core import GaugeMetricFamily, CounterMetricFamily
from prometheus_client.registry import Collector from prometheus_client.registry import Collector
from sqlalchemy import text from sqlalchemy import text
from sqlalchemy.exc import SQLAlchemyError
from werkzeug.middleware.dispatcher import DispatcherMiddleware from werkzeug.middleware.dispatcher import DispatcherMiddleware
from app.api import api from app.api import api
@ -44,65 +45,89 @@ def not_migrating() -> bool:
class DefinedProxiesCollector(Collector): class DefinedProxiesCollector(Collector):
def collect(self) -> Iterator[Metric]: def collect(self) -> Iterator[Metric]:
with app.app_context(): with app.app_context():
conn = db.engine.connect() ok = GaugeMetricFamily("database_collector",
result = conn.execute(text(""" "Status of a database collector (0: bad, 1: good)",
SELECT origin.group_id, "group".group_name, proxy.provider, proxy.pool_id, pool.pool_name, labels=["collector"])
COUNT(proxy.id) FROM proxy, origin, pool, "group" try:
WHERE proxy.origin_id = origin.id with db.engine.connect() as conn:
AND origin.group_id = "group".id result = conn.execute(text("""
AND proxy.pool_id = pool.id SELECT origin.group_id, "group".group_name, proxy.provider, proxy.pool_id, pool.pool_name,
AND proxy.destroyed IS NULL COUNT(proxy.id) FROM proxy, origin, pool, "group"
GROUP BY origin.group_id, "group".group_name, proxy.provider, proxy.pool_id, pool.pool_name; WHERE proxy.origin_id = origin.id
""")) AND origin.group_id = "group".id
c = GaugeMetricFamily("defined_proxies", "Number of proxies currently defined for deployment", AND proxy.pool_id = pool.id
labels=['group_id', 'group_name', 'provider', 'pool_id', AND proxy.destroyed IS NULL
'pool_name']) GROUP BY origin.group_id, "group".group_name, proxy.provider, proxy.pool_id, pool.pool_name;
for row in result: """))
c.add_metric([str(row[0]), row[1], row[2], str(row[3]), row[4]], c = GaugeMetricFamily("defined_proxies", "Number of proxies currently defined for deployment",
row[5]) labels=['group_id', 'group_name', 'provider', 'pool_id',
yield c 'pool_name'])
for row in result:
c.add_metric([str(row[0]), row[1], row[2], str(row[3]), row[4]],
row[5])
yield c
ok.add_metric(["defined_proxies"], 1)
except SQLAlchemyError:
ok.add_metric(["defined_proxies"], 0)
yield ok
class BlockedProxiesCollector(Collector): class BlockedProxiesCollector(Collector):
def collect(self) -> Iterator[Metric]: def collect(self) -> Iterator[Metric]:
with app.app_context(): with app.app_context():
with db.engine.connect() as conn: ok = GaugeMetricFamily("database_collector",
result = conn.execute(text(""" "Status of a database collector (0: bad, 1: good)",
SELECT origin.group_id, "group".group_name, proxy.provider, proxy.pool_id, pool.pool_name, labels=["collector"])
proxy.deprecation_reason, COUNT(proxy.id) FROM proxy, origin, pool, "group" try:
WHERE proxy.origin_id = origin.id with db.engine.connect() as conn:
AND origin.group_id = "group".id result = conn.execute(text("""
AND proxy.pool_id = pool.id SELECT origin.group_id, "group".group_name, proxy.provider, proxy.pool_id, pool.pool_name,
AND proxy.deprecated IS NOT NULL proxy.deprecation_reason, COUNT(proxy.id) FROM proxy, origin, pool, "group"
GROUP BY origin.group_id, "group".group_name, proxy.provider, proxy.pool_id, pool.pool_name, WHERE proxy.origin_id = origin.id
proxy.deprecation_reason; AND origin.group_id = "group".id
""")) AND proxy.pool_id = pool.id
c = CounterMetricFamily("deprecated_proxies", AND proxy.deprecated IS NOT NULL
"Number of proxies deprecated", GROUP BY origin.group_id, "group".group_name, proxy.provider, proxy.pool_id, pool.pool_name,
labels=['group_id', 'group_name', 'provider', 'pool_id', 'pool_name', proxy.deprecation_reason;
'deprecation_reason']) """))
for row in result: c = CounterMetricFamily("deprecated_proxies",
c.add_metric([str(row[0]), row[1], row[2], str(row[3]), row[4], row[5]], "Number of proxies deprecated",
row[6]) labels=['group_id', 'group_name', 'provider', 'pool_id', 'pool_name',
yield c 'deprecation_reason'])
for row in result:
c.add_metric([str(row[0]), row[1], row[2], str(row[3]), row[4], row[5]],
row[6])
yield c
ok.add_metric(["deprecated_proxies"], 0)
except SQLAlchemyError:
ok.add_metric(["deprecated_proxies"], 0)
yield ok
class AutomationCollector(Collector): class AutomationCollector(Collector):
def collect(self) -> Iterator[Metric]: def collect(self) -> Iterator[Metric]:
with app.app_context(): with app.app_context():
c = GaugeMetricFamily("automation_state", "The automation state (0: idle, 1: running, 2: error)", ok = GaugeMetricFamily("database_collector",
labels=['automation_name']) "Status of a database collector (0: bad, 1: good)",
automations = Automation.query.all() labels=["collector"])
for automation in automations: try:
if automation.short_name in app.config['HIDDEN_AUTOMATIONS']: c = GaugeMetricFamily("automation_state", "The automation state (0: idle, 1: running, 2: error)",
continue labels=['automation_name'])
if automation.state == AutomationState.IDLE: automations = Automation.query.all()
c.add_metric([automation.short_name], 0) for automation in automations:
elif automation.state == AutomationState.RUNNING: if automation.short_name in app.config['HIDDEN_AUTOMATIONS']:
c.add_metric([automation.short_name], 1) continue
else: if automation.state == AutomationState.IDLE:
c.add_metric([automation.short_name], 2) c.add_metric([automation.short_name], 0)
yield c elif automation.state == AutomationState.RUNNING:
c.add_metric([automation.short_name], 1)
else:
c.add_metric([automation.short_name], 2)
yield c
ok.add_metric(["automation_state"], 1)
except SQLAlchemyError:
ok.add_metric(["automation_state"], 0)
yield ok
if not_migrating() and 'DISABLE_METRICS' not in os.environ: if not_migrating() and 'DISABLE_METRICS' not in os.environ: