feat: metrics collectors don't depend on database being ready

also adds new "database_collector" metric to show current collector status
This commit is contained in:
Iain Learmonth 2024-12-03 14:55:53 +00:00
parent 3c3a1485e7
commit 173eea2625

View file

@ -9,6 +9,7 @@ from prometheus_client import make_wsgi_app, Metric, CollectorRegistry
from prometheus_client.metrics_core import GaugeMetricFamily, CounterMetricFamily
from prometheus_client.registry import Collector
from sqlalchemy import text
from sqlalchemy.exc import SQLAlchemyError
from werkzeug.middleware.dispatcher import DispatcherMiddleware
from app.api import api
@ -44,65 +45,89 @@ def not_migrating() -> bool:
class DefinedProxiesCollector(Collector):
def collect(self) -> Iterator[Metric]:
with app.app_context():
conn = db.engine.connect()
result = conn.execute(text("""
SELECT origin.group_id, "group".group_name, proxy.provider, proxy.pool_id, pool.pool_name,
COUNT(proxy.id) FROM proxy, origin, pool, "group"
WHERE proxy.origin_id = origin.id
AND origin.group_id = "group".id
AND proxy.pool_id = pool.id
AND proxy.destroyed IS NULL
GROUP BY origin.group_id, "group".group_name, proxy.provider, proxy.pool_id, pool.pool_name;
"""))
c = GaugeMetricFamily("defined_proxies", "Number of proxies currently defined for deployment",
labels=['group_id', 'group_name', 'provider', 'pool_id',
'pool_name'])
for row in result:
c.add_metric([str(row[0]), row[1], row[2], str(row[3]), row[4]],
row[5])
yield c
ok = GaugeMetricFamily("database_collector",
"Status of a database collector (0: bad, 1: good)",
labels=["collector"])
try:
with db.engine.connect() as conn:
result = conn.execute(text("""
SELECT origin.group_id, "group".group_name, proxy.provider, proxy.pool_id, pool.pool_name,
COUNT(proxy.id) FROM proxy, origin, pool, "group"
WHERE proxy.origin_id = origin.id
AND origin.group_id = "group".id
AND proxy.pool_id = pool.id
AND proxy.destroyed IS NULL
GROUP BY origin.group_id, "group".group_name, proxy.provider, proxy.pool_id, pool.pool_name;
"""))
c = GaugeMetricFamily("defined_proxies", "Number of proxies currently defined for deployment",
labels=['group_id', 'group_name', 'provider', 'pool_id',
'pool_name'])
for row in result:
c.add_metric([str(row[0]), row[1], row[2], str(row[3]), row[4]],
row[5])
yield c
ok.add_metric(["defined_proxies"], 1)
except SQLAlchemyError:
ok.add_metric(["defined_proxies"], 0)
yield ok
class BlockedProxiesCollector(Collector):
def collect(self) -> Iterator[Metric]:
with app.app_context():
with db.engine.connect() as conn:
result = conn.execute(text("""
SELECT origin.group_id, "group".group_name, proxy.provider, proxy.pool_id, pool.pool_name,
proxy.deprecation_reason, COUNT(proxy.id) FROM proxy, origin, pool, "group"
WHERE proxy.origin_id = origin.id
AND origin.group_id = "group".id
AND proxy.pool_id = pool.id
AND proxy.deprecated IS NOT NULL
GROUP BY origin.group_id, "group".group_name, proxy.provider, proxy.pool_id, pool.pool_name,
proxy.deprecation_reason;
"""))
c = CounterMetricFamily("deprecated_proxies",
"Number of proxies deprecated",
labels=['group_id', 'group_name', 'provider', 'pool_id', 'pool_name',
'deprecation_reason'])
for row in result:
c.add_metric([str(row[0]), row[1], row[2], str(row[3]), row[4], row[5]],
row[6])
yield c
ok = GaugeMetricFamily("database_collector",
"Status of a database collector (0: bad, 1: good)",
labels=["collector"])
try:
with db.engine.connect() as conn:
result = conn.execute(text("""
SELECT origin.group_id, "group".group_name, proxy.provider, proxy.pool_id, pool.pool_name,
proxy.deprecation_reason, COUNT(proxy.id) FROM proxy, origin, pool, "group"
WHERE proxy.origin_id = origin.id
AND origin.group_id = "group".id
AND proxy.pool_id = pool.id
AND proxy.deprecated IS NOT NULL
GROUP BY origin.group_id, "group".group_name, proxy.provider, proxy.pool_id, pool.pool_name,
proxy.deprecation_reason;
"""))
c = CounterMetricFamily("deprecated_proxies",
"Number of proxies deprecated",
labels=['group_id', 'group_name', 'provider', 'pool_id', 'pool_name',
'deprecation_reason'])
for row in result:
c.add_metric([str(row[0]), row[1], row[2], str(row[3]), row[4], row[5]],
row[6])
yield c
ok.add_metric(["deprecated_proxies"], 0)
except SQLAlchemyError:
ok.add_metric(["deprecated_proxies"], 0)
yield ok
class AutomationCollector(Collector):
def collect(self) -> Iterator[Metric]:
with app.app_context():
c = GaugeMetricFamily("automation_state", "The automation state (0: idle, 1: running, 2: error)",
labels=['automation_name'])
automations = Automation.query.all()
for automation in automations:
if automation.short_name in app.config['HIDDEN_AUTOMATIONS']:
continue
if automation.state == AutomationState.IDLE:
c.add_metric([automation.short_name], 0)
elif automation.state == AutomationState.RUNNING:
c.add_metric([automation.short_name], 1)
else:
c.add_metric([automation.short_name], 2)
yield c
ok = GaugeMetricFamily("database_collector",
"Status of a database collector (0: bad, 1: good)",
labels=["collector"])
try:
c = GaugeMetricFamily("automation_state", "The automation state (0: idle, 1: running, 2: error)",
labels=['automation_name'])
automations = Automation.query.all()
for automation in automations:
if automation.short_name in app.config['HIDDEN_AUTOMATIONS']:
continue
if automation.state == AutomationState.IDLE:
c.add_metric([automation.short_name], 0)
elif automation.state == AutomationState.RUNNING:
c.add_metric([automation.short_name], 1)
else:
c.add_metric([automation.short_name], 2)
yield c
ok.add_metric(["automation_state"], 1)
except SQLAlchemyError:
ok.add_metric(["automation_state"], 0)
yield ok
if not_migrating() and 'DISABLE_METRICS' not in os.environ: