feat: metrics collectors don't depend on database being ready

also adds new "database_collector" metric to show current collector status
This commit is contained in:
Iain Learmonth 2024-12-03 14:55:53 +00:00
parent 3c3a1485e7
commit 173eea2625

View file

@ -9,6 +9,7 @@ from prometheus_client import make_wsgi_app, Metric, CollectorRegistry
from prometheus_client.metrics_core import GaugeMetricFamily, CounterMetricFamily from prometheus_client.metrics_core import GaugeMetricFamily, CounterMetricFamily
from prometheus_client.registry import Collector from prometheus_client.registry import Collector
from sqlalchemy import text from sqlalchemy import text
from sqlalchemy.exc import SQLAlchemyError
from werkzeug.middleware.dispatcher import DispatcherMiddleware from werkzeug.middleware.dispatcher import DispatcherMiddleware
from app.api import api from app.api import api
@ -44,7 +45,11 @@ def not_migrating() -> bool:
class DefinedProxiesCollector(Collector): class DefinedProxiesCollector(Collector):
def collect(self) -> Iterator[Metric]: def collect(self) -> Iterator[Metric]:
with app.app_context(): with app.app_context():
conn = db.engine.connect() ok = GaugeMetricFamily("database_collector",
"Status of a database collector (0: bad, 1: good)",
labels=["collector"])
try:
with db.engine.connect() as conn:
result = conn.execute(text(""" result = conn.execute(text("""
SELECT origin.group_id, "group".group_name, proxy.provider, proxy.pool_id, pool.pool_name, SELECT origin.group_id, "group".group_name, proxy.provider, proxy.pool_id, pool.pool_name,
COUNT(proxy.id) FROM proxy, origin, pool, "group" COUNT(proxy.id) FROM proxy, origin, pool, "group"
@ -61,11 +66,18 @@ class DefinedProxiesCollector(Collector):
c.add_metric([str(row[0]), row[1], row[2], str(row[3]), row[4]], c.add_metric([str(row[0]), row[1], row[2], str(row[3]), row[4]],
row[5]) row[5])
yield c yield c
ok.add_metric(["defined_proxies"], 1)
except SQLAlchemyError:
ok.add_metric(["defined_proxies"], 0)
yield ok
class BlockedProxiesCollector(Collector): class BlockedProxiesCollector(Collector):
def collect(self) -> Iterator[Metric]: def collect(self) -> Iterator[Metric]:
with app.app_context(): with app.app_context():
ok = GaugeMetricFamily("database_collector",
"Status of a database collector (0: bad, 1: good)",
labels=["collector"])
try:
with db.engine.connect() as conn: with db.engine.connect() as conn:
result = conn.execute(text(""" result = conn.execute(text("""
SELECT origin.group_id, "group".group_name, proxy.provider, proxy.pool_id, pool.pool_name, SELECT origin.group_id, "group".group_name, proxy.provider, proxy.pool_id, pool.pool_name,
@ -85,11 +97,20 @@ class BlockedProxiesCollector(Collector):
c.add_metric([str(row[0]), row[1], row[2], str(row[3]), row[4], row[5]], c.add_metric([str(row[0]), row[1], row[2], str(row[3]), row[4], row[5]],
row[6]) row[6])
yield c yield c
ok.add_metric(["deprecated_proxies"], 0)
except SQLAlchemyError:
ok.add_metric(["deprecated_proxies"], 0)
yield ok
class AutomationCollector(Collector): class AutomationCollector(Collector):
def collect(self) -> Iterator[Metric]: def collect(self) -> Iterator[Metric]:
with app.app_context(): with app.app_context():
ok = GaugeMetricFamily("database_collector",
"Status of a database collector (0: bad, 1: good)",
labels=["collector"])
try:
c = GaugeMetricFamily("automation_state", "The automation state (0: idle, 1: running, 2: error)", c = GaugeMetricFamily("automation_state", "The automation state (0: idle, 1: running, 2: error)",
labels=['automation_name']) labels=['automation_name'])
automations = Automation.query.all() automations = Automation.query.all()
@ -103,6 +124,10 @@ class AutomationCollector(Collector):
else: else:
c.add_metric([automation.short_name], 2) c.add_metric([automation.short_name], 2)
yield c yield c
ok.add_metric(["automation_state"], 1)
except SQLAlchemyError:
ok.add_metric(["automation_state"], 0)
yield ok
if not_migrating() and 'DISABLE_METRICS' not in os.environ: if not_migrating() and 'DISABLE_METRICS' not in os.environ: