feat: metrics collectors don't depend on database being ready
also adds new "database_collector" metric to show current collector status
This commit is contained in:
parent
3c3a1485e7
commit
173eea2625
1 changed files with 75 additions and 50 deletions
125
app/__init__.py
125
app/__init__.py
|
@ -9,6 +9,7 @@ from prometheus_client import make_wsgi_app, Metric, CollectorRegistry
|
||||||
from prometheus_client.metrics_core import GaugeMetricFamily, CounterMetricFamily
|
from prometheus_client.metrics_core import GaugeMetricFamily, CounterMetricFamily
|
||||||
from prometheus_client.registry import Collector
|
from prometheus_client.registry import Collector
|
||||||
from sqlalchemy import text
|
from sqlalchemy import text
|
||||||
|
from sqlalchemy.exc import SQLAlchemyError
|
||||||
from werkzeug.middleware.dispatcher import DispatcherMiddleware
|
from werkzeug.middleware.dispatcher import DispatcherMiddleware
|
||||||
|
|
||||||
from app.api import api
|
from app.api import api
|
||||||
|
@ -44,65 +45,89 @@ def not_migrating() -> bool:
|
||||||
class DefinedProxiesCollector(Collector):
|
class DefinedProxiesCollector(Collector):
|
||||||
def collect(self) -> Iterator[Metric]:
|
def collect(self) -> Iterator[Metric]:
|
||||||
with app.app_context():
|
with app.app_context():
|
||||||
conn = db.engine.connect()
|
ok = GaugeMetricFamily("database_collector",
|
||||||
result = conn.execute(text("""
|
"Status of a database collector (0: bad, 1: good)",
|
||||||
SELECT origin.group_id, "group".group_name, proxy.provider, proxy.pool_id, pool.pool_name,
|
labels=["collector"])
|
||||||
COUNT(proxy.id) FROM proxy, origin, pool, "group"
|
try:
|
||||||
WHERE proxy.origin_id = origin.id
|
with db.engine.connect() as conn:
|
||||||
AND origin.group_id = "group".id
|
result = conn.execute(text("""
|
||||||
AND proxy.pool_id = pool.id
|
SELECT origin.group_id, "group".group_name, proxy.provider, proxy.pool_id, pool.pool_name,
|
||||||
AND proxy.destroyed IS NULL
|
COUNT(proxy.id) FROM proxy, origin, pool, "group"
|
||||||
GROUP BY origin.group_id, "group".group_name, proxy.provider, proxy.pool_id, pool.pool_name;
|
WHERE proxy.origin_id = origin.id
|
||||||
"""))
|
AND origin.group_id = "group".id
|
||||||
c = GaugeMetricFamily("defined_proxies", "Number of proxies currently defined for deployment",
|
AND proxy.pool_id = pool.id
|
||||||
labels=['group_id', 'group_name', 'provider', 'pool_id',
|
AND proxy.destroyed IS NULL
|
||||||
'pool_name'])
|
GROUP BY origin.group_id, "group".group_name, proxy.provider, proxy.pool_id, pool.pool_name;
|
||||||
for row in result:
|
"""))
|
||||||
c.add_metric([str(row[0]), row[1], row[2], str(row[3]), row[4]],
|
c = GaugeMetricFamily("defined_proxies", "Number of proxies currently defined for deployment",
|
||||||
row[5])
|
labels=['group_id', 'group_name', 'provider', 'pool_id',
|
||||||
yield c
|
'pool_name'])
|
||||||
|
for row in result:
|
||||||
|
c.add_metric([str(row[0]), row[1], row[2], str(row[3]), row[4]],
|
||||||
|
row[5])
|
||||||
|
yield c
|
||||||
|
ok.add_metric(["defined_proxies"], 1)
|
||||||
|
except SQLAlchemyError:
|
||||||
|
ok.add_metric(["defined_proxies"], 0)
|
||||||
|
yield ok
|
||||||
|
|
||||||
class BlockedProxiesCollector(Collector):
|
class BlockedProxiesCollector(Collector):
|
||||||
def collect(self) -> Iterator[Metric]:
|
def collect(self) -> Iterator[Metric]:
|
||||||
with app.app_context():
|
with app.app_context():
|
||||||
with db.engine.connect() as conn:
|
ok = GaugeMetricFamily("database_collector",
|
||||||
result = conn.execute(text("""
|
"Status of a database collector (0: bad, 1: good)",
|
||||||
SELECT origin.group_id, "group".group_name, proxy.provider, proxy.pool_id, pool.pool_name,
|
labels=["collector"])
|
||||||
proxy.deprecation_reason, COUNT(proxy.id) FROM proxy, origin, pool, "group"
|
try:
|
||||||
WHERE proxy.origin_id = origin.id
|
with db.engine.connect() as conn:
|
||||||
AND origin.group_id = "group".id
|
result = conn.execute(text("""
|
||||||
AND proxy.pool_id = pool.id
|
SELECT origin.group_id, "group".group_name, proxy.provider, proxy.pool_id, pool.pool_name,
|
||||||
AND proxy.deprecated IS NOT NULL
|
proxy.deprecation_reason, COUNT(proxy.id) FROM proxy, origin, pool, "group"
|
||||||
GROUP BY origin.group_id, "group".group_name, proxy.provider, proxy.pool_id, pool.pool_name,
|
WHERE proxy.origin_id = origin.id
|
||||||
proxy.deprecation_reason;
|
AND origin.group_id = "group".id
|
||||||
"""))
|
AND proxy.pool_id = pool.id
|
||||||
c = CounterMetricFamily("deprecated_proxies",
|
AND proxy.deprecated IS NOT NULL
|
||||||
"Number of proxies deprecated",
|
GROUP BY origin.group_id, "group".group_name, proxy.provider, proxy.pool_id, pool.pool_name,
|
||||||
labels=['group_id', 'group_name', 'provider', 'pool_id', 'pool_name',
|
proxy.deprecation_reason;
|
||||||
'deprecation_reason'])
|
"""))
|
||||||
for row in result:
|
c = CounterMetricFamily("deprecated_proxies",
|
||||||
c.add_metric([str(row[0]), row[1], row[2], str(row[3]), row[4], row[5]],
|
"Number of proxies deprecated",
|
||||||
row[6])
|
labels=['group_id', 'group_name', 'provider', 'pool_id', 'pool_name',
|
||||||
yield c
|
'deprecation_reason'])
|
||||||
|
for row in result:
|
||||||
|
c.add_metric([str(row[0]), row[1], row[2], str(row[3]), row[4], row[5]],
|
||||||
|
row[6])
|
||||||
|
yield c
|
||||||
|
ok.add_metric(["deprecated_proxies"], 0)
|
||||||
|
except SQLAlchemyError:
|
||||||
|
ok.add_metric(["deprecated_proxies"], 0)
|
||||||
|
yield ok
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class AutomationCollector(Collector):
|
class AutomationCollector(Collector):
|
||||||
def collect(self) -> Iterator[Metric]:
|
def collect(self) -> Iterator[Metric]:
|
||||||
with app.app_context():
|
with app.app_context():
|
||||||
c = GaugeMetricFamily("automation_state", "The automation state (0: idle, 1: running, 2: error)",
|
ok = GaugeMetricFamily("database_collector",
|
||||||
labels=['automation_name'])
|
"Status of a database collector (0: bad, 1: good)",
|
||||||
automations = Automation.query.all()
|
labels=["collector"])
|
||||||
for automation in automations:
|
try:
|
||||||
if automation.short_name in app.config['HIDDEN_AUTOMATIONS']:
|
c = GaugeMetricFamily("automation_state", "The automation state (0: idle, 1: running, 2: error)",
|
||||||
continue
|
labels=['automation_name'])
|
||||||
if automation.state == AutomationState.IDLE:
|
automations = Automation.query.all()
|
||||||
c.add_metric([automation.short_name], 0)
|
for automation in automations:
|
||||||
elif automation.state == AutomationState.RUNNING:
|
if automation.short_name in app.config['HIDDEN_AUTOMATIONS']:
|
||||||
c.add_metric([automation.short_name], 1)
|
continue
|
||||||
else:
|
if automation.state == AutomationState.IDLE:
|
||||||
c.add_metric([automation.short_name], 2)
|
c.add_metric([automation.short_name], 0)
|
||||||
yield c
|
elif automation.state == AutomationState.RUNNING:
|
||||||
|
c.add_metric([automation.short_name], 1)
|
||||||
|
else:
|
||||||
|
c.add_metric([automation.short_name], 2)
|
||||||
|
yield c
|
||||||
|
ok.add_metric(["automation_state"], 1)
|
||||||
|
except SQLAlchemyError:
|
||||||
|
ok.add_metric(["automation_state"], 0)
|
||||||
|
yield ok
|
||||||
|
|
||||||
|
|
||||||
if not_migrating() and 'DISABLE_METRICS' not in os.environ:
|
if not_migrating() and 'DISABLE_METRICS' not in os.environ:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue