import json from typing import Optional from prometheus_client import Counter, Histogram WEBHOOK_EVENTS_TOTAL = Counter( "matrix_ops_bot_webhook_events_total", "Incoming webhook events by source and processing result.", ["source", "result"], ) MESSAGES_SENT_TOTAL = Counter( "matrix_ops_bot_messages_sent_total", "Messages successfully sent to Matrix by source.", ["source"], ) MESSAGE_SEND_FAILURES_TOTAL = Counter( "matrix_ops_bot_message_send_failures_total", "Failures while sending messages to Matrix by source and reason.", ["source", "reason"], ) MATRIX_AUTH_TOTAL = Counter( "matrix_ops_bot_matrix_auth_total", "Matrix authentication and credential-restore attempts by mode and result.", ["mode", "result"], ) MATRIX_SYNC_ERRORS_TOTAL = Counter( "matrix_ops_bot_matrix_sync_errors_total", "Matrix sync loop errors by coarse reason.", ["reason"], ) EVENT_TO_SEND_SECONDS = Histogram( "matrix_ops_bot_event_to_send_seconds", "Time from webhook receipt to first Matrix send attempt by source.", ["source"], ) CONFIG_LOADED_TOTAL = Counter( "matrix_ops_bot_config_loaded_total", "Bot config load outcomes at startup.", ["result"], ) def source_label(source: Optional[str]) -> str: if source is None: return "unknown" normalized = source.strip().lower().replace("-", "_") if normalized in {"gitlab", "pagerduty", "aws_sns", "alertmanager"}: return normalized return "unknown" def classify_send_failure(exc: Exception) -> str: msg = f"{type(exc).__name__} {exc}".lower() if "forbidden" in msg or "403" in msg: return "forbidden" if "rate limit" in msg or "too many requests" in msg or "429" in msg: return "ratelimit" if "unknown room" in msg or "room not found" in msg: return "unknown_room" if ( "timeout" in msg or "connection" in msg or "network" in msg or "dns" in msg or "refused" in msg ): return "network" return "exception" def classify_sync_error(exc: Exception) -> str: msg = f"{type(exc).__name__} {exc}".lower() if "401" in msg or "403" in msg or "unauthorized" in msg or "forbidden" in msg: return "auth" if ( "timeout" in msg or "connection" in msg or "network" in msg or "dns" in msg or "refused" in msg ): return "network" if "megolm" in msg or "olm" in msg or "decrypt" in msg: return "crypto" return "unknown" def classify_payload_error(exc: Exception) -> str: if isinstance(exc, (ValueError, TypeError, KeyError, json.JSONDecodeError)): return "invalid_payload" return "handler_error"