alarms: refactor the alarms subsystem
also include eotk alarms now
This commit is contained in:
parent
a935055083
commit
e2ce24bf3b
17 changed files with 288 additions and 152 deletions
|
@ -1,48 +1,40 @@
|
|||
import datetime
|
||||
from typing import Optional
|
||||
from typing import Optional, List
|
||||
|
||||
from app.extensions import db
|
||||
from app.models.alarms import Alarm
|
||||
|
||||
|
||||
def alarms_for(target: str) -> List[Alarm]:
|
||||
return list(Alarm.query.filter(
|
||||
Alarm.target == target
|
||||
).all())
|
||||
|
||||
|
||||
def _get_alarm(target: str,
|
||||
alarm_type: str,
|
||||
*,
|
||||
proxy_id: Optional[int] = None,
|
||||
origin_id: Optional[int] = None,
|
||||
aspect: str,
|
||||
create_if_missing: bool = True) -> Optional[Alarm]:
|
||||
alarm: Optional[Alarm]
|
||||
if target == "proxy":
|
||||
alarm = Alarm.query.filter(
|
||||
Alarm.target == "proxy",
|
||||
Alarm.alarm_type == alarm_type,
|
||||
Alarm.proxy_id == proxy_id
|
||||
).first()
|
||||
elif target == "origin":
|
||||
alarm = Alarm.query.filter(
|
||||
Alarm.target == "origin",
|
||||
Alarm.alarm_type == alarm_type,
|
||||
Alarm.proxy_id == origin_id
|
||||
).first()
|
||||
else:
|
||||
return None
|
||||
alarm: Optional[Alarm] = Alarm.query.filter(
|
||||
Alarm.aspect == aspect,
|
||||
Alarm.target == target
|
||||
).first()
|
||||
if create_if_missing and alarm is None:
|
||||
alarm = Alarm()
|
||||
alarm.aspect = aspect
|
||||
alarm.target = target
|
||||
alarm.alarm_type = alarm_type
|
||||
alarm.text = "New alarm"
|
||||
alarm.state_changed = datetime.datetime.utcnow()
|
||||
if target == "proxy":
|
||||
alarm.proxy_id = proxy_id
|
||||
if target == "origin":
|
||||
alarm.origin_id = origin_id
|
||||
alarm.last_updated = datetime.datetime.utcnow()
|
||||
db.session.add(alarm)
|
||||
db.session.commit()
|
||||
return alarm
|
||||
|
||||
|
||||
def get_proxy_alarm(proxy_id: int, alarm_type: str) -> Alarm:
|
||||
alarm = _get_alarm("proxy", alarm_type, proxy_id=proxy_id)
|
||||
def get_alarm(target: str, aspect: str) -> Optional[Alarm]:
|
||||
return _get_alarm(target, aspect, create_if_missing=False)
|
||||
|
||||
|
||||
def get_or_create_alarm(target: str, aspect: str) -> Alarm:
|
||||
alarm = _get_alarm(target, aspect, create_if_missing=True)
|
||||
if alarm is None:
|
||||
# mypy can't tell that this will never be reached
|
||||
raise RuntimeError("Creating an alarm must have failed.")
|
||||
raise RuntimeError("Asked for an alarm to be created but got None.")
|
||||
return alarm
|
||||
|
|
|
@ -15,6 +15,7 @@ from app.terraform.block_external import BlockExternalAutomation
|
|||
from app.terraform.block_ooni import BlockOONIAutomation
|
||||
from app.terraform.block_roskomsvoboda import BlockRoskomsvobodaAutomation
|
||||
from app.terraform.eotk.aws import EotkAWSAutomation
|
||||
from app.terraform.alarms.eotk_aws import AlarmEotkAwsAutomation
|
||||
from app.terraform.alarms.proxy_azure_cdn import AlarmProxyAzureCdnAutomation
|
||||
from app.terraform.alarms.proxy_cloudfront import AlarmProxyCloudfrontAutomation
|
||||
from app.terraform.alarms.proxy_http_status import AlarmProxyHTTPStatusAutomation
|
||||
|
@ -37,6 +38,7 @@ else:
|
|||
jobs = {
|
||||
x.short_name: x
|
||||
for x in [
|
||||
AlarmEotkAwsAutomation,
|
||||
AlarmProxyAzureCdnAutomation,
|
||||
AlarmProxyCloudfrontAutomation,
|
||||
AlarmProxyHTTPStatusAutomation,
|
||||
|
|
|
@ -1,7 +1,10 @@
|
|||
from abc import abstractmethod
|
||||
from datetime import datetime
|
||||
from typing import Union, List, Optional, Any
|
||||
|
||||
from app.alarms import alarms_for
|
||||
from app.extensions import db
|
||||
from app.models.alarms import Alarm
|
||||
|
||||
|
||||
class AbstractConfiguration(db.Model): # type: ignore
|
||||
|
@ -13,6 +16,15 @@ class AbstractConfiguration(db.Model): # type: ignore
|
|||
updated = db.Column(db.DateTime(), default=datetime.utcnow, nullable=False)
|
||||
destroyed = db.Column(db.DateTime(), nullable=True)
|
||||
|
||||
@property
|
||||
def alarms(self) -> List[Alarm]:
|
||||
return alarms_for(self.brn)
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def brn(self) -> str:
|
||||
raise NotImplementedError()
|
||||
|
||||
def destroy(self) -> None:
|
||||
self.destroyed = datetime.utcnow()
|
||||
self.updated = datetime.utcnow()
|
||||
|
@ -59,6 +71,11 @@ class AbstractResource(db.Model): # type: ignore
|
|||
if self.updated is None:
|
||||
self.updated = datetime.utcnow()
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def brn(self) -> str:
|
||||
raise NotImplementedError()
|
||||
|
||||
def deprecate(self, *, reason: str) -> None:
|
||||
self.deprecated = datetime.utcnow()
|
||||
self.deprecation_reason = reason
|
||||
|
|
|
@ -14,38 +14,30 @@ class AlarmState(enum.Enum):
|
|||
|
||||
class Alarm(db.Model): # type: ignore
|
||||
id = db.Column(db.Integer, primary_key=True)
|
||||
target = db.Column(db.String(60), nullable=False)
|
||||
group_id = db.Column(db.Integer, db.ForeignKey("group.id"))
|
||||
origin_id = db.Column(db.Integer, db.ForeignKey("origin.id"))
|
||||
proxy_id = db.Column(db.Integer, db.ForeignKey("proxy.id"))
|
||||
bridge_id = db.Column(db.Integer, db.ForeignKey("bridge.id"))
|
||||
alarm_type = db.Column(db.String(255), nullable=False)
|
||||
target = db.Column(db.String(255), nullable=False)
|
||||
aspect = db.Column(db.String(255), nullable=False)
|
||||
alarm_state = db.Column(db.Enum(AlarmState), default=AlarmState.UNKNOWN, nullable=False)
|
||||
state_changed = db.Column(db.DateTime(), nullable=False)
|
||||
last_updated = db.Column(db.DateTime())
|
||||
text = db.Column(db.String(255))
|
||||
|
||||
group = db.relationship("Group", back_populates="alarms")
|
||||
origin = db.relationship("Origin", back_populates="alarms")
|
||||
proxy = db.relationship("Proxy", back_populates="alarms")
|
||||
bridge = db.relationship("Bridge", back_populates="alarms")
|
||||
last_updated = db.Column(db.DateTime(), nullable=False)
|
||||
text = db.Column(db.String(255), nullable=False)
|
||||
|
||||
@classmethod
|
||||
def csv_header(cls) -> List[str]:
|
||||
return [
|
||||
"id", "target", "group_id", "origin_id", "proxy_id", "bridge_id", "alarm_type",
|
||||
"alarm_state", "state_changed", "last_updated", "text"
|
||||
]
|
||||
return ["id", "target", "alarm_type", "alarm_state", "state_changed", "last_updated", "text"]
|
||||
|
||||
def csv_row(self) -> List[Any]:
|
||||
return [
|
||||
getattr(self, x) for x in self.csv_header()
|
||||
]
|
||||
return [getattr(self, x) for x in self.csv_header()]
|
||||
|
||||
def update_state(self, state: AlarmState, text: str) -> None:
|
||||
from app.models.activity import Activity
|
||||
|
||||
if self.alarm_state != state or self.state_changed is None:
|
||||
self.state_changed = datetime.utcnow()
|
||||
activity = Activity(activity_type="alarm_state",
|
||||
text=f"{self.alarm_state.name}->{state.name}! State changed for "
|
||||
f"{self.aspect} on {self.target}: {text}")
|
||||
activity.notify()
|
||||
db.session.add(activity)
|
||||
self.alarm_state = state
|
||||
self.text = text
|
||||
self.last_updated = datetime.utcnow()
|
||||
db.session.commit()
|
||||
|
|
|
@ -13,7 +13,6 @@ class Group(AbstractConfiguration):
|
|||
bridgeconfs = db.relationship("BridgeConf", back_populates="group")
|
||||
eotks = db.relationship("Eotk", back_populates="group")
|
||||
onions = db.relationship("Onion", back_populates="group")
|
||||
alarms = db.relationship("Alarm", back_populates="group")
|
||||
|
||||
@classmethod
|
||||
def csv_header(cls) -> List[str]:
|
||||
|
|
|
@ -39,7 +39,6 @@ class Bridge(AbstractResource):
|
|||
bridgeline = db.Column(db.String(255), nullable=True)
|
||||
|
||||
conf = db.relationship("BridgeConf", back_populates="bridges")
|
||||
alarms = db.relationship("Alarm", back_populates="bridge")
|
||||
|
||||
@classmethod
|
||||
def csv_header(cls) -> List[str]:
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
from typing import Optional, List
|
||||
|
||||
from flask import current_app
|
||||
from tldextract import extract
|
||||
|
||||
from app.extensions import db
|
||||
|
@ -14,7 +15,10 @@ class Origin(AbstractConfiguration):
|
|||
|
||||
group = db.relationship("Group", back_populates="origins")
|
||||
proxies = db.relationship("Proxy", back_populates="origin")
|
||||
alarms = db.relationship("Alarm", back_populates="origin")
|
||||
|
||||
@property
|
||||
def brn(self) -> str:
|
||||
return f"brn:{current_app.config['GLOBAL_NAMESPACE']}:{self.group_id}:mirror:conf:origin/{self.domain_name}"
|
||||
|
||||
@classmethod
|
||||
def csv_header(cls) -> List[str]:
|
||||
|
@ -45,7 +49,10 @@ class Proxy(AbstractResource):
|
|||
url = db.Column(db.String(255), nullable=True)
|
||||
|
||||
origin = db.relationship("Origin", back_populates="proxies")
|
||||
alarms = db.relationship("Alarm", back_populates="proxy")
|
||||
|
||||
@property
|
||||
def brn(self) -> str:
|
||||
return f"brn:{current_app.config['GLOBAL_NAMESPACE']}:{self.origin.group_id}:mirror:{self.provider}:proxy/{self.id}"
|
||||
|
||||
@classmethod
|
||||
def csv_header(cls) -> List[str]:
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
from flask import current_app
|
||||
|
||||
from app.extensions import db
|
||||
from app.models import AbstractConfiguration, AbstractResource
|
||||
|
||||
|
@ -17,3 +19,7 @@ class Eotk(AbstractResource):
|
|||
region = db.Column(db.String(20), nullable=False)
|
||||
|
||||
group = db.relationship("Group", back_populates="eotks")
|
||||
|
||||
@property
|
||||
def brn(self) -> str:
|
||||
return f"brn:{current_app.config['GLOBAL_NAMESPACE']}:{self.group_id}:eotk:{self.provider}:instance/{self.region}"
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Optional
|
||||
from typing import Optional, Union
|
||||
|
||||
from flask import Blueprint, render_template, request
|
||||
from flask.typing import ResponseReturnValue
|
||||
from jinja2 import Markup
|
||||
from sqlalchemy import desc, or_
|
||||
|
||||
from app.models.activity import Activity
|
||||
|
@ -10,6 +11,7 @@ from app.models.alarms import Alarm, AlarmState
|
|||
from app.models.bridges import Bridge
|
||||
from app.models.mirrors import Origin, Proxy
|
||||
from app.models.base import Group
|
||||
from app.models.onions import Eotk
|
||||
from app.portal.automation import bp as automation
|
||||
from app.portal.bridgeconf import bp as bridgeconf
|
||||
from app.portal.bridge import bp as bridge
|
||||
|
@ -50,11 +52,44 @@ def format_datetime(s: Optional[datetime]) -> str:
|
|||
return s.strftime("%a, %d %b %Y %H:%M:%S")
|
||||
|
||||
|
||||
@portal.app_template_filter("describe_brn")
|
||||
def describe_brn(s: str) -> Union[str, Markup]:
|
||||
parts = s.split(":")
|
||||
if parts[3] == "mirror":
|
||||
if parts[5].startswith("origin/"):
|
||||
origin = Origin.query.filter(
|
||||
Origin.domain_name == parts[5][len("origin/"):]
|
||||
).first()
|
||||
if not origin:
|
||||
return s
|
||||
return f"Origin: {origin.domain_name} ({origin.group.group_name})"
|
||||
if parts[5].startswith("proxy/"):
|
||||
proxy = Proxy.query.filter(
|
||||
Proxy.id == int(parts[5][len("proxy/"):])
|
||||
).first()
|
||||
if not proxy:
|
||||
return s
|
||||
return Markup(f"Proxy: {proxy.url}<br>({proxy.origin.group.group_name}: {proxy.origin.domain_name})") # type: ignore
|
||||
if parts[5].startswith("quota/"):
|
||||
if parts[4] == "cloudfront":
|
||||
return f"Quota: CloudFront {parts[5][len('quota/'):]}"
|
||||
if parts[3] == "eotk":
|
||||
if parts[5].startswith("instance/"):
|
||||
eotk = Eotk.query.filter(
|
||||
Eotk.group_id == parts[2],
|
||||
Eotk.region == parts[5][len("instance/"):]
|
||||
).first()
|
||||
if not eotk:
|
||||
return s
|
||||
return f"EOTK Instance: {eotk.group.group_name} in {eotk.provider} {eotk.region}"
|
||||
return s
|
||||
|
||||
|
||||
def total_origins_blocked() -> int:
|
||||
count = 0
|
||||
for o in Origin.query.filter(Origin.destroyed.is_(None)).all():
|
||||
for a in o.alarms:
|
||||
if a.alarm_type.startswith("origin-block-ooni-"):
|
||||
if a.aspect.startswith("origin-block-ooni-"):
|
||||
if a.alarm_state == AlarmState.WARNING:
|
||||
count += 1
|
||||
break
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
<thead>
|
||||
<tr>
|
||||
<th scope="col">Resource</th>
|
||||
<th scope="col">Type</th>
|
||||
<th scope="col">Aspect</th>
|
||||
<th scope="col">State</th>
|
||||
<th scope="col">Message</th>
|
||||
<th scope="col">Last Update</th>
|
||||
|
@ -37,14 +37,8 @@
|
|||
<tbody>
|
||||
{% for alarm in alarms %}
|
||||
<tr class="bg-{% if alarm.alarm_state.name == "OK" %}success{% elif alarm.alarm_state.name == "UNKNOWN" %}dark{% elif alarm.alarm_state.name == "WARNING" %}warning{% else %}danger{% endif %} text-{% if alarm.alarm_state.name == "WARNING" %}dark{% else %}light{% endif %}">
|
||||
{% if alarm.target == "proxy" %}
|
||||
<td>Proxy: {{ alarm.proxy.url }}<br />({{ alarm.proxy.origin.domain_name }})</td>
|
||||
{% elif alarm.target == "origin" %}
|
||||
<td>Origin: {{ alarm.origin.domain_name }}</td>
|
||||
{% elif alarm.target == "service/cloudfront" %}
|
||||
<td>AWS CloudFront</td>
|
||||
{% endif %}
|
||||
<td>{{ alarm.alarm_type }}</td>
|
||||
<td>{{ alarm.target | describe_brn }}</td>
|
||||
<td>{{ alarm.aspect }}</td>
|
||||
<td>{{ alarm.alarm_state.name }}</td>
|
||||
<td>{{ alarm.text }}</td>
|
||||
<td>{{ alarm.last_updated | format_datetime }}</td>
|
||||
|
|
54
app/terraform/alarms/eotk_aws.py
Normal file
54
app/terraform/alarms/eotk_aws.py
Normal file
|
@ -0,0 +1,54 @@
|
|||
from typing import Tuple, Optional
|
||||
|
||||
import boto3
|
||||
from sqlalchemy import func
|
||||
|
||||
from app import app
|
||||
from app.alarms import get_or_create_alarm
|
||||
from app.extensions import db
|
||||
from app.models.base import Group
|
||||
from app.models.alarms import AlarmState
|
||||
from app.models.onions import Eotk
|
||||
from app.terraform import BaseAutomation
|
||||
|
||||
|
||||
def alarms_in_region(region: str, prefix: str, aspect: str) -> None:
|
||||
cloudwatch = boto3.client('cloudwatch',
|
||||
aws_access_key_id=app.config['AWS_ACCESS_KEY'],
|
||||
aws_secret_access_key=app.config['AWS_SECRET_KEY'],
|
||||
region_name=region)
|
||||
dist_paginator = cloudwatch.get_paginator('describe_alarms')
|
||||
page_iterator = dist_paginator.paginate(AlarmNamePrefix=prefix)
|
||||
for page in page_iterator:
|
||||
for cw_alarm in page['MetricAlarms']:
|
||||
eotk_id = cw_alarm["AlarmName"][len(prefix):].split("-")
|
||||
group: Optional[Group] = Group.query.filter(func.lower(Group.group_name) == eotk_id[1]).first()
|
||||
if group is None:
|
||||
print("Unable to find group for " + cw_alarm['AlarmName'])
|
||||
continue
|
||||
eotk = Eotk.query.filter(
|
||||
Eotk.group_id == group.id,
|
||||
Eotk.region == region
|
||||
).first()
|
||||
if eotk is None:
|
||||
print("Skipping unknown instance " + cw_alarm['AlarmName'])
|
||||
continue
|
||||
alarm = get_or_create_alarm(eotk.brn, aspect)
|
||||
if cw_alarm['StateValue'] == "OK":
|
||||
alarm.update_state(AlarmState.OK, "CloudWatch alarm OK")
|
||||
elif cw_alarm['StateValue'] == "ALARM":
|
||||
alarm.update_state(AlarmState.CRITICAL, "CloudWatch alarm ALARM")
|
||||
else:
|
||||
alarm.update_state(AlarmState.UNKNOWN, f"CloudWatch alarm {cw_alarm['StateValue']}")
|
||||
|
||||
|
||||
class AlarmEotkAwsAutomation(BaseAutomation):
|
||||
short_name = "monitor_eotk_aws"
|
||||
description = "Import alarms for AWS EOTK instances"
|
||||
|
||||
def automate(self, full: bool = False) -> Tuple[bool, str]:
|
||||
for region in ["us-east-2", "eu-central-1"]:
|
||||
alarms_in_region(region, "eotk-bw-out-high-", "bandwidth-out-high")
|
||||
alarms_in_region(region, "eotk-cpu-high-", "instance-cpu")
|
||||
db.session.commit()
|
||||
return True, ""
|
|
@ -4,7 +4,7 @@ from azure.identity import ClientSecretCredential
|
|||
from azure.mgmt.alertsmanagement import AlertsManagementClient
|
||||
|
||||
from app import app
|
||||
from app.alarms import get_proxy_alarm
|
||||
from app.alarms import get_or_create_alarm
|
||||
from app.models.alarms import AlarmState
|
||||
from app.models.mirrors import Proxy
|
||||
from app.terraform import BaseAutomation
|
||||
|
@ -30,7 +30,7 @@ class AlarmProxyAzureCdnAutomation(BaseAutomation):
|
|||
Proxy.provider == "azure_cdn",
|
||||
Proxy.destroyed.is_(None)
|
||||
):
|
||||
alarm = get_proxy_alarm(proxy.id, "bandwidth-out-high")
|
||||
alarm = get_or_create_alarm(proxy.brn, "bandwidth-out-high")
|
||||
if proxy.origin.group.group_name.lower() not in firing:
|
||||
alarm.update_state(AlarmState.OK, "Azure monitor alert not firing")
|
||||
else:
|
||||
|
|
|
@ -2,60 +2,62 @@ import datetime
|
|||
from typing import Tuple
|
||||
|
||||
import boto3
|
||||
from flask import current_app
|
||||
|
||||
from app import app
|
||||
from app.alarms import get_proxy_alarm
|
||||
from app.alarms import get_or_create_alarm
|
||||
from app.extensions import db
|
||||
from app.models.mirrors import Proxy
|
||||
from app.models.alarms import AlarmState, Alarm
|
||||
from app.models.alarms import AlarmState
|
||||
from app.terraform import BaseAutomation
|
||||
|
||||
|
||||
def _cloudfront_quota() -> None:
|
||||
alarm = get_or_create_alarm(
|
||||
f"brn:{current_app.config['GLOBAL_NAMESPACE']}:0:mirror:cloudfront:quota/distributions",
|
||||
"quota-usage"
|
||||
)
|
||||
alarm.last_updated = datetime.datetime.utcnow()
|
||||
deployed_count = len(Proxy.query.filter(
|
||||
Proxy.destroyed.is_(None)).all())
|
||||
message = f"{deployed_count} distributions deployed"
|
||||
if deployed_count > 370:
|
||||
alarm.update_state(AlarmState.CRITICAL, message)
|
||||
elif deployed_count > 320:
|
||||
alarm.update_state(AlarmState.WARNING, message)
|
||||
else:
|
||||
alarm.update_state(AlarmState.OK, message)
|
||||
|
||||
|
||||
def _proxy_alarms() -> None:
|
||||
cloudwatch = boto3.client('cloudwatch',
|
||||
aws_access_key_id=app.config['AWS_ACCESS_KEY'],
|
||||
aws_secret_access_key=app.config['AWS_SECRET_KEY'],
|
||||
region_name='us-east-2')
|
||||
dist_paginator = cloudwatch.get_paginator('describe_alarms')
|
||||
page_iterator = dist_paginator.paginate(AlarmNamePrefix="bandwidth-out-high-")
|
||||
for page in page_iterator:
|
||||
for cw_alarm in page['MetricAlarms']:
|
||||
dist_id = cw_alarm["AlarmName"][len("bandwidth-out-high-"):]
|
||||
proxy = Proxy.query.filter(Proxy.slug == dist_id).first()
|
||||
if proxy is None:
|
||||
print("Skipping unknown proxy " + dist_id)
|
||||
continue
|
||||
alarm = get_or_create_alarm(proxy.brn, "bandwidth-out-high")
|
||||
if cw_alarm['StateValue'] == "OK":
|
||||
alarm.update_state(AlarmState.OK, "CloudWatch alarm OK")
|
||||
elif cw_alarm['StateValue'] == "ALARM":
|
||||
alarm.update_state(AlarmState.CRITICAL, "CloudWatch alarm ALARM")
|
||||
else:
|
||||
alarm.update_state(AlarmState.UNKNOWN, f"CloudWatch alarm {cw_alarm['StateValue']}")
|
||||
|
||||
|
||||
class AlarmProxyCloudfrontAutomation(BaseAutomation):
|
||||
short_name = "monitor_proxy_cloudfront"
|
||||
description = "Import alarms for AWS CloudFront proxies"
|
||||
|
||||
def automate(self, full: bool = False) -> Tuple[bool, str]:
|
||||
cloudwatch = boto3.client('cloudwatch',
|
||||
aws_access_key_id=app.config['AWS_ACCESS_KEY'],
|
||||
aws_secret_access_key=app.config['AWS_SECRET_KEY'],
|
||||
region_name='us-east-2')
|
||||
dist_paginator = cloudwatch.get_paginator('describe_alarms')
|
||||
page_iterator = dist_paginator.paginate(AlarmNamePrefix="bandwidth-out-high-")
|
||||
for page in page_iterator:
|
||||
for cw_alarm in page['MetricAlarms']:
|
||||
dist_id = cw_alarm["AlarmName"][len("bandwidth-out-high-"):]
|
||||
proxy = Proxy.query.filter(Proxy.slug == dist_id).first()
|
||||
if proxy is None:
|
||||
print("Skipping unknown proxy " + dist_id)
|
||||
continue
|
||||
alarm = get_proxy_alarm(proxy.id, "bandwidth-out-high")
|
||||
if cw_alarm['StateValue'] == "OK":
|
||||
alarm.update_state(AlarmState.OK, "CloudWatch alarm OK")
|
||||
elif cw_alarm['StateValue'] == "ALARM":
|
||||
alarm.update_state(AlarmState.CRITICAL, "CloudWatch alarm ALARM")
|
||||
else:
|
||||
alarm.update_state(AlarmState.UNKNOWN, f"CloudWatch alarm {cw_alarm['StateValue']}")
|
||||
alarm = Alarm.query.filter(
|
||||
Alarm.alarm_type == "cloudfront-quota"
|
||||
).first()
|
||||
if alarm is None:
|
||||
alarm = Alarm() # type: ignore
|
||||
alarm.target = "service/cloudfront"
|
||||
alarm.alarm_type = "cloudfront-quota"
|
||||
alarm.state_changed = datetime.datetime.utcnow()
|
||||
db.session.add(alarm)
|
||||
alarm.last_updated = datetime.datetime.utcnow()
|
||||
deployed_count = len(Proxy.query.filter(
|
||||
Proxy.destroyed.is_(None)).all())
|
||||
old_state = alarm.alarm_state
|
||||
if deployed_count > 370:
|
||||
alarm.alarm_state = AlarmState.CRITICAL
|
||||
elif deployed_count > 320:
|
||||
alarm.alarm_state = AlarmState.WARNING
|
||||
else:
|
||||
alarm.alarm_state = AlarmState.OK
|
||||
if alarm.alarm_state != old_state:
|
||||
alarm.state_changed = datetime.datetime.utcnow()
|
||||
_proxy_alarms()
|
||||
_cloudfront_quota()
|
||||
db.session.commit()
|
||||
return True, ""
|
||||
|
|
|
@ -3,26 +3,13 @@ from typing import Tuple
|
|||
import requests
|
||||
from requests import RequestException
|
||||
|
||||
from app.alarms import get_or_create_alarm
|
||||
from app.extensions import db
|
||||
from app.models.alarms import Alarm, AlarmState
|
||||
from app.models.alarms import AlarmState
|
||||
from app.models.mirrors import Proxy
|
||||
from app.terraform import BaseAutomation
|
||||
|
||||
|
||||
def set_http_alarm(proxy_id: int, state: AlarmState, text: str) -> None:
|
||||
alarm = Alarm.query.filter(
|
||||
Alarm.proxy_id == proxy_id,
|
||||
Alarm.alarm_type == "http-status"
|
||||
).first()
|
||||
if alarm is None:
|
||||
alarm = Alarm()
|
||||
alarm.proxy_id = proxy_id
|
||||
alarm.alarm_type = "http-status"
|
||||
alarm.target = "proxy"
|
||||
db.session.add(alarm)
|
||||
alarm.update_state(state, text)
|
||||
|
||||
|
||||
class AlarmProxyHTTPStatusAutomation(BaseAutomation):
|
||||
short_name = "alarm_http_status"
|
||||
description = "Check all deployed proxies for HTTP status code"
|
||||
|
@ -40,28 +27,26 @@ class AlarmProxyHTTPStatusAutomation(BaseAutomation):
|
|||
allow_redirects=False,
|
||||
timeout=5)
|
||||
r.raise_for_status()
|
||||
alarm = get_or_create_alarm(proxy.brn, "http-status")
|
||||
if r.is_redirect:
|
||||
set_http_alarm(
|
||||
proxy.id,
|
||||
alarm.update_state(
|
||||
AlarmState.CRITICAL,
|
||||
f"{r.status_code} {r.reason}"
|
||||
)
|
||||
else:
|
||||
set_http_alarm(
|
||||
proxy.id,
|
||||
alarm.update_state(
|
||||
AlarmState.OK,
|
||||
f"{r.status_code} {r.reason}"
|
||||
)
|
||||
except requests.HTTPError:
|
||||
set_http_alarm(
|
||||
proxy.id,
|
||||
alarm.update_state(
|
||||
AlarmState.CRITICAL,
|
||||
f"{r.status_code} {r.reason}"
|
||||
)
|
||||
except RequestException as e:
|
||||
set_http_alarm(
|
||||
proxy.id,
|
||||
alarm.update_state(
|
||||
AlarmState.CRITICAL,
|
||||
repr(e)
|
||||
)
|
||||
db.session.commit()
|
||||
return True, ""
|
||||
|
|
|
@ -5,8 +5,9 @@ from typing import Dict, Tuple, Any
|
|||
|
||||
import requests
|
||||
|
||||
from app.alarms import get_or_create_alarm
|
||||
from app.extensions import db
|
||||
from app.models.alarms import Alarm, AlarmState
|
||||
from app.models.alarms import AlarmState
|
||||
from app.models.mirrors import Origin
|
||||
from app.terraform import BaseAutomation
|
||||
|
||||
|
@ -58,20 +59,6 @@ def threshold_origin(domain_name: str) -> Dict[str, Any]:
|
|||
return ooni
|
||||
|
||||
|
||||
def set_ooni_alarm(origin_id: int, country: str, state: AlarmState, text: str) -> None:
|
||||
alarm = Alarm.query.filter(
|
||||
Alarm.origin_id == origin_id,
|
||||
Alarm.alarm_type == f"origin-block-ooni-{country}"
|
||||
).first()
|
||||
if alarm is None:
|
||||
alarm = Alarm()
|
||||
alarm.origin_id = origin_id
|
||||
alarm.alarm_type = f"origin-block-ooni-{country}"
|
||||
alarm.target = "origin"
|
||||
db.session.add(alarm)
|
||||
alarm.update_state(state, text)
|
||||
|
||||
|
||||
class BlockOONIAutomation(BaseAutomation):
|
||||
short_name = "block_ooni"
|
||||
description = "Import origin and/or proxy reachability results from OONI"
|
||||
|
@ -82,5 +69,8 @@ class BlockOONIAutomation(BaseAutomation):
|
|||
for origin in origins:
|
||||
ooni = threshold_origin(origin.domain_name)
|
||||
for country in ooni:
|
||||
set_ooni_alarm(origin.id, country.lower(), ooni[country]["state"], ooni[country]["message"])
|
||||
alarm = get_or_create_alarm(origin.brn,
|
||||
f"origin-block-ooni-{country.lower()}")
|
||||
alarm.update_state(ooni[country]["state"], ooni[country]["message"])
|
||||
db.session.commit()
|
||||
return True, ""
|
||||
|
|
|
@ -65,8 +65,7 @@ class EotkAWSAutomation(TerraformAutomation):
|
|||
aws = aws,
|
||||
aws.second_region = aws.second_region
|
||||
}
|
||||
source = "sr2c/eotk/aws"
|
||||
version = "0.0.5"
|
||||
source = "/Users/irl/PycharmProjects/bc-dashboard/terraform/terraform-aws-eotk"
|
||||
namespace = "{{ global_namespace }}"
|
||||
tenant = "{{ group.group_name }}"
|
||||
name = "eotk"
|
||||
|
|
63
migrations/versions/31aec2f86c40_new_alarm_schema.py
Normal file
63
migrations/versions/31aec2f86c40_new_alarm_schema.py
Normal file
|
@ -0,0 +1,63 @@
|
|||
"""new alarm schema
|
||||
|
||||
Revision ID: 31aec2f86c40
|
||||
Revises: 1842ba85a5c7
|
||||
Create Date: 2022-05-18 14:22:51.028405
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = '31aec2f86c40'
|
||||
down_revision = '1842ba85a5c7'
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade():
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
with op.batch_alter_table('alarm', schema=None) as batch_op:
|
||||
batch_op.execute("TRUNCATE alarm")
|
||||
batch_op.add_column(sa.Column('aspect', sa.String(length=255), nullable=False))
|
||||
batch_op.alter_column('last_updated',
|
||||
existing_type=postgresql.TIMESTAMP(),
|
||||
nullable=False)
|
||||
batch_op.alter_column('text',
|
||||
existing_type=sa.VARCHAR(length=255),
|
||||
nullable=False)
|
||||
batch_op.drop_constraint('fk_alarm_group_id_group', type_='foreignkey')
|
||||
batch_op.drop_constraint('fk_alarm_proxy_id_proxy', type_='foreignkey')
|
||||
batch_op.drop_constraint('fk_alarm_origin_id_origin', type_='foreignkey')
|
||||
batch_op.drop_constraint('fk_alarm_bridge_id_bridge', type_='foreignkey')
|
||||
batch_op.drop_column('origin_id')
|
||||
batch_op.drop_column('alarm_type')
|
||||
batch_op.drop_column('bridge_id')
|
||||
batch_op.drop_column('group_id')
|
||||
batch_op.drop_column('proxy_id')
|
||||
|
||||
# ### end Alembic commands ###
|
||||
|
||||
|
||||
def downgrade():
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
with op.batch_alter_table('alarm', schema=None) as batch_op:
|
||||
batch_op.add_column(sa.Column('proxy_id', sa.INTEGER(), autoincrement=False, nullable=True))
|
||||
batch_op.add_column(sa.Column('group_id', sa.INTEGER(), autoincrement=False, nullable=True))
|
||||
batch_op.add_column(sa.Column('bridge_id', sa.INTEGER(), autoincrement=False, nullable=True))
|
||||
batch_op.add_column(sa.Column('alarm_type', sa.VARCHAR(length=255), autoincrement=False, nullable=False))
|
||||
batch_op.add_column(sa.Column('origin_id', sa.INTEGER(), autoincrement=False, nullable=True))
|
||||
batch_op.create_foreign_key('fk_alarm_bridge_id_bridge', 'bridge', ['bridge_id'], ['id'])
|
||||
batch_op.create_foreign_key('fk_alarm_origin_id_origin', 'origin', ['origin_id'], ['id'])
|
||||
batch_op.create_foreign_key('fk_alarm_proxy_id_proxy', 'proxy', ['proxy_id'], ['id'])
|
||||
batch_op.create_foreign_key('fk_alarm_group_id_group', 'group', ['group_id'], ['id'])
|
||||
batch_op.alter_column('text',
|
||||
existing_type=sa.VARCHAR(length=255),
|
||||
nullable=True)
|
||||
batch_op.alter_column('last_updated',
|
||||
existing_type=postgresql.TIMESTAMP(),
|
||||
nullable=True)
|
||||
batch_op.drop_column('aspect')
|
||||
|
||||
# ### end Alembic commands ###
|
Loading…
Add table
Add a link
Reference in a new issue