alarms: refactor the alarms subsystem

also include eotk alarms now
This commit is contained in:
Iain Learmonth 2022-05-18 15:49:36 +01:00
parent a935055083
commit e2ce24bf3b
17 changed files with 288 additions and 152 deletions

View file

@ -1,48 +1,40 @@
import datetime import datetime
from typing import Optional from typing import Optional, List
from app.extensions import db from app.extensions import db
from app.models.alarms import Alarm from app.models.alarms import Alarm
def alarms_for(target: str) -> List[Alarm]:
return list(Alarm.query.filter(
Alarm.target == target
).all())
def _get_alarm(target: str, def _get_alarm(target: str,
alarm_type: str, aspect: str,
*,
proxy_id: Optional[int] = None,
origin_id: Optional[int] = None,
create_if_missing: bool = True) -> Optional[Alarm]: create_if_missing: bool = True) -> Optional[Alarm]:
alarm: Optional[Alarm] alarm: Optional[Alarm] = Alarm.query.filter(
if target == "proxy": Alarm.aspect == aspect,
alarm = Alarm.query.filter( Alarm.target == target
Alarm.target == "proxy", ).first()
Alarm.alarm_type == alarm_type,
Alarm.proxy_id == proxy_id
).first()
elif target == "origin":
alarm = Alarm.query.filter(
Alarm.target == "origin",
Alarm.alarm_type == alarm_type,
Alarm.proxy_id == origin_id
).first()
else:
return None
if create_if_missing and alarm is None: if create_if_missing and alarm is None:
alarm = Alarm() alarm = Alarm()
alarm.aspect = aspect
alarm.target = target alarm.target = target
alarm.alarm_type = alarm_type alarm.text = "New alarm"
alarm.state_changed = datetime.datetime.utcnow() alarm.state_changed = datetime.datetime.utcnow()
if target == "proxy": alarm.last_updated = datetime.datetime.utcnow()
alarm.proxy_id = proxy_id
if target == "origin":
alarm.origin_id = origin_id
db.session.add(alarm) db.session.add(alarm)
db.session.commit()
return alarm return alarm
def get_proxy_alarm(proxy_id: int, alarm_type: str) -> Alarm: def get_alarm(target: str, aspect: str) -> Optional[Alarm]:
alarm = _get_alarm("proxy", alarm_type, proxy_id=proxy_id) return _get_alarm(target, aspect, create_if_missing=False)
def get_or_create_alarm(target: str, aspect: str) -> Alarm:
alarm = _get_alarm(target, aspect, create_if_missing=True)
if alarm is None: if alarm is None:
# mypy can't tell that this will never be reached raise RuntimeError("Asked for an alarm to be created but got None.")
raise RuntimeError("Creating an alarm must have failed.")
return alarm return alarm

View file

@ -15,6 +15,7 @@ from app.terraform.block_external import BlockExternalAutomation
from app.terraform.block_ooni import BlockOONIAutomation from app.terraform.block_ooni import BlockOONIAutomation
from app.terraform.block_roskomsvoboda import BlockRoskomsvobodaAutomation from app.terraform.block_roskomsvoboda import BlockRoskomsvobodaAutomation
from app.terraform.eotk.aws import EotkAWSAutomation from app.terraform.eotk.aws import EotkAWSAutomation
from app.terraform.alarms.eotk_aws import AlarmEotkAwsAutomation
from app.terraform.alarms.proxy_azure_cdn import AlarmProxyAzureCdnAutomation from app.terraform.alarms.proxy_azure_cdn import AlarmProxyAzureCdnAutomation
from app.terraform.alarms.proxy_cloudfront import AlarmProxyCloudfrontAutomation from app.terraform.alarms.proxy_cloudfront import AlarmProxyCloudfrontAutomation
from app.terraform.alarms.proxy_http_status import AlarmProxyHTTPStatusAutomation from app.terraform.alarms.proxy_http_status import AlarmProxyHTTPStatusAutomation
@ -37,6 +38,7 @@ else:
jobs = { jobs = {
x.short_name: x x.short_name: x
for x in [ for x in [
AlarmEotkAwsAutomation,
AlarmProxyAzureCdnAutomation, AlarmProxyAzureCdnAutomation,
AlarmProxyCloudfrontAutomation, AlarmProxyCloudfrontAutomation,
AlarmProxyHTTPStatusAutomation, AlarmProxyHTTPStatusAutomation,

View file

@ -1,7 +1,10 @@
from abc import abstractmethod
from datetime import datetime from datetime import datetime
from typing import Union, List, Optional, Any from typing import Union, List, Optional, Any
from app.alarms import alarms_for
from app.extensions import db from app.extensions import db
from app.models.alarms import Alarm
class AbstractConfiguration(db.Model): # type: ignore class AbstractConfiguration(db.Model): # type: ignore
@ -13,6 +16,15 @@ class AbstractConfiguration(db.Model): # type: ignore
updated = db.Column(db.DateTime(), default=datetime.utcnow, nullable=False) updated = db.Column(db.DateTime(), default=datetime.utcnow, nullable=False)
destroyed = db.Column(db.DateTime(), nullable=True) destroyed = db.Column(db.DateTime(), nullable=True)
@property
def alarms(self) -> List[Alarm]:
return alarms_for(self.brn)
@property
@abstractmethod
def brn(self) -> str:
raise NotImplementedError()
def destroy(self) -> None: def destroy(self) -> None:
self.destroyed = datetime.utcnow() self.destroyed = datetime.utcnow()
self.updated = datetime.utcnow() self.updated = datetime.utcnow()
@ -59,6 +71,11 @@ class AbstractResource(db.Model): # type: ignore
if self.updated is None: if self.updated is None:
self.updated = datetime.utcnow() self.updated = datetime.utcnow()
@property
@abstractmethod
def brn(self) -> str:
raise NotImplementedError()
def deprecate(self, *, reason: str) -> None: def deprecate(self, *, reason: str) -> None:
self.deprecated = datetime.utcnow() self.deprecated = datetime.utcnow()
self.deprecation_reason = reason self.deprecation_reason = reason

View file

@ -14,38 +14,30 @@ class AlarmState(enum.Enum):
class Alarm(db.Model): # type: ignore class Alarm(db.Model): # type: ignore
id = db.Column(db.Integer, primary_key=True) id = db.Column(db.Integer, primary_key=True)
target = db.Column(db.String(60), nullable=False) target = db.Column(db.String(255), nullable=False)
group_id = db.Column(db.Integer, db.ForeignKey("group.id")) aspect = db.Column(db.String(255), nullable=False)
origin_id = db.Column(db.Integer, db.ForeignKey("origin.id"))
proxy_id = db.Column(db.Integer, db.ForeignKey("proxy.id"))
bridge_id = db.Column(db.Integer, db.ForeignKey("bridge.id"))
alarm_type = db.Column(db.String(255), nullable=False)
alarm_state = db.Column(db.Enum(AlarmState), default=AlarmState.UNKNOWN, nullable=False) alarm_state = db.Column(db.Enum(AlarmState), default=AlarmState.UNKNOWN, nullable=False)
state_changed = db.Column(db.DateTime(), nullable=False) state_changed = db.Column(db.DateTime(), nullable=False)
last_updated = db.Column(db.DateTime()) last_updated = db.Column(db.DateTime(), nullable=False)
text = db.Column(db.String(255)) text = db.Column(db.String(255), nullable=False)
group = db.relationship("Group", back_populates="alarms")
origin = db.relationship("Origin", back_populates="alarms")
proxy = db.relationship("Proxy", back_populates="alarms")
bridge = db.relationship("Bridge", back_populates="alarms")
@classmethod @classmethod
def csv_header(cls) -> List[str]: def csv_header(cls) -> List[str]:
return [ return ["id", "target", "alarm_type", "alarm_state", "state_changed", "last_updated", "text"]
"id", "target", "group_id", "origin_id", "proxy_id", "bridge_id", "alarm_type",
"alarm_state", "state_changed", "last_updated", "text"
]
def csv_row(self) -> List[Any]: def csv_row(self) -> List[Any]:
return [ return [getattr(self, x) for x in self.csv_header()]
getattr(self, x) for x in self.csv_header()
]
def update_state(self, state: AlarmState, text: str) -> None: def update_state(self, state: AlarmState, text: str) -> None:
from app.models.activity import Activity
if self.alarm_state != state or self.state_changed is None: if self.alarm_state != state or self.state_changed is None:
self.state_changed = datetime.utcnow() self.state_changed = datetime.utcnow()
activity = Activity(activity_type="alarm_state",
text=f"{self.alarm_state.name}->{state.name}! State changed for "
f"{self.aspect} on {self.target}: {text}")
activity.notify()
db.session.add(activity)
self.alarm_state = state self.alarm_state = state
self.text = text self.text = text
self.last_updated = datetime.utcnow() self.last_updated = datetime.utcnow()
db.session.commit()

View file

@ -13,7 +13,6 @@ class Group(AbstractConfiguration):
bridgeconfs = db.relationship("BridgeConf", back_populates="group") bridgeconfs = db.relationship("BridgeConf", back_populates="group")
eotks = db.relationship("Eotk", back_populates="group") eotks = db.relationship("Eotk", back_populates="group")
onions = db.relationship("Onion", back_populates="group") onions = db.relationship("Onion", back_populates="group")
alarms = db.relationship("Alarm", back_populates="group")
@classmethod @classmethod
def csv_header(cls) -> List[str]: def csv_header(cls) -> List[str]:

View file

@ -39,7 +39,6 @@ class Bridge(AbstractResource):
bridgeline = db.Column(db.String(255), nullable=True) bridgeline = db.Column(db.String(255), nullable=True)
conf = db.relationship("BridgeConf", back_populates="bridges") conf = db.relationship("BridgeConf", back_populates="bridges")
alarms = db.relationship("Alarm", back_populates="bridge")
@classmethod @classmethod
def csv_header(cls) -> List[str]: def csv_header(cls) -> List[str]:

View file

@ -1,5 +1,6 @@
from typing import Optional, List from typing import Optional, List
from flask import current_app
from tldextract import extract from tldextract import extract
from app.extensions import db from app.extensions import db
@ -14,7 +15,10 @@ class Origin(AbstractConfiguration):
group = db.relationship("Group", back_populates="origins") group = db.relationship("Group", back_populates="origins")
proxies = db.relationship("Proxy", back_populates="origin") proxies = db.relationship("Proxy", back_populates="origin")
alarms = db.relationship("Alarm", back_populates="origin")
@property
def brn(self) -> str:
return f"brn:{current_app.config['GLOBAL_NAMESPACE']}:{self.group_id}:mirror:conf:origin/{self.domain_name}"
@classmethod @classmethod
def csv_header(cls) -> List[str]: def csv_header(cls) -> List[str]:
@ -45,7 +49,10 @@ class Proxy(AbstractResource):
url = db.Column(db.String(255), nullable=True) url = db.Column(db.String(255), nullable=True)
origin = db.relationship("Origin", back_populates="proxies") origin = db.relationship("Origin", back_populates="proxies")
alarms = db.relationship("Alarm", back_populates="proxy")
@property
def brn(self) -> str:
return f"brn:{current_app.config['GLOBAL_NAMESPACE']}:{self.origin.group_id}:mirror:{self.provider}:proxy/{self.id}"
@classmethod @classmethod
def csv_header(cls) -> List[str]: def csv_header(cls) -> List[str]:

View file

@ -1,3 +1,5 @@
from flask import current_app
from app.extensions import db from app.extensions import db
from app.models import AbstractConfiguration, AbstractResource from app.models import AbstractConfiguration, AbstractResource
@ -17,3 +19,7 @@ class Eotk(AbstractResource):
region = db.Column(db.String(20), nullable=False) region = db.Column(db.String(20), nullable=False)
group = db.relationship("Group", back_populates="eotks") group = db.relationship("Group", back_populates="eotks")
@property
def brn(self) -> str:
return f"brn:{current_app.config['GLOBAL_NAMESPACE']}:{self.group_id}:eotk:{self.provider}:instance/{self.region}"

View file

@ -1,8 +1,9 @@
from datetime import datetime, timedelta, timezone from datetime import datetime, timedelta, timezone
from typing import Optional from typing import Optional, Union
from flask import Blueprint, render_template, request from flask import Blueprint, render_template, request
from flask.typing import ResponseReturnValue from flask.typing import ResponseReturnValue
from jinja2 import Markup
from sqlalchemy import desc, or_ from sqlalchemy import desc, or_
from app.models.activity import Activity from app.models.activity import Activity
@ -10,6 +11,7 @@ from app.models.alarms import Alarm, AlarmState
from app.models.bridges import Bridge from app.models.bridges import Bridge
from app.models.mirrors import Origin, Proxy from app.models.mirrors import Origin, Proxy
from app.models.base import Group from app.models.base import Group
from app.models.onions import Eotk
from app.portal.automation import bp as automation from app.portal.automation import bp as automation
from app.portal.bridgeconf import bp as bridgeconf from app.portal.bridgeconf import bp as bridgeconf
from app.portal.bridge import bp as bridge from app.portal.bridge import bp as bridge
@ -50,11 +52,44 @@ def format_datetime(s: Optional[datetime]) -> str:
return s.strftime("%a, %d %b %Y %H:%M:%S") return s.strftime("%a, %d %b %Y %H:%M:%S")
@portal.app_template_filter("describe_brn")
def describe_brn(s: str) -> Union[str, Markup]:
parts = s.split(":")
if parts[3] == "mirror":
if parts[5].startswith("origin/"):
origin = Origin.query.filter(
Origin.domain_name == parts[5][len("origin/"):]
).first()
if not origin:
return s
return f"Origin: {origin.domain_name} ({origin.group.group_name})"
if parts[5].startswith("proxy/"):
proxy = Proxy.query.filter(
Proxy.id == int(parts[5][len("proxy/"):])
).first()
if not proxy:
return s
return Markup(f"Proxy: {proxy.url}<br>({proxy.origin.group.group_name}: {proxy.origin.domain_name})") # type: ignore
if parts[5].startswith("quota/"):
if parts[4] == "cloudfront":
return f"Quota: CloudFront {parts[5][len('quota/'):]}"
if parts[3] == "eotk":
if parts[5].startswith("instance/"):
eotk = Eotk.query.filter(
Eotk.group_id == parts[2],
Eotk.region == parts[5][len("instance/"):]
).first()
if not eotk:
return s
return f"EOTK Instance: {eotk.group.group_name} in {eotk.provider} {eotk.region}"
return s
def total_origins_blocked() -> int: def total_origins_blocked() -> int:
count = 0 count = 0
for o in Origin.query.filter(Origin.destroyed.is_(None)).all(): for o in Origin.query.filter(Origin.destroyed.is_(None)).all():
for a in o.alarms: for a in o.alarms:
if a.alarm_type.startswith("origin-block-ooni-"): if a.aspect.startswith("origin-block-ooni-"):
if a.alarm_state == AlarmState.WARNING: if a.alarm_state == AlarmState.WARNING:
count += 1 count += 1
break break

View file

@ -28,7 +28,7 @@
<thead> <thead>
<tr> <tr>
<th scope="col">Resource</th> <th scope="col">Resource</th>
<th scope="col">Type</th> <th scope="col">Aspect</th>
<th scope="col">State</th> <th scope="col">State</th>
<th scope="col">Message</th> <th scope="col">Message</th>
<th scope="col">Last Update</th> <th scope="col">Last Update</th>
@ -37,14 +37,8 @@
<tbody> <tbody>
{% for alarm in alarms %} {% for alarm in alarms %}
<tr class="bg-{% if alarm.alarm_state.name == "OK" %}success{% elif alarm.alarm_state.name == "UNKNOWN" %}dark{% elif alarm.alarm_state.name == "WARNING" %}warning{% else %}danger{% endif %} text-{% if alarm.alarm_state.name == "WARNING" %}dark{% else %}light{% endif %}"> <tr class="bg-{% if alarm.alarm_state.name == "OK" %}success{% elif alarm.alarm_state.name == "UNKNOWN" %}dark{% elif alarm.alarm_state.name == "WARNING" %}warning{% else %}danger{% endif %} text-{% if alarm.alarm_state.name == "WARNING" %}dark{% else %}light{% endif %}">
{% if alarm.target == "proxy" %} <td>{{ alarm.target | describe_brn }}</td>
<td>Proxy: {{ alarm.proxy.url }}<br />({{ alarm.proxy.origin.domain_name }})</td> <td>{{ alarm.aspect }}</td>
{% elif alarm.target == "origin" %}
<td>Origin: {{ alarm.origin.domain_name }}</td>
{% elif alarm.target == "service/cloudfront" %}
<td>AWS CloudFront</td>
{% endif %}
<td>{{ alarm.alarm_type }}</td>
<td>{{ alarm.alarm_state.name }}</td> <td>{{ alarm.alarm_state.name }}</td>
<td>{{ alarm.text }}</td> <td>{{ alarm.text }}</td>
<td>{{ alarm.last_updated | format_datetime }}</td> <td>{{ alarm.last_updated | format_datetime }}</td>

View file

@ -0,0 +1,54 @@
from typing import Tuple, Optional
import boto3
from sqlalchemy import func
from app import app
from app.alarms import get_or_create_alarm
from app.extensions import db
from app.models.base import Group
from app.models.alarms import AlarmState
from app.models.onions import Eotk
from app.terraform import BaseAutomation
def alarms_in_region(region: str, prefix: str, aspect: str) -> None:
cloudwatch = boto3.client('cloudwatch',
aws_access_key_id=app.config['AWS_ACCESS_KEY'],
aws_secret_access_key=app.config['AWS_SECRET_KEY'],
region_name=region)
dist_paginator = cloudwatch.get_paginator('describe_alarms')
page_iterator = dist_paginator.paginate(AlarmNamePrefix=prefix)
for page in page_iterator:
for cw_alarm in page['MetricAlarms']:
eotk_id = cw_alarm["AlarmName"][len(prefix):].split("-")
group: Optional[Group] = Group.query.filter(func.lower(Group.group_name) == eotk_id[1]).first()
if group is None:
print("Unable to find group for " + cw_alarm['AlarmName'])
continue
eotk = Eotk.query.filter(
Eotk.group_id == group.id,
Eotk.region == region
).first()
if eotk is None:
print("Skipping unknown instance " + cw_alarm['AlarmName'])
continue
alarm = get_or_create_alarm(eotk.brn, aspect)
if cw_alarm['StateValue'] == "OK":
alarm.update_state(AlarmState.OK, "CloudWatch alarm OK")
elif cw_alarm['StateValue'] == "ALARM":
alarm.update_state(AlarmState.CRITICAL, "CloudWatch alarm ALARM")
else:
alarm.update_state(AlarmState.UNKNOWN, f"CloudWatch alarm {cw_alarm['StateValue']}")
class AlarmEotkAwsAutomation(BaseAutomation):
short_name = "monitor_eotk_aws"
description = "Import alarms for AWS EOTK instances"
def automate(self, full: bool = False) -> Tuple[bool, str]:
for region in ["us-east-2", "eu-central-1"]:
alarms_in_region(region, "eotk-bw-out-high-", "bandwidth-out-high")
alarms_in_region(region, "eotk-cpu-high-", "instance-cpu")
db.session.commit()
return True, ""

View file

@ -4,7 +4,7 @@ from azure.identity import ClientSecretCredential
from azure.mgmt.alertsmanagement import AlertsManagementClient from azure.mgmt.alertsmanagement import AlertsManagementClient
from app import app from app import app
from app.alarms import get_proxy_alarm from app.alarms import get_or_create_alarm
from app.models.alarms import AlarmState from app.models.alarms import AlarmState
from app.models.mirrors import Proxy from app.models.mirrors import Proxy
from app.terraform import BaseAutomation from app.terraform import BaseAutomation
@ -30,7 +30,7 @@ class AlarmProxyAzureCdnAutomation(BaseAutomation):
Proxy.provider == "azure_cdn", Proxy.provider == "azure_cdn",
Proxy.destroyed.is_(None) Proxy.destroyed.is_(None)
): ):
alarm = get_proxy_alarm(proxy.id, "bandwidth-out-high") alarm = get_or_create_alarm(proxy.brn, "bandwidth-out-high")
if proxy.origin.group.group_name.lower() not in firing: if proxy.origin.group.group_name.lower() not in firing:
alarm.update_state(AlarmState.OK, "Azure monitor alert not firing") alarm.update_state(AlarmState.OK, "Azure monitor alert not firing")
else: else:

View file

@ -2,60 +2,62 @@ import datetime
from typing import Tuple from typing import Tuple
import boto3 import boto3
from flask import current_app
from app import app from app import app
from app.alarms import get_proxy_alarm from app.alarms import get_or_create_alarm
from app.extensions import db from app.extensions import db
from app.models.mirrors import Proxy from app.models.mirrors import Proxy
from app.models.alarms import AlarmState, Alarm from app.models.alarms import AlarmState
from app.terraform import BaseAutomation from app.terraform import BaseAutomation
def _cloudfront_quota() -> None:
alarm = get_or_create_alarm(
f"brn:{current_app.config['GLOBAL_NAMESPACE']}:0:mirror:cloudfront:quota/distributions",
"quota-usage"
)
alarm.last_updated = datetime.datetime.utcnow()
deployed_count = len(Proxy.query.filter(
Proxy.destroyed.is_(None)).all())
message = f"{deployed_count} distributions deployed"
if deployed_count > 370:
alarm.update_state(AlarmState.CRITICAL, message)
elif deployed_count > 320:
alarm.update_state(AlarmState.WARNING, message)
else:
alarm.update_state(AlarmState.OK, message)
def _proxy_alarms() -> None:
cloudwatch = boto3.client('cloudwatch',
aws_access_key_id=app.config['AWS_ACCESS_KEY'],
aws_secret_access_key=app.config['AWS_SECRET_KEY'],
region_name='us-east-2')
dist_paginator = cloudwatch.get_paginator('describe_alarms')
page_iterator = dist_paginator.paginate(AlarmNamePrefix="bandwidth-out-high-")
for page in page_iterator:
for cw_alarm in page['MetricAlarms']:
dist_id = cw_alarm["AlarmName"][len("bandwidth-out-high-"):]
proxy = Proxy.query.filter(Proxy.slug == dist_id).first()
if proxy is None:
print("Skipping unknown proxy " + dist_id)
continue
alarm = get_or_create_alarm(proxy.brn, "bandwidth-out-high")
if cw_alarm['StateValue'] == "OK":
alarm.update_state(AlarmState.OK, "CloudWatch alarm OK")
elif cw_alarm['StateValue'] == "ALARM":
alarm.update_state(AlarmState.CRITICAL, "CloudWatch alarm ALARM")
else:
alarm.update_state(AlarmState.UNKNOWN, f"CloudWatch alarm {cw_alarm['StateValue']}")
class AlarmProxyCloudfrontAutomation(BaseAutomation): class AlarmProxyCloudfrontAutomation(BaseAutomation):
short_name = "monitor_proxy_cloudfront" short_name = "monitor_proxy_cloudfront"
description = "Import alarms for AWS CloudFront proxies" description = "Import alarms for AWS CloudFront proxies"
def automate(self, full: bool = False) -> Tuple[bool, str]: def automate(self, full: bool = False) -> Tuple[bool, str]:
cloudwatch = boto3.client('cloudwatch', _proxy_alarms()
aws_access_key_id=app.config['AWS_ACCESS_KEY'], _cloudfront_quota()
aws_secret_access_key=app.config['AWS_SECRET_KEY'],
region_name='us-east-2')
dist_paginator = cloudwatch.get_paginator('describe_alarms')
page_iterator = dist_paginator.paginate(AlarmNamePrefix="bandwidth-out-high-")
for page in page_iterator:
for cw_alarm in page['MetricAlarms']:
dist_id = cw_alarm["AlarmName"][len("bandwidth-out-high-"):]
proxy = Proxy.query.filter(Proxy.slug == dist_id).first()
if proxy is None:
print("Skipping unknown proxy " + dist_id)
continue
alarm = get_proxy_alarm(proxy.id, "bandwidth-out-high")
if cw_alarm['StateValue'] == "OK":
alarm.update_state(AlarmState.OK, "CloudWatch alarm OK")
elif cw_alarm['StateValue'] == "ALARM":
alarm.update_state(AlarmState.CRITICAL, "CloudWatch alarm ALARM")
else:
alarm.update_state(AlarmState.UNKNOWN, f"CloudWatch alarm {cw_alarm['StateValue']}")
alarm = Alarm.query.filter(
Alarm.alarm_type == "cloudfront-quota"
).first()
if alarm is None:
alarm = Alarm() # type: ignore
alarm.target = "service/cloudfront"
alarm.alarm_type = "cloudfront-quota"
alarm.state_changed = datetime.datetime.utcnow()
db.session.add(alarm)
alarm.last_updated = datetime.datetime.utcnow()
deployed_count = len(Proxy.query.filter(
Proxy.destroyed.is_(None)).all())
old_state = alarm.alarm_state
if deployed_count > 370:
alarm.alarm_state = AlarmState.CRITICAL
elif deployed_count > 320:
alarm.alarm_state = AlarmState.WARNING
else:
alarm.alarm_state = AlarmState.OK
if alarm.alarm_state != old_state:
alarm.state_changed = datetime.datetime.utcnow()
db.session.commit() db.session.commit()
return True, "" return True, ""

View file

@ -3,26 +3,13 @@ from typing import Tuple
import requests import requests
from requests import RequestException from requests import RequestException
from app.alarms import get_or_create_alarm
from app.extensions import db from app.extensions import db
from app.models.alarms import Alarm, AlarmState from app.models.alarms import AlarmState
from app.models.mirrors import Proxy from app.models.mirrors import Proxy
from app.terraform import BaseAutomation from app.terraform import BaseAutomation
def set_http_alarm(proxy_id: int, state: AlarmState, text: str) -> None:
alarm = Alarm.query.filter(
Alarm.proxy_id == proxy_id,
Alarm.alarm_type == "http-status"
).first()
if alarm is None:
alarm = Alarm()
alarm.proxy_id = proxy_id
alarm.alarm_type = "http-status"
alarm.target = "proxy"
db.session.add(alarm)
alarm.update_state(state, text)
class AlarmProxyHTTPStatusAutomation(BaseAutomation): class AlarmProxyHTTPStatusAutomation(BaseAutomation):
short_name = "alarm_http_status" short_name = "alarm_http_status"
description = "Check all deployed proxies for HTTP status code" description = "Check all deployed proxies for HTTP status code"
@ -40,28 +27,26 @@ class AlarmProxyHTTPStatusAutomation(BaseAutomation):
allow_redirects=False, allow_redirects=False,
timeout=5) timeout=5)
r.raise_for_status() r.raise_for_status()
alarm = get_or_create_alarm(proxy.brn, "http-status")
if r.is_redirect: if r.is_redirect:
set_http_alarm( alarm.update_state(
proxy.id,
AlarmState.CRITICAL, AlarmState.CRITICAL,
f"{r.status_code} {r.reason}" f"{r.status_code} {r.reason}"
) )
else: else:
set_http_alarm( alarm.update_state(
proxy.id,
AlarmState.OK, AlarmState.OK,
f"{r.status_code} {r.reason}" f"{r.status_code} {r.reason}"
) )
except requests.HTTPError: except requests.HTTPError:
set_http_alarm( alarm.update_state(
proxy.id,
AlarmState.CRITICAL, AlarmState.CRITICAL,
f"{r.status_code} {r.reason}" f"{r.status_code} {r.reason}"
) )
except RequestException as e: except RequestException as e:
set_http_alarm( alarm.update_state(
proxy.id,
AlarmState.CRITICAL, AlarmState.CRITICAL,
repr(e) repr(e)
) )
db.session.commit()
return True, "" return True, ""

View file

@ -5,8 +5,9 @@ from typing import Dict, Tuple, Any
import requests import requests
from app.alarms import get_or_create_alarm
from app.extensions import db from app.extensions import db
from app.models.alarms import Alarm, AlarmState from app.models.alarms import AlarmState
from app.models.mirrors import Origin from app.models.mirrors import Origin
from app.terraform import BaseAutomation from app.terraform import BaseAutomation
@ -58,20 +59,6 @@ def threshold_origin(domain_name: str) -> Dict[str, Any]:
return ooni return ooni
def set_ooni_alarm(origin_id: int, country: str, state: AlarmState, text: str) -> None:
alarm = Alarm.query.filter(
Alarm.origin_id == origin_id,
Alarm.alarm_type == f"origin-block-ooni-{country}"
).first()
if alarm is None:
alarm = Alarm()
alarm.origin_id = origin_id
alarm.alarm_type = f"origin-block-ooni-{country}"
alarm.target = "origin"
db.session.add(alarm)
alarm.update_state(state, text)
class BlockOONIAutomation(BaseAutomation): class BlockOONIAutomation(BaseAutomation):
short_name = "block_ooni" short_name = "block_ooni"
description = "Import origin and/or proxy reachability results from OONI" description = "Import origin and/or proxy reachability results from OONI"
@ -82,5 +69,8 @@ class BlockOONIAutomation(BaseAutomation):
for origin in origins: for origin in origins:
ooni = threshold_origin(origin.domain_name) ooni = threshold_origin(origin.domain_name)
for country in ooni: for country in ooni:
set_ooni_alarm(origin.id, country.lower(), ooni[country]["state"], ooni[country]["message"]) alarm = get_or_create_alarm(origin.brn,
f"origin-block-ooni-{country.lower()}")
alarm.update_state(ooni[country]["state"], ooni[country]["message"])
db.session.commit()
return True, "" return True, ""

View file

@ -65,8 +65,7 @@ class EotkAWSAutomation(TerraformAutomation):
aws = aws, aws = aws,
aws.second_region = aws.second_region aws.second_region = aws.second_region
} }
source = "sr2c/eotk/aws" source = "/Users/irl/PycharmProjects/bc-dashboard/terraform/terraform-aws-eotk"
version = "0.0.5"
namespace = "{{ global_namespace }}" namespace = "{{ global_namespace }}"
tenant = "{{ group.group_name }}" tenant = "{{ group.group_name }}"
name = "eotk" name = "eotk"

View file

@ -0,0 +1,63 @@
"""new alarm schema
Revision ID: 31aec2f86c40
Revises: 1842ba85a5c7
Create Date: 2022-05-18 14:22:51.028405
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = '31aec2f86c40'
down_revision = '1842ba85a5c7'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('alarm', schema=None) as batch_op:
batch_op.execute("TRUNCATE alarm")
batch_op.add_column(sa.Column('aspect', sa.String(length=255), nullable=False))
batch_op.alter_column('last_updated',
existing_type=postgresql.TIMESTAMP(),
nullable=False)
batch_op.alter_column('text',
existing_type=sa.VARCHAR(length=255),
nullable=False)
batch_op.drop_constraint('fk_alarm_group_id_group', type_='foreignkey')
batch_op.drop_constraint('fk_alarm_proxy_id_proxy', type_='foreignkey')
batch_op.drop_constraint('fk_alarm_origin_id_origin', type_='foreignkey')
batch_op.drop_constraint('fk_alarm_bridge_id_bridge', type_='foreignkey')
batch_op.drop_column('origin_id')
batch_op.drop_column('alarm_type')
batch_op.drop_column('bridge_id')
batch_op.drop_column('group_id')
batch_op.drop_column('proxy_id')
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('alarm', schema=None) as batch_op:
batch_op.add_column(sa.Column('proxy_id', sa.INTEGER(), autoincrement=False, nullable=True))
batch_op.add_column(sa.Column('group_id', sa.INTEGER(), autoincrement=False, nullable=True))
batch_op.add_column(sa.Column('bridge_id', sa.INTEGER(), autoincrement=False, nullable=True))
batch_op.add_column(sa.Column('alarm_type', sa.VARCHAR(length=255), autoincrement=False, nullable=False))
batch_op.add_column(sa.Column('origin_id', sa.INTEGER(), autoincrement=False, nullable=True))
batch_op.create_foreign_key('fk_alarm_bridge_id_bridge', 'bridge', ['bridge_id'], ['id'])
batch_op.create_foreign_key('fk_alarm_origin_id_origin', 'origin', ['origin_id'], ['id'])
batch_op.create_foreign_key('fk_alarm_proxy_id_proxy', 'proxy', ['proxy_id'], ['id'])
batch_op.create_foreign_key('fk_alarm_group_id_group', 'group', ['group_id'], ['id'])
batch_op.alter_column('text',
existing_type=sa.VARCHAR(length=255),
nullable=True)
batch_op.alter_column('last_updated',
existing_type=postgresql.TIMESTAMP(),
nullable=True)
batch_op.drop_column('aspect')
# ### end Alembic commands ###