automation: establish an automation framework
This commit is contained in:
parent
1b53bf451c
commit
8abe5d60fa
31 changed files with 586 additions and 274 deletions
0
app/terraform/alarms/__init__.py
Normal file
0
app/terraform/alarms/__init__.py
Normal file
36
app/terraform/alarms/proxy_azure_cdn.py
Normal file
36
app/terraform/alarms/proxy_azure_cdn.py
Normal file
|
@ -0,0 +1,36 @@
|
|||
from azure.identity import ClientSecretCredential
|
||||
from azure.mgmt.alertsmanagement import AlertsManagementClient
|
||||
|
||||
from app import app
|
||||
from app.alarms import get_proxy_alarm
|
||||
from app.models.alarms import AlarmState
|
||||
from app.models.mirrors import Proxy
|
||||
from app.terraform import BaseAutomation
|
||||
|
||||
|
||||
class AlarmProxyAzureCdnAutomation(BaseAutomation):
|
||||
short_name = "monitor_proxy_azure_cdn"
|
||||
description = "Import alarms for Azure CDN proxies"
|
||||
|
||||
def automate(self):
|
||||
credential = ClientSecretCredential(
|
||||
tenant_id=app.config['AZURE_TENANT_ID'],
|
||||
client_id=app.config['AZURE_CLIENT_ID'],
|
||||
client_secret=app.config['AZURE_CLIENT_SECRET'])
|
||||
client = AlertsManagementClient(
|
||||
credential,
|
||||
app.config['AZURE_SUBSCRIPTION_ID']
|
||||
)
|
||||
firing = [x.name[len("bandwidth-out-high-bc-"):]
|
||||
for x in client.alerts.get_all()
|
||||
if x.name.startswith("bandwidth-out-high-bc-") and x.properties.essentials.monitor_condition == "Fired"]
|
||||
for proxy in Proxy.query.filter(
|
||||
Proxy.provider == "azure_cdn",
|
||||
Proxy.destroyed == None
|
||||
):
|
||||
alarm = get_proxy_alarm(proxy.id, "bandwidth-out-high")
|
||||
if proxy.origin.group.group_name.lower() not in firing:
|
||||
alarm.update_state(AlarmState.OK, "Azure monitor alert not firing")
|
||||
else:
|
||||
alarm.update_state(AlarmState.CRITICAL, "Azure monitor alert firing")
|
||||
return True, []
|
60
app/terraform/alarms/proxy_cloudfront.py
Normal file
60
app/terraform/alarms/proxy_cloudfront.py
Normal file
|
@ -0,0 +1,60 @@
|
|||
import datetime
|
||||
|
||||
import boto3
|
||||
|
||||
from app import app
|
||||
from app.alarms import get_proxy_alarm
|
||||
from app.extensions import db
|
||||
from app.models.mirrors import Proxy
|
||||
from app.models.alarms import AlarmState, Alarm
|
||||
from app.terraform import BaseAutomation
|
||||
|
||||
|
||||
class AlarmProxyCloudfrontAutomation(BaseAutomation):
|
||||
short_name = "monitor_proxy_cloudfront"
|
||||
description = "Import alarms for AWS CloudFront proxies"
|
||||
|
||||
def automate(self):
|
||||
cloudwatch = boto3.client('cloudwatch',
|
||||
aws_access_key_id=app.config['AWS_ACCESS_KEY'],
|
||||
aws_secret_access_key=app.config['AWS_SECRET_KEY'],
|
||||
region_name='us-east-2')
|
||||
dist_paginator = cloudwatch.get_paginator('describe_alarms')
|
||||
page_iterator = dist_paginator.paginate(AlarmNamePrefix="bandwidth-out-high-")
|
||||
for page in page_iterator:
|
||||
for cw_alarm in page['MetricAlarms']:
|
||||
dist_id = cw_alarm["AlarmName"][len("bandwidth-out-high-"):]
|
||||
proxy = Proxy.query.filter(Proxy.slug == dist_id).first()
|
||||
if proxy is None:
|
||||
print("Skipping unknown proxy " + dist_id)
|
||||
continue
|
||||
alarm = get_proxy_alarm(proxy.id, "bandwidth-out-high")
|
||||
if cw_alarm['StateValue'] == "OK":
|
||||
alarm.update_state(AlarmState.OK, "CloudWatch alarm OK")
|
||||
elif cw_alarm['StateValue'] == "ALARM":
|
||||
alarm.update_state(AlarmState.CRITICAL, "CloudWatch alarm ALARM")
|
||||
else:
|
||||
alarm.update_state(AlarmState.UNKNOWN, f"CloudWatch alarm {cw_alarm['StateValue']}")
|
||||
alarm = Alarm.query.filter(
|
||||
Alarm.alarm_type == "cloudfront-quota"
|
||||
).first()
|
||||
if alarm is None:
|
||||
alarm = Alarm()
|
||||
alarm.target = "service/cloudfront"
|
||||
alarm.alarm_type = "cloudfront-quota"
|
||||
alarm.state_changed = datetime.datetime.utcnow()
|
||||
db.session.add(alarm)
|
||||
alarm.last_updated = datetime.datetime.utcnow()
|
||||
deployed_count = len(Proxy.query.filter(
|
||||
Proxy.destroyed == None).all())
|
||||
old_state = alarm.alarm_state
|
||||
if deployed_count > 370:
|
||||
alarm.alarm_state = AlarmState.CRITICAL
|
||||
elif deployed_count > 320:
|
||||
alarm.alarm_state = AlarmState.WARNING
|
||||
else:
|
||||
alarm.alarm_state = AlarmState.OK
|
||||
if alarm.alarm_state != old_state:
|
||||
alarm.state_changed = datetime.datetime.utcnow()
|
||||
db.session.commit()
|
||||
return True, []
|
64
app/terraform/alarms/proxy_http_status.py
Normal file
64
app/terraform/alarms/proxy_http_status.py
Normal file
|
@ -0,0 +1,64 @@
|
|||
from typing import Tuple
|
||||
|
||||
import requests
|
||||
|
||||
from app.extensions import db
|
||||
from app.models.alarms import Alarm, AlarmState
|
||||
from app.models.mirrors import Proxy
|
||||
from app.terraform import BaseAutomation
|
||||
|
||||
|
||||
def set_http_alarm(proxy_id: int, state: AlarmState, text: str):
|
||||
alarm = Alarm.query.filter(
|
||||
Alarm.proxy_id == proxy_id,
|
||||
Alarm.alarm_type == "http-status"
|
||||
).first()
|
||||
if alarm is None:
|
||||
alarm = Alarm()
|
||||
alarm.proxy_id = proxy_id
|
||||
alarm.alarm_type = "http-status"
|
||||
alarm.target = "proxy"
|
||||
db.session.add(alarm)
|
||||
alarm.update_state(state, text)
|
||||
|
||||
|
||||
class AlarmProxyHTTPStatusAutomation(BaseAutomation):
|
||||
short_name = "alarm_http_status"
|
||||
description = "Check all deployed proxies for HTTP status code"
|
||||
|
||||
def automate(self, full: bool = False) -> Tuple[bool, str]:
|
||||
proxies = Proxy.query.filter(
|
||||
Proxy.destroyed == None
|
||||
)
|
||||
for proxy in proxies:
|
||||
try:
|
||||
if proxy.url is None:
|
||||
continue
|
||||
r = requests.get(proxy.url,
|
||||
allow_redirects=False,
|
||||
timeout=5)
|
||||
r.raise_for_status()
|
||||
if r.is_redirect:
|
||||
set_http_alarm(
|
||||
proxy.id,
|
||||
AlarmState.CRITICAL,
|
||||
f"{r.status_code} {r.reason}"
|
||||
)
|
||||
else:
|
||||
set_http_alarm(
|
||||
proxy.id,
|
||||
AlarmState.OK,
|
||||
f"{r.status_code} {r.reason}"
|
||||
)
|
||||
except (requests.ConnectionError, requests.Timeout):
|
||||
set_http_alarm(
|
||||
proxy.id,
|
||||
AlarmState.CRITICAL,
|
||||
f"Connection failure")
|
||||
except requests.HTTPError:
|
||||
set_http_alarm(
|
||||
proxy.id,
|
||||
AlarmState.CRITICAL,
|
||||
f"{r.status_code} {r.reason}"
|
||||
)
|
||||
return True, []
|
Loading…
Add table
Add a link
Reference in a new issue