automation: establish an automation framework

This commit is contained in:
Iain Learmonth 2022-05-08 17:20:04 +01:00
parent 1b53bf451c
commit 8abe5d60fa
31 changed files with 586 additions and 274 deletions

View file

@ -1,4 +1,5 @@
import os
from typing import Tuple
from app import app
@ -10,7 +11,7 @@ class BaseAutomation:
the portal system.
"""
def automate(self):
def automate(self, full: bool = False) -> Tuple[bool, str]:
raise NotImplementedError()
def working_directory(self, filename=None) -> str:

View file

View file

@ -0,0 +1,36 @@
from azure.identity import ClientSecretCredential
from azure.mgmt.alertsmanagement import AlertsManagementClient
from app import app
from app.alarms import get_proxy_alarm
from app.models.alarms import AlarmState
from app.models.mirrors import Proxy
from app.terraform import BaseAutomation
class AlarmProxyAzureCdnAutomation(BaseAutomation):
short_name = "monitor_proxy_azure_cdn"
description = "Import alarms for Azure CDN proxies"
def automate(self):
credential = ClientSecretCredential(
tenant_id=app.config['AZURE_TENANT_ID'],
client_id=app.config['AZURE_CLIENT_ID'],
client_secret=app.config['AZURE_CLIENT_SECRET'])
client = AlertsManagementClient(
credential,
app.config['AZURE_SUBSCRIPTION_ID']
)
firing = [x.name[len("bandwidth-out-high-bc-"):]
for x in client.alerts.get_all()
if x.name.startswith("bandwidth-out-high-bc-") and x.properties.essentials.monitor_condition == "Fired"]
for proxy in Proxy.query.filter(
Proxy.provider == "azure_cdn",
Proxy.destroyed == None
):
alarm = get_proxy_alarm(proxy.id, "bandwidth-out-high")
if proxy.origin.group.group_name.lower() not in firing:
alarm.update_state(AlarmState.OK, "Azure monitor alert not firing")
else:
alarm.update_state(AlarmState.CRITICAL, "Azure monitor alert firing")
return True, []

View file

@ -0,0 +1,60 @@
import datetime
import boto3
from app import app
from app.alarms import get_proxy_alarm
from app.extensions import db
from app.models.mirrors import Proxy
from app.models.alarms import AlarmState, Alarm
from app.terraform import BaseAutomation
class AlarmProxyCloudfrontAutomation(BaseAutomation):
short_name = "monitor_proxy_cloudfront"
description = "Import alarms for AWS CloudFront proxies"
def automate(self):
cloudwatch = boto3.client('cloudwatch',
aws_access_key_id=app.config['AWS_ACCESS_KEY'],
aws_secret_access_key=app.config['AWS_SECRET_KEY'],
region_name='us-east-2')
dist_paginator = cloudwatch.get_paginator('describe_alarms')
page_iterator = dist_paginator.paginate(AlarmNamePrefix="bandwidth-out-high-")
for page in page_iterator:
for cw_alarm in page['MetricAlarms']:
dist_id = cw_alarm["AlarmName"][len("bandwidth-out-high-"):]
proxy = Proxy.query.filter(Proxy.slug == dist_id).first()
if proxy is None:
print("Skipping unknown proxy " + dist_id)
continue
alarm = get_proxy_alarm(proxy.id, "bandwidth-out-high")
if cw_alarm['StateValue'] == "OK":
alarm.update_state(AlarmState.OK, "CloudWatch alarm OK")
elif cw_alarm['StateValue'] == "ALARM":
alarm.update_state(AlarmState.CRITICAL, "CloudWatch alarm ALARM")
else:
alarm.update_state(AlarmState.UNKNOWN, f"CloudWatch alarm {cw_alarm['StateValue']}")
alarm = Alarm.query.filter(
Alarm.alarm_type == "cloudfront-quota"
).first()
if alarm is None:
alarm = Alarm()
alarm.target = "service/cloudfront"
alarm.alarm_type = "cloudfront-quota"
alarm.state_changed = datetime.datetime.utcnow()
db.session.add(alarm)
alarm.last_updated = datetime.datetime.utcnow()
deployed_count = len(Proxy.query.filter(
Proxy.destroyed == None).all())
old_state = alarm.alarm_state
if deployed_count > 370:
alarm.alarm_state = AlarmState.CRITICAL
elif deployed_count > 320:
alarm.alarm_state = AlarmState.WARNING
else:
alarm.alarm_state = AlarmState.OK
if alarm.alarm_state != old_state:
alarm.state_changed = datetime.datetime.utcnow()
db.session.commit()
return True, []

View file

@ -0,0 +1,64 @@
from typing import Tuple
import requests
from app.extensions import db
from app.models.alarms import Alarm, AlarmState
from app.models.mirrors import Proxy
from app.terraform import BaseAutomation
def set_http_alarm(proxy_id: int, state: AlarmState, text: str):
alarm = Alarm.query.filter(
Alarm.proxy_id == proxy_id,
Alarm.alarm_type == "http-status"
).first()
if alarm is None:
alarm = Alarm()
alarm.proxy_id = proxy_id
alarm.alarm_type = "http-status"
alarm.target = "proxy"
db.session.add(alarm)
alarm.update_state(state, text)
class AlarmProxyHTTPStatusAutomation(BaseAutomation):
short_name = "alarm_http_status"
description = "Check all deployed proxies for HTTP status code"
def automate(self, full: bool = False) -> Tuple[bool, str]:
proxies = Proxy.query.filter(
Proxy.destroyed == None
)
for proxy in proxies:
try:
if proxy.url is None:
continue
r = requests.get(proxy.url,
allow_redirects=False,
timeout=5)
r.raise_for_status()
if r.is_redirect:
set_http_alarm(
proxy.id,
AlarmState.CRITICAL,
f"{r.status_code} {r.reason}"
)
else:
set_http_alarm(
proxy.id,
AlarmState.OK,
f"{r.status_code} {r.reason}"
)
except (requests.ConnectionError, requests.Timeout):
set_http_alarm(
proxy.id,
AlarmState.CRITICAL,
f"Connection failure")
except requests.HTTPError:
set_http_alarm(
proxy.id,
AlarmState.CRITICAL,
f"{r.status_code} {r.reason}"
)
return True, []

View file

@ -1,14 +1,14 @@
import datetime
from typing import Iterable
from typing import Iterable, Optional, Any
from app import app
from app.extensions import db
from app.models.bridges import BridgeConf, Bridge
from app.models.base import Group
from app.terraform import BaseAutomation
from app.terraform.terraform import TerraformAutomation
class BridgeAutomation(BaseAutomation):
class BridgeAutomation(TerraformAutomation):
def create_missing(self):
bridgeconfs: Iterable[BridgeConf] = BridgeConf.query.filter(
BridgeConf.provider == self.provider,
@ -45,8 +45,12 @@ class BridgeAutomation(BaseAutomation):
bridge.destroy()
db.session.commit()
def generate_terraform(self):
self.write_terraform_config(
def tf_prehook(self) -> Optional[Any]:
self.create_missing()
self.destroy_expired()
def tf_generate(self):
self.tf_write(
self.template,
groups=Group.query.all(),
bridgeconfs=BridgeConf.query.filter(
@ -60,8 +64,8 @@ class BridgeAutomation(BaseAutomation):
}
)
def import_terraform(self):
outputs = self.terraform_output()
def tf_posthook(self, *, prehook_result: Any = None) -> None:
outputs = self.tf_output()
for output in outputs:
if output.startswith('bridge_hashed_fingerprint_'):
parts = outputs[output]['value'].split(" ")

View file

@ -1,9 +1,9 @@
from app import app
from app.terraform.bridge import BridgeAutomation
class BridgeAWSAutomation(BridgeAutomation):
short_name = "bridge_aws"
description = "Deploy Tor bridges on AWS Lightsail"
provider = "aws"
template_parameters = [
@ -67,18 +67,3 @@ class BridgeAWSAutomation(BridgeAutomation):
{% endfor %}
{% endfor %}
"""
def automate():
auto = BridgeAWSAutomation()
auto.destroy_expired()
auto.create_missing()
auto.generate_terraform()
auto.terraform_init()
auto.terraform_apply()
auto.import_terraform()
if __name__ == "__main__":
with app.app_context():
automate()

View file

@ -1,9 +1,9 @@
from app import app
from app.terraform.bridge import BridgeAutomation
class BridgeGandiAutomation(BridgeAutomation):
short_name = "bridge_gandi"
description = "Deploy Tor bridges on GandiCloud VPS"
provider = "gandi"
template_parameters = [
@ -78,18 +78,3 @@ class BridgeGandiAutomation(BridgeAutomation):
{% endfor %}
{% endfor %}
"""
def automate():
auto = BridgeGandiAutomation()
auto.destroy_expired()
auto.create_missing()
auto.generate_terraform()
auto.terraform_init()
auto.terraform_apply()
auto.import_terraform()
if __name__ == "__main__":
with app.app_context():
automate()

View file

@ -1,9 +1,9 @@
from app import app
from app.terraform.bridge import BridgeAutomation
class BridgeHcloudAutomation(BridgeAutomation):
short_name = "bridge_hcloud"
description = "Deploy Tor bridges on Hetzner Cloud"
provider = "hcloud"
template_parameters = [
@ -81,18 +81,3 @@ class BridgeHcloudAutomation(BridgeAutomation):
{% endfor %}
{% endfor %}
"""
def automate():
auto = BridgeHcloudAutomation()
auto.destroy_expired()
auto.create_missing()
auto.generate_terraform()
auto.terraform_init()
auto.terraform_apply()
auto.import_terraform()
if __name__ == "__main__":
with app.app_context():
automate()

View file

@ -1,9 +1,9 @@
from app import app
from app.terraform.bridge import BridgeAutomation
class BridgeOvhAutomation(BridgeAutomation):
short_name = "bridge_ovh"
description = "Deploy Tor bridges on OVH Public Cloud"
provider = "ovh"
template_parameters = [
@ -104,18 +104,3 @@ class BridgeOvhAutomation(BridgeAutomation):
{% endfor %}
{% endfor %}
"""
def automate():
auto = BridgeOvhAutomation()
auto.destroy_expired()
auto.create_missing()
auto.generate_terraform()
auto.terraform_init()
auto.terraform_apply()
auto.import_terraform()
if __name__ == "__main__":
with app.app_context():
automate()

View file

@ -5,12 +5,12 @@ from app.lists.mirror_mapping import mirror_mapping
from app.lists.bc2 import mirror_sites
from app.lists.bridgelines import bridgelines
from app.models.base import MirrorList
from app.terraform import BaseAutomation
from app.terraform.terraform import TerraformAutomation
class ListAutomation(BaseAutomation):
def generate_terraform(self):
self.write_terraform_config(
class ListAutomation(TerraformAutomation):
def tf_generate(self):
self.tf_write(
self.template,
lists=MirrorList.query.filter(
MirrorList.destroyed == None,

View file

@ -1,9 +1,9 @@
from app import app
from app.terraform.list import ListAutomation
class ListGithubAutomation(ListAutomation):
short_name = "list_github"
description = "Update mirror lists in GitHub repositories"
provider = "github"
template_parameters = [
@ -45,11 +45,3 @@ class ListGithubAutomation(ListAutomation):
}
{% endfor %}
"""
if __name__ == "__main__":
with app.app_context():
auto = ListGithubAutomation()
auto.generate_terraform()
auto.terraform_init()
auto.terraform_apply()

View file

@ -1,9 +1,9 @@
from app import app
from app.terraform.list import ListAutomation
class ListGitlabAutomation(ListAutomation):
short_name = "list_gitlab"
description = "Update mirror lists in GitLab repositories"
provider = "gitlab"
template_parameters = [
@ -44,11 +44,3 @@ class ListGitlabAutomation(ListAutomation):
{% endfor %}
"""
if __name__ == "__main__":
with app.app_context():
auto = ListGitlabAutomation()
auto.generate_terraform()
auto.terraform_init()
auto.terraform_apply()

View file

@ -1,9 +1,9 @@
from app import app
from app.terraform.list import ListAutomation
class ListGithubAutomation(ListAutomation):
class ListS3Automation(ListAutomation):
short_name = "list_s3"
description = "Update mirror lists in AWS S3 buckets"
provider = "s3"
template_parameters = [
@ -36,11 +36,3 @@ class ListGithubAutomation(ListAutomation):
}
{% endfor %}
"""
if __name__ == "__main__":
with app.app_context():
auto = ListGithubAutomation()
auto.generate_terraform()
auto.terraform_init()
auto.terraform_apply()

View file

@ -37,6 +37,8 @@ class ProxyAutomation(TerraformAutomation):
for group in groups:
subgroup = 0
for origin in group.origins:
if origin.destroyed is not None:
continue
while True:
if subgroups[group.id][subgroup] >= self.subgroup_max:
subgroup += 1
@ -87,7 +89,7 @@ class ProxyAutomation(TerraformAutomation):
self.deprecate_orphaned_proxies()
self.destroy_expired_proxies()
def tf_posthook(self):
def tf_posthook(self, *, prehook_result):
self.import_state(self.tf_show())
def tf_generate(self):

View file

@ -1,16 +1,11 @@
from azure.identity import ClientSecretCredential
from azure.mgmt.alertsmanagement import AlertsManagementClient
from app import app
from app.alarms import get_proxy_alarm
from app.extensions import db
from app.models.mirrors import Proxy
from app.models.alarms import AlarmState
from app.terraform.proxy import ProxyAutomation
class ProxyAzureCdnAutomation(ProxyAutomation):
short_name = "proxy_azure_cdn"
description = "Deploy proxies to Azure CDN"
provider = "azure_cdn"
subgroup_max = 25
parallelism = 1
@ -170,33 +165,3 @@ class ProxyAzureCdnAutomation(ProxyAutomation):
for proxy in proxies:
proxy.url = f"https://{proxy.slug}.azureedge.net"
db.session.commit()
def import_monitor_alerts():
credential = ClientSecretCredential(
tenant_id=app.config['AZURE_TENANT_ID'],
client_id=app.config['AZURE_CLIENT_ID'],
client_secret=app.config['AZURE_CLIENT_SECRET'])
client = AlertsManagementClient(
credential,
app.config['AZURE_SUBSCRIPTION_ID']
)
firing = [x.name[len("bandwidth-out-high-bc-"):]
for x in client.alerts.get_all()
if x.name.startswith("bandwidth-out-high-bc-") and x.properties.essentials.monitor_condition == "Fired"]
for proxy in Proxy.query.filter(
Proxy.provider == "azure_cdn",
Proxy.destroyed == None
):
alarm = get_proxy_alarm(proxy.id, "bandwidth-out-high")
if proxy.origin.group.group_name.lower() not in firing:
alarm.update_state(AlarmState.OK, "Azure monitor alert not firing")
else:
alarm.update_state(AlarmState.CRITICAL, "Azure monitor alert firing")
if __name__ == "__main__":
with app.app_context():
auto = ProxyAzureCdnAutomation()
auto.automate()
import_monitor_alerts()

View file

@ -1,17 +1,13 @@
import datetime
import boto3
from app import app
from app.alarms import get_proxy_alarm
from app.extensions import db
from app.models.alarms import Alarm, AlarmState
from app.models.mirrors import Proxy
from app.terraform.proxy import ProxyAutomation
class ProxyCloudfrontAutomation(ProxyAutomation):
short_name = "proxy_cloudfront"
description = "Deploy proxies to AWS CloudFront"
provider = "cloudfront"
template_parameters = [
@ -87,55 +83,3 @@ class ProxyCloudfrontAutomation(ProxyAutomation):
proxy.terraform_updated = datetime.datetime.utcnow()
break
db.session.commit()
def import_cloudwatch_alarms():
cloudwatch = boto3.client('cloudwatch',
aws_access_key_id=app.config['AWS_ACCESS_KEY'],
aws_secret_access_key=app.config['AWS_SECRET_KEY'],
region_name='us-east-2')
dist_paginator = cloudwatch.get_paginator('describe_alarms')
page_iterator = dist_paginator.paginate(AlarmNamePrefix="bandwidth-out-high-")
for page in page_iterator:
for cw_alarm in page['MetricAlarms']:
dist_id = cw_alarm["AlarmName"][len("bandwidth-out-high-"):]
proxy = Proxy.query.filter(Proxy.slug == dist_id).first()
if proxy is None:
print("Skipping unknown proxy " + dist_id)
continue
alarm = get_proxy_alarm(proxy.id, "bandwidth-out-high")
if cw_alarm['StateValue'] == "OK":
alarm.update_state(AlarmState.OK, "CloudWatch alarm OK")
elif cw_alarm['StateValue'] == "ALARM":
alarm.update_state(AlarmState.CRITICAL, "CloudWatch alarm ALARM")
else:
alarm.update_state(AlarmState.UNKNOWN, f"CloudWatch alarm {cw_alarm['StateValue']}")
alarm = Alarm.query.filter(
Alarm.alarm_type == "cloudfront-quota"
).first()
if alarm is None:
alarm = Alarm()
alarm.target = "service/cloudfront"
alarm.alarm_type = "cloudfront-quota"
alarm.state_changed = datetime.datetime.utcnow()
db.session.add(alarm)
alarm.last_updated = datetime.datetime.utcnow()
deployed_count = len(Proxy.query.filter(
Proxy.destroyed == None).all())
old_state = alarm.alarm_state
if deployed_count > 370:
alarm.alarm_state = AlarmState.CRITICAL
elif deployed_count > 320:
alarm.alarm_state = AlarmState.WARNING
else:
alarm.alarm_state = AlarmState.OK
if alarm.alarm_state != old_state:
alarm.state_changed = datetime.datetime.utcnow()
db.session.commit()
if __name__ == "__main__":
with app.app_context():
auto = ProxyCloudfrontAutomation()
auto.automate()
import_cloudwatch_alarms()

View file

@ -1,62 +0,0 @@
import requests
from app import app
from app.extensions import db
from app.models.alarms import Alarm, AlarmState
from app.models.mirrors import Proxy
def set_http_alarm(proxy_id: int, state: AlarmState, text: str):
alarm = Alarm.query.filter(
Alarm.proxy_id == proxy_id,
Alarm.alarm_type == "http-status"
).first()
if alarm is None:
alarm = Alarm()
alarm.proxy_id = proxy_id
alarm.alarm_type = "http-status"
alarm.target = "proxy"
db.session.add(alarm)
alarm.update_state(state, text)
def check_http():
proxies = Proxy.query.filter(
Proxy.destroyed == None
)
for proxy in proxies:
try:
if proxy.url is None:
continue
r = requests.get(proxy.url,
allow_redirects=False,
timeout=5)
r.raise_for_status()
if r.is_redirect:
set_http_alarm(
proxy.id,
AlarmState.CRITICAL,
f"{r.status_code} {r.reason}"
)
else:
set_http_alarm(
proxy.id,
AlarmState.OK,
f"{r.status_code} {r.reason}"
)
except (requests.ConnectionError, requests.Timeout):
set_http_alarm(
proxy.id,
AlarmState.CRITICAL,
f"Connection failure")
except requests.HTTPError:
set_http_alarm(
proxy.id,
AlarmState.CRITICAL,
f"{r.status_code} {r.reason}"
)
if __name__ == "__main__":
with app.app_context():
check_http()

View file

@ -1,6 +1,6 @@
import json
import subprocess
from typing import Dict, Any, Optional
from typing import Any, Dict, List, Optional, Tuple
import jinja2
@ -18,20 +18,27 @@ class TerraformAutomation(BaseAutomation):
Default parallelism for remote API calls.
"""
def automate(self):
self.tf_prehook()
def automate(self, full: bool = False):
prehook_result = self.tf_prehook()
self.tf_generate()
self.tf_init()
self.tf_apply(refresh=False)
self.tf_posthook()
returncode, logs = self.tf_apply(refresh=full)
self.tf_posthook(prehook_result=prehook_result)
return True if returncode == 0 else False, logs
def tf_apply(self, refresh: bool = True, parallelism: Optional[int] = None):
def tf_apply(self, refresh: bool = True, parallelism: Optional[int] = None) -> Tuple[int, List[Dict[str, Any]]]:
if not parallelism:
parallelism = self.parallelism
subprocess.run(
tf = subprocess.run(
['terraform', 'apply', f'-refresh={str(refresh).lower()}', '-auto-approve',
f'-parallelism={str(parallelism)}'],
cwd=self.working_directory())
f'-parallelism={str(parallelism)}', '-json'],
cwd=self.working_directory(),
stdout=subprocess.PIPE)
logs = []
for line in tf.stdout.decode('utf-8').split('\n'):
if line.strip():
logs.append(json.loads(line))
return tf.returncode, logs
def tf_generate(self):
raise NotImplementedError()
@ -56,7 +63,7 @@ class TerraformAutomation(BaseAutomation):
# more like JSON-ND, task is to figure out how to yield those records
# as plan runs, the same is probably also true for apply
def tf_posthook(self, prehook_result: Any = None) -> None:
def tf_posthook(self, *, prehook_result: Any = None) -> None:
"""
This hook function is called as part of normal automation, after the
completion of :func:`tf_apply`.