majuna/app/cli/automate.py

156 lines
6.8 KiB
Python

import datetime
import logging
from traceback import TracebackException
from typing import Type
from app import app
from app.cli import _SubparserType, BaseCliHandler
from app.extensions import db
from app.models.activity import Activity
from app.models.automation import Automation, AutomationState, AutomationLogs
from app.terraform import BaseAutomation
from app.terraform.block_bridge_github import BlockBridgeGitHubAutomation
from app.terraform.block_external import BlockExternalAutomation
from app.terraform.block_ooni import BlockOONIAutomation
from app.terraform.block_roskomsvoboda import BlockRoskomsvobodaAutomation
from app.terraform.eotk.aws import EotkAWSAutomation
from app.terraform.alarms.eotk_aws import AlarmEotkAwsAutomation
from app.terraform.alarms.proxy_azure_cdn import AlarmProxyAzureCdnAutomation
from app.terraform.alarms.proxy_cloudfront import AlarmProxyCloudfrontAutomation
from app.terraform.alarms.proxy_http_status import AlarmProxyHTTPStatusAutomation
from app.terraform.alarms.smart_aws import AlarmSmartAwsAutomation
from app.terraform.bridge.aws import BridgeAWSAutomation
from app.terraform.bridge.gandi import BridgeGandiAutomation
from app.terraform.bridge.hcloud import BridgeHcloudAutomation
from app.terraform.bridge.ovh import BridgeOvhAutomation
from app.terraform.list.github import ListGithubAutomation
from app.terraform.list.gitlab import ListGitlabAutomation
from app.terraform.list.s3 import ListS3Automation
from app.terraform.proxy.azure_cdn import ProxyAzureCdnAutomation
from app.terraform.proxy.cloudfront import ProxyCloudfrontAutomation
from app.terraform.proxy.fastly import ProxyFastlyAutomation
jobs = {
x.short_name: x
for x in [
AlarmEotkAwsAutomation,
AlarmProxyAzureCdnAutomation,
AlarmProxyCloudfrontAutomation,
AlarmProxyHTTPStatusAutomation,
AlarmSmartAwsAutomation,
BlockBridgeGitHubAutomation,
BlockExternalAutomation,
BlockOONIAutomation,
BlockRoskomsvobodaAutomation,
BridgeAWSAutomation,
BridgeGandiAutomation,
BridgeHcloudAutomation,
BridgeOvhAutomation,
EotkAWSAutomation,
ListGithubAutomation,
ListGitlabAutomation,
ListS3Automation,
ProxyAzureCdnAutomation,
ProxyCloudfrontAutomation,
ProxyFastlyAutomation
]
}
def run_all(**kwargs: bool) -> None:
"""
Run all automation tasks.
:param kwargs: this function takes the same arguments as :func:`run_job` and will pass the same arguments
to every task
:return: None
"""
for job in jobs.values():
run_job(job, **kwargs) # type: ignore
def run_job(job_cls: Type[BaseAutomation], *,
force: bool = False, ignore_schedule: bool = False) -> None:
automation = Automation.query.filter(Automation.short_name == job_cls.short_name).first()
if automation is None:
automation = Automation()
automation.short_name = job_cls.short_name
automation.description = job_cls.description
automation.enabled = False
automation.next_is_full = False
automation.added = datetime.datetime.utcnow()
automation.updated = automation.added
db.session.add(automation)
db.session.commit()
else:
if automation.state == AutomationState.RUNNING and not force:
logging.warning("Not running an already running automation")
return
if not ignore_schedule and not force:
if automation.next_run is not None and automation.next_run > datetime.datetime.utcnow():
logging.warning("Not time to run this job yet")
return
if not automation.enabled and not force:
logging.warning("job %s is disabled and --force not specified", job_cls.short_name)
return
automation.state = AutomationState.RUNNING
db.session.commit()
job: BaseAutomation = job_cls()
try:
success, logs = job.automate()
# We want to catch any and all exceptions that would cause problems here, because
# the error handling process isn't really handling the error, but rather causing it
# to be logged for investigation. Catching more specific exceptions would just mean that
# others go unrecorded and are difficult to debug.
except Exception as exc: # pylint: disable=broad-except
trace = TracebackException.from_exception(exc)
success = False
logs = "\n".join(trace.format())
if success:
automation.state = AutomationState.IDLE
automation.next_run = datetime.datetime.utcnow() + datetime.timedelta(
minutes=getattr(job, "frequency", 7))
else:
automation.state = AutomationState.ERROR
automation.enabled = False
automation.next_run = None
log = AutomationLogs()
log.automation_id = automation.id
log.added = datetime.datetime.utcnow()
log.updated = datetime.datetime.utcnow()
log.logs = str(logs)
db.session.add(log)
activity = Activity(
activity_type="automation",
text=(f"[{automation.short_name}] 🚨 Automation failure: It was not possible to handle this failure safely "
"and so the automation task has been automatically disabled. It may be possible to simply re-enable "
"the task, but repeated failures will usually require deeper investigation. See logs for full "
"details.")
)
db.session.add(activity)
activity.notify() # Notify before commit because the failure occurred even if we can't commit.
automation.last_run = datetime.datetime.utcnow()
db.session.commit()
class AutomateCliHandler(BaseCliHandler):
@classmethod
def add_subparser_to(cls, subparsers: _SubparserType) -> None:
parser = subparsers.add_parser("automate", help="automation operations")
parser.add_argument("-a", "--all", dest="all", help="run all automation jobs", action="store_true")
parser.add_argument("-j", "--job", dest="job", choices=sorted(jobs.keys()),
help="run a specific automation job")
parser.add_argument("--force", help="run job even if disabled and it's not time yet", action="store_true")
parser.add_argument("--ignore-schedule", help="run job even if it's not time yet", action="store_true")
parser.set_defaults(cls=cls)
def run(self) -> None:
with app.app_context():
if self.args.job:
run_job(jobs[self.args.job], # type: ignore
force=self.args.force,
ignore_schedule=self.args.ignore_schedule)
elif self.args.all:
run_all(force=self.args.force, ignore_schedule=self.args.ignore_schedule)
else:
logging.error("No action requested")