import os.path from abc import abstractmethod from collections import defaultdict import datetime import math import string import random from typing import Dict, Optional, Any, List from sqlalchemy import text from tldextract import tldextract from app import app from app.extensions import db from app.models.base import Group from app.models.mirrors import Proxy, Origin, SmartProxy from app.terraform.proxy.lib import all_cdn_prefixes from app.terraform.terraform import TerraformAutomation def update_smart_proxy_instance(group_id: int, provider: str, region: str, instance_id: str) -> None: instance = SmartProxy.query.filter( SmartProxy.group_id == group_id, SmartProxy.region == region, SmartProxy.provider == provider, SmartProxy.destroyed.is_(None) ).first() if instance is None: instance = SmartProxy() instance.added = datetime.datetime.utcnow() instance.group_id = group_id instance.provider = provider instance.region = region db.session.add(instance) instance.updated = datetime.datetime.utcnow() instance.instance_id = instance_id class ProxyAutomation(TerraformAutomation): subgroup_max = math.inf """ Maximum number of proxies to deploy per sub-group. This is required for some providers where the number origins per group may exceed the number of proxies that can be configured in a single "configuration block", e.g. Azure CDN's profiles. """ template: str """ Terraform configuration template using Jinja 2. """ template_parameters: List[str] """ List of parameters to be read from the application configuration for use in the templating of the Terraform configuration. """ smart_proxies = False """ Whether this provider supports "smart" proxies. """ def get_subgroups(self) -> Dict[int, Dict[int, int]]: conn = db.engine.connect() result = conn.execute(text(""" SELECT origin.group_id, proxy.psg, COUNT(proxy.id) FROM proxy, origin WHERE proxy.origin_id = origin.id AND proxy.destroyed IS NULL AND proxy.provider = :provider GROUP BY origin.group_id, proxy.psg; """), provider=self.provider) subgroups: Dict[int, Dict[int, int]] = defaultdict(lambda: defaultdict(lambda: 0)) for row in result: subgroups[row[0]][row[1]] = row[2] return subgroups def create_missing_proxies(self) -> None: groups = Group.query.all() subgroups = self.get_subgroups() for group in groups: subgroup = 0 for origin in group.origins: if origin.destroyed is not None: continue while True: if subgroups[group.id][subgroup] >= self.subgroup_max: subgroup += 1 else: break proxies = [ x for x in origin.proxies if x.provider == self.provider and x.deprecated is None and x.destroyed is None ] if not proxies: subgroups[group.id][subgroup] += 1 proxy = Proxy() proxy.origin_id = origin.id proxy.provider = self.provider proxy.psg = subgroup # The random usage below is good enough for its purpose: to create a slug that # hasn't been used before. proxy.slug = tldextract.extract(origin.domain_name).domain[:5] + ''.join( random.choices(string.ascii_lowercase, k=12)) # nosec proxy.added = datetime.datetime.utcnow() proxy.updated = datetime.datetime.utcnow() db.session.add(proxy) db.session.commit() def deprecate_orphaned_proxies(self) -> None: proxies = Proxy.query.filter( Proxy.deprecated.is_(None), Proxy.destroyed.is_(None), Proxy.provider == self.provider ).all() for proxy in proxies: if proxy.origin.destroyed is not None: proxy.deprecate(reason="origin_destroyed") db.session.commit() def destroy_expired_proxies(self) -> None: cutoff = datetime.datetime.utcnow() - datetime.timedelta(days=3) proxies = Proxy.query.filter( Proxy.destroyed.is_(None), Proxy.provider == self.provider, Proxy.deprecated < cutoff ).all() for proxy in proxies: proxy.destroyed = datetime.datetime.utcnow() proxy.updated = datetime.datetime.utcnow() db.session.commit() @abstractmethod def import_state(self, state: Any) -> None: raise NotImplementedError() def tf_prehook(self) -> Optional[Any]: # pylint: disable=useless-return self.create_missing_proxies() self.deprecate_orphaned_proxies() self.destroy_expired_proxies() return None def tf_posthook(self, *, prehook_result: Any = None) -> None: self.import_state(self.tf_show()) def tf_generate(self) -> None: groups = Group.query.all() self.tf_write( self.template, groups=groups, proxies=Proxy.query.filter( Proxy.provider == self.provider, Proxy.destroyed.is_(None)).all(), subgroups=self.get_subgroups(), global_namespace=app.config['GLOBAL_NAMESPACE'], bypass_token=app.config['BYPASS_TOKEN'], terraform_modules_path=os.path.join(*list(os.path.split(app.root_path))[:-1], 'terraform-modules'), **{k: app.config[k.upper()] for k in self.template_parameters}) if self.smart_proxies: for group in groups: self.sp_config(group) def sp_trusted_prefixes(self) -> str: return "\n".join([f"geoip2_proxy {p};" for p in all_cdn_prefixes()]) def sp_config(self, group: Group) -> None: group_origins: List[Origin] = Origin.query.filter( Origin.group_id == group.id, Origin.destroyed.is_(None), Origin.smart.is_(True) ).all() self.tmpl_write(f"smart_proxy.{group.id}.conf", """ geoip2 /usr/share/GeoIP/GeoIP2-City.mmdb { auto_reload 5m; $geoip2_metadata_country_build metadata build_epoch; $geoip2_data_country_code default=US country iso_code; } """ + self.sp_trusted_prefixes() + """ geoip2_proxy_recursive on; map $geoip2_data_country_code $redirect_country { default yes; """ + "\n".join([f" {cc} no;" for cc in app.config['CENSORED_COUNTRIES']]) + """ } {% for origin in origins %} server { listen 443 ssl; server_name origin-{{ origin.id }}.{{ provider }}.smart.{{ smart_zone[:-1] }}; if ($redirect_country = yes) { set $redirect_test 1; } if ($arg_redirect = "false") { set $redirect_test 0; } if ($redirect_test = 1) { rewrite ^ https://{{ origin.domain_name }}$request_uri? break; } location / { proxy_set_header Accept-Encoding ""; proxy_ssl_server_name on; proxy_pass https://{{ origin.domain_name }}/; subs_filter_types text/html text/css text/xml; subs_filter https://{{ origin.domain_name }}/ /; subs_filter "([^:]|)\\\"https://{{ origin.domain_name }}\\\"" \\1\\\"/\\\"; {%- for asset_origin in origin.group.origins | selectattr("assets") -%} {%- for asset_proxy in asset_origin.proxies | selectattr("provider", "equalto", provider) | selectattr("deprecated", "none") | selectattr("destroyed", "none") -%} {%- if loop.first %} subs_filter https://{{ asset_origin.domain_name }}/ {{ asset_proxy.url }}/; {%- endif -%} {%- endfor -%} {%- endfor %} } ssl_certificate /etc/ssl/smart_proxy.crt; ssl_certificate_key /etc/ssl/private/smart_proxy.key; } {% endfor %} """, provider=self.provider, origins=group_origins, smart_zone=app.config['SMART_ZONE'])