majuna/app/terraform/proxy/__init__.py

213 lines
8.2 KiB
Python

import os.path
from abc import abstractmethod
from collections import defaultdict
import datetime
import math
import string
import random
from typing import Dict, Optional, Any, List
from sqlalchemy import text
from tldextract import tldextract
from app import app
from app.extensions import db
from app.models.base import Group
from app.models.mirrors import Proxy, Origin, SmartProxy
from app.terraform.proxy.lib import all_cdn_prefixes
from app.terraform.terraform import TerraformAutomation
def update_smart_proxy_instance(group_id: int,
provider: str,
region: str,
instance_id: str) -> None:
instance = SmartProxy.query.filter(
SmartProxy.group_id == group_id,
SmartProxy.region == region,
SmartProxy.provider == provider,
SmartProxy.destroyed.is_(None)
).first()
if instance is None:
instance = SmartProxy()
instance.added = datetime.datetime.utcnow()
instance.group_id = group_id
instance.provider = provider
instance.region = region
db.session.add(instance)
instance.updated = datetime.datetime.utcnow()
instance.instance_id = instance_id
class ProxyAutomation(TerraformAutomation):
subgroup_max = math.inf
"""
Maximum number of proxies to deploy per sub-group. This is required for some providers
where the number origins per group may exceed the number of proxies that can be configured
in a single "configuration block", e.g. Azure CDN's profiles.
"""
template: str
"""
Terraform configuration template using Jinja 2.
"""
template_parameters: List[str]
"""
List of parameters to be read from the application configuration for use
in the templating of the Terraform configuration.
"""
smart_proxies = False
"""
Whether this provider supports "smart" proxies.
"""
def get_subgroups(self) -> Dict[int, Dict[int, int]]:
conn = db.engine.connect()
result = conn.execute(text("""
SELECT origin.group_id, proxy.psg, COUNT(proxy.id) FROM proxy, origin
WHERE proxy.origin_id = origin.id
AND proxy.destroyed IS NULL
AND proxy.provider = :provider
GROUP BY origin.group_id, proxy.psg;
"""), provider=self.provider)
subgroups: Dict[int, Dict[int, int]] = defaultdict(lambda: defaultdict(lambda: 0))
for row in result:
subgroups[row[0]][row[1]] = row[2]
return subgroups
def create_missing_proxies(self) -> None:
groups = Group.query.all()
subgroups = self.get_subgroups()
for group in groups:
subgroup = 0
for origin in group.origins:
if origin.destroyed is not None:
continue
while True:
if subgroups[group.id][subgroup] >= self.subgroup_max:
subgroup += 1
else:
break
proxies = [
x for x in origin.proxies
if x.provider == self.provider and x.deprecated is None and x.destroyed is None
]
if not proxies:
subgroups[group.id][subgroup] += 1
proxy = Proxy()
proxy.origin_id = origin.id
proxy.provider = self.provider
proxy.psg = subgroup
# The random usage below is good enough for its purpose: to create a slug that
# hasn't been used before.
proxy.slug = tldextract.extract(origin.domain_name).domain[:5] + ''.join(
random.choices(string.ascii_lowercase, k=12)) # nosec
proxy.added = datetime.datetime.utcnow()
proxy.updated = datetime.datetime.utcnow()
db.session.add(proxy)
db.session.commit()
def deprecate_orphaned_proxies(self) -> None:
proxies = Proxy.query.filter(
Proxy.deprecated.is_(None),
Proxy.destroyed.is_(None),
Proxy.provider == self.provider
).all()
for proxy in proxies:
if proxy.origin.destroyed is not None:
proxy.deprecate(reason="origin_destroyed")
db.session.commit()
def destroy_expired_proxies(self) -> None:
cutoff = datetime.datetime.utcnow() - datetime.timedelta(days=3)
proxies = Proxy.query.filter(
Proxy.destroyed.is_(None),
Proxy.provider == self.provider,
Proxy.deprecated < cutoff
).all()
for proxy in proxies:
proxy.destroyed = datetime.datetime.utcnow()
proxy.updated = datetime.datetime.utcnow()
db.session.commit()
@abstractmethod
def import_state(self, state: Any) -> None:
raise NotImplementedError()
def tf_prehook(self) -> Optional[Any]: # pylint: disable=useless-return
self.create_missing_proxies()
self.deprecate_orphaned_proxies()
self.destroy_expired_proxies()
return None
def tf_posthook(self, *, prehook_result: Any = None) -> None:
self.import_state(self.tf_show())
def tf_generate(self) -> None:
groups = Group.query.all()
self.tf_write(
self.template,
groups=groups,
proxies=Proxy.query.filter(
Proxy.provider == self.provider, Proxy.destroyed.is_(None)).all(), subgroups=self.get_subgroups(),
global_namespace=app.config['GLOBAL_NAMESPACE'], bypass_token=app.config['BYPASS_TOKEN'],
terraform_modules_path=os.path.join(*list(os.path.split(app.root_path))[:-1], 'terraform-modules'),
**{k: app.config[k.upper()] for k in self.template_parameters})
if self.smart_proxies:
for group in groups:
self.sp_config(group)
def sp_trusted_prefixes(self):
return "\n".join([f"geoip2_proxy {p};" for p in all_cdn_prefixes()])
def sp_config(self, group: Group) -> None:
group_origins: List[Origin] = Origin.query.filter(
Origin.group_id == group.id,
Origin.destroyed.is_(None),
Origin.smart.is_(True)
).all()
self.tmpl_write(f"smart_proxy.{group.id}.conf", """
geoip2 /usr/share/GeoIP/GeoIP2-City.mmdb {
auto_reload 5m;
$geoip2_metadata_country_build metadata build_epoch;
$geoip2_data_country_code default=US country iso_code;
}
""" + self.sp_trusted_prefixes() + """
geoip2_proxy_recursive on;
map $geoip2_data_country_code $redirect_country {
default yes;
""" + "\n".join([f" {cc} no;" for cc in app.config['CENSORED_COUNTRIES']]) + """
}
{% for origin in origins %}
server {
listen 443 ssl;
server_name origin-{{ origin.id }}.{{ provider }}.smart.{{ smart_zone[:-1] }};
if ($redirect_country = no) {
rewrite ^ https://{{ origin.domain_name }}$request_uri break;
}
location / {
proxy_set_header Accept-Encoding "";
proxy_ssl_server_name on;
proxy_pass https://{{ origin.domain_name }}/;
subs_filter_types text/html text/css text/xml;
subs_filter https://{{ origin.domain_name }}/ /;
subs_filter "([^:]|)\\\"https://{{ origin.domain_name }}\\\"" \\1\\\"/\\\";
{%- for asset_origin in origin.group.origins | selectattr("assets") -%}
{%- for asset_proxy in asset_origin.proxies | selectattr("provider", "equalto", provider) | selectattr("deprecated", "none") | selectattr("destroyed", "none") -%}
{%- if loop.first %}
subs_filter https://{{ asset_origin.domain_name }}/ {{ asset_proxy.url }}/;
{%- endif -%}
{%- endfor -%}
{%- endfor %}
}
ssl_certificate /etc/ssl/smart_proxy.crt;
ssl_certificate_key /etc/ssl/private/smart_proxy.key;
}
{% endfor %}
""",
provider=self.provider,
origins=group_origins,
smart_zone=app.config['SMART_ZONE'])