majuna/app/terraform/proxy/__init__.py

226 lines
8.6 KiB
Python
Raw Normal View History

2022-08-12 11:55:14 +01:00
import os.path
2022-05-16 11:44:03 +01:00
from abc import abstractmethod
2022-04-25 15:05:28 +01:00
from collections import defaultdict
import datetime
import math
import string
import random
2022-05-16 11:44:03 +01:00
from typing import Dict, Optional, Any, List
2022-03-10 14:26:22 +00:00
2022-04-25 15:06:24 +01:00
from sqlalchemy import text
from tldextract import tldextract
2022-04-25 15:06:24 +01:00
2022-03-10 14:26:22 +00:00
from app import app
from app.extensions import db
2022-04-22 14:01:16 +01:00
from app.models.base import Group
from app.models.mirrors import Proxy, Origin, SmartProxy
2022-08-12 11:55:14 +01:00
from app.terraform.proxy.lib import all_cdn_prefixes
from app.terraform.terraform import TerraformAutomation
2022-03-10 14:26:22 +00:00
def update_smart_proxy_instance(group_id: int,
provider: str,
region: str,
instance_id: str) -> None:
instance = SmartProxy.query.filter(
SmartProxy.group_id == group_id,
SmartProxy.region == region,
SmartProxy.provider == provider,
SmartProxy.destroyed.is_(None)
).first()
if instance is None:
instance = SmartProxy()
instance.added = datetime.datetime.utcnow()
instance.group_id = group_id
instance.provider = provider
instance.region = region
db.session.add(instance)
instance.updated = datetime.datetime.utcnow()
instance.instance_id = instance_id
def sp_trusted_prefixes() -> str:
return "\n".join([f"geoip2_proxy {p};" for p in all_cdn_prefixes()])
class ProxyAutomation(TerraformAutomation):
subgroup_max = math.inf
2022-05-16 11:44:03 +01:00
"""
Maximum number of proxies to deploy per sub-group. This is required for some providers
where the number origins per group may exceed the number of proxies that can be configured
in a single "configuration block", e.g. Azure CDN's profiles.
"""
template: str
"""
Terraform configuration template using Jinja 2.
"""
template_parameters: List[str]
"""
List of parameters to be read from the application configuration for use
in the templating of the Terraform configuration.
"""
2022-03-10 14:26:22 +00:00
smart_proxies = False
"""
Whether this provider supports "smart" proxies.
"""
def get_subgroups(self) -> Dict[int, Dict[int, int]]:
2022-04-25 15:05:28 +01:00
conn = db.engine.connect()
2022-04-25 15:06:24 +01:00
result = conn.execute(text("""
2022-04-25 15:05:28 +01:00
SELECT origin.group_id, proxy.psg, COUNT(proxy.id) FROM proxy, origin
WHERE proxy.origin_id = origin.id
AND proxy.destroyed IS NULL
AND proxy.provider = :provider
GROUP BY origin.group_id, proxy.psg;
2022-04-25 15:06:24 +01:00
"""), provider=self.provider)
2022-05-16 11:44:03 +01:00
subgroups: Dict[int, Dict[int, int]] = defaultdict(lambda: defaultdict(lambda: 0))
2022-04-25 15:05:28 +01:00
for row in result:
subgroups[row[0]][row[1]] = row[2]
return subgroups
2022-05-16 11:44:03 +01:00
def create_missing_proxies(self) -> None:
groups = Group.query.all()
subgroups = self.get_subgroups()
for group in groups:
subgroup = 0
for origin in group.origins:
if origin.destroyed is not None:
continue
while True:
if subgroups[group.id][subgroup] >= self.subgroup_max:
subgroup += 1
else:
break
proxies = [
x for x in origin.proxies
if x.provider == self.provider and x.deprecated is None and x.destroyed is None
]
if not proxies:
subgroups[group.id][subgroup] += 1
proxy = Proxy()
proxy.origin_id = origin.id
proxy.provider = self.provider
proxy.psg = subgroup
2022-05-16 12:47:40 +01:00
# The random usage below is good enough for its purpose: to create a slug that
# hasn't been used before.
proxy.slug = tldextract.extract(origin.domain_name).domain[:5] + ''.join(
2022-05-16 12:47:40 +01:00
random.choices(string.ascii_lowercase, k=12)) # nosec
proxy.added = datetime.datetime.utcnow()
proxy.updated = datetime.datetime.utcnow()
db.session.add(proxy)
db.session.commit()
2022-03-10 14:26:22 +00:00
2022-05-16 11:44:03 +01:00
def deprecate_orphaned_proxies(self) -> None:
proxies = Proxy.query.filter(
2022-05-16 13:29:48 +01:00
Proxy.deprecated.is_(None),
Proxy.destroyed.is_(None),
Proxy.provider == self.provider
).all()
for proxy in proxies:
if proxy.origin.destroyed is not None:
proxy.deprecate(reason="origin_destroyed")
db.session.commit()
2022-05-16 11:44:03 +01:00
def destroy_expired_proxies(self) -> None:
2022-03-10 14:26:22 +00:00
cutoff = datetime.datetime.utcnow() - datetime.timedelta(days=3)
proxies = Proxy.query.filter(
2022-05-16 13:29:48 +01:00
Proxy.destroyed.is_(None),
2022-03-10 14:26:22 +00:00
Proxy.provider == self.provider,
Proxy.deprecated < cutoff
).all()
for proxy in proxies:
proxy.destroyed = datetime.datetime.utcnow()
proxy.updated = datetime.datetime.utcnow()
db.session.commit()
2022-05-16 11:44:03 +01:00
@abstractmethod
def import_state(self, state: Any) -> None:
raise NotImplementedError()
def tf_prehook(self) -> Optional[Any]: # pylint: disable=useless-return
self.create_missing_proxies()
self.deprecate_orphaned_proxies()
self.destroy_expired_proxies()
return None
2022-05-16 11:44:03 +01:00
def tf_posthook(self, *, prehook_result: Any = None) -> None:
self.import_state(self.tf_show())
2022-05-16 11:44:03 +01:00
def tf_generate(self) -> None:
groups = Group.query.all()
self.tf_write(
2022-03-10 14:26:22 +00:00
self.template,
groups=groups,
2022-03-10 14:26:22 +00:00
proxies=Proxy.query.filter(
Proxy.provider == self.provider, Proxy.destroyed.is_(None)).all(), subgroups=self.get_subgroups(),
global_namespace=app.config['GLOBAL_NAMESPACE'], bypass_token=app.config['BYPASS_TOKEN'],
2022-08-12 11:55:14 +01:00
terraform_modules_path=os.path.join(*list(os.path.split(app.root_path))[:-1], 'terraform-modules'),
backend_config=f"""backend "http" {{
lock_address = "{app.config['TFSTATE_BACKEND']}/{self.short_name}"
unlock_address = "{app.config['TFSTATE_BACKEND']}/{self.short_name}"
address = "{app.config['TFSTATE_BACKEND']}/{self.short_name}"
}}""",
**{k: app.config[k.upper()] for k in self.template_parameters})
if self.smart_proxies:
for group in groups:
self.sp_config(group)
def sp_config(self, group: Group) -> None:
group_origins: List[Origin] = Origin.query.filter(
Origin.group_id == group.id,
Origin.destroyed.is_(None),
Origin.smart.is_(True)
).all()
self.tmpl_write(f"smart_proxy.{group.id}.conf", """
2022-08-12 11:55:14 +01:00
geoip2 /usr/share/GeoIP/GeoIP2-City.mmdb {
auto_reload 5m;
$geoip2_metadata_country_build metadata build_epoch;
$geoip2_data_country_code default=US country iso_code;
}
""" + sp_trusted_prefixes() + """
2022-08-12 11:55:14 +01:00
geoip2_proxy_recursive on;
map $geoip2_data_country_code $redirect_country {
2022-08-12 11:55:14 +01:00
default yes;
""" + "\n".join([f" {cc} no;" for cc in app.config['CENSORED_COUNTRIES']]) + """
}
{% for origin in origins %}
server {
listen 443 ssl;
2022-05-25 15:32:17 +01:00
server_name origin-{{ origin.id }}.{{ provider }}.smart.{{ smart_zone[:-1] }};
2022-08-12 13:42:07 +01:00
if ($redirect_country = yes) {
set $redirect_test 1;
}
if ($arg_redirect = "false") {
set $redirect_test 0;
}
2022-08-16 16:49:13 +01:00
if ($redirect_test = 2) {
2022-08-12 15:58:46 +01:00
rewrite ^ https://{{ origin.domain_name }}$request_uri? break;
2022-08-12 11:55:14 +01:00
}
location / {
proxy_set_header Accept-Encoding "";
proxy_ssl_server_name on;
proxy_pass https://{{ origin.domain_name }}/;
subs_filter_types text/html text/css text/xml;
subs_filter https://{{ origin.domain_name }}/ /;
2022-08-12 11:55:14 +01:00
subs_filter "([^:]|)\\\"https://{{ origin.domain_name }}\\\"" \\1\\\"/\\\";
{%- for asset_origin in origin.group.origins | selectattr("assets") -%}
{%- for asset_proxy in asset_origin.proxies | selectattr("provider", "equalto", provider) | selectattr("deprecated", "none") | selectattr("destroyed", "none") -%}
{%- if loop.first %}
subs_filter https://{{ asset_origin.domain_name }}/ {{ asset_proxy.url }}/;
{%- endif -%}
{%- endfor -%}
{%- endfor %}
}
ssl_certificate /etc/ssl/smart_proxy.crt;
ssl_certificate_key /etc/ssl/private/smart_proxy.key;
}
{% endfor %}
""",
provider=self.provider,
2022-05-25 15:32:17 +01:00
origins=group_origins,
smart_zone=app.config['SMART_ZONE'])