312 lines
14 KiB
Python
312 lines
14 KiB
Python
import datetime
|
|
import logging
|
|
import random
|
|
import string
|
|
from collections import OrderedDict
|
|
from typing import Any, Dict, List, Optional, Tuple, Type
|
|
from typing import OrderedDict as OrderedDictT
|
|
|
|
from flask import current_app
|
|
from tldextract import tldextract
|
|
|
|
from app import db
|
|
from app.models.base import Pool
|
|
from app.models.mirrors import Proxy, Origin
|
|
from app.terraform import BaseAutomation
|
|
from app.terraform.proxy import ProxyAutomation
|
|
from app.terraform.proxy.azure_cdn import ProxyAzureCdnAutomation
|
|
from app.terraform.proxy.cloudfront import ProxyCloudfrontAutomation
|
|
from app.terraform.proxy.fastly import ProxyFastlyAutomation
|
|
|
|
PROXY_PROVIDERS: Dict[str, Type[ProxyAutomation]] = {p.provider: p for p in [ # type: ignore[attr-defined]
|
|
# In order of preference
|
|
ProxyCloudfrontAutomation,
|
|
ProxyFastlyAutomation,
|
|
ProxyAzureCdnAutomation
|
|
] if p.enabled} # type: ignore[attr-defined]
|
|
|
|
SubgroupCount = OrderedDictT[str, OrderedDictT[int, OrderedDictT[int, int]]]
|
|
|
|
|
|
def all_active_proxies() -> List[Proxy]:
|
|
"""
|
|
Retrieve all active proxies from the database.
|
|
|
|
This function returns a list of all `Proxy` instances that are currently active.
|
|
An active proxy is defined as a proxy that is not deprecated and not destroyed.
|
|
|
|
:return: A list of all active Proxy instances.
|
|
"""
|
|
result: List[Proxy] = Proxy.query.filter(
|
|
Proxy.deprecated.is_(None),
|
|
Proxy.destroyed.is_(None),
|
|
).all()
|
|
return result
|
|
|
|
|
|
def random_slug(origin_domain_name: str) -> str:
|
|
"""
|
|
Generate a random slug consisting of a prefix extracted from a domain name and a series of random lower case
|
|
letters.
|
|
|
|
The function extracts the domain from the given `origin_domain_name`, trims it to the first 5 characters,
|
|
and appends 12 random lower case letters.
|
|
|
|
:param origin_domain_name: The domain name to extract the prefix from.
|
|
:return: The generated random slug.
|
|
|
|
:Example:
|
|
|
|
>>> random_slug("example.com")
|
|
"exampasdfghjkl"
|
|
"""
|
|
# The random slug doesn't need to be cryptographically secure, hence the use of `# nosec`
|
|
return tldextract.extract(origin_domain_name).domain[:5] + ''.join(
|
|
random.choices(string.ascii_lowercase, k=12)) # nosec
|
|
|
|
|
|
def calculate_subgroup_count(proxies: Optional[List[Proxy]] = None) -> SubgroupCount:
|
|
"""
|
|
Calculate the count of each subgroup within each group for each provider.
|
|
|
|
The function loops through the list of Proxy objects and creates a nested dictionary structure.
|
|
The keys of the outermost dictionary are the providers.
|
|
The values are dictionaries where the keys are the group IDs and the values are
|
|
dictionaries where the keys are subgroups and the values are their counts.
|
|
|
|
:param proxies: A list of Proxy objects. If None, the calculation will be performed on all active proxies.
|
|
:return: A nested dictionary representing the count of each subgroup within each group for each provider.
|
|
"""
|
|
if proxies is None:
|
|
proxies = all_active_proxies()
|
|
subgroup_count: SubgroupCount = OrderedDict()
|
|
for proxy in proxies:
|
|
if proxy.provider not in subgroup_count:
|
|
subgroup_count[proxy.provider] = OrderedDict()
|
|
if proxy.origin.group_id not in subgroup_count[proxy.provider]:
|
|
subgroup_count[proxy.provider][proxy.origin.group_id] = OrderedDict()
|
|
if proxy.psg not in subgroup_count[proxy.provider][proxy.origin.group_id]:
|
|
subgroup_count[proxy.provider][proxy.origin.group_id][proxy.psg] = 1
|
|
else:
|
|
subgroup_count[proxy.provider][proxy.origin.group_id][proxy.psg] += 1
|
|
return subgroup_count
|
|
|
|
|
|
def next_subgroup(subgroup_count: SubgroupCount, provider: str, group_id: int, max_subgroup_count: int,
|
|
max_subgroup_members: int) -> Optional[int]:
|
|
"""
|
|
Find the first available subgroup with less than the specified maximum count in the specified provider and group.
|
|
If the last subgroup in the group is full, return the next subgroup number as long as it doesn't exceed
|
|
`max_subgroup`.
|
|
|
|
The function traverses the `subgroup_count` dictionary for the given provider and group in the order of subgroup.
|
|
It returns the first subgroup found with a count less than `max_count`.
|
|
|
|
:param subgroup_count: A nested dictionary representing the count of each subgroup within each group for each
|
|
provider.
|
|
:param provider: The provider to find the next subgroup in.
|
|
:param group_id: The group to find the next subgroup in.
|
|
:param max_subgroup_count: The maximum allowable subgroup number.
|
|
:param max_subgroup_members: The maximum count a subgroup should have to be considered available.
|
|
:return: The subgroup of the first available subgroup within the specified provider and group.
|
|
If no available subgroup is found and max_subgroup is not exceeded, returns the next subgroup number.
|
|
If no subgroup is available and max_subgroup is exceeded, returns None.
|
|
"""
|
|
if provider in subgroup_count and group_id in subgroup_count[provider]:
|
|
subgroups = subgroup_count[provider][group_id]
|
|
for subgroup in range(1, max_subgroup_count + 1):
|
|
if subgroup not in subgroups or subgroups[subgroup] < max_subgroup_members:
|
|
return subgroup
|
|
return None
|
|
return 1
|
|
|
|
|
|
def auto_deprecate_proxies() -> None:
|
|
"""
|
|
Automatically deprecate proxies based on certain conditions.
|
|
|
|
This function deprecates proxies under two conditions:
|
|
1. The origin of the proxy has been destroyed.
|
|
2. The proxy belongs to a list of origins due for daily replacement and has reached its max age.
|
|
|
|
.. note::
|
|
- The "origin_destroyed" reason means the origin of the proxy has been destroyed.
|
|
- The "max_age_reached" reason means the proxy has been in use for longer than the maximum allowed period.
|
|
The maximum age cutoff is randomly set to a time between 24 and 48 hours.
|
|
"""
|
|
proxies: List[Proxy] = all_active_proxies()
|
|
for proxy in proxies:
|
|
if proxy.origin.destroyed is not None:
|
|
proxy.deprecate(reason="origin_destroyed")
|
|
if proxy.origin.assets:
|
|
max_age_cutoff = datetime.datetime.utcnow() - datetime.timedelta(
|
|
days=1, seconds=86400 * random.random()) # nosec: B311
|
|
if proxy.added < max_age_cutoff:
|
|
proxy.deprecate(reason="max_age_reached")
|
|
|
|
|
|
def destroy_expired_proxies() -> None:
|
|
"""
|
|
Destroy proxies that have been deprecated for a certain period of time.
|
|
|
|
This function finds all proxies that are not already destroyed and have been deprecated for more than 4 days.
|
|
It then destroys these proxies.
|
|
"""
|
|
expiry_cutoff = datetime.datetime.utcnow() - datetime.timedelta(days=4)
|
|
proxies = Proxy.query.filter(
|
|
Proxy.destroyed.is_(None),
|
|
Proxy.deprecated < expiry_cutoff
|
|
).all()
|
|
for proxy in proxies:
|
|
logging.debug("Destroying expired proxy")
|
|
proxy.destroy()
|
|
|
|
|
|
def promote_hot_spare_proxy(pool_id: int, origin: Origin) -> bool:
|
|
"""
|
|
Promote a 'hot spare' proxy to a specified pool from the reserve pool.
|
|
|
|
This function searches for a 'hot spare' proxy (a proxy in reserve pool with pool_id == -1)
|
|
for the given origin. If a proxy is found, it is promoted to the specified pool by changing its pool ID.
|
|
The added timestamp is also reset to the time at which the hot spare was promoted.
|
|
|
|
:param pool_id: The pool to which the 'hot spare' proxy is to be promoted.
|
|
:param origin: The origin of the 'hot spare' proxy to be promoted.
|
|
:return: True if a 'hot spare' proxy was found and promoted, False otherwise.
|
|
|
|
.. note:: In the database, the pool ID -1 signifies a reserve pool of 'hot spare' proxies. This pool is created by
|
|
default in the schema migrations.
|
|
"""
|
|
proxy = Proxy.query.filter(
|
|
Proxy.pool_id == -1,
|
|
Proxy.origin_id == origin.id,
|
|
).first()
|
|
if not proxy:
|
|
return False
|
|
proxy.pool_id = pool_id
|
|
proxy.added = datetime.datetime.utcnow()
|
|
return True
|
|
|
|
|
|
class ProxyMetaAutomation(BaseAutomation):
|
|
short_name = "proxy_meta"
|
|
description = "Housekeeping for proxies"
|
|
frequency = 1
|
|
|
|
subgroup_count: SubgroupCount
|
|
|
|
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
|
super().__init__(*args, **kwargs)
|
|
self.subgroup_count = calculate_subgroup_count()
|
|
|
|
def automate(self, full: bool = False) -> Tuple[bool, str]:
|
|
# Deprecate orphaned proxies, old proxies and mismatched proxies
|
|
auto_deprecate_proxies()
|
|
destroy_expired_proxies()
|
|
self.handle_missing_proxies()
|
|
self.create_hot_spare_proxies()
|
|
db.session.commit()
|
|
return True, ""
|
|
|
|
def handle_missing_proxies(self) -> None:
|
|
"""
|
|
Create new proxies for origins that lack active proxies in a pool.
|
|
|
|
This function iterates over all pools, groups in each pool, and origins in each group.
|
|
If an origin is not destroyed and lacks active (not deprecated and not destroyed) proxies in a pool,
|
|
a new proxy for the origin in the pool is created.
|
|
"""
|
|
pools = Pool.query.all()
|
|
for pool in pools:
|
|
for group in pool.groups:
|
|
for origin in group.origins:
|
|
if origin.destroyed is not None:
|
|
continue
|
|
proxies = [
|
|
x for x in origin.proxies
|
|
if x.pool_id == pool.id and x.deprecated is None and x.destroyed is None
|
|
]
|
|
if not proxies:
|
|
logging.debug("Creating new proxy for %s in pool %s", origin, pool)
|
|
if not promote_hot_spare_proxy(pool.id, origin):
|
|
# No "hot spare" available
|
|
self.create_proxy(pool.id, origin)
|
|
|
|
def create_proxy(self, pool_id: int, origin: Origin) -> bool:
|
|
"""
|
|
Creates a web proxy resource for the given origin and pool combination.
|
|
|
|
Initially it will attempt to create smart proxies on providers that support smart proxies,
|
|
and "simple" proxies on other providers. If other providers have exhausted their quota
|
|
already then a "simple" proxy may be created on a platform that supports smart proxies.
|
|
|
|
A boolean is returned to indicate whether a proxy resource was created.
|
|
|
|
:param pool_id: pool to create the resource for
|
|
:param origin: origin to create the resource for
|
|
:return: whether a proxy resource was created
|
|
"""
|
|
for provider in PROXY_PROVIDERS.values():
|
|
logging.debug("Looking at provider %s", provider.provider)
|
|
subgroup = next_subgroup(self.subgroup_count, provider.provider, origin.group_id,
|
|
provider.subgroup_members_max, provider.subgroup_count_max)
|
|
if subgroup is None:
|
|
continue # Exceeded maximum number of subgroups and last subgroup is full
|
|
self.increment_subgroup(provider.provider, origin.group_id, subgroup)
|
|
proxy = Proxy()
|
|
proxy.pool_id = pool_id
|
|
proxy.origin_id = origin.id
|
|
proxy.provider = provider.provider
|
|
proxy.psg = subgroup
|
|
# The random usage below is good enough for its purpose: to create a slug that
|
|
# hasn't been used recently.
|
|
proxy.slug = random_slug(origin.domain_name)
|
|
proxy.added = datetime.datetime.utcnow()
|
|
proxy.updated = datetime.datetime.utcnow()
|
|
logging.debug("Creating proxy %s", proxy)
|
|
db.session.add(proxy)
|
|
return True
|
|
return False
|
|
|
|
def increment_subgroup(self, provider: str, group_id: int, psg: int) -> None:
|
|
"""
|
|
Increment the count of a specific subgroup within a group for a specific provider.
|
|
|
|
This function mutates the `subgroup_count` dictionary by incrementing the count of the specified subgroup.
|
|
If the provider, group, or subgroup does not exist in `subgroup_count`, they are created.
|
|
|
|
:param provider: The provider to increment the subgroup count for.
|
|
:param group_id: The group to increment the subgroup count for.
|
|
:param psg: The subgroup to increment the count of.
|
|
"""
|
|
if provider not in self.subgroup_count:
|
|
self.subgroup_count[provider] = OrderedDict()
|
|
if group_id not in self.subgroup_count[provider]:
|
|
self.subgroup_count[provider][group_id] = OrderedDict()
|
|
if psg not in self.subgroup_count[provider][group_id]:
|
|
self.subgroup_count[provider][group_id][psg] = 0
|
|
self.subgroup_count[provider][group_id][psg] += 1
|
|
|
|
def create_hot_spare_proxies(self) -> None:
|
|
"""
|
|
Create 'hot spare' proxies for origins that lack active proxies.
|
|
|
|
This function iterates over all groups and their origins.
|
|
If an origin is not destroyed and lacks active proxies (not deprecated and not destroyed),
|
|
a new 'hot spare' proxy for this origin is created in the reserve pool (with pool_id = -1).
|
|
"""
|
|
origins = Origin.query.filter(
|
|
Origin.destroyed.is_(None)
|
|
).all()
|
|
for origin in origins:
|
|
if origin.destroyed is not None:
|
|
continue
|
|
proxies = Proxy.query.filter(
|
|
Proxy.pool_id == -1,
|
|
Proxy.origin_id == origin.id,
|
|
Proxy.deprecated.is_(None),
|
|
Proxy.destroyed.is_(None),
|
|
).all()
|
|
if not proxies:
|
|
logging.debug("Creating new hot spare proxy for origin %s", origin)
|
|
self.create_proxy(-1, origin) # Creating proxy in reserve pool
|