2024-11-09 11:08:48 +00:00
|
|
|
import logging
|
2024-12-06 16:08:48 +00:00
|
|
|
from datetime import datetime, timedelta, timezone
|
2024-11-09 11:08:48 +00:00
|
|
|
from typing import Dict, List, Optional, TypedDict
|
2024-12-06 16:08:48 +00:00
|
|
|
|
2022-05-17 08:28:37 +01:00
|
|
|
from flask import current_app
|
2023-10-29 15:45:10 +00:00
|
|
|
from sqlalchemy import or_
|
2024-11-09 11:08:48 +00:00
|
|
|
from sqlalchemy.orm import selectinload
|
2022-04-27 13:30:49 +01:00
|
|
|
from tldextract import extract
|
|
|
|
|
2023-10-29 15:45:10 +00:00
|
|
|
from app.extensions import db
|
2022-09-26 14:51:11 +01:00
|
|
|
from app.models.base import Group, Pool
|
2024-12-06 16:08:48 +00:00
|
|
|
from app.models.mirrors import Origin, Proxy
|
2022-04-27 13:30:49 +01:00
|
|
|
|
|
|
|
|
2024-11-09 11:08:48 +00:00
|
|
|
class MirrorMappingMirror(TypedDict):
|
|
|
|
origin_domain: str
|
|
|
|
origin_domain_normalized: str
|
|
|
|
origin_domain_root: str
|
|
|
|
valid_from: str
|
|
|
|
valid_to: Optional[str]
|
|
|
|
countries: Dict[str, int]
|
|
|
|
country: Optional[str]
|
|
|
|
risk: int
|
2022-04-27 13:30:49 +01:00
|
|
|
|
|
|
|
|
2024-11-09 11:08:48 +00:00
|
|
|
class MirrorMapping(TypedDict):
|
|
|
|
version: str
|
|
|
|
mappings: Dict[str, MirrorMappingMirror]
|
|
|
|
s3_buckets: List[str]
|
2022-04-27 13:30:49 +01:00
|
|
|
|
|
|
|
|
2024-11-09 11:08:48 +00:00
|
|
|
def mirror_mapping(_: Optional[Pool]) -> MirrorMapping:
|
2024-12-06 16:08:48 +00:00
|
|
|
two_days_ago = datetime.now(tz=timezone.utc) - timedelta(days=2)
|
2023-10-29 15:45:10 +00:00
|
|
|
|
|
|
|
proxies = (
|
2024-11-09 11:08:48 +00:00
|
|
|
db.session.query(Proxy)
|
|
|
|
.options(selectinload(Proxy.origin).selectinload(Origin.countries))
|
|
|
|
.filter(or_(Proxy.destroyed.is_(None), Proxy.destroyed > two_days_ago))
|
2023-10-29 15:45:10 +00:00
|
|
|
.filter(Proxy.url.is_not(None))
|
|
|
|
.all()
|
|
|
|
)
|
|
|
|
|
2024-11-09 11:08:48 +00:00
|
|
|
result: Dict[str, MirrorMappingMirror] = {}
|
2023-10-29 15:45:10 +00:00
|
|
|
for proxy in proxies:
|
2024-11-09 11:08:48 +00:00
|
|
|
if proxy.url is None:
|
|
|
|
logging.error("No URL for proxy %s", proxy)
|
|
|
|
continue
|
|
|
|
|
|
|
|
countries = proxy.origin.risk_level
|
|
|
|
if countries:
|
|
|
|
highest_risk_country_code, highest_risk_level = max(countries.items(), key=lambda x: x[1])
|
2023-10-29 15:45:10 +00:00
|
|
|
else:
|
|
|
|
highest_risk_country_code = "ZZ"
|
|
|
|
highest_risk_level = 0
|
|
|
|
|
2024-11-09 11:08:48 +00:00
|
|
|
result[proxy.url.lstrip("https://")] = {
|
|
|
|
"origin_domain": proxy.origin.domain_name,
|
|
|
|
"origin_domain_normalized": proxy.origin.domain_name.replace("www.", ""),
|
|
|
|
"origin_domain_root": extract(proxy.origin.domain_name).registered_domain,
|
|
|
|
"valid_from": proxy.added.isoformat(),
|
|
|
|
"valid_to": proxy.destroyed.isoformat() if proxy.destroyed else None,
|
|
|
|
"countries": countries,
|
|
|
|
"country": highest_risk_country_code,
|
|
|
|
"risk": highest_risk_level
|
|
|
|
}
|
|
|
|
|
|
|
|
groups = db.session.query(Group).options(selectinload(Group.pools))
|
|
|
|
s3_buckets = [
|
|
|
|
f"{current_app.config['GLOBAL_NAMESPACE']}-{g.group_name.lower()}-logs-cloudfront"
|
|
|
|
for g in groups.filter(Group.destroyed.is_(None)).all()
|
|
|
|
]
|
|
|
|
|
|
|
|
return {
|
|
|
|
"version": "1.2",
|
|
|
|
"mappings": result,
|
|
|
|
"s3_buckets": s3_buckets
|
|
|
|
}
|