import logging from datetime import datetime, timedelta from typing import Dict, List, Optional, TypedDict from flask import current_app from sqlalchemy import or_ from sqlalchemy.orm import selectinload from tldextract import extract from app.extensions import db from app.models.base import Group, Pool from app.models.mirrors import Proxy, Origin class MirrorMappingMirror(TypedDict): origin_domain: str origin_domain_normalized: str origin_domain_root: str valid_from: str valid_to: Optional[str] countries: Dict[str, int] country: Optional[str] risk: int class MirrorMapping(TypedDict): version: str mappings: Dict[str, MirrorMappingMirror] s3_buckets: List[str] def mirror_mapping(_: Optional[Pool]) -> MirrorMapping: two_days_ago = datetime.utcnow() - timedelta(days=2) proxies = ( db.session.query(Proxy) .options(selectinload(Proxy.origin).selectinload(Origin.countries)) .filter(or_(Proxy.destroyed.is_(None), Proxy.destroyed > two_days_ago)) .filter(Proxy.url.is_not(None)) .all() ) result: Dict[str, MirrorMappingMirror] = {} for proxy in proxies: if proxy.url is None: logging.error("No URL for proxy %s", proxy) continue countries = proxy.origin.risk_level if countries: highest_risk_country_code, highest_risk_level = max(countries.items(), key=lambda x: x[1]) else: highest_risk_country_code = "ZZ" highest_risk_level = 0 result[proxy.url.lstrip("https://")] = { "origin_domain": proxy.origin.domain_name, "origin_domain_normalized": proxy.origin.domain_name.replace("www.", ""), "origin_domain_root": extract(proxy.origin.domain_name).registered_domain, "valid_from": proxy.added.isoformat(), "valid_to": proxy.destroyed.isoformat() if proxy.destroyed else None, "countries": countries, "country": highest_risk_country_code, "risk": highest_risk_level } groups = db.session.query(Group).options(selectinload(Group.pools)) s3_buckets = [ f"{current_app.config['GLOBAL_NAMESPACE']}-{g.group_name.lower()}-logs-cloudfront" for g in groups.filter(Group.destroyed.is_(None)).all() ] return { "version": "1.2", "mappings": result, "s3_buckets": s3_buckets }