2022-06-17 14:02:10 +01:00
|
|
|
# pylint: disable=too-few-public-methods
|
|
|
|
|
2022-04-27 14:50:41 +01:00
|
|
|
import builtins
|
2023-10-29 15:45:10 +00:00
|
|
|
from datetime import datetime, timedelta
|
2023-08-21 14:41:21 +01:00
|
|
|
from typing import Dict, List, Union, Optional
|
2022-04-27 13:30:49 +01:00
|
|
|
|
2022-05-17 08:28:37 +01:00
|
|
|
from flask import current_app
|
2022-04-27 13:30:49 +01:00
|
|
|
from pydantic import BaseModel, Field
|
2023-10-29 15:45:10 +00:00
|
|
|
from sqlalchemy import or_
|
2022-04-27 13:30:49 +01:00
|
|
|
from tldextract import extract
|
|
|
|
|
2023-10-29 15:45:10 +00:00
|
|
|
from app.extensions import db
|
2022-09-26 14:51:11 +01:00
|
|
|
from app.models.base import Group, Pool
|
2022-05-11 10:38:27 +01:00
|
|
|
from app.models.mirrors import Proxy
|
2022-04-27 13:30:49 +01:00
|
|
|
|
|
|
|
|
|
|
|
class MMMirror(BaseModel):
|
|
|
|
origin_domain: str = Field(description="The full origin domain name")
|
|
|
|
origin_domain_normalized: str = Field(description="The origin_domain with \"www.\" removed, if present")
|
|
|
|
origin_domain_root: str = Field(description="The registered domain name of the origin, excluding subdomains")
|
2022-11-15 15:03:29 +00:00
|
|
|
valid_from: str = Field(description="The date on which the mirror was added to the system")
|
|
|
|
valid_to: Optional[str] = Field(description="The date on which the mirror was decommissioned")
|
2023-10-29 15:45:10 +00:00
|
|
|
# countries: List[Tuple[str, int]] = Field(description="A list mapping of risk levels to country")
|
|
|
|
country: Optional[str] = Field(description="The country code of the country in which the origin is targeted")
|
|
|
|
risk: int = Field(description="A risk score for the origin in the target country")
|
2022-04-27 13:30:49 +01:00
|
|
|
|
|
|
|
|
|
|
|
class MirrorMapping(BaseModel):
|
2022-05-10 15:07:23 +01:00
|
|
|
version: str = Field(
|
|
|
|
description="Version number of the mirror mapping schema in use"
|
|
|
|
)
|
|
|
|
mappings: Dict[str, MMMirror] = Field(
|
2022-04-27 13:30:49 +01:00
|
|
|
description="The domain name for the mirror"
|
|
|
|
)
|
2022-05-11 10:38:27 +01:00
|
|
|
s3_buckets: List[str] = Field(
|
|
|
|
description="The names of all S3 buckets used for CloudFront logs"
|
|
|
|
)
|
2022-04-27 13:30:49 +01:00
|
|
|
|
|
|
|
class Config:
|
2022-05-11 10:38:27 +01:00
|
|
|
title = "Mirror Mapping Version 1.1"
|
2022-04-27 13:30:49 +01:00
|
|
|
|
|
|
|
|
2023-01-21 15:18:13 +00:00
|
|
|
def mirror_mapping(_: Optional[Pool]) -> Dict[str, Union[str, Dict[str, str]]]:
|
2023-10-29 15:45:10 +00:00
|
|
|
one_week_ago = datetime.utcnow() - timedelta(days=7)
|
|
|
|
|
|
|
|
proxies = (
|
|
|
|
db.session.query(Proxy) # type: ignore[no-untyped-call]
|
|
|
|
.filter(or_(Proxy.destroyed.is_(None), Proxy.destroyed > one_week_ago))
|
|
|
|
.filter(Proxy.url.is_not(None))
|
|
|
|
.all()
|
|
|
|
)
|
|
|
|
|
|
|
|
result = {}
|
|
|
|
for proxy in proxies:
|
|
|
|
if proxy.origin.countries: # Check if there are any associated countries
|
|
|
|
risk_levels = proxy.origin.risk_level.items()
|
|
|
|
highest_risk_country = max(risk_levels, key=lambda x: x[1])
|
|
|
|
highest_risk_country_code = highest_risk_country[0]
|
|
|
|
highest_risk_level = highest_risk_country[1]
|
|
|
|
else:
|
|
|
|
highest_risk_country_code = "ZZ"
|
|
|
|
highest_risk_level = 0
|
|
|
|
|
|
|
|
result[proxy.url.lstrip("https://")] = MMMirror(
|
|
|
|
origin_domain=proxy.origin.domain_name,
|
|
|
|
origin_domain_normalized=proxy.origin.domain_name.replace("www.", ""),
|
|
|
|
origin_domain_root=extract(proxy.origin.domain_name).registered_domain,
|
|
|
|
valid_from=proxy.added.isoformat(),
|
|
|
|
valid_to=proxy.destroyed.isoformat() if proxy.destroyed is not None else None,
|
|
|
|
# countries=[], # TODO: countries,
|
|
|
|
country=highest_risk_country_code,
|
|
|
|
risk=highest_risk_level
|
|
|
|
)
|
|
|
|
|
2022-05-11 10:38:27 +01:00
|
|
|
return MirrorMapping(
|
2023-10-29 15:45:10 +00:00
|
|
|
version="1.2",
|
|
|
|
mappings=result,
|
2022-05-11 10:38:27 +01:00
|
|
|
s3_buckets=[
|
2022-05-17 08:28:37 +01:00
|
|
|
f"{current_app.config['GLOBAL_NAMESPACE']}-{g.group_name.lower()}-logs-cloudfront"
|
2022-05-16 13:29:48 +01:00
|
|
|
for g in Group.query.filter(Group.destroyed.is_(None)).all()
|
2022-05-11 10:38:27 +01:00
|
|
|
]
|
|
|
|
).dict()
|
2022-04-27 14:50:41 +01:00
|
|
|
|
|
|
|
|
|
|
|
if getattr(builtins, "__sphinx_build__", False):
|
|
|
|
schema = MirrorMapping.schema_json()
|