From d08388c339f21de82df0a95c166d7aeed1ca3f70 Mon Sep 17 00:00:00 2001 From: Iain Learmonth Date: Sat, 9 Nov 2024 11:08:48 +0000 Subject: [PATCH] feat: remove pydantic from list generation --- app/lists/bc2.py | 77 ++++++++++++----------- app/lists/bridgelines.py | 56 ++++++----------- app/lists/mirror_mapping.py | 111 +++++++++++++++------------------ app/lists/redirector.py | 64 +++++++++---------- app/models/__init__.py | 44 ++++++------- app/terraform/list/__init__.py | 5 +- requirements-types.txt | 3 - requirements.txt | 1 - 8 files changed, 164 insertions(+), 197 deletions(-) diff --git a/app/lists/bc2.py b/app/lists/bc2.py index b570ead..c82580e 100644 --- a/app/lists/bc2.py +++ b/app/lists/bc2.py @@ -1,38 +1,28 @@ -# pylint: disable=too-few-public-methods - -import builtins -from datetime import datetime -from typing import List, Dict, Union, Any, Optional - -from pydantic import BaseModel, Field +from typing import List, Optional, TypedDict from app.models.base import Pool from app.models.mirrors import Origin, Proxy -class BC2Alternative(BaseModel): +class BC2Alternative(TypedDict): proto: str type: str - created_at: datetime - updated_at: datetime + created_at: str + updated_at: str url: str -class BC2Site(BaseModel): - main_domain: str = Field(description="The main domain name of the website, excluding \"www.\" if present.", - examples=["bbc.co.uk", "bbc.com", "guardianproject.info"]) +class BC2Site(TypedDict): + main_domain: str available_alternatives: List[BC2Alternative] -class BypassCensorship2(BaseModel): - version: str = Field(description="Version number of the Bypass Censorship Extension schema in use", ) +class BypassCensorship2(TypedDict): + version: str sites: List[BC2Site] - class Config: - title = "Bypass Censorship Version 2" - -def onion_alternative(origin: Origin) -> List[Dict[str, Any]]: +def onion_alternative(origin: Origin) -> List[BC2Alternative]: url: Optional[str] = origin.onion() if url is None: return [] @@ -41,22 +31,23 @@ def onion_alternative(origin: Origin) -> List[Dict[str, Any]]: "type": "eotk", "created_at": str(origin.added), "updated_at": str(origin.updated), - "url": url} - ] + "url": url + }] -def proxy_alternative(proxy: Proxy) -> Dict[str, Any]: +def proxy_alternative(proxy: Proxy) -> Optional[BC2Alternative]: + if proxy.url is None: + return None return { "proto": "https", "type": "mirror", - "created_at": str(proxy.added), - "updated_at": str(proxy.updated), + "created_at": proxy.added.isoformat(), + "updated_at": proxy.updated.isoformat(), "url": proxy.url } def main_domain(origin: Origin) -> str: - # Both description and domain_name are required to be not null in the database schema description: str = origin.description if description.startswith("proxy:"): return description[len("proxy:"):].replace("www.", "") @@ -65,20 +56,30 @@ def main_domain(origin: Origin) -> str: def active_proxies(origin: Origin, pool: Pool) -> List[Proxy]: - def _filter_fn(proxy: Proxy) -> bool: - return proxy.url is not None and not proxy.deprecated and not proxy.destroyed and proxy.pool_id == pool.id - return list(filter(_filter_fn, origin.proxies)) + return [ + proxy for proxy in origin.proxies + if proxy.url is not None and not proxy.deprecated and not proxy.destroyed and proxy.pool_id == pool.id + ] -def mirror_sites(pool: Pool) -> Dict[ - str, Union[str, List[Dict[str, Union[str, List[Dict[str, str]]]]]]]: - return {"version": "2.0", "sites": [{"main_domain": main_domain(origin), - "available_alternatives": onion_alternative(origin) + [ - proxy_alternative(proxy) for proxy in - active_proxies(origin, pool)]} for origin in - Origin.query.order_by(Origin.domain_name).all() if - origin.destroyed is None]} +def mirror_sites(pool: Pool) -> BypassCensorship2: + origins = Origin.query.filter(Origin.destroyed.is_(None)).order_by(Origin.domain_name).all() + sites: List[BC2Site] = [] + for origin in origins: + # Gather alternatives, filtering out None values from proxy_alternative + alternatives = onion_alternative(origin) + [ + alt for proxy in active_proxies(origin, pool) + if (alt := proxy_alternative(proxy)) is not None + ] -if getattr(builtins, "__sphinx_build__", False): - schema = BypassCensorship2.schema_json() + # Add the site dictionary to the list + sites.append({ + "main_domain": main_domain(origin), + "available_alternatives": list(alternatives) + }) + + return { + "version": "2.0", + "sites": sites + } diff --git a/app/lists/bridgelines.py b/app/lists/bridgelines.py index e37a626..556bc4a 100644 --- a/app/lists/bridgelines.py +++ b/app/lists/bridgelines.py @@ -1,51 +1,31 @@ -# pylint: disable=too-few-public-methods - -import builtins -from typing import List, Iterable, Dict, Any, Optional - -from pydantic import BaseModel, Field +from typing import List, Optional, TypedDict +from sqlalchemy.orm import selectinload from app.models.base import Pool from app.models.bridges import Bridge -class Bridgelines(BaseModel): - version: str = Field( - description="Version number of the bridgelines schema in use", - examples=[ - "1.0" - ] - ) - bridgelines: List[str] = Field( - description="List of bridgelines, ready for use in a torrc file", - examples=[ - "Bridge obfs4 71.73.124.31:8887 E81B1237F6D13497B166060F55861565593CFF8E " - "cert=b54NsV6tK1g+LHaThPOTCibdpx3wHm9NFe0PzGF1nwz+4M/tq6SkfOaShzPnZsIRCFRIHg iat-mode=0", - "Bridge obfs4 172.105.176.101:80 D18BC7E082D7EBF8E851029AC89A12A3F44A50BF " - "cert=KHfAAUptXWRmLy3ehS9ETMO5luY06d0w7tEBDiAI0z62nC5Qo/APrzZxodkYWX2bNko/Mw iat-mode=0", - "Bridge obfs4 141.101.36.55:9023 045EF272F08BC11CDB985889E4E9FE35DC6F9C67 " - "cert=6KEdf/5aDSyuYEqvo14JE8Cks3i7PQtj9EFX2wTCiEaUPsp/I7eaOm4uSWdqwvV4vTVlFw iat-mode=0 " - ] - ) - - class Config: - title = "Bridgelines Version 1" +class BridgelinesDict(TypedDict): + version: str + bridgelines: List[str] -def bridgelines(pool: Pool, *, distribution_method: Optional[str] = None) -> Dict[str, Any]: - bridges: Iterable[Bridge] = Bridge.query.filter( +def bridgelines(pool: Pool, *, distribution_method: Optional[str] = None) -> BridgelinesDict: + # Fetch bridges with selectinload for related data + query = Bridge.query.options(selectinload(Bridge.conf)).filter( Bridge.destroyed.is_(None), Bridge.deprecated.is_(None), Bridge.bridgeline.is_not(None) - ).all() + ) + if distribution_method is not None: - bridges = [b for b in bridges - if b.conf.distribution_method == distribution_method] - return Bridgelines( - version="1.0", - bridgelines=[b.bridgeline for b in bridges if b.conf.pool_id == pool.id] - ).dict() + query = query.filter(Bridge.conf.has(distribution_method=distribution_method)) + # Collect bridgelines specific to the pool + bridgelines = [b.bridgeline for b in query.all() if b.conf.pool_id == pool.id] -if getattr(builtins, "__sphinx_build__", False): - schema = Bridgelines.schema_json() + # Return dictionary directly, inlining the previous `to_dict` functionality + return { + "version": "1.0", + "bridgelines": bridgelines + } diff --git a/app/lists/mirror_mapping.py b/app/lists/mirror_mapping.py index 83094c5..c66c25d 100644 --- a/app/lists/mirror_mapping.py +++ b/app/lists/mirror_mapping.py @@ -1,87 +1,76 @@ -# pylint: disable=too-few-public-methods - -import builtins +import logging from datetime import datetime, timedelta -from typing import Dict, List, Union, Optional - +from typing import Dict, List, Optional, TypedDict from flask import current_app -from pydantic import BaseModel, Field from sqlalchemy import or_ +from sqlalchemy.orm import selectinload from tldextract import extract from app.extensions import db from app.models.base import Group, Pool -from app.models.mirrors import Proxy +from app.models.mirrors import Proxy, Origin -class MMMirror(BaseModel): - origin_domain: str = Field(description="The full origin domain name") - origin_domain_normalized: str = Field(description="The origin_domain with \"www.\" removed, if present") - origin_domain_root: str = Field(description="The registered domain name of the origin, excluding subdomains") - valid_from: str = Field(description="The date on which the mirror was added to the system") - valid_to: Optional[str] = Field(description="The date on which the mirror was decommissioned") - countries: Dict[str, int] = Field(description="A list mapping of risk levels to country") - country: Optional[str] = Field( - description="The country code of the country with the highest risk level where the origin is targeted") - risk: int = Field(description="The risk score for the highest risk country") +class MirrorMappingMirror(TypedDict): + origin_domain: str + origin_domain_normalized: str + origin_domain_root: str + valid_from: str + valid_to: Optional[str] + countries: Dict[str, int] + country: Optional[str] + risk: int -class MirrorMapping(BaseModel): - version: str = Field( - description="Version number of the mirror mapping schema in use" - ) - mappings: Dict[str, MMMirror] = Field( - description="The domain name for the mirror" - ) - s3_buckets: List[str] = Field( - description="The names of all S3 buckets used for CloudFront logs" - ) - - class Config: - title = "Mirror Mapping Version 1.2" +class MirrorMapping(TypedDict): + version: str + mappings: Dict[str, MirrorMappingMirror] + s3_buckets: List[str] -def mirror_mapping(_: Optional[Pool]) -> Dict[str, Union[str, Dict[str, str]]]: - one_week_ago = datetime.utcnow() - timedelta(days=7) +def mirror_mapping(_: Optional[Pool]) -> MirrorMapping: + two_days_ago = datetime.utcnow() - timedelta(days=2) proxies = ( - db.session.query(Proxy) # type: ignore[no-untyped-call] - .filter(or_(Proxy.destroyed.is_(None), Proxy.destroyed > one_week_ago)) + db.session.query(Proxy) + .options(selectinload(Proxy.origin).selectinload(Origin.countries)) + .filter(or_(Proxy.destroyed.is_(None), Proxy.destroyed > two_days_ago)) .filter(Proxy.url.is_not(None)) .all() ) - result = {} + result: Dict[str, MirrorMappingMirror] = {} for proxy in proxies: - if proxy.origin.countries: # Check if there are any associated countries - risk_levels = proxy.origin.risk_level.items() - highest_risk_country = max(risk_levels, key=lambda x: x[1]) - highest_risk_country_code = highest_risk_country[0] - highest_risk_level = highest_risk_country[1] + if proxy.url is None: + logging.error("No URL for proxy %s", proxy) + continue + + countries = proxy.origin.risk_level + if countries: + highest_risk_country_code, highest_risk_level = max(countries.items(), key=lambda x: x[1]) else: highest_risk_country_code = "ZZ" highest_risk_level = 0 - result[proxy.url.lstrip("https://")] = MMMirror( - origin_domain=proxy.origin.domain_name, - origin_domain_normalized=proxy.origin.domain_name.replace("www.", ""), - origin_domain_root=extract(proxy.origin.domain_name).registered_domain, - valid_from=proxy.added.isoformat(), - valid_to=proxy.destroyed.isoformat() if proxy.destroyed is not None else None, - countries=proxy.origin.risk_level, - country=highest_risk_country_code, - risk=highest_risk_level - ) + result[proxy.url.lstrip("https://")] = { + "origin_domain": proxy.origin.domain_name, + "origin_domain_normalized": proxy.origin.domain_name.replace("www.", ""), + "origin_domain_root": extract(proxy.origin.domain_name).registered_domain, + "valid_from": proxy.added.isoformat(), + "valid_to": proxy.destroyed.isoformat() if proxy.destroyed else None, + "countries": countries, + "country": highest_risk_country_code, + "risk": highest_risk_level + } - return MirrorMapping( - version="1.2", - mappings=result, - s3_buckets=[ - f"{current_app.config['GLOBAL_NAMESPACE']}-{g.group_name.lower()}-logs-cloudfront" - for g in Group.query.filter(Group.destroyed.is_(None)).all() - ] - ).dict() + groups = db.session.query(Group).options(selectinload(Group.pools)) + s3_buckets = [ + f"{current_app.config['GLOBAL_NAMESPACE']}-{g.group_name.lower()}-logs-cloudfront" + for g in groups.filter(Group.destroyed.is_(None)).all() + ] - -if getattr(builtins, "__sphinx_build__", False): - schema = MirrorMapping.schema_json() + return { + "version": "1.2", + "mappings": result, + "s3_buckets": s3_buckets + } diff --git a/app/lists/redirector.py b/app/lists/redirector.py index 22fec06..ae7cacd 100644 --- a/app/lists/redirector.py +++ b/app/lists/redirector.py @@ -1,12 +1,11 @@ -from typing import List, Dict, Union, Optional - -from pydantic import BaseModel +from typing import List, Dict, Optional, TypedDict +from sqlalchemy.orm import selectinload from app.models.base import Pool from app.models.mirrors import Proxy -class RedirectorPool(BaseModel): +class RedirectorPool(TypedDict): short_name: str description: str api_key: str @@ -14,41 +13,40 @@ class RedirectorPool(BaseModel): origins: Dict[str, str] -class RedirectorData(BaseModel): +class RedirectorData(TypedDict): version: str pools: List[RedirectorPool] def redirector_pool_origins(pool: Pool) -> Dict[str, str]: - origins: Dict[str, str] = dict() - active_proxies = Proxy.query.filter( - Proxy.deprecated.is_(None), - Proxy.destroyed.is_(None), - Proxy.url.is_not(None), - Proxy.pool_id == pool.id - ) - for proxy in active_proxies: - origins[proxy.origin.domain_name] = proxy.url - return origins + return { + proxy.origin.domain_name: proxy.url + for proxy in Proxy.query.filter( + Proxy.deprecated.is_(None), + Proxy.destroyed.is_(None), + Proxy.url.is_not(None), + Proxy.pool_id == pool.id + ) + } -def redirector_pool(pool: Pool) -> RedirectorPool: - return RedirectorPool( - short_name=pool.pool_name, - description=pool.description, - api_key=pool.api_key, - redirector_domain=pool.redirector_domain, - origins=redirector_pool_origins(pool) - ) +def redirector_data(_: Optional[Pool]) -> RedirectorData: + active_pools = Pool.query.options( + selectinload(Pool.proxies) + ).filter(Pool.destroyed.is_(None)).all() + pools: List[RedirectorPool] = [ + { + "short_name": pool.pool_name, + "description": pool.description, + "api_key": pool.api_key, + "redirector_domain": pool.redirector_domain, + "origins": redirector_pool_origins(pool) + } + for pool in active_pools + ] -def redirector_data(_: Optional[Pool]) -> Dict[str, Union[str, Dict[str, Union[Dict[str, str]]]]]: - active_pools = Pool.query.filter( - Pool.destroyed.is_(None) - ).all() - return RedirectorData( - version="1.0", - pools=[ - redirector_pool(pool) for pool in active_pools - ] - ).dict() + return { + "version": "1.0", + "pools": pools + } diff --git a/app/models/__init__.py b/app/models/__init__.py index b7b4b9e..7b4c6c4 100644 --- a/app/models/__init__.py +++ b/app/models/__init__.py @@ -3,6 +3,8 @@ from abc import abstractmethod from datetime import datetime from typing import Union, List, Optional, Any, Dict +from sqlalchemy.orm import Mapped, mapped_column + from app.brm.brn import BRN from app.extensions import db @@ -10,11 +12,11 @@ from app.extensions import db class AbstractConfiguration(db.Model): # type: ignore __abstract__ = True - id = db.Column(db.Integer, primary_key=True) - description = db.Column(db.String(255), nullable=False) - added = db.Column(db.DateTime(), default=datetime.utcnow, nullable=False) - updated = db.Column(db.DateTime(), default=datetime.utcnow, nullable=False) - destroyed = db.Column(db.DateTime(), nullable=True) + id: Mapped[int] = mapped_column(db.Integer, primary_key=True) + description: Mapped[str] = mapped_column(db.String(255), nullable=False) + added: Mapped[datetime] = mapped_column(db.DateTime(), default=datetime.utcnow, nullable=False) + updated: Mapped[datetime] = mapped_column(db.DateTime(), default=datetime.utcnow, nullable=False) + destroyed: Mapped[datetime] = mapped_column(db.DateTime()) @property @abstractmethod @@ -38,12 +40,12 @@ class AbstractConfiguration(db.Model): # type: ignore class Deprecation(db.Model): # type: ignore[name-defined,misc] - id = db.Column(db.Integer, primary_key=True) - resource_type = db.Column(db.String(50)) - resource_id = db.Column(db.Integer) - deprecated_at = db.Column(db.DateTime(), default=datetime.utcnow, nullable=False) - meta = db.Column(db.JSON()) - reason = db.Column(db.String(), nullable=False) + id: Mapped[int] = mapped_column(db.Integer, primary_key=True) + resource_type: Mapped[str] = mapped_column(db.String(50)) + resource_id: Mapped[int] = mapped_column(db.Integer) + deprecated_at: Mapped[datetime] = mapped_column(db.DateTime(), default=datetime.utcnow, nullable=False) + meta: Mapped[Optional[Dict[str, Any]]] = mapped_column(db.JSON()) + reason: Mapped[str] = mapped_column(db.String(), nullable=False) @property def resource(self) -> "AbstractResource": @@ -55,12 +57,12 @@ class Deprecation(db.Model): # type: ignore[name-defined,misc] class AbstractResource(db.Model): # type: ignore __abstract__ = True - id = db.Column(db.Integer, primary_key=True) - added = db.Column(db.DateTime(), default=datetime.utcnow, nullable=False) - updated = db.Column(db.DateTime(), default=datetime.utcnow, nullable=False) - deprecated = db.Column(db.DateTime(), nullable=True) - deprecation_reason = db.Column(db.String(), nullable=True) - destroyed = db.Column(db.DateTime(), nullable=True) + id: Mapped[int] = mapped_column(db.Integer, primary_key=True) + added: Mapped[datetime] = mapped_column(db.DateTime(), default=datetime.utcnow, nullable=False) + updated: Mapped[datetime] = mapped_column(db.DateTime(), default=datetime.utcnow, nullable=False) + deprecated: Mapped[Optional[datetime]] = mapped_column(db.DateTime()) + deprecation_reason: Mapped[Optional[str]] = mapped_column(db.String()) + destroyed: Mapped[Optional[datetime]] = mapped_column(db.DateTime()) def __init__(self, *, id: Optional[int] = None, @@ -70,6 +72,10 @@ class AbstractResource(db.Model): # type: ignore deprecation_reason: Optional[str] = None, destroyed: Optional[datetime] = None, **kwargs: Any) -> None: + if added is None: + added = datetime.utcnow() + if updated is None: + updated = datetime.utcnow() super().__init__(id=id, added=added, updated=updated, @@ -77,10 +83,6 @@ class AbstractResource(db.Model): # type: ignore deprecation_reason=deprecation_reason, destroyed=destroyed, **kwargs) - if self.added is None: - self.added = datetime.utcnow() - if self.updated is None: - self.updated = datetime.utcnow() @property @abstractmethod diff --git a/app/terraform/list/__init__.py b/app/terraform/list/__init__.py index 8362ee0..e4699b7 100644 --- a/app/terraform/list/__init__.py +++ b/app/terraform/list/__init__.py @@ -69,11 +69,12 @@ class ListAutomation(TerraformAutomation): ) for pool in Pool.query.filter(Pool.destroyed.is_(None)).all(): for key, formatter in lists.items(): + formatted_pool = formatter(pool) for obfuscate in [True, False]: with open(os.path.join( self.working_dir, f"{key}.{pool.pool_name}{'.jsno' if obfuscate else '.json'}"), 'w', encoding="utf-8") as out: - out.write(json_encode(formatter(pool), obfuscate)) + out.write(json_encode(formatted_pool, obfuscate)) with open(os.path.join(self.working_dir, f"{key}.{pool.pool_name}{'.jso' if obfuscate else '.js'}"), 'w', encoding="utf-8") as out: - out.write(javascript_encode(formatter(pool), obfuscate)) + out.write(javascript_encode(formatted_pool, obfuscate)) diff --git a/requirements-types.txt b/requirements-types.txt index 58d2f36..ac76b0f 100644 --- a/requirements-types.txt +++ b/requirements-types.txt @@ -1,6 +1,3 @@ mypy -types-flask-sqlalchemy -types-requests types-PyYAML types-python-dateutil -types-sqlalchemy diff --git a/requirements.txt b/requirements.txt index dc77bdd..e3e7394 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,7 +16,6 @@ markupsafe nose openpyxl prometheus_client -pydantic pytest python-dateutil python-gitlab