feat: remove pydantic from list generation

This commit is contained in:
Iain Learmonth 2024-11-09 11:08:48 +00:00
parent 1e70ec8fa6
commit d08388c339
8 changed files with 164 additions and 197 deletions

View file

@ -1,38 +1,28 @@
# pylint: disable=too-few-public-methods from typing import List, Optional, TypedDict
import builtins
from datetime import datetime
from typing import List, Dict, Union, Any, Optional
from pydantic import BaseModel, Field
from app.models.base import Pool from app.models.base import Pool
from app.models.mirrors import Origin, Proxy from app.models.mirrors import Origin, Proxy
class BC2Alternative(BaseModel): class BC2Alternative(TypedDict):
proto: str proto: str
type: str type: str
created_at: datetime created_at: str
updated_at: datetime updated_at: str
url: str url: str
class BC2Site(BaseModel): class BC2Site(TypedDict):
main_domain: str = Field(description="The main domain name of the website, excluding \"www.\" if present.", main_domain: str
examples=["bbc.co.uk", "bbc.com", "guardianproject.info"])
available_alternatives: List[BC2Alternative] available_alternatives: List[BC2Alternative]
class BypassCensorship2(BaseModel): class BypassCensorship2(TypedDict):
version: str = Field(description="Version number of the Bypass Censorship Extension schema in use", ) version: str
sites: List[BC2Site] sites: List[BC2Site]
class Config:
title = "Bypass Censorship Version 2"
def onion_alternative(origin: Origin) -> List[BC2Alternative]:
def onion_alternative(origin: Origin) -> List[Dict[str, Any]]:
url: Optional[str] = origin.onion() url: Optional[str] = origin.onion()
if url is None: if url is None:
return [] return []
@ -41,22 +31,23 @@ def onion_alternative(origin: Origin) -> List[Dict[str, Any]]:
"type": "eotk", "type": "eotk",
"created_at": str(origin.added), "created_at": str(origin.added),
"updated_at": str(origin.updated), "updated_at": str(origin.updated),
"url": url} "url": url
] }]
def proxy_alternative(proxy: Proxy) -> Dict[str, Any]: def proxy_alternative(proxy: Proxy) -> Optional[BC2Alternative]:
if proxy.url is None:
return None
return { return {
"proto": "https", "proto": "https",
"type": "mirror", "type": "mirror",
"created_at": str(proxy.added), "created_at": proxy.added.isoformat(),
"updated_at": str(proxy.updated), "updated_at": proxy.updated.isoformat(),
"url": proxy.url "url": proxy.url
} }
def main_domain(origin: Origin) -> str: def main_domain(origin: Origin) -> str:
# Both description and domain_name are required to be not null in the database schema
description: str = origin.description description: str = origin.description
if description.startswith("proxy:"): if description.startswith("proxy:"):
return description[len("proxy:"):].replace("www.", "") return description[len("proxy:"):].replace("www.", "")
@ -65,20 +56,30 @@ def main_domain(origin: Origin) -> str:
def active_proxies(origin: Origin, pool: Pool) -> List[Proxy]: def active_proxies(origin: Origin, pool: Pool) -> List[Proxy]:
def _filter_fn(proxy: Proxy) -> bool: return [
return proxy.url is not None and not proxy.deprecated and not proxy.destroyed and proxy.pool_id == pool.id proxy for proxy in origin.proxies
return list(filter(_filter_fn, origin.proxies)) if proxy.url is not None and not proxy.deprecated and not proxy.destroyed and proxy.pool_id == pool.id
]
def mirror_sites(pool: Pool) -> Dict[ def mirror_sites(pool: Pool) -> BypassCensorship2:
str, Union[str, List[Dict[str, Union[str, List[Dict[str, str]]]]]]]: origins = Origin.query.filter(Origin.destroyed.is_(None)).order_by(Origin.domain_name).all()
return {"version": "2.0", "sites": [{"main_domain": main_domain(origin),
"available_alternatives": onion_alternative(origin) + [
proxy_alternative(proxy) for proxy in
active_proxies(origin, pool)]} for origin in
Origin.query.order_by(Origin.domain_name).all() if
origin.destroyed is None]}
sites: List[BC2Site] = []
for origin in origins:
# Gather alternatives, filtering out None values from proxy_alternative
alternatives = onion_alternative(origin) + [
alt for proxy in active_proxies(origin, pool)
if (alt := proxy_alternative(proxy)) is not None
]
if getattr(builtins, "__sphinx_build__", False): # Add the site dictionary to the list
schema = BypassCensorship2.schema_json() sites.append({
"main_domain": main_domain(origin),
"available_alternatives": list(alternatives)
})
return {
"version": "2.0",
"sites": sites
}

View file

@ -1,51 +1,31 @@
# pylint: disable=too-few-public-methods from typing import List, Optional, TypedDict
from sqlalchemy.orm import selectinload
import builtins
from typing import List, Iterable, Dict, Any, Optional
from pydantic import BaseModel, Field
from app.models.base import Pool from app.models.base import Pool
from app.models.bridges import Bridge from app.models.bridges import Bridge
class Bridgelines(BaseModel): class BridgelinesDict(TypedDict):
version: str = Field( version: str
description="Version number of the bridgelines schema in use", bridgelines: List[str]
examples=[
"1.0"
]
)
bridgelines: List[str] = Field(
description="List of bridgelines, ready for use in a torrc file",
examples=[
"Bridge obfs4 71.73.124.31:8887 E81B1237F6D13497B166060F55861565593CFF8E "
"cert=b54NsV6tK1g+LHaThPOTCibdpx3wHm9NFe0PzGF1nwz+4M/tq6SkfOaShzPnZsIRCFRIHg iat-mode=0",
"Bridge obfs4 172.105.176.101:80 D18BC7E082D7EBF8E851029AC89A12A3F44A50BF "
"cert=KHfAAUptXWRmLy3ehS9ETMO5luY06d0w7tEBDiAI0z62nC5Qo/APrzZxodkYWX2bNko/Mw iat-mode=0",
"Bridge obfs4 141.101.36.55:9023 045EF272F08BC11CDB985889E4E9FE35DC6F9C67 "
"cert=6KEdf/5aDSyuYEqvo14JE8Cks3i7PQtj9EFX2wTCiEaUPsp/I7eaOm4uSWdqwvV4vTVlFw iat-mode=0 "
]
)
class Config:
title = "Bridgelines Version 1"
def bridgelines(pool: Pool, *, distribution_method: Optional[str] = None) -> Dict[str, Any]: def bridgelines(pool: Pool, *, distribution_method: Optional[str] = None) -> BridgelinesDict:
bridges: Iterable[Bridge] = Bridge.query.filter( # Fetch bridges with selectinload for related data
query = Bridge.query.options(selectinload(Bridge.conf)).filter(
Bridge.destroyed.is_(None), Bridge.destroyed.is_(None),
Bridge.deprecated.is_(None), Bridge.deprecated.is_(None),
Bridge.bridgeline.is_not(None) Bridge.bridgeline.is_not(None)
).all() )
if distribution_method is not None: if distribution_method is not None:
bridges = [b for b in bridges query = query.filter(Bridge.conf.has(distribution_method=distribution_method))
if b.conf.distribution_method == distribution_method]
return Bridgelines(
version="1.0",
bridgelines=[b.bridgeline for b in bridges if b.conf.pool_id == pool.id]
).dict()
# Collect bridgelines specific to the pool
bridgelines = [b.bridgeline for b in query.all() if b.conf.pool_id == pool.id]
if getattr(builtins, "__sphinx_build__", False): # Return dictionary directly, inlining the previous `to_dict` functionality
schema = Bridgelines.schema_json() return {
"version": "1.0",
"bridgelines": bridgelines
}

View file

@ -1,87 +1,76 @@
# pylint: disable=too-few-public-methods import logging
import builtins
from datetime import datetime, timedelta from datetime import datetime, timedelta
from typing import Dict, List, Union, Optional from typing import Dict, List, Optional, TypedDict
from flask import current_app from flask import current_app
from pydantic import BaseModel, Field
from sqlalchemy import or_ from sqlalchemy import or_
from sqlalchemy.orm import selectinload
from tldextract import extract from tldextract import extract
from app.extensions import db from app.extensions import db
from app.models.base import Group, Pool from app.models.base import Group, Pool
from app.models.mirrors import Proxy from app.models.mirrors import Proxy, Origin
class MMMirror(BaseModel): class MirrorMappingMirror(TypedDict):
origin_domain: str = Field(description="The full origin domain name") origin_domain: str
origin_domain_normalized: str = Field(description="The origin_domain with \"www.\" removed, if present") origin_domain_normalized: str
origin_domain_root: str = Field(description="The registered domain name of the origin, excluding subdomains") origin_domain_root: str
valid_from: str = Field(description="The date on which the mirror was added to the system") valid_from: str
valid_to: Optional[str] = Field(description="The date on which the mirror was decommissioned") valid_to: Optional[str]
countries: Dict[str, int] = Field(description="A list mapping of risk levels to country") countries: Dict[str, int]
country: Optional[str] = Field( country: Optional[str]
description="The country code of the country with the highest risk level where the origin is targeted") risk: int
risk: int = Field(description="The risk score for the highest risk country")
class MirrorMapping(BaseModel): class MirrorMapping(TypedDict):
version: str = Field( version: str
description="Version number of the mirror mapping schema in use" mappings: Dict[str, MirrorMappingMirror]
) s3_buckets: List[str]
mappings: Dict[str, MMMirror] = Field(
description="The domain name for the mirror"
)
s3_buckets: List[str] = Field(
description="The names of all S3 buckets used for CloudFront logs"
)
class Config:
title = "Mirror Mapping Version 1.2"
def mirror_mapping(_: Optional[Pool]) -> Dict[str, Union[str, Dict[str, str]]]: def mirror_mapping(_: Optional[Pool]) -> MirrorMapping:
one_week_ago = datetime.utcnow() - timedelta(days=7) two_days_ago = datetime.utcnow() - timedelta(days=2)
proxies = ( proxies = (
db.session.query(Proxy) # type: ignore[no-untyped-call] db.session.query(Proxy)
.filter(or_(Proxy.destroyed.is_(None), Proxy.destroyed > one_week_ago)) .options(selectinload(Proxy.origin).selectinload(Origin.countries))
.filter(or_(Proxy.destroyed.is_(None), Proxy.destroyed > two_days_ago))
.filter(Proxy.url.is_not(None)) .filter(Proxy.url.is_not(None))
.all() .all()
) )
result = {} result: Dict[str, MirrorMappingMirror] = {}
for proxy in proxies: for proxy in proxies:
if proxy.origin.countries: # Check if there are any associated countries if proxy.url is None:
risk_levels = proxy.origin.risk_level.items() logging.error("No URL for proxy %s", proxy)
highest_risk_country = max(risk_levels, key=lambda x: x[1]) continue
highest_risk_country_code = highest_risk_country[0]
highest_risk_level = highest_risk_country[1] countries = proxy.origin.risk_level
if countries:
highest_risk_country_code, highest_risk_level = max(countries.items(), key=lambda x: x[1])
else: else:
highest_risk_country_code = "ZZ" highest_risk_country_code = "ZZ"
highest_risk_level = 0 highest_risk_level = 0
result[proxy.url.lstrip("https://")] = MMMirror( result[proxy.url.lstrip("https://")] = {
origin_domain=proxy.origin.domain_name, "origin_domain": proxy.origin.domain_name,
origin_domain_normalized=proxy.origin.domain_name.replace("www.", ""), "origin_domain_normalized": proxy.origin.domain_name.replace("www.", ""),
origin_domain_root=extract(proxy.origin.domain_name).registered_domain, "origin_domain_root": extract(proxy.origin.domain_name).registered_domain,
valid_from=proxy.added.isoformat(), "valid_from": proxy.added.isoformat(),
valid_to=proxy.destroyed.isoformat() if proxy.destroyed is not None else None, "valid_to": proxy.destroyed.isoformat() if proxy.destroyed else None,
countries=proxy.origin.risk_level, "countries": countries,
country=highest_risk_country_code, "country": highest_risk_country_code,
risk=highest_risk_level "risk": highest_risk_level
) }
return MirrorMapping( groups = db.session.query(Group).options(selectinload(Group.pools))
version="1.2", s3_buckets = [
mappings=result,
s3_buckets=[
f"{current_app.config['GLOBAL_NAMESPACE']}-{g.group_name.lower()}-logs-cloudfront" f"{current_app.config['GLOBAL_NAMESPACE']}-{g.group_name.lower()}-logs-cloudfront"
for g in Group.query.filter(Group.destroyed.is_(None)).all() for g in groups.filter(Group.destroyed.is_(None)).all()
] ]
).dict()
return {
if getattr(builtins, "__sphinx_build__", False): "version": "1.2",
schema = MirrorMapping.schema_json() "mappings": result,
"s3_buckets": s3_buckets
}

View file

@ -1,12 +1,11 @@
from typing import List, Dict, Union, Optional from typing import List, Dict, Optional, TypedDict
from sqlalchemy.orm import selectinload
from pydantic import BaseModel
from app.models.base import Pool from app.models.base import Pool
from app.models.mirrors import Proxy from app.models.mirrors import Proxy
class RedirectorPool(BaseModel): class RedirectorPool(TypedDict):
short_name: str short_name: str
description: str description: str
api_key: str api_key: str
@ -14,41 +13,40 @@ class RedirectorPool(BaseModel):
origins: Dict[str, str] origins: Dict[str, str]
class RedirectorData(BaseModel): class RedirectorData(TypedDict):
version: str version: str
pools: List[RedirectorPool] pools: List[RedirectorPool]
def redirector_pool_origins(pool: Pool) -> Dict[str, str]: def redirector_pool_origins(pool: Pool) -> Dict[str, str]:
origins: Dict[str, str] = dict() return {
active_proxies = Proxy.query.filter( proxy.origin.domain_name: proxy.url
for proxy in Proxy.query.filter(
Proxy.deprecated.is_(None), Proxy.deprecated.is_(None),
Proxy.destroyed.is_(None), Proxy.destroyed.is_(None),
Proxy.url.is_not(None), Proxy.url.is_not(None),
Proxy.pool_id == pool.id Proxy.pool_id == pool.id
) )
for proxy in active_proxies: }
origins[proxy.origin.domain_name] = proxy.url
return origins
def redirector_pool(pool: Pool) -> RedirectorPool: def redirector_data(_: Optional[Pool]) -> RedirectorData:
return RedirectorPool( active_pools = Pool.query.options(
short_name=pool.pool_name, selectinload(Pool.proxies)
description=pool.description, ).filter(Pool.destroyed.is_(None)).all()
api_key=pool.api_key,
redirector_domain=pool.redirector_domain,
origins=redirector_pool_origins(pool)
)
pools: List[RedirectorPool] = [
def redirector_data(_: Optional[Pool]) -> Dict[str, Union[str, Dict[str, Union[Dict[str, str]]]]]: {
active_pools = Pool.query.filter( "short_name": pool.pool_name,
Pool.destroyed.is_(None) "description": pool.description,
).all() "api_key": pool.api_key,
return RedirectorData( "redirector_domain": pool.redirector_domain,
version="1.0", "origins": redirector_pool_origins(pool)
pools=[ }
redirector_pool(pool) for pool in active_pools for pool in active_pools
] ]
).dict()
return {
"version": "1.0",
"pools": pools
}

View file

@ -3,6 +3,8 @@ from abc import abstractmethod
from datetime import datetime from datetime import datetime
from typing import Union, List, Optional, Any, Dict from typing import Union, List, Optional, Any, Dict
from sqlalchemy.orm import Mapped, mapped_column
from app.brm.brn import BRN from app.brm.brn import BRN
from app.extensions import db from app.extensions import db
@ -10,11 +12,11 @@ from app.extensions import db
class AbstractConfiguration(db.Model): # type: ignore class AbstractConfiguration(db.Model): # type: ignore
__abstract__ = True __abstract__ = True
id = db.Column(db.Integer, primary_key=True) id: Mapped[int] = mapped_column(db.Integer, primary_key=True)
description = db.Column(db.String(255), nullable=False) description: Mapped[str] = mapped_column(db.String(255), nullable=False)
added = db.Column(db.DateTime(), default=datetime.utcnow, nullable=False) added: Mapped[datetime] = mapped_column(db.DateTime(), default=datetime.utcnow, nullable=False)
updated = db.Column(db.DateTime(), default=datetime.utcnow, nullable=False) updated: Mapped[datetime] = mapped_column(db.DateTime(), default=datetime.utcnow, nullable=False)
destroyed = db.Column(db.DateTime(), nullable=True) destroyed: Mapped[datetime] = mapped_column(db.DateTime())
@property @property
@abstractmethod @abstractmethod
@ -38,12 +40,12 @@ class AbstractConfiguration(db.Model): # type: ignore
class Deprecation(db.Model): # type: ignore[name-defined,misc] class Deprecation(db.Model): # type: ignore[name-defined,misc]
id = db.Column(db.Integer, primary_key=True) id: Mapped[int] = mapped_column(db.Integer, primary_key=True)
resource_type = db.Column(db.String(50)) resource_type: Mapped[str] = mapped_column(db.String(50))
resource_id = db.Column(db.Integer) resource_id: Mapped[int] = mapped_column(db.Integer)
deprecated_at = db.Column(db.DateTime(), default=datetime.utcnow, nullable=False) deprecated_at: Mapped[datetime] = mapped_column(db.DateTime(), default=datetime.utcnow, nullable=False)
meta = db.Column(db.JSON()) meta: Mapped[Optional[Dict[str, Any]]] = mapped_column(db.JSON())
reason = db.Column(db.String(), nullable=False) reason: Mapped[str] = mapped_column(db.String(), nullable=False)
@property @property
def resource(self) -> "AbstractResource": def resource(self) -> "AbstractResource":
@ -55,12 +57,12 @@ class Deprecation(db.Model): # type: ignore[name-defined,misc]
class AbstractResource(db.Model): # type: ignore class AbstractResource(db.Model): # type: ignore
__abstract__ = True __abstract__ = True
id = db.Column(db.Integer, primary_key=True) id: Mapped[int] = mapped_column(db.Integer, primary_key=True)
added = db.Column(db.DateTime(), default=datetime.utcnow, nullable=False) added: Mapped[datetime] = mapped_column(db.DateTime(), default=datetime.utcnow, nullable=False)
updated = db.Column(db.DateTime(), default=datetime.utcnow, nullable=False) updated: Mapped[datetime] = mapped_column(db.DateTime(), default=datetime.utcnow, nullable=False)
deprecated = db.Column(db.DateTime(), nullable=True) deprecated: Mapped[Optional[datetime]] = mapped_column(db.DateTime())
deprecation_reason = db.Column(db.String(), nullable=True) deprecation_reason: Mapped[Optional[str]] = mapped_column(db.String())
destroyed = db.Column(db.DateTime(), nullable=True) destroyed: Mapped[Optional[datetime]] = mapped_column(db.DateTime())
def __init__(self, *, def __init__(self, *,
id: Optional[int] = None, id: Optional[int] = None,
@ -70,6 +72,10 @@ class AbstractResource(db.Model): # type: ignore
deprecation_reason: Optional[str] = None, deprecation_reason: Optional[str] = None,
destroyed: Optional[datetime] = None, destroyed: Optional[datetime] = None,
**kwargs: Any) -> None: **kwargs: Any) -> None:
if added is None:
added = datetime.utcnow()
if updated is None:
updated = datetime.utcnow()
super().__init__(id=id, super().__init__(id=id,
added=added, added=added,
updated=updated, updated=updated,
@ -77,10 +83,6 @@ class AbstractResource(db.Model): # type: ignore
deprecation_reason=deprecation_reason, deprecation_reason=deprecation_reason,
destroyed=destroyed, destroyed=destroyed,
**kwargs) **kwargs)
if self.added is None:
self.added = datetime.utcnow()
if self.updated is None:
self.updated = datetime.utcnow()
@property @property
@abstractmethod @abstractmethod

View file

@ -69,11 +69,12 @@ class ListAutomation(TerraformAutomation):
) )
for pool in Pool.query.filter(Pool.destroyed.is_(None)).all(): for pool in Pool.query.filter(Pool.destroyed.is_(None)).all():
for key, formatter in lists.items(): for key, formatter in lists.items():
formatted_pool = formatter(pool)
for obfuscate in [True, False]: for obfuscate in [True, False]:
with open(os.path.join( with open(os.path.join(
self.working_dir, f"{key}.{pool.pool_name}{'.jsno' if obfuscate else '.json'}"), self.working_dir, f"{key}.{pool.pool_name}{'.jsno' if obfuscate else '.json'}"),
'w', encoding="utf-8") as out: 'w', encoding="utf-8") as out:
out.write(json_encode(formatter(pool), obfuscate)) out.write(json_encode(formatted_pool, obfuscate))
with open(os.path.join(self.working_dir, f"{key}.{pool.pool_name}{'.jso' if obfuscate else '.js'}"), with open(os.path.join(self.working_dir, f"{key}.{pool.pool_name}{'.jso' if obfuscate else '.js'}"),
'w', encoding="utf-8") as out: 'w', encoding="utf-8") as out:
out.write(javascript_encode(formatter(pool), obfuscate)) out.write(javascript_encode(formatted_pool, obfuscate))

View file

@ -1,6 +1,3 @@
mypy mypy
types-flask-sqlalchemy
types-requests
types-PyYAML types-PyYAML
types-python-dateutil types-python-dateutil
types-sqlalchemy

View file

@ -16,7 +16,6 @@ markupsafe
nose nose
openpyxl openpyxl
prometheus_client prometheus_client
pydantic
pytest pytest
python-dateutil python-dateutil
python-gitlab python-gitlab