feat: remove pydantic from list generation

This commit is contained in:
Iain Learmonth 2024-11-09 11:08:48 +00:00
parent 1e70ec8fa6
commit d08388c339
8 changed files with 164 additions and 197 deletions

View file

@ -1,38 +1,28 @@
# pylint: disable=too-few-public-methods
import builtins
from datetime import datetime
from typing import List, Dict, Union, Any, Optional
from pydantic import BaseModel, Field
from typing import List, Optional, TypedDict
from app.models.base import Pool
from app.models.mirrors import Origin, Proxy
class BC2Alternative(BaseModel):
class BC2Alternative(TypedDict):
proto: str
type: str
created_at: datetime
updated_at: datetime
created_at: str
updated_at: str
url: str
class BC2Site(BaseModel):
main_domain: str = Field(description="The main domain name of the website, excluding \"www.\" if present.",
examples=["bbc.co.uk", "bbc.com", "guardianproject.info"])
class BC2Site(TypedDict):
main_domain: str
available_alternatives: List[BC2Alternative]
class BypassCensorship2(BaseModel):
version: str = Field(description="Version number of the Bypass Censorship Extension schema in use", )
class BypassCensorship2(TypedDict):
version: str
sites: List[BC2Site]
class Config:
title = "Bypass Censorship Version 2"
def onion_alternative(origin: Origin) -> List[Dict[str, Any]]:
def onion_alternative(origin: Origin) -> List[BC2Alternative]:
url: Optional[str] = origin.onion()
if url is None:
return []
@ -41,22 +31,23 @@ def onion_alternative(origin: Origin) -> List[Dict[str, Any]]:
"type": "eotk",
"created_at": str(origin.added),
"updated_at": str(origin.updated),
"url": url}
]
"url": url
}]
def proxy_alternative(proxy: Proxy) -> Dict[str, Any]:
def proxy_alternative(proxy: Proxy) -> Optional[BC2Alternative]:
if proxy.url is None:
return None
return {
"proto": "https",
"type": "mirror",
"created_at": str(proxy.added),
"updated_at": str(proxy.updated),
"created_at": proxy.added.isoformat(),
"updated_at": proxy.updated.isoformat(),
"url": proxy.url
}
def main_domain(origin: Origin) -> str:
# Both description and domain_name are required to be not null in the database schema
description: str = origin.description
if description.startswith("proxy:"):
return description[len("proxy:"):].replace("www.", "")
@ -65,20 +56,30 @@ def main_domain(origin: Origin) -> str:
def active_proxies(origin: Origin, pool: Pool) -> List[Proxy]:
def _filter_fn(proxy: Proxy) -> bool:
return proxy.url is not None and not proxy.deprecated and not proxy.destroyed and proxy.pool_id == pool.id
return list(filter(_filter_fn, origin.proxies))
return [
proxy for proxy in origin.proxies
if proxy.url is not None and not proxy.deprecated and not proxy.destroyed and proxy.pool_id == pool.id
]
def mirror_sites(pool: Pool) -> Dict[
str, Union[str, List[Dict[str, Union[str, List[Dict[str, str]]]]]]]:
return {"version": "2.0", "sites": [{"main_domain": main_domain(origin),
"available_alternatives": onion_alternative(origin) + [
proxy_alternative(proxy) for proxy in
active_proxies(origin, pool)]} for origin in
Origin.query.order_by(Origin.domain_name).all() if
origin.destroyed is None]}
def mirror_sites(pool: Pool) -> BypassCensorship2:
origins = Origin.query.filter(Origin.destroyed.is_(None)).order_by(Origin.domain_name).all()
sites: List[BC2Site] = []
for origin in origins:
# Gather alternatives, filtering out None values from proxy_alternative
alternatives = onion_alternative(origin) + [
alt for proxy in active_proxies(origin, pool)
if (alt := proxy_alternative(proxy)) is not None
]
if getattr(builtins, "__sphinx_build__", False):
schema = BypassCensorship2.schema_json()
# Add the site dictionary to the list
sites.append({
"main_domain": main_domain(origin),
"available_alternatives": list(alternatives)
})
return {
"version": "2.0",
"sites": sites
}

View file

@ -1,51 +1,31 @@
# pylint: disable=too-few-public-methods
import builtins
from typing import List, Iterable, Dict, Any, Optional
from pydantic import BaseModel, Field
from typing import List, Optional, TypedDict
from sqlalchemy.orm import selectinload
from app.models.base import Pool
from app.models.bridges import Bridge
class Bridgelines(BaseModel):
version: str = Field(
description="Version number of the bridgelines schema in use",
examples=[
"1.0"
]
)
bridgelines: List[str] = Field(
description="List of bridgelines, ready for use in a torrc file",
examples=[
"Bridge obfs4 71.73.124.31:8887 E81B1237F6D13497B166060F55861565593CFF8E "
"cert=b54NsV6tK1g+LHaThPOTCibdpx3wHm9NFe0PzGF1nwz+4M/tq6SkfOaShzPnZsIRCFRIHg iat-mode=0",
"Bridge obfs4 172.105.176.101:80 D18BC7E082D7EBF8E851029AC89A12A3F44A50BF "
"cert=KHfAAUptXWRmLy3ehS9ETMO5luY06d0w7tEBDiAI0z62nC5Qo/APrzZxodkYWX2bNko/Mw iat-mode=0",
"Bridge obfs4 141.101.36.55:9023 045EF272F08BC11CDB985889E4E9FE35DC6F9C67 "
"cert=6KEdf/5aDSyuYEqvo14JE8Cks3i7PQtj9EFX2wTCiEaUPsp/I7eaOm4uSWdqwvV4vTVlFw iat-mode=0 "
]
)
class Config:
title = "Bridgelines Version 1"
class BridgelinesDict(TypedDict):
version: str
bridgelines: List[str]
def bridgelines(pool: Pool, *, distribution_method: Optional[str] = None) -> Dict[str, Any]:
bridges: Iterable[Bridge] = Bridge.query.filter(
def bridgelines(pool: Pool, *, distribution_method: Optional[str] = None) -> BridgelinesDict:
# Fetch bridges with selectinload for related data
query = Bridge.query.options(selectinload(Bridge.conf)).filter(
Bridge.destroyed.is_(None),
Bridge.deprecated.is_(None),
Bridge.bridgeline.is_not(None)
).all()
)
if distribution_method is not None:
bridges = [b for b in bridges
if b.conf.distribution_method == distribution_method]
return Bridgelines(
version="1.0",
bridgelines=[b.bridgeline for b in bridges if b.conf.pool_id == pool.id]
).dict()
query = query.filter(Bridge.conf.has(distribution_method=distribution_method))
# Collect bridgelines specific to the pool
bridgelines = [b.bridgeline for b in query.all() if b.conf.pool_id == pool.id]
if getattr(builtins, "__sphinx_build__", False):
schema = Bridgelines.schema_json()
# Return dictionary directly, inlining the previous `to_dict` functionality
return {
"version": "1.0",
"bridgelines": bridgelines
}

View file

@ -1,87 +1,76 @@
# pylint: disable=too-few-public-methods
import builtins
import logging
from datetime import datetime, timedelta
from typing import Dict, List, Union, Optional
from typing import Dict, List, Optional, TypedDict
from flask import current_app
from pydantic import BaseModel, Field
from sqlalchemy import or_
from sqlalchemy.orm import selectinload
from tldextract import extract
from app.extensions import db
from app.models.base import Group, Pool
from app.models.mirrors import Proxy
from app.models.mirrors import Proxy, Origin
class MMMirror(BaseModel):
origin_domain: str = Field(description="The full origin domain name")
origin_domain_normalized: str = Field(description="The origin_domain with \"www.\" removed, if present")
origin_domain_root: str = Field(description="The registered domain name of the origin, excluding subdomains")
valid_from: str = Field(description="The date on which the mirror was added to the system")
valid_to: Optional[str] = Field(description="The date on which the mirror was decommissioned")
countries: Dict[str, int] = Field(description="A list mapping of risk levels to country")
country: Optional[str] = Field(
description="The country code of the country with the highest risk level where the origin is targeted")
risk: int = Field(description="The risk score for the highest risk country")
class MirrorMappingMirror(TypedDict):
origin_domain: str
origin_domain_normalized: str
origin_domain_root: str
valid_from: str
valid_to: Optional[str]
countries: Dict[str, int]
country: Optional[str]
risk: int
class MirrorMapping(BaseModel):
version: str = Field(
description="Version number of the mirror mapping schema in use"
)
mappings: Dict[str, MMMirror] = Field(
description="The domain name for the mirror"
)
s3_buckets: List[str] = Field(
description="The names of all S3 buckets used for CloudFront logs"
)
class Config:
title = "Mirror Mapping Version 1.2"
class MirrorMapping(TypedDict):
version: str
mappings: Dict[str, MirrorMappingMirror]
s3_buckets: List[str]
def mirror_mapping(_: Optional[Pool]) -> Dict[str, Union[str, Dict[str, str]]]:
one_week_ago = datetime.utcnow() - timedelta(days=7)
def mirror_mapping(_: Optional[Pool]) -> MirrorMapping:
two_days_ago = datetime.utcnow() - timedelta(days=2)
proxies = (
db.session.query(Proxy) # type: ignore[no-untyped-call]
.filter(or_(Proxy.destroyed.is_(None), Proxy.destroyed > one_week_ago))
db.session.query(Proxy)
.options(selectinload(Proxy.origin).selectinload(Origin.countries))
.filter(or_(Proxy.destroyed.is_(None), Proxy.destroyed > two_days_ago))
.filter(Proxy.url.is_not(None))
.all()
)
result = {}
result: Dict[str, MirrorMappingMirror] = {}
for proxy in proxies:
if proxy.origin.countries: # Check if there are any associated countries
risk_levels = proxy.origin.risk_level.items()
highest_risk_country = max(risk_levels, key=lambda x: x[1])
highest_risk_country_code = highest_risk_country[0]
highest_risk_level = highest_risk_country[1]
if proxy.url is None:
logging.error("No URL for proxy %s", proxy)
continue
countries = proxy.origin.risk_level
if countries:
highest_risk_country_code, highest_risk_level = max(countries.items(), key=lambda x: x[1])
else:
highest_risk_country_code = "ZZ"
highest_risk_level = 0
result[proxy.url.lstrip("https://")] = MMMirror(
origin_domain=proxy.origin.domain_name,
origin_domain_normalized=proxy.origin.domain_name.replace("www.", ""),
origin_domain_root=extract(proxy.origin.domain_name).registered_domain,
valid_from=proxy.added.isoformat(),
valid_to=proxy.destroyed.isoformat() if proxy.destroyed is not None else None,
countries=proxy.origin.risk_level,
country=highest_risk_country_code,
risk=highest_risk_level
)
result[proxy.url.lstrip("https://")] = {
"origin_domain": proxy.origin.domain_name,
"origin_domain_normalized": proxy.origin.domain_name.replace("www.", ""),
"origin_domain_root": extract(proxy.origin.domain_name).registered_domain,
"valid_from": proxy.added.isoformat(),
"valid_to": proxy.destroyed.isoformat() if proxy.destroyed else None,
"countries": countries,
"country": highest_risk_country_code,
"risk": highest_risk_level
}
return MirrorMapping(
version="1.2",
mappings=result,
groups = db.session.query(Group).options(selectinload(Group.pools))
s3_buckets = [
f"{current_app.config['GLOBAL_NAMESPACE']}-{g.group_name.lower()}-logs-cloudfront"
for g in Group.query.filter(Group.destroyed.is_(None)).all()
for g in groups.filter(Group.destroyed.is_(None)).all()
]
).dict()
if getattr(builtins, "__sphinx_build__", False):
schema = MirrorMapping.schema_json()
return {
"version": "1.2",
"mappings": result,
"s3_buckets": s3_buckets
}

View file

@ -1,12 +1,11 @@
from typing import List, Dict, Union, Optional
from pydantic import BaseModel
from typing import List, Dict, Optional, TypedDict
from sqlalchemy.orm import selectinload
from app.models.base import Pool
from app.models.mirrors import Proxy
class RedirectorPool(BaseModel):
class RedirectorPool(TypedDict):
short_name: str
description: str
api_key: str
@ -14,41 +13,40 @@ class RedirectorPool(BaseModel):
origins: Dict[str, str]
class RedirectorData(BaseModel):
class RedirectorData(TypedDict):
version: str
pools: List[RedirectorPool]
def redirector_pool_origins(pool: Pool) -> Dict[str, str]:
origins: Dict[str, str] = dict()
active_proxies = Proxy.query.filter(
return {
proxy.origin.domain_name: proxy.url
for proxy in Proxy.query.filter(
Proxy.deprecated.is_(None),
Proxy.destroyed.is_(None),
Proxy.url.is_not(None),
Proxy.pool_id == pool.id
)
for proxy in active_proxies:
origins[proxy.origin.domain_name] = proxy.url
return origins
}
def redirector_pool(pool: Pool) -> RedirectorPool:
return RedirectorPool(
short_name=pool.pool_name,
description=pool.description,
api_key=pool.api_key,
redirector_domain=pool.redirector_domain,
origins=redirector_pool_origins(pool)
)
def redirector_data(_: Optional[Pool]) -> RedirectorData:
active_pools = Pool.query.options(
selectinload(Pool.proxies)
).filter(Pool.destroyed.is_(None)).all()
def redirector_data(_: Optional[Pool]) -> Dict[str, Union[str, Dict[str, Union[Dict[str, str]]]]]:
active_pools = Pool.query.filter(
Pool.destroyed.is_(None)
).all()
return RedirectorData(
version="1.0",
pools=[
redirector_pool(pool) for pool in active_pools
pools: List[RedirectorPool] = [
{
"short_name": pool.pool_name,
"description": pool.description,
"api_key": pool.api_key,
"redirector_domain": pool.redirector_domain,
"origins": redirector_pool_origins(pool)
}
for pool in active_pools
]
).dict()
return {
"version": "1.0",
"pools": pools
}

View file

@ -3,6 +3,8 @@ from abc import abstractmethod
from datetime import datetime
from typing import Union, List, Optional, Any, Dict
from sqlalchemy.orm import Mapped, mapped_column
from app.brm.brn import BRN
from app.extensions import db
@ -10,11 +12,11 @@ from app.extensions import db
class AbstractConfiguration(db.Model): # type: ignore
__abstract__ = True
id = db.Column(db.Integer, primary_key=True)
description = db.Column(db.String(255), nullable=False)
added = db.Column(db.DateTime(), default=datetime.utcnow, nullable=False)
updated = db.Column(db.DateTime(), default=datetime.utcnow, nullable=False)
destroyed = db.Column(db.DateTime(), nullable=True)
id: Mapped[int] = mapped_column(db.Integer, primary_key=True)
description: Mapped[str] = mapped_column(db.String(255), nullable=False)
added: Mapped[datetime] = mapped_column(db.DateTime(), default=datetime.utcnow, nullable=False)
updated: Mapped[datetime] = mapped_column(db.DateTime(), default=datetime.utcnow, nullable=False)
destroyed: Mapped[datetime] = mapped_column(db.DateTime())
@property
@abstractmethod
@ -38,12 +40,12 @@ class AbstractConfiguration(db.Model): # type: ignore
class Deprecation(db.Model): # type: ignore[name-defined,misc]
id = db.Column(db.Integer, primary_key=True)
resource_type = db.Column(db.String(50))
resource_id = db.Column(db.Integer)
deprecated_at = db.Column(db.DateTime(), default=datetime.utcnow, nullable=False)
meta = db.Column(db.JSON())
reason = db.Column(db.String(), nullable=False)
id: Mapped[int] = mapped_column(db.Integer, primary_key=True)
resource_type: Mapped[str] = mapped_column(db.String(50))
resource_id: Mapped[int] = mapped_column(db.Integer)
deprecated_at: Mapped[datetime] = mapped_column(db.DateTime(), default=datetime.utcnow, nullable=False)
meta: Mapped[Optional[Dict[str, Any]]] = mapped_column(db.JSON())
reason: Mapped[str] = mapped_column(db.String(), nullable=False)
@property
def resource(self) -> "AbstractResource":
@ -55,12 +57,12 @@ class Deprecation(db.Model): # type: ignore[name-defined,misc]
class AbstractResource(db.Model): # type: ignore
__abstract__ = True
id = db.Column(db.Integer, primary_key=True)
added = db.Column(db.DateTime(), default=datetime.utcnow, nullable=False)
updated = db.Column(db.DateTime(), default=datetime.utcnow, nullable=False)
deprecated = db.Column(db.DateTime(), nullable=True)
deprecation_reason = db.Column(db.String(), nullable=True)
destroyed = db.Column(db.DateTime(), nullable=True)
id: Mapped[int] = mapped_column(db.Integer, primary_key=True)
added: Mapped[datetime] = mapped_column(db.DateTime(), default=datetime.utcnow, nullable=False)
updated: Mapped[datetime] = mapped_column(db.DateTime(), default=datetime.utcnow, nullable=False)
deprecated: Mapped[Optional[datetime]] = mapped_column(db.DateTime())
deprecation_reason: Mapped[Optional[str]] = mapped_column(db.String())
destroyed: Mapped[Optional[datetime]] = mapped_column(db.DateTime())
def __init__(self, *,
id: Optional[int] = None,
@ -70,6 +72,10 @@ class AbstractResource(db.Model): # type: ignore
deprecation_reason: Optional[str] = None,
destroyed: Optional[datetime] = None,
**kwargs: Any) -> None:
if added is None:
added = datetime.utcnow()
if updated is None:
updated = datetime.utcnow()
super().__init__(id=id,
added=added,
updated=updated,
@ -77,10 +83,6 @@ class AbstractResource(db.Model): # type: ignore
deprecation_reason=deprecation_reason,
destroyed=destroyed,
**kwargs)
if self.added is None:
self.added = datetime.utcnow()
if self.updated is None:
self.updated = datetime.utcnow()
@property
@abstractmethod

View file

@ -69,11 +69,12 @@ class ListAutomation(TerraformAutomation):
)
for pool in Pool.query.filter(Pool.destroyed.is_(None)).all():
for key, formatter in lists.items():
formatted_pool = formatter(pool)
for obfuscate in [True, False]:
with open(os.path.join(
self.working_dir, f"{key}.{pool.pool_name}{'.jsno' if obfuscate else '.json'}"),
'w', encoding="utf-8") as out:
out.write(json_encode(formatter(pool), obfuscate))
out.write(json_encode(formatted_pool, obfuscate))
with open(os.path.join(self.working_dir, f"{key}.{pool.pool_name}{'.jso' if obfuscate else '.js'}"),
'w', encoding="utf-8") as out:
out.write(javascript_encode(formatter(pool), obfuscate))
out.write(javascript_encode(formatted_pool, obfuscate))

View file

@ -1,6 +1,3 @@
mypy
types-flask-sqlalchemy
types-requests
types-PyYAML
types-python-dateutil
types-sqlalchemy

View file

@ -16,7 +16,6 @@ markupsafe
nose
openpyxl
prometheus_client
pydantic
pytest
python-dateutil
python-gitlab