diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 462aa15..b12e832 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -7,6 +7,7 @@ test: - apt update && apt install build-essential - pip install -r requirements.txt - pip install -U sphinx sphinx-press-theme sphinx-jsonschema + - pushd scripts && python update_schemas.py && popd - sphinx-build -b html docs public rules: - if: $CI_COMMIT_REF_NAME != $CI_DEFAULT_BRANCH @@ -18,6 +19,7 @@ pages: - apt update && apt install build-essential - pip install -r requirements.txt - pip install -U sphinx sphinx-press-theme sphinx-jsonschema + - pushd scripts && python update_schemas.py && popd - sphinx-build -b html docs public artifacts: paths: diff --git a/app/__init__.py b/app/__init__.py index 638f147..d009e4b 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -5,7 +5,7 @@ import yaml from app.extensions import db from app.extensions import migrate from app.extensions import bootstrap -from app.mirror_sites import mirror_sites +from app.lists.bc2 import mirror_sites from app.models.mirrors import Origin, Proxy, Mirror from app.models.base import Group from app.portal import portal diff --git a/app/lists/__init__.py b/app/lists/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/mirror_sites.py b/app/lists/bc2.py similarity index 67% rename from app/mirror_sites.py rename to app/lists/bc2.py index 9a6faf9..e8125f3 100644 --- a/app/mirror_sites.py +++ b/app/lists/bc2.py @@ -1,8 +1,35 @@ -from tldextract import extract +from datetime import datetime +from typing import List -from app.models.bridges import Bridge -from app.models.mirrors import Origin, Proxy +from pydantic import BaseModel, Field +from app.models.mirrors import Origin + + +class BC2Alternative(BaseModel): + proto: str + type: str + created_at: datetime + updated_at: datetime + url: str + + +class BC2Site(BaseModel): + main_domain: str = Field( + description="The main domain name of the website, excluding \"www.\" if present.", + examples=["bbc.co.uk", "bbc.com", "guardianproject.info"] + ) + available_alternatives: List[BC2Alternative] + + +class BypassCensorship2(BaseModel): + version: str = Field( + description="Version number of the Bypass Censorship Extension schema in use", + ) + sites: List[BC2Site] + + class Config: + title = "Bypass Censorship Version 2" def mirror_sites(): return { @@ -29,25 +56,3 @@ def mirror_sites(): ]} for x in Origin.query.order_by(Origin.domain_name).all() if x.destroyed is None ] } - - -def bridgelines(): - return { - "version": "1.0", - "bridgelines": [ - b.bridgeline for b in Bridge.query.filter( - Bridge.destroyed == None, - Bridge.bridgeline != None - ) - ] - } - - -def mirror_mapping(): - return { - d.url.lstrip("https://"): { - "origin_domain": d.origin.domain_name, - "origin_domain_normalized": d.origin.domain_name.lstrip("www."), - "origin_domain_root": extract(d.origin.domain_name).registered_domain - } for d in Proxy.query.all() if d.url is not None - } diff --git a/app/lists/bridgelines.py b/app/lists/bridgelines.py new file mode 100644 index 0000000..6e83d76 --- /dev/null +++ b/app/lists/bridgelines.py @@ -0,0 +1,34 @@ +from typing import List + +from pydantic import BaseModel, Field + +from app.models.bridges import Bridge + + +class Bridgelines(BaseModel): + version: str = Field( + description="Version number of the bridgelines schema in use" + ) + bridgelines: List[str] = Field( + description="List of bridgelines, ready for use in a torrc file", + examples = [ + "obfs4 71.73.124.31:8887 E81B1237F6D13497B166060F55861565593CFF8E cert=b54NsV6tK1g+LHaThPOTCibdpx3wHm9NFe0PzGF1nwz+4M/tq6SkfOaShzPnZsIRCFRIHg iat-mode=0", + "obfs4 172.105.176.101:80 D18BC7E082D7EBF8E851029AC89A12A3F44A50BF cert=KHfAAUptXWRmLy3ehS9ETMO5luY06d0w7tEBDiAI0z62nC5Qo/APrzZxodkYWX2bNko/Mw iat-mode=0", + "obfs4 141.101.36.55:9023 045EF272F08BC11CDB985889E4E9FE35DC6F9C67 cert=6KEdf/5aDSyuYEqvo14JE8Cks3i7PQtj9EFX2wTCiEaUPsp/I7eaOm4uSWdqwvV4vTVlFw iat-mode=0" + ] + ) + + class Config: + title = "Bridgelines Version 2" + + +def bridgelines(): + return Bridgelines( + version="1.0", + bridgelines=[ + b.bridgeline for b in Bridge.query.filter( + Bridge.destroyed == None, + Bridge.bridgeline != None + ) + ] + ).dict() diff --git a/app/lists/mirror_mapping.py b/app/lists/mirror_mapping.py new file mode 100644 index 0000000..dd7cbd9 --- /dev/null +++ b/app/lists/mirror_mapping.py @@ -0,0 +1,31 @@ +from typing import Dict + +from pydantic import BaseModel, Field +from tldextract import extract + +from app import Proxy + + +class MMMirror(BaseModel): + origin_domain: str = Field(description="The full origin domain name") + origin_domain_normalized: str = Field(description="The origin_domain with \"www.\" removed, if present") + origin_domain_root: str = Field(description="The registered domain name of the origin, excluding subdomains") + + +class MirrorMapping(BaseModel): + __root__: Dict[str, MMMirror] = Field( + description="The domain name for the mirror" + ) + + class Config: + title = "Mirror Mapping Version 1" + + +def mirror_mapping(): + return MirrorMapping(**{ + d.url.lstrip("https://"): MMMirror( + origin_domain=d.origin.domain_name, + origin_domain_normalized=d.origin.domain_name.lstrip("www."), + origin_domain_root=extract(d.origin.domain_name).registered_domain + ) for d in Proxy.query.all() if d.url is not None + }).dict() diff --git a/app/terraform/list/__init__.py b/app/terraform/list/__init__.py index f6950a8..95e2348 100644 --- a/app/terraform/list/__init__.py +++ b/app/terraform/list/__init__.py @@ -1,7 +1,9 @@ import json from app import app -from app.mirror_sites import bridgelines, mirror_sites, mirror_mapping +from app.lists.mirror_mapping import mirror_mapping +from app.lists.bc2 import mirror_sites +from app.lists.bridgelines import bridgelines from app.models.base import MirrorList from app.terraform import BaseAutomation diff --git a/docs/tech/schemas.rst b/docs/tech/schemas.rst index 12e20d2..38824e6 100644 --- a/docs/tech/schemas.rst +++ b/docs/tech/schemas.rst @@ -1,8 +1,17 @@ Mirror List Formats =================== -Bypass Censorship Version 2 +Bypass Censorship Extension --------------------------- .. jsonschema:: ../../schemas/bc2.json +Mirror Analytics +---------------- + +.. jsonschema:: ../../schemas/mirror-mapping.json + +Tor Bridges +----------- + +.. jsonschema:: ../../schemas/bridgelines.json \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 2f3e956..b6269c7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,16 +1,17 @@ -flask~=2.0.2 -wtforms~=3.0.1 -boto3~=1.21.15 +PyGithub alembic~=1.7.6 -sqlalchemy~=1.4.32 -pyyaml~=6.0 -jinja2~=3.0.2 -tldextract~=3.2.0 -requests~=2.27.1 azure-identity azure-mgmt-alertsmanagement +bootstrap-flask +boto3~=1.21.15 flask-migrate flask-sqlalchemy -bootstrap-flask flask-wtf -PyGithub \ No newline at end of file +flask~=2.0.2 +jinja2~=3.0.2 +pydantic +pyyaml~=6.0 +requests~=2.27.1 +sqlalchemy~=1.4.32 +tldextract~=3.2.0 +wtforms~=3.0.1 diff --git a/schemas/bc2.json b/schemas/bc2.json index d630f07..fda2c3c 100644 --- a/schemas/bc2.json +++ b/schemas/bc2.json @@ -1,87 +1,86 @@ { - "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://bypass.censorship.guide/schema/bc2.json", - "type": "object", - "title": "Bypass Censorship Version 2", - "required": [ - "sites", - "version" - ], - "properties": { - "sites": { - "$id": "#/properties/sites", - "type": "array", - "title": "The sites schema", - "additionalItems": true, - "items": { - "$id": "#/properties/sites/items", - "type": "object", - "title": "The items schema", - "required": [ - "available_alternatives", - "main_domain" - ], - "properties": { - "available_alternatives": { - "$id": "#/properties/sites/items/properties/available_alternatives", - "type": "array", - "title": "The available_alternatives schema", - "additionalItems": true, - "items": { - "$id": "#/properties/sites/items/properties/available_alternatives/items", - "type": "object", - "title": "The items schema", - "required": [ - "created_at", - "proto", - "type", - "updated_at", - "url" - ], - "properties": { - "created_at": { - "$id": "#/properties/sites/items/properties/available_alternatives/items/properties/created_at", - "type": "string", - "title": "The created_at schema" - }, - "proto": { - "$id": "#/properties/sites/items/properties/available_alternatives/items/properties/proto", - "type": "string", - "title": "The proto schema" - }, - "type": { - "$id": "#/properties/sites/items/properties/available_alternatives/items/properties/type", - "type": "string", - "title": "The type schema" - }, - "updated_at": { - "$id": "#/properties/sites/items/properties/available_alternatives/items/properties/updated_at", - "type": "string", - "title": "The updated_at schema" - }, - "url": { - "$id": "#/properties/sites/items/properties/available_alternatives/items/properties/url", - "type": "string", - "title": "The url schema" - } - }, - "additionalProperties": true - } - }, - "main_domain": { - "$id": "#/properties/sites/items/properties/main_domain", - "type": "string", - "title": "The main_domain schema" - } - }, - "additionalProperties": true - } - }, - "version": { - "$id": "#/properties/version", - "type": "string", - "title": "The version schema" - } + "title": "Bypass Censorship Version 2", + "type": "object", + "properties": { + "version": { + "title": "Version", + "description": "Version number of the Bypass Censorship Extension schema in use", + "type": "string" }, - "additionalProperties": true + "sites": { + "title": "Sites", + "type": "array", + "items": { + "$ref": "#/definitions/BC2Site" + } + } + }, + "required": [ + "version", + "sites" + ], + "definitions": { + "BC2Alternative": { + "title": "BC2Alternative", + "type": "object", + "properties": { + "proto": { + "title": "Proto", + "type": "string" + }, + "type": { + "title": "Type", + "type": "string" + }, + "created_at": { + "title": "Created At", + "type": "string", + "format": "date-time" + }, + "updated_at": { + "title": "Updated At", + "type": "string", + "format": "date-time" + }, + "url": { + "title": "Url", + "type": "string" + } + }, + "required": [ + "proto", + "type", + "created_at", + "updated_at", + "url" + ] + }, + "BC2Site": { + "title": "BC2Site", + "type": "object", + "properties": { + "main_domain": { + "title": "Main Domain", + "description": "The main domain name of the website, excluding \"www.\" if present.", + "examples": [ + "bbc.co.uk", + "bbc.com", + "guardianproject.info" + ], + "type": "string" + }, + "available_alternatives": { + "title": "Available Alternatives", + "type": "array", + "items": { + "$ref": "#/definitions/BC2Alternative" + } + } + }, + "required": [ + "main_domain", + "available_alternatives" + ] + } + } } \ No newline at end of file