schemas: generate some good schemas

This commit is contained in:
Iain Learmonth 2022-04-27 13:30:49 +01:00
parent ed56ed5368
commit b7a2201ad6
10 changed files with 205 additions and 122 deletions

View file

@ -7,6 +7,7 @@ test:
- apt update && apt install build-essential
- pip install -r requirements.txt
- pip install -U sphinx sphinx-press-theme sphinx-jsonschema
- pushd scripts && python update_schemas.py && popd
- sphinx-build -b html docs public
rules:
- if: $CI_COMMIT_REF_NAME != $CI_DEFAULT_BRANCH
@ -18,6 +19,7 @@ pages:
- apt update && apt install build-essential
- pip install -r requirements.txt
- pip install -U sphinx sphinx-press-theme sphinx-jsonschema
- pushd scripts && python update_schemas.py && popd
- sphinx-build -b html docs public
artifacts:
paths:

View file

@ -5,7 +5,7 @@ import yaml
from app.extensions import db
from app.extensions import migrate
from app.extensions import bootstrap
from app.mirror_sites import mirror_sites
from app.lists.bc2 import mirror_sites
from app.models.mirrors import Origin, Proxy, Mirror
from app.models.base import Group
from app.portal import portal

0
app/lists/__init__.py Normal file
View file

View file

@ -1,8 +1,35 @@
from tldextract import extract
from datetime import datetime
from typing import List
from app.models.bridges import Bridge
from app.models.mirrors import Origin, Proxy
from pydantic import BaseModel, Field
from app.models.mirrors import Origin
class BC2Alternative(BaseModel):
proto: str
type: str
created_at: datetime
updated_at: datetime
url: str
class BC2Site(BaseModel):
main_domain: str = Field(
description="The main domain name of the website, excluding \"www.\" if present.",
examples=["bbc.co.uk", "bbc.com", "guardianproject.info"]
)
available_alternatives: List[BC2Alternative]
class BypassCensorship2(BaseModel):
version: str = Field(
description="Version number of the Bypass Censorship Extension schema in use",
)
sites: List[BC2Site]
class Config:
title = "Bypass Censorship Version 2"
def mirror_sites():
return {
@ -29,25 +56,3 @@ def mirror_sites():
]} for x in Origin.query.order_by(Origin.domain_name).all() if x.destroyed is None
]
}
def bridgelines():
return {
"version": "1.0",
"bridgelines": [
b.bridgeline for b in Bridge.query.filter(
Bridge.destroyed == None,
Bridge.bridgeline != None
)
]
}
def mirror_mapping():
return {
d.url.lstrip("https://"): {
"origin_domain": d.origin.domain_name,
"origin_domain_normalized": d.origin.domain_name.lstrip("www."),
"origin_domain_root": extract(d.origin.domain_name).registered_domain
} for d in Proxy.query.all() if d.url is not None
}

34
app/lists/bridgelines.py Normal file
View file

@ -0,0 +1,34 @@
from typing import List
from pydantic import BaseModel, Field
from app.models.bridges import Bridge
class Bridgelines(BaseModel):
version: str = Field(
description="Version number of the bridgelines schema in use"
)
bridgelines: List[str] = Field(
description="List of bridgelines, ready for use in a torrc file",
examples = [
"obfs4 71.73.124.31:8887 E81B1237F6D13497B166060F55861565593CFF8E cert=b54NsV6tK1g+LHaThPOTCibdpx3wHm9NFe0PzGF1nwz+4M/tq6SkfOaShzPnZsIRCFRIHg iat-mode=0",
"obfs4 172.105.176.101:80 D18BC7E082D7EBF8E851029AC89A12A3F44A50BF cert=KHfAAUptXWRmLy3ehS9ETMO5luY06d0w7tEBDiAI0z62nC5Qo/APrzZxodkYWX2bNko/Mw iat-mode=0",
"obfs4 141.101.36.55:9023 045EF272F08BC11CDB985889E4E9FE35DC6F9C67 cert=6KEdf/5aDSyuYEqvo14JE8Cks3i7PQtj9EFX2wTCiEaUPsp/I7eaOm4uSWdqwvV4vTVlFw iat-mode=0"
]
)
class Config:
title = "Bridgelines Version 2"
def bridgelines():
return Bridgelines(
version="1.0",
bridgelines=[
b.bridgeline for b in Bridge.query.filter(
Bridge.destroyed == None,
Bridge.bridgeline != None
)
]
).dict()

View file

@ -0,0 +1,31 @@
from typing import Dict
from pydantic import BaseModel, Field
from tldextract import extract
from app import Proxy
class MMMirror(BaseModel):
origin_domain: str = Field(description="The full origin domain name")
origin_domain_normalized: str = Field(description="The origin_domain with \"www.\" removed, if present")
origin_domain_root: str = Field(description="The registered domain name of the origin, excluding subdomains")
class MirrorMapping(BaseModel):
__root__: Dict[str, MMMirror] = Field(
description="The domain name for the mirror"
)
class Config:
title = "Mirror Mapping Version 1"
def mirror_mapping():
return MirrorMapping(**{
d.url.lstrip("https://"): MMMirror(
origin_domain=d.origin.domain_name,
origin_domain_normalized=d.origin.domain_name.lstrip("www."),
origin_domain_root=extract(d.origin.domain_name).registered_domain
) for d in Proxy.query.all() if d.url is not None
}).dict()

View file

@ -1,7 +1,9 @@
import json
from app import app
from app.mirror_sites import bridgelines, mirror_sites, mirror_mapping
from app.lists.mirror_mapping import mirror_mapping
from app.lists.bc2 import mirror_sites
from app.lists.bridgelines import bridgelines
from app.models.base import MirrorList
from app.terraform import BaseAutomation

View file

@ -1,8 +1,17 @@
Mirror List Formats
===================
Bypass Censorship Version 2
Bypass Censorship Extension
---------------------------
.. jsonschema:: ../../schemas/bc2.json
Mirror Analytics
----------------
.. jsonschema:: ../../schemas/mirror-mapping.json
Tor Bridges
-----------
.. jsonschema:: ../../schemas/bridgelines.json

View file

@ -1,16 +1,17 @@
flask~=2.0.2
wtforms~=3.0.1
boto3~=1.21.15
PyGithub
alembic~=1.7.6
sqlalchemy~=1.4.32
pyyaml~=6.0
jinja2~=3.0.2
tldextract~=3.2.0
requests~=2.27.1
azure-identity
azure-mgmt-alertsmanagement
bootstrap-flask
boto3~=1.21.15
flask-migrate
flask-sqlalchemy
bootstrap-flask
flask-wtf
PyGithub
flask~=2.0.2
jinja2~=3.0.2
pydantic
pyyaml~=6.0
requests~=2.27.1
sqlalchemy~=1.4.32
tldextract~=3.2.0
wtforms~=3.0.1

View file

@ -1,87 +1,86 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "https://bypass.censorship.guide/schema/bc2.json",
"type": "object",
"title": "Bypass Censorship Version 2",
"required": [
"sites",
"version"
],
"properties": {
"sites": {
"$id": "#/properties/sites",
"type": "array",
"title": "The sites schema",
"additionalItems": true,
"items": {
"$id": "#/properties/sites/items",
"type": "object",
"title": "The items schema",
"required": [
"available_alternatives",
"main_domain"
],
"properties": {
"available_alternatives": {
"$id": "#/properties/sites/items/properties/available_alternatives",
"type": "array",
"title": "The available_alternatives schema",
"additionalItems": true,
"items": {
"$id": "#/properties/sites/items/properties/available_alternatives/items",
"type": "object",
"title": "The items schema",
"required": [
"created_at",
"proto",
"type",
"updated_at",
"url"
],
"properties": {
"created_at": {
"$id": "#/properties/sites/items/properties/available_alternatives/items/properties/created_at",
"type": "string",
"title": "The created_at schema"
"version": {
"title": "Version",
"description": "Version number of the Bypass Censorship Extension schema in use",
"type": "string"
},
"sites": {
"title": "Sites",
"type": "array",
"items": {
"$ref": "#/definitions/BC2Site"
}
}
},
"required": [
"version",
"sites"
],
"definitions": {
"BC2Alternative": {
"title": "BC2Alternative",
"type": "object",
"properties": {
"proto": {
"$id": "#/properties/sites/items/properties/available_alternatives/items/properties/proto",
"type": "string",
"title": "The proto schema"
"title": "Proto",
"type": "string"
},
"type": {
"$id": "#/properties/sites/items/properties/available_alternatives/items/properties/type",
"title": "Type",
"type": "string"
},
"created_at": {
"title": "Created At",
"type": "string",
"title": "The type schema"
"format": "date-time"
},
"updated_at": {
"$id": "#/properties/sites/items/properties/available_alternatives/items/properties/updated_at",
"title": "Updated At",
"type": "string",
"title": "The updated_at schema"
"format": "date-time"
},
"url": {
"$id": "#/properties/sites/items/properties/available_alternatives/items/properties/url",
"type": "string",
"title": "The url schema"
"title": "Url",
"type": "string"
}
},
"additionalProperties": true
}
"required": [
"proto",
"type",
"created_at",
"updated_at",
"url"
]
},
"BC2Site": {
"title": "BC2Site",
"type": "object",
"properties": {
"main_domain": {
"$id": "#/properties/sites/items/properties/main_domain",
"type": "string",
"title": "The main_domain schema"
"title": "Main Domain",
"description": "The main domain name of the website, excluding \"www.\" if present.",
"examples": [
"bbc.co.uk",
"bbc.com",
"guardianproject.info"
],
"type": "string"
},
"available_alternatives": {
"title": "Available Alternatives",
"type": "array",
"items": {
"$ref": "#/definitions/BC2Alternative"
}
}
},
"additionalProperties": true
"required": [
"main_domain",
"available_alternatives"
]
}
},
"version": {
"$id": "#/properties/version",
"type": "string",
"title": "The version schema"
}
},
"additionalProperties": true
}