lint: tidying up code in block tasks

This commit is contained in:
Iain Learmonth 2022-06-17 12:42:42 +01:00
parent a0da4d4641
commit ac5a604587
8 changed files with 86 additions and 49 deletions

View file

@ -1,5 +1,7 @@
[MASTER] [MASTER]
disable=missing-module-docstring
ignored-classes=Column ignored-classes=Column
load-plugins=pylint_flask,pylint_flask_sqlalchemy load-plugins=pylint_flask,pylint_flask_sqlalchemy
max-line-length=120
py-version=3.8 py-version=3.8
suggestion-mode=yes suggestion-mode=yes

View file

@ -1,3 +1,4 @@
import logging
from abc import abstractmethod from abc import abstractmethod
from datetime import datetime from datetime import datetime
from typing import Union, List, Optional, Any from typing import Union, List, Optional, Any
@ -78,9 +79,13 @@ class AbstractResource(db.Model): # type: ignore
raise NotImplementedError() raise NotImplementedError()
def deprecate(self, *, reason: str) -> None: def deprecate(self, *, reason: str) -> None:
self.deprecated = datetime.utcnow() if self.deprecated is not None:
self.deprecation_reason = reason logging.info("Deprecating %s (reason=%s)", (self.brn, reason))
self.updated = datetime.utcnow() self.deprecated = datetime.utcnow()
self.deprecation_reason = reason
self.updated = datetime.utcnow()
else:
logging.info("Not deprecating %s (reason=%s) because it's already deprecated", (self.brn, reason))
def destroy(self) -> None: def destroy(self) -> None:
if self.deprecated is None: if self.deprecated is None:

View file

@ -1,5 +1,5 @@
from datetime import datetime, timedelta, timezone from datetime import datetime, timedelta, timezone
from typing import Optional, Union from typing import Optional
from flask import Blueprint, render_template, request from flask import Blueprint, render_template, request
from flask.typing import ResponseReturnValue from flask.typing import ResponseReturnValue
@ -55,7 +55,7 @@ def format_datetime(s: Optional[datetime]) -> str:
@portal.app_template_filter("describe_brn") @portal.app_template_filter("describe_brn")
def describe_brn(s: str) -> Union[str, Markup]: def describe_brn(s: str) -> ResponseReturnValue:
parts = s.split(":") parts = s.split(":")
if parts[3] == "mirror": if parts[3] == "mirror":
if parts[5].startswith("origin/"): if parts[5].startswith("origin/"):
@ -71,7 +71,8 @@ def describe_brn(s: str) -> Union[str, Markup]:
).first() ).first()
if not proxy: if not proxy:
return s return s
return Markup(f"Proxy: {proxy.url}<br>({proxy.origin.group.group_name}: {proxy.origin.domain_name})") # type: ignore return Markup( # type: ignore[no-untyped-call]
f"Proxy: {proxy.url}<br>({proxy.origin.group.group_name}: {proxy.origin.domain_name})")
if parts[5].startswith("quota/"): if parts[5].startswith("quota/"):
if parts[4] == "cloudfront": if parts[4] == "cloudfront":
return f"Quota: CloudFront {parts[5][len('quota/'):]}" return f"Quota: CloudFront {parts[5][len('quota/'):]}"

View file

@ -11,21 +11,28 @@ from app.terraform import BaseAutomation
class BlockBridgeGitHubAutomation(BaseAutomation): class BlockBridgeGitHubAutomation(BaseAutomation):
"""
Automation task to import bridge reachability results from GitHub.
"""
short_name = "block_bridge_github" short_name = "block_bridge_github"
description = "Import bridge reachability results from GitHub" description = "Import bridge reachability results from GitHub"
frequency = 30 frequency = 30
def automate(self, full: bool = False) -> Tuple[bool, str]: def automate(self, full: bool = False) -> Tuple[bool, str]:
g = Github(app.config['GITHUB_API_KEY']) github = Github(app.config['GITHUB_API_KEY'])
repo = g.get_repo(app.config['GITHUB_BRIDGE_REPO']) repo = github.get_repo(app.config['GITHUB_BRIDGE_REPO'])
for vp in app.config['GITHUB_BRIDGE_VANTAGE_POINTS']: for vantage_point in app.config['GITHUB_BRIDGE_VANTAGE_POINTS']:
contents = repo.get_contents(f"recentResult_{vp}") contents = repo.get_contents(f"recentResult_{vantage_point}")
if isinstance(contents, list): if isinstance(contents, list):
return False, f"Expected a file at recentResult_{vp} but got a directory." return (False,
f"Expected a file at recentResult_{vantage_point}"
" but got a directory.")
results = contents.decoded_content.decode('utf-8').splitlines() results = contents.decoded_content.decode('utf-8').splitlines()
for result in results: for result in results:
parts = result.split("\t") parts = result.split("\t")
if isoparse(parts[2]) < (datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=3)): if isoparse(parts[2]) < (datetime.datetime.now(datetime.timezone.utc)
- datetime.timedelta(days=3)):
continue continue
if int(parts[1]) < 40: if int(parts[1]) < 40:
bridge: Bridge = Bridge.query.filter( bridge: Bridge = Bridge.query.filter(

View file

@ -1,5 +1,5 @@
import datetime import datetime
from typing import Tuple from typing import Tuple, List, Dict
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import requests import requests
@ -12,37 +12,51 @@ from app.terraform import BaseAutomation
class BlockExternalAutomation(BaseAutomation): class BlockExternalAutomation(BaseAutomation):
"""
Automation task to import proxy reachability results from external source.
"""
short_name = "block_external" short_name = "block_external"
description = "Import proxy reachability results from external source" description = "Import proxy reachability results from external source"
def automate(self, full: bool = False) -> Tuple[bool, str]: content: bytes
results: Dict[str, List[str]]
def _fetch(self) -> None:
user_agent = {'User-agent': 'BypassCensorship/1.0'} user_agent = {'User-agent': 'BypassCensorship/1.0'}
page = requests.get(app.config['EXTERNAL_CHECK_URL'], headers=user_agent) page = requests.get(app.config['EXTERNAL_CHECK_URL'], headers=user_agent)
soup = BeautifulSoup(page.content, 'html.parser') self.content = page.content
h2 = soup.find_all('h2')
def _parse(self) -> None:
soup = BeautifulSoup(self.content, 'html.parser')
h2 = soup.find_all('h2') # pylint: disable=invalid-name
div = soup.find_all('div', class_="overflow-auto mb-5") div = soup.find_all('div', class_="overflow-auto mb-5")
results = {} results = {}
i = 0 i = 0
while i < len(h2): while i < len(h2):
if not div[i].div: if not div[i].div:
urls = [] urls = []
a = div[i].find_all('a') anchors = div[i].find_all('a')
j = 0 j = 0
while j < len(a): while j < len(anchors):
urls.append(a[j].text) urls.append(anchors[j].text)
j += 1 j += 1
results[h2[i].text] = urls results[h2[i].text] = urls
else: else:
results[h2[i].text] = [] results[h2[i].text] = []
i += 1 i += 1
self.results = results
def automate(self, full: bool = False) -> Tuple[bool, str]:
# TODO: handle errors in fetching remote content
# TODO: handle errors in parsing the remote content
self._fetch()
self._parse()
activities = [] activities = []
blocked_proxies = [] blocked_proxies = []
for vp in results: for vantage_point, urls in self.results.items():
if vp not in app.config['EXTERNAL_VANTAGE_POINTS']: if vantage_point not in app.config['EXTERNAL_VANTAGE_POINTS']:
continue continue
for url in results[vp]: for url in urls:
print(f"Found {url} blocked") print(f"Found {url} blocked")
proxy = Proxy.query.filter( proxy = Proxy.query.filter(
Proxy.provider == "cloudfront", Proxy.provider == "cloudfront",
@ -54,9 +68,6 @@ class BlockExternalAutomation(BaseAutomation):
if not proxy.origin.auto_rotation: if not proxy.origin.auto_rotation:
print("Proxy auto-rotation forbidden for origin") print("Proxy auto-rotation forbidden for origin")
continue continue
if proxy.deprecated:
print("Proxy already marked blocked")
continue
if proxy.added > datetime.datetime.utcnow() - datetime.timedelta(hours=3): if proxy.added > datetime.datetime.utcnow() - datetime.timedelta(hours=3):
activities.append(Activity( activities.append(Activity(
activity_type="block_warning", activity_type="block_warning",
@ -78,8 +89,8 @@ class BlockExternalAutomation(BaseAutomation):
activity_type="block_warning", activity_type="block_warning",
text=( text=(
"More than 15 proxies were marked blocked according to external source. REFUSING to rotate."))) "More than 15 proxies were marked blocked according to external source. REFUSING to rotate.")))
for a in activities: for activity in activities:
a.notify() activity.notify()
db.session.add(a) db.session.add(activity)
db.session.commit() db.session.commit()
return True, "" return True, ""

View file

@ -27,9 +27,9 @@ def _check_origin(api_url: str, result: Dict[str, Any]) -> Dict[str, Any]:
return result return result
for r in req['results']: for r in req['results']:
not_ok = False not_ok = False
for s in ["anomaly", "confirmed", "failure"]: for status in ["anomaly", "confirmed", "failure"]:
if s in r and r[s]: if status in r and r[status]:
result[r["probe_cc"]][s] += 1 result[r["probe_cc"]][status] += 1
not_ok = True not_ok = True
break break
if not not_ok: if not not_ok:
@ -60,6 +60,10 @@ def threshold_origin(domain_name: str) -> Dict[str, Any]:
class BlockOONIAutomation(BaseAutomation): class BlockOONIAutomation(BaseAutomation):
"""
Automation task to import origin and/or proxy reachability results from OONI.
"""
short_name = "block_ooni" short_name = "block_ooni"
description = "Import origin and/or proxy reachability results from OONI" description = "Import origin and/or proxy reachability results from OONI"
frequency = 240 frequency = 240

View file

@ -10,6 +10,16 @@ from app.terraform import BaseAutomation
class BlockRoskomsvobodaAutomation(BaseAutomation): class BlockRoskomsvobodaAutomation(BaseAutomation):
"""
Automation task to import Russian blocklist from RosKomSvoboda.
This task will import the Russian state register of prohibited sites,
which is part of the enforcement of federal laws of the Russian Federation
No. 139-FZ, No. 187-FZ, No. 398-FZ and a number of others that regulate
the dissemination of information on the Internet.
Where proxies are found to be blocked they will be rotated.
"""
short_name = "block_roskomsvoboda" short_name = "block_roskomsvoboda"
description = "Import Russian blocklist from RosKomSvoboda" description = "Import Russian blocklist from RosKomSvoboda"
frequency = 90 frequency = 90
@ -22,27 +32,24 @@ class BlockRoskomsvobodaAutomation(BaseAutomation):
).all() ).all()
patterns = requests.get("https://reestr.rublacklist.net/api/v2/domains/json").json() patterns = requests.get("https://reestr.rublacklist.net/api/v2/domains/json").json()
for pattern in patterns: for pattern in patterns:
for p in proxies: for proxy in proxies:
if p.url is None: if proxy.url is None:
# Not ready yet # Not ready yet
continue continue
if fnmatch(p.url[len("https://"):], pattern): if fnmatch(proxy.url[len("https://"):], pattern):
print(f"Found {p.url} blocked") print(f"Found {proxy.url} blocked")
if not p.origin.auto_rotation: if not proxy.origin.auto_rotation:
print("Proxy auto-rotation forbidden for origin") print("Proxy auto-rotation forbidden for origin")
continue continue
if p.deprecated: proxy.deprecate(reason="roskomsvoboda")
print("Proxy already marked blocked")
continue
p.deprecate(reason="roskomsvoboda")
activities.append(Activity( activities.append(Activity(
activity_type="block", activity_type="block",
text=(f"Proxy {p.url} for {p.origin.domain_name} detected blocked according to RosKomSvoboda. " text=(f"Proxy {proxy.url} for {proxy.origin.domain_name} detected blocked "
"Rotation scheduled.") "according to RosKomSvoboda. Rotation scheduled.")
)) ))
for a in activities: for activity in activities:
db.session.add(a) db.session.add(activity)
db.session.commit() db.session.commit()
for a in activities: for activity in activities:
a.notify() activity.notify()
return True, "" return True, ""

View file

@ -1,2 +1,2 @@
[flake8] [flake8]
ignore = E501 ignore = E501,W503