lint: tidying up code in block tasks

This commit is contained in:
Iain Learmonth 2022-06-17 12:42:42 +01:00
parent a0da4d4641
commit ac5a604587
8 changed files with 86 additions and 49 deletions

View file

@ -1,5 +1,7 @@
[MASTER]
disable=missing-module-docstring
ignored-classes=Column
load-plugins=pylint_flask,pylint_flask_sqlalchemy
max-line-length=120
py-version=3.8
suggestion-mode=yes

View file

@ -1,3 +1,4 @@
import logging
from abc import abstractmethod
from datetime import datetime
from typing import Union, List, Optional, Any
@ -78,9 +79,13 @@ class AbstractResource(db.Model): # type: ignore
raise NotImplementedError()
def deprecate(self, *, reason: str) -> None:
if self.deprecated is not None:
logging.info("Deprecating %s (reason=%s)", (self.brn, reason))
self.deprecated = datetime.utcnow()
self.deprecation_reason = reason
self.updated = datetime.utcnow()
else:
logging.info("Not deprecating %s (reason=%s) because it's already deprecated", (self.brn, reason))
def destroy(self) -> None:
if self.deprecated is None:

View file

@ -1,5 +1,5 @@
from datetime import datetime, timedelta, timezone
from typing import Optional, Union
from typing import Optional
from flask import Blueprint, render_template, request
from flask.typing import ResponseReturnValue
@ -55,7 +55,7 @@ def format_datetime(s: Optional[datetime]) -> str:
@portal.app_template_filter("describe_brn")
def describe_brn(s: str) -> Union[str, Markup]:
def describe_brn(s: str) -> ResponseReturnValue:
parts = s.split(":")
if parts[3] == "mirror":
if parts[5].startswith("origin/"):
@ -71,7 +71,8 @@ def describe_brn(s: str) -> Union[str, Markup]:
).first()
if not proxy:
return s
return Markup(f"Proxy: {proxy.url}<br>({proxy.origin.group.group_name}: {proxy.origin.domain_name})") # type: ignore
return Markup( # type: ignore[no-untyped-call]
f"Proxy: {proxy.url}<br>({proxy.origin.group.group_name}: {proxy.origin.domain_name})")
if parts[5].startswith("quota/"):
if parts[4] == "cloudfront":
return f"Quota: CloudFront {parts[5][len('quota/'):]}"

View file

@ -11,21 +11,28 @@ from app.terraform import BaseAutomation
class BlockBridgeGitHubAutomation(BaseAutomation):
"""
Automation task to import bridge reachability results from GitHub.
"""
short_name = "block_bridge_github"
description = "Import bridge reachability results from GitHub"
frequency = 30
def automate(self, full: bool = False) -> Tuple[bool, str]:
g = Github(app.config['GITHUB_API_KEY'])
repo = g.get_repo(app.config['GITHUB_BRIDGE_REPO'])
for vp in app.config['GITHUB_BRIDGE_VANTAGE_POINTS']:
contents = repo.get_contents(f"recentResult_{vp}")
github = Github(app.config['GITHUB_API_KEY'])
repo = github.get_repo(app.config['GITHUB_BRIDGE_REPO'])
for vantage_point in app.config['GITHUB_BRIDGE_VANTAGE_POINTS']:
contents = repo.get_contents(f"recentResult_{vantage_point}")
if isinstance(contents, list):
return False, f"Expected a file at recentResult_{vp} but got a directory."
return (False,
f"Expected a file at recentResult_{vantage_point}"
" but got a directory.")
results = contents.decoded_content.decode('utf-8').splitlines()
for result in results:
parts = result.split("\t")
if isoparse(parts[2]) < (datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=3)):
if isoparse(parts[2]) < (datetime.datetime.now(datetime.timezone.utc)
- datetime.timedelta(days=3)):
continue
if int(parts[1]) < 40:
bridge: Bridge = Bridge.query.filter(

View file

@ -1,5 +1,5 @@
import datetime
from typing import Tuple
from typing import Tuple, List, Dict
from bs4 import BeautifulSoup
import requests
@ -12,37 +12,51 @@ from app.terraform import BaseAutomation
class BlockExternalAutomation(BaseAutomation):
"""
Automation task to import proxy reachability results from external source.
"""
short_name = "block_external"
description = "Import proxy reachability results from external source"
def automate(self, full: bool = False) -> Tuple[bool, str]:
content: bytes
results: Dict[str, List[str]]
def _fetch(self) -> None:
user_agent = {'User-agent': 'BypassCensorship/1.0'}
page = requests.get(app.config['EXTERNAL_CHECK_URL'], headers=user_agent)
soup = BeautifulSoup(page.content, 'html.parser')
h2 = soup.find_all('h2')
self.content = page.content
def _parse(self) -> None:
soup = BeautifulSoup(self.content, 'html.parser')
h2 = soup.find_all('h2') # pylint: disable=invalid-name
div = soup.find_all('div', class_="overflow-auto mb-5")
results = {}
i = 0
while i < len(h2):
if not div[i].div:
urls = []
a = div[i].find_all('a')
anchors = div[i].find_all('a')
j = 0
while j < len(a):
urls.append(a[j].text)
while j < len(anchors):
urls.append(anchors[j].text)
j += 1
results[h2[i].text] = urls
else:
results[h2[i].text] = []
i += 1
self.results = results
def automate(self, full: bool = False) -> Tuple[bool, str]:
# TODO: handle errors in fetching remote content
# TODO: handle errors in parsing the remote content
self._fetch()
self._parse()
activities = []
blocked_proxies = []
for vp in results:
if vp not in app.config['EXTERNAL_VANTAGE_POINTS']:
for vantage_point, urls in self.results.items():
if vantage_point not in app.config['EXTERNAL_VANTAGE_POINTS']:
continue
for url in results[vp]:
for url in urls:
print(f"Found {url} blocked")
proxy = Proxy.query.filter(
Proxy.provider == "cloudfront",
@ -54,9 +68,6 @@ class BlockExternalAutomation(BaseAutomation):
if not proxy.origin.auto_rotation:
print("Proxy auto-rotation forbidden for origin")
continue
if proxy.deprecated:
print("Proxy already marked blocked")
continue
if proxy.added > datetime.datetime.utcnow() - datetime.timedelta(hours=3):
activities.append(Activity(
activity_type="block_warning",
@ -78,8 +89,8 @@ class BlockExternalAutomation(BaseAutomation):
activity_type="block_warning",
text=(
"More than 15 proxies were marked blocked according to external source. REFUSING to rotate.")))
for a in activities:
a.notify()
db.session.add(a)
for activity in activities:
activity.notify()
db.session.add(activity)
db.session.commit()
return True, ""

View file

@ -27,9 +27,9 @@ def _check_origin(api_url: str, result: Dict[str, Any]) -> Dict[str, Any]:
return result
for r in req['results']:
not_ok = False
for s in ["anomaly", "confirmed", "failure"]:
if s in r and r[s]:
result[r["probe_cc"]][s] += 1
for status in ["anomaly", "confirmed", "failure"]:
if status in r and r[status]:
result[r["probe_cc"]][status] += 1
not_ok = True
break
if not not_ok:
@ -60,6 +60,10 @@ def threshold_origin(domain_name: str) -> Dict[str, Any]:
class BlockOONIAutomation(BaseAutomation):
"""
Automation task to import origin and/or proxy reachability results from OONI.
"""
short_name = "block_ooni"
description = "Import origin and/or proxy reachability results from OONI"
frequency = 240

View file

@ -10,6 +10,16 @@ from app.terraform import BaseAutomation
class BlockRoskomsvobodaAutomation(BaseAutomation):
"""
Automation task to import Russian blocklist from RosKomSvoboda.
This task will import the Russian state register of prohibited sites,
which is part of the enforcement of federal laws of the Russian Federation
No. 139-FZ, No. 187-FZ, No. 398-FZ and a number of others that regulate
the dissemination of information on the Internet.
Where proxies are found to be blocked they will be rotated.
"""
short_name = "block_roskomsvoboda"
description = "Import Russian blocklist from RosKomSvoboda"
frequency = 90
@ -22,27 +32,24 @@ class BlockRoskomsvobodaAutomation(BaseAutomation):
).all()
patterns = requests.get("https://reestr.rublacklist.net/api/v2/domains/json").json()
for pattern in patterns:
for p in proxies:
if p.url is None:
for proxy in proxies:
if proxy.url is None:
# Not ready yet
continue
if fnmatch(p.url[len("https://"):], pattern):
print(f"Found {p.url} blocked")
if not p.origin.auto_rotation:
if fnmatch(proxy.url[len("https://"):], pattern):
print(f"Found {proxy.url} blocked")
if not proxy.origin.auto_rotation:
print("Proxy auto-rotation forbidden for origin")
continue
if p.deprecated:
print("Proxy already marked blocked")
continue
p.deprecate(reason="roskomsvoboda")
proxy.deprecate(reason="roskomsvoboda")
activities.append(Activity(
activity_type="block",
text=(f"Proxy {p.url} for {p.origin.domain_name} detected blocked according to RosKomSvoboda. "
"Rotation scheduled.")
text=(f"Proxy {proxy.url} for {proxy.origin.domain_name} detected blocked "
"according to RosKomSvoboda. Rotation scheduled.")
))
for a in activities:
db.session.add(a)
for activity in activities:
db.session.add(activity)
db.session.commit()
for a in activities:
a.notify()
for activity in activities:
activity.notify()
return True, ""

View file

@ -1,2 +1,2 @@
[flake8]
ignore = E501
ignore = E501,W503