diff --git a/.pylintrc b/.pylintrc
index 3816d2e..970b010 100644
--- a/.pylintrc
+++ b/.pylintrc
@@ -1,5 +1,7 @@
[MASTER]
+disable=missing-module-docstring
ignored-classes=Column
load-plugins=pylint_flask,pylint_flask_sqlalchemy
+max-line-length=120
py-version=3.8
suggestion-mode=yes
diff --git a/app/models/__init__.py b/app/models/__init__.py
index 03fcec3..cab8b89 100644
--- a/app/models/__init__.py
+++ b/app/models/__init__.py
@@ -1,3 +1,4 @@
+import logging
from abc import abstractmethod
from datetime import datetime
from typing import Union, List, Optional, Any
@@ -78,9 +79,13 @@ class AbstractResource(db.Model): # type: ignore
raise NotImplementedError()
def deprecate(self, *, reason: str) -> None:
- self.deprecated = datetime.utcnow()
- self.deprecation_reason = reason
- self.updated = datetime.utcnow()
+ if self.deprecated is not None:
+ logging.info("Deprecating %s (reason=%s)", (self.brn, reason))
+ self.deprecated = datetime.utcnow()
+ self.deprecation_reason = reason
+ self.updated = datetime.utcnow()
+ else:
+ logging.info("Not deprecating %s (reason=%s) because it's already deprecated", (self.brn, reason))
def destroy(self) -> None:
if self.deprecated is None:
diff --git a/app/portal/__init__.py b/app/portal/__init__.py
index 6257042..6d75eab 100644
--- a/app/portal/__init__.py
+++ b/app/portal/__init__.py
@@ -1,5 +1,5 @@
from datetime import datetime, timedelta, timezone
-from typing import Optional, Union
+from typing import Optional
from flask import Blueprint, render_template, request
from flask.typing import ResponseReturnValue
@@ -55,7 +55,7 @@ def format_datetime(s: Optional[datetime]) -> str:
@portal.app_template_filter("describe_brn")
-def describe_brn(s: str) -> Union[str, Markup]:
+def describe_brn(s: str) -> ResponseReturnValue:
parts = s.split(":")
if parts[3] == "mirror":
if parts[5].startswith("origin/"):
@@ -71,7 +71,8 @@ def describe_brn(s: str) -> Union[str, Markup]:
).first()
if not proxy:
return s
- return Markup(f"Proxy: {proxy.url}
({proxy.origin.group.group_name}: {proxy.origin.domain_name})") # type: ignore
+ return Markup( # type: ignore[no-untyped-call]
+ f"Proxy: {proxy.url}
({proxy.origin.group.group_name}: {proxy.origin.domain_name})")
if parts[5].startswith("quota/"):
if parts[4] == "cloudfront":
return f"Quota: CloudFront {parts[5][len('quota/'):]}"
diff --git a/app/terraform/block_bridge_github.py b/app/terraform/block_bridge_github.py
index f1866d3..2131514 100644
--- a/app/terraform/block_bridge_github.py
+++ b/app/terraform/block_bridge_github.py
@@ -11,21 +11,28 @@ from app.terraform import BaseAutomation
class BlockBridgeGitHubAutomation(BaseAutomation):
+ """
+ Automation task to import bridge reachability results from GitHub.
+ """
+
short_name = "block_bridge_github"
description = "Import bridge reachability results from GitHub"
frequency = 30
def automate(self, full: bool = False) -> Tuple[bool, str]:
- g = Github(app.config['GITHUB_API_KEY'])
- repo = g.get_repo(app.config['GITHUB_BRIDGE_REPO'])
- for vp in app.config['GITHUB_BRIDGE_VANTAGE_POINTS']:
- contents = repo.get_contents(f"recentResult_{vp}")
+ github = Github(app.config['GITHUB_API_KEY'])
+ repo = github.get_repo(app.config['GITHUB_BRIDGE_REPO'])
+ for vantage_point in app.config['GITHUB_BRIDGE_VANTAGE_POINTS']:
+ contents = repo.get_contents(f"recentResult_{vantage_point}")
if isinstance(contents, list):
- return False, f"Expected a file at recentResult_{vp} but got a directory."
+ return (False,
+ f"Expected a file at recentResult_{vantage_point}"
+ " but got a directory.")
results = contents.decoded_content.decode('utf-8').splitlines()
for result in results:
parts = result.split("\t")
- if isoparse(parts[2]) < (datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=3)):
+ if isoparse(parts[2]) < (datetime.datetime.now(datetime.timezone.utc)
+ - datetime.timedelta(days=3)):
continue
if int(parts[1]) < 40:
bridge: Bridge = Bridge.query.filter(
diff --git a/app/terraform/block_external.py b/app/terraform/block_external.py
index fc173c9..2c52976 100644
--- a/app/terraform/block_external.py
+++ b/app/terraform/block_external.py
@@ -1,5 +1,5 @@
import datetime
-from typing import Tuple
+from typing import Tuple, List, Dict
from bs4 import BeautifulSoup
import requests
@@ -12,37 +12,51 @@ from app.terraform import BaseAutomation
class BlockExternalAutomation(BaseAutomation):
+ """
+ Automation task to import proxy reachability results from external source.
+ """
short_name = "block_external"
description = "Import proxy reachability results from external source"
- def automate(self, full: bool = False) -> Tuple[bool, str]:
+ content: bytes
+ results: Dict[str, List[str]]
+
+ def _fetch(self) -> None:
user_agent = {'User-agent': 'BypassCensorship/1.0'}
page = requests.get(app.config['EXTERNAL_CHECK_URL'], headers=user_agent)
- soup = BeautifulSoup(page.content, 'html.parser')
- h2 = soup.find_all('h2')
+ self.content = page.content
+
+ def _parse(self) -> None:
+ soup = BeautifulSoup(self.content, 'html.parser')
+ h2 = soup.find_all('h2') # pylint: disable=invalid-name
div = soup.find_all('div', class_="overflow-auto mb-5")
-
results = {}
-
i = 0
while i < len(h2):
if not div[i].div:
urls = []
- a = div[i].find_all('a')
+ anchors = div[i].find_all('a')
j = 0
- while j < len(a):
- urls.append(a[j].text)
+ while j < len(anchors):
+ urls.append(anchors[j].text)
j += 1
results[h2[i].text] = urls
else:
results[h2[i].text] = []
i += 1
+ self.results = results
+
+ def automate(self, full: bool = False) -> Tuple[bool, str]:
+ # TODO: handle errors in fetching remote content
+ # TODO: handle errors in parsing the remote content
+ self._fetch()
+ self._parse()
activities = []
blocked_proxies = []
- for vp in results:
- if vp not in app.config['EXTERNAL_VANTAGE_POINTS']:
+ for vantage_point, urls in self.results.items():
+ if vantage_point not in app.config['EXTERNAL_VANTAGE_POINTS']:
continue
- for url in results[vp]:
+ for url in urls:
print(f"Found {url} blocked")
proxy = Proxy.query.filter(
Proxy.provider == "cloudfront",
@@ -54,9 +68,6 @@ class BlockExternalAutomation(BaseAutomation):
if not proxy.origin.auto_rotation:
print("Proxy auto-rotation forbidden for origin")
continue
- if proxy.deprecated:
- print("Proxy already marked blocked")
- continue
if proxy.added > datetime.datetime.utcnow() - datetime.timedelta(hours=3):
activities.append(Activity(
activity_type="block_warning",
@@ -78,8 +89,8 @@ class BlockExternalAutomation(BaseAutomation):
activity_type="block_warning",
text=(
"More than 15 proxies were marked blocked according to external source. REFUSING to rotate.")))
- for a in activities:
- a.notify()
- db.session.add(a)
+ for activity in activities:
+ activity.notify()
+ db.session.add(activity)
db.session.commit()
return True, ""
diff --git a/app/terraform/block_ooni.py b/app/terraform/block_ooni.py
index c41d37e..6e8f4fb 100644
--- a/app/terraform/block_ooni.py
+++ b/app/terraform/block_ooni.py
@@ -27,9 +27,9 @@ def _check_origin(api_url: str, result: Dict[str, Any]) -> Dict[str, Any]:
return result
for r in req['results']:
not_ok = False
- for s in ["anomaly", "confirmed", "failure"]:
- if s in r and r[s]:
- result[r["probe_cc"]][s] += 1
+ for status in ["anomaly", "confirmed", "failure"]:
+ if status in r and r[status]:
+ result[r["probe_cc"]][status] += 1
not_ok = True
break
if not not_ok:
@@ -60,6 +60,10 @@ def threshold_origin(domain_name: str) -> Dict[str, Any]:
class BlockOONIAutomation(BaseAutomation):
+ """
+ Automation task to import origin and/or proxy reachability results from OONI.
+ """
+
short_name = "block_ooni"
description = "Import origin and/or proxy reachability results from OONI"
frequency = 240
diff --git a/app/terraform/block_roskomsvoboda.py b/app/terraform/block_roskomsvoboda.py
index db6e314..0739631 100644
--- a/app/terraform/block_roskomsvoboda.py
+++ b/app/terraform/block_roskomsvoboda.py
@@ -10,6 +10,16 @@ from app.terraform import BaseAutomation
class BlockRoskomsvobodaAutomation(BaseAutomation):
+ """
+ Automation task to import Russian blocklist from RosKomSvoboda.
+
+ This task will import the Russian state register of prohibited sites,
+ which is part of the enforcement of federal laws of the Russian Federation
+ No. 139-FZ, No. 187-FZ, No. 398-FZ and a number of others that regulate
+ the dissemination of information on the Internet.
+
+ Where proxies are found to be blocked they will be rotated.
+ """
short_name = "block_roskomsvoboda"
description = "Import Russian blocklist from RosKomSvoboda"
frequency = 90
@@ -22,27 +32,24 @@ class BlockRoskomsvobodaAutomation(BaseAutomation):
).all()
patterns = requests.get("https://reestr.rublacklist.net/api/v2/domains/json").json()
for pattern in patterns:
- for p in proxies:
- if p.url is None:
+ for proxy in proxies:
+ if proxy.url is None:
# Not ready yet
continue
- if fnmatch(p.url[len("https://"):], pattern):
- print(f"Found {p.url} blocked")
- if not p.origin.auto_rotation:
+ if fnmatch(proxy.url[len("https://"):], pattern):
+ print(f"Found {proxy.url} blocked")
+ if not proxy.origin.auto_rotation:
print("Proxy auto-rotation forbidden for origin")
continue
- if p.deprecated:
- print("Proxy already marked blocked")
- continue
- p.deprecate(reason="roskomsvoboda")
+ proxy.deprecate(reason="roskomsvoboda")
activities.append(Activity(
activity_type="block",
- text=(f"Proxy {p.url} for {p.origin.domain_name} detected blocked according to RosKomSvoboda. "
- "Rotation scheduled.")
+ text=(f"Proxy {proxy.url} for {proxy.origin.domain_name} detected blocked "
+ "according to RosKomSvoboda. Rotation scheduled.")
))
- for a in activities:
- db.session.add(a)
+ for activity in activities:
+ db.session.add(activity)
db.session.commit()
- for a in activities:
- a.notify()
+ for activity in activities:
+ activity.notify()
return True, ""
diff --git a/setup.cfg b/setup.cfg
index e44b810..a1cce8e 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,2 +1,2 @@
[flake8]
-ignore = E501
+ignore = E501,W503