From ac5a604587bb9797bd6e7dd7e7aa194acaae6355 Mon Sep 17 00:00:00 2001 From: Iain Learmonth Date: Fri, 17 Jun 2022 12:42:42 +0100 Subject: [PATCH] lint: tidying up code in block tasks --- .pylintrc | 2 ++ app/models/__init__.py | 11 +++++-- app/portal/__init__.py | 7 +++-- app/terraform/block_bridge_github.py | 19 +++++++---- app/terraform/block_external.py | 47 +++++++++++++++++----------- app/terraform/block_ooni.py | 10 ++++-- app/terraform/block_roskomsvoboda.py | 37 +++++++++++++--------- setup.cfg | 2 +- 8 files changed, 86 insertions(+), 49 deletions(-) diff --git a/.pylintrc b/.pylintrc index 3816d2e..970b010 100644 --- a/.pylintrc +++ b/.pylintrc @@ -1,5 +1,7 @@ [MASTER] +disable=missing-module-docstring ignored-classes=Column load-plugins=pylint_flask,pylint_flask_sqlalchemy +max-line-length=120 py-version=3.8 suggestion-mode=yes diff --git a/app/models/__init__.py b/app/models/__init__.py index 03fcec3..cab8b89 100644 --- a/app/models/__init__.py +++ b/app/models/__init__.py @@ -1,3 +1,4 @@ +import logging from abc import abstractmethod from datetime import datetime from typing import Union, List, Optional, Any @@ -78,9 +79,13 @@ class AbstractResource(db.Model): # type: ignore raise NotImplementedError() def deprecate(self, *, reason: str) -> None: - self.deprecated = datetime.utcnow() - self.deprecation_reason = reason - self.updated = datetime.utcnow() + if self.deprecated is not None: + logging.info("Deprecating %s (reason=%s)", (self.brn, reason)) + self.deprecated = datetime.utcnow() + self.deprecation_reason = reason + self.updated = datetime.utcnow() + else: + logging.info("Not deprecating %s (reason=%s) because it's already deprecated", (self.brn, reason)) def destroy(self) -> None: if self.deprecated is None: diff --git a/app/portal/__init__.py b/app/portal/__init__.py index 6257042..6d75eab 100644 --- a/app/portal/__init__.py +++ b/app/portal/__init__.py @@ -1,5 +1,5 @@ from datetime import datetime, timedelta, timezone -from typing import Optional, Union +from typing import Optional from flask import Blueprint, render_template, request from flask.typing import ResponseReturnValue @@ -55,7 +55,7 @@ def format_datetime(s: Optional[datetime]) -> str: @portal.app_template_filter("describe_brn") -def describe_brn(s: str) -> Union[str, Markup]: +def describe_brn(s: str) -> ResponseReturnValue: parts = s.split(":") if parts[3] == "mirror": if parts[5].startswith("origin/"): @@ -71,7 +71,8 @@ def describe_brn(s: str) -> Union[str, Markup]: ).first() if not proxy: return s - return Markup(f"Proxy: {proxy.url}
({proxy.origin.group.group_name}: {proxy.origin.domain_name})") # type: ignore + return Markup( # type: ignore[no-untyped-call] + f"Proxy: {proxy.url}
({proxy.origin.group.group_name}: {proxy.origin.domain_name})") if parts[5].startswith("quota/"): if parts[4] == "cloudfront": return f"Quota: CloudFront {parts[5][len('quota/'):]}" diff --git a/app/terraform/block_bridge_github.py b/app/terraform/block_bridge_github.py index f1866d3..2131514 100644 --- a/app/terraform/block_bridge_github.py +++ b/app/terraform/block_bridge_github.py @@ -11,21 +11,28 @@ from app.terraform import BaseAutomation class BlockBridgeGitHubAutomation(BaseAutomation): + """ + Automation task to import bridge reachability results from GitHub. + """ + short_name = "block_bridge_github" description = "Import bridge reachability results from GitHub" frequency = 30 def automate(self, full: bool = False) -> Tuple[bool, str]: - g = Github(app.config['GITHUB_API_KEY']) - repo = g.get_repo(app.config['GITHUB_BRIDGE_REPO']) - for vp in app.config['GITHUB_BRIDGE_VANTAGE_POINTS']: - contents = repo.get_contents(f"recentResult_{vp}") + github = Github(app.config['GITHUB_API_KEY']) + repo = github.get_repo(app.config['GITHUB_BRIDGE_REPO']) + for vantage_point in app.config['GITHUB_BRIDGE_VANTAGE_POINTS']: + contents = repo.get_contents(f"recentResult_{vantage_point}") if isinstance(contents, list): - return False, f"Expected a file at recentResult_{vp} but got a directory." + return (False, + f"Expected a file at recentResult_{vantage_point}" + " but got a directory.") results = contents.decoded_content.decode('utf-8').splitlines() for result in results: parts = result.split("\t") - if isoparse(parts[2]) < (datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=3)): + if isoparse(parts[2]) < (datetime.datetime.now(datetime.timezone.utc) + - datetime.timedelta(days=3)): continue if int(parts[1]) < 40: bridge: Bridge = Bridge.query.filter( diff --git a/app/terraform/block_external.py b/app/terraform/block_external.py index fc173c9..2c52976 100644 --- a/app/terraform/block_external.py +++ b/app/terraform/block_external.py @@ -1,5 +1,5 @@ import datetime -from typing import Tuple +from typing import Tuple, List, Dict from bs4 import BeautifulSoup import requests @@ -12,37 +12,51 @@ from app.terraform import BaseAutomation class BlockExternalAutomation(BaseAutomation): + """ + Automation task to import proxy reachability results from external source. + """ short_name = "block_external" description = "Import proxy reachability results from external source" - def automate(self, full: bool = False) -> Tuple[bool, str]: + content: bytes + results: Dict[str, List[str]] + + def _fetch(self) -> None: user_agent = {'User-agent': 'BypassCensorship/1.0'} page = requests.get(app.config['EXTERNAL_CHECK_URL'], headers=user_agent) - soup = BeautifulSoup(page.content, 'html.parser') - h2 = soup.find_all('h2') + self.content = page.content + + def _parse(self) -> None: + soup = BeautifulSoup(self.content, 'html.parser') + h2 = soup.find_all('h2') # pylint: disable=invalid-name div = soup.find_all('div', class_="overflow-auto mb-5") - results = {} - i = 0 while i < len(h2): if not div[i].div: urls = [] - a = div[i].find_all('a') + anchors = div[i].find_all('a') j = 0 - while j < len(a): - urls.append(a[j].text) + while j < len(anchors): + urls.append(anchors[j].text) j += 1 results[h2[i].text] = urls else: results[h2[i].text] = [] i += 1 + self.results = results + + def automate(self, full: bool = False) -> Tuple[bool, str]: + # TODO: handle errors in fetching remote content + # TODO: handle errors in parsing the remote content + self._fetch() + self._parse() activities = [] blocked_proxies = [] - for vp in results: - if vp not in app.config['EXTERNAL_VANTAGE_POINTS']: + for vantage_point, urls in self.results.items(): + if vantage_point not in app.config['EXTERNAL_VANTAGE_POINTS']: continue - for url in results[vp]: + for url in urls: print(f"Found {url} blocked") proxy = Proxy.query.filter( Proxy.provider == "cloudfront", @@ -54,9 +68,6 @@ class BlockExternalAutomation(BaseAutomation): if not proxy.origin.auto_rotation: print("Proxy auto-rotation forbidden for origin") continue - if proxy.deprecated: - print("Proxy already marked blocked") - continue if proxy.added > datetime.datetime.utcnow() - datetime.timedelta(hours=3): activities.append(Activity( activity_type="block_warning", @@ -78,8 +89,8 @@ class BlockExternalAutomation(BaseAutomation): activity_type="block_warning", text=( "More than 15 proxies were marked blocked according to external source. REFUSING to rotate."))) - for a in activities: - a.notify() - db.session.add(a) + for activity in activities: + activity.notify() + db.session.add(activity) db.session.commit() return True, "" diff --git a/app/terraform/block_ooni.py b/app/terraform/block_ooni.py index c41d37e..6e8f4fb 100644 --- a/app/terraform/block_ooni.py +++ b/app/terraform/block_ooni.py @@ -27,9 +27,9 @@ def _check_origin(api_url: str, result: Dict[str, Any]) -> Dict[str, Any]: return result for r in req['results']: not_ok = False - for s in ["anomaly", "confirmed", "failure"]: - if s in r and r[s]: - result[r["probe_cc"]][s] += 1 + for status in ["anomaly", "confirmed", "failure"]: + if status in r and r[status]: + result[r["probe_cc"]][status] += 1 not_ok = True break if not not_ok: @@ -60,6 +60,10 @@ def threshold_origin(domain_name: str) -> Dict[str, Any]: class BlockOONIAutomation(BaseAutomation): + """ + Automation task to import origin and/or proxy reachability results from OONI. + """ + short_name = "block_ooni" description = "Import origin and/or proxy reachability results from OONI" frequency = 240 diff --git a/app/terraform/block_roskomsvoboda.py b/app/terraform/block_roskomsvoboda.py index db6e314..0739631 100644 --- a/app/terraform/block_roskomsvoboda.py +++ b/app/terraform/block_roskomsvoboda.py @@ -10,6 +10,16 @@ from app.terraform import BaseAutomation class BlockRoskomsvobodaAutomation(BaseAutomation): + """ + Automation task to import Russian blocklist from RosKomSvoboda. + + This task will import the Russian state register of prohibited sites, + which is part of the enforcement of federal laws of the Russian Federation + No. 139-FZ, No. 187-FZ, No. 398-FZ and a number of others that regulate + the dissemination of information on the Internet. + + Where proxies are found to be blocked they will be rotated. + """ short_name = "block_roskomsvoboda" description = "Import Russian blocklist from RosKomSvoboda" frequency = 90 @@ -22,27 +32,24 @@ class BlockRoskomsvobodaAutomation(BaseAutomation): ).all() patterns = requests.get("https://reestr.rublacklist.net/api/v2/domains/json").json() for pattern in patterns: - for p in proxies: - if p.url is None: + for proxy in proxies: + if proxy.url is None: # Not ready yet continue - if fnmatch(p.url[len("https://"):], pattern): - print(f"Found {p.url} blocked") - if not p.origin.auto_rotation: + if fnmatch(proxy.url[len("https://"):], pattern): + print(f"Found {proxy.url} blocked") + if not proxy.origin.auto_rotation: print("Proxy auto-rotation forbidden for origin") continue - if p.deprecated: - print("Proxy already marked blocked") - continue - p.deprecate(reason="roskomsvoboda") + proxy.deprecate(reason="roskomsvoboda") activities.append(Activity( activity_type="block", - text=(f"Proxy {p.url} for {p.origin.domain_name} detected blocked according to RosKomSvoboda. " - "Rotation scheduled.") + text=(f"Proxy {proxy.url} for {proxy.origin.domain_name} detected blocked " + "according to RosKomSvoboda. Rotation scheduled.") )) - for a in activities: - db.session.add(a) + for activity in activities: + db.session.add(activity) db.session.commit() - for a in activities: - a.notify() + for activity in activities: + activity.notify() return True, "" diff --git a/setup.cfg b/setup.cfg index e44b810..a1cce8e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,2 +1,2 @@ [flake8] -ignore = E501 +ignore = E501,W503