lint: tidying up code in block tasks
This commit is contained in:
parent
a0da4d4641
commit
ac5a604587
8 changed files with 86 additions and 49 deletions
|
@ -1,5 +1,7 @@
|
||||||
[MASTER]
|
[MASTER]
|
||||||
|
disable=missing-module-docstring
|
||||||
ignored-classes=Column
|
ignored-classes=Column
|
||||||
load-plugins=pylint_flask,pylint_flask_sqlalchemy
|
load-plugins=pylint_flask,pylint_flask_sqlalchemy
|
||||||
|
max-line-length=120
|
||||||
py-version=3.8
|
py-version=3.8
|
||||||
suggestion-mode=yes
|
suggestion-mode=yes
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
import logging
|
||||||
from abc import abstractmethod
|
from abc import abstractmethod
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Union, List, Optional, Any
|
from typing import Union, List, Optional, Any
|
||||||
|
@ -78,9 +79,13 @@ class AbstractResource(db.Model): # type: ignore
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
def deprecate(self, *, reason: str) -> None:
|
def deprecate(self, *, reason: str) -> None:
|
||||||
self.deprecated = datetime.utcnow()
|
if self.deprecated is not None:
|
||||||
self.deprecation_reason = reason
|
logging.info("Deprecating %s (reason=%s)", (self.brn, reason))
|
||||||
self.updated = datetime.utcnow()
|
self.deprecated = datetime.utcnow()
|
||||||
|
self.deprecation_reason = reason
|
||||||
|
self.updated = datetime.utcnow()
|
||||||
|
else:
|
||||||
|
logging.info("Not deprecating %s (reason=%s) because it's already deprecated", (self.brn, reason))
|
||||||
|
|
||||||
def destroy(self) -> None:
|
def destroy(self) -> None:
|
||||||
if self.deprecated is None:
|
if self.deprecated is None:
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
from datetime import datetime, timedelta, timezone
|
from datetime import datetime, timedelta, timezone
|
||||||
from typing import Optional, Union
|
from typing import Optional
|
||||||
|
|
||||||
from flask import Blueprint, render_template, request
|
from flask import Blueprint, render_template, request
|
||||||
from flask.typing import ResponseReturnValue
|
from flask.typing import ResponseReturnValue
|
||||||
|
@ -55,7 +55,7 @@ def format_datetime(s: Optional[datetime]) -> str:
|
||||||
|
|
||||||
|
|
||||||
@portal.app_template_filter("describe_brn")
|
@portal.app_template_filter("describe_brn")
|
||||||
def describe_brn(s: str) -> Union[str, Markup]:
|
def describe_brn(s: str) -> ResponseReturnValue:
|
||||||
parts = s.split(":")
|
parts = s.split(":")
|
||||||
if parts[3] == "mirror":
|
if parts[3] == "mirror":
|
||||||
if parts[5].startswith("origin/"):
|
if parts[5].startswith("origin/"):
|
||||||
|
@ -71,7 +71,8 @@ def describe_brn(s: str) -> Union[str, Markup]:
|
||||||
).first()
|
).first()
|
||||||
if not proxy:
|
if not proxy:
|
||||||
return s
|
return s
|
||||||
return Markup(f"Proxy: {proxy.url}<br>({proxy.origin.group.group_name}: {proxy.origin.domain_name})") # type: ignore
|
return Markup( # type: ignore[no-untyped-call]
|
||||||
|
f"Proxy: {proxy.url}<br>({proxy.origin.group.group_name}: {proxy.origin.domain_name})")
|
||||||
if parts[5].startswith("quota/"):
|
if parts[5].startswith("quota/"):
|
||||||
if parts[4] == "cloudfront":
|
if parts[4] == "cloudfront":
|
||||||
return f"Quota: CloudFront {parts[5][len('quota/'):]}"
|
return f"Quota: CloudFront {parts[5][len('quota/'):]}"
|
||||||
|
|
|
@ -11,21 +11,28 @@ from app.terraform import BaseAutomation
|
||||||
|
|
||||||
|
|
||||||
class BlockBridgeGitHubAutomation(BaseAutomation):
|
class BlockBridgeGitHubAutomation(BaseAutomation):
|
||||||
|
"""
|
||||||
|
Automation task to import bridge reachability results from GitHub.
|
||||||
|
"""
|
||||||
|
|
||||||
short_name = "block_bridge_github"
|
short_name = "block_bridge_github"
|
||||||
description = "Import bridge reachability results from GitHub"
|
description = "Import bridge reachability results from GitHub"
|
||||||
frequency = 30
|
frequency = 30
|
||||||
|
|
||||||
def automate(self, full: bool = False) -> Tuple[bool, str]:
|
def automate(self, full: bool = False) -> Tuple[bool, str]:
|
||||||
g = Github(app.config['GITHUB_API_KEY'])
|
github = Github(app.config['GITHUB_API_KEY'])
|
||||||
repo = g.get_repo(app.config['GITHUB_BRIDGE_REPO'])
|
repo = github.get_repo(app.config['GITHUB_BRIDGE_REPO'])
|
||||||
for vp in app.config['GITHUB_BRIDGE_VANTAGE_POINTS']:
|
for vantage_point in app.config['GITHUB_BRIDGE_VANTAGE_POINTS']:
|
||||||
contents = repo.get_contents(f"recentResult_{vp}")
|
contents = repo.get_contents(f"recentResult_{vantage_point}")
|
||||||
if isinstance(contents, list):
|
if isinstance(contents, list):
|
||||||
return False, f"Expected a file at recentResult_{vp} but got a directory."
|
return (False,
|
||||||
|
f"Expected a file at recentResult_{vantage_point}"
|
||||||
|
" but got a directory.")
|
||||||
results = contents.decoded_content.decode('utf-8').splitlines()
|
results = contents.decoded_content.decode('utf-8').splitlines()
|
||||||
for result in results:
|
for result in results:
|
||||||
parts = result.split("\t")
|
parts = result.split("\t")
|
||||||
if isoparse(parts[2]) < (datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=3)):
|
if isoparse(parts[2]) < (datetime.datetime.now(datetime.timezone.utc)
|
||||||
|
- datetime.timedelta(days=3)):
|
||||||
continue
|
continue
|
||||||
if int(parts[1]) < 40:
|
if int(parts[1]) < 40:
|
||||||
bridge: Bridge = Bridge.query.filter(
|
bridge: Bridge = Bridge.query.filter(
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
import datetime
|
import datetime
|
||||||
from typing import Tuple
|
from typing import Tuple, List, Dict
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
import requests
|
import requests
|
||||||
|
@ -12,37 +12,51 @@ from app.terraform import BaseAutomation
|
||||||
|
|
||||||
|
|
||||||
class BlockExternalAutomation(BaseAutomation):
|
class BlockExternalAutomation(BaseAutomation):
|
||||||
|
"""
|
||||||
|
Automation task to import proxy reachability results from external source.
|
||||||
|
"""
|
||||||
short_name = "block_external"
|
short_name = "block_external"
|
||||||
description = "Import proxy reachability results from external source"
|
description = "Import proxy reachability results from external source"
|
||||||
|
|
||||||
def automate(self, full: bool = False) -> Tuple[bool, str]:
|
content: bytes
|
||||||
|
results: Dict[str, List[str]]
|
||||||
|
|
||||||
|
def _fetch(self) -> None:
|
||||||
user_agent = {'User-agent': 'BypassCensorship/1.0'}
|
user_agent = {'User-agent': 'BypassCensorship/1.0'}
|
||||||
page = requests.get(app.config['EXTERNAL_CHECK_URL'], headers=user_agent)
|
page = requests.get(app.config['EXTERNAL_CHECK_URL'], headers=user_agent)
|
||||||
soup = BeautifulSoup(page.content, 'html.parser')
|
self.content = page.content
|
||||||
h2 = soup.find_all('h2')
|
|
||||||
|
def _parse(self) -> None:
|
||||||
|
soup = BeautifulSoup(self.content, 'html.parser')
|
||||||
|
h2 = soup.find_all('h2') # pylint: disable=invalid-name
|
||||||
div = soup.find_all('div', class_="overflow-auto mb-5")
|
div = soup.find_all('div', class_="overflow-auto mb-5")
|
||||||
|
|
||||||
results = {}
|
results = {}
|
||||||
|
|
||||||
i = 0
|
i = 0
|
||||||
while i < len(h2):
|
while i < len(h2):
|
||||||
if not div[i].div:
|
if not div[i].div:
|
||||||
urls = []
|
urls = []
|
||||||
a = div[i].find_all('a')
|
anchors = div[i].find_all('a')
|
||||||
j = 0
|
j = 0
|
||||||
while j < len(a):
|
while j < len(anchors):
|
||||||
urls.append(a[j].text)
|
urls.append(anchors[j].text)
|
||||||
j += 1
|
j += 1
|
||||||
results[h2[i].text] = urls
|
results[h2[i].text] = urls
|
||||||
else:
|
else:
|
||||||
results[h2[i].text] = []
|
results[h2[i].text] = []
|
||||||
i += 1
|
i += 1
|
||||||
|
self.results = results
|
||||||
|
|
||||||
|
def automate(self, full: bool = False) -> Tuple[bool, str]:
|
||||||
|
# TODO: handle errors in fetching remote content
|
||||||
|
# TODO: handle errors in parsing the remote content
|
||||||
|
self._fetch()
|
||||||
|
self._parse()
|
||||||
activities = []
|
activities = []
|
||||||
blocked_proxies = []
|
blocked_proxies = []
|
||||||
for vp in results:
|
for vantage_point, urls in self.results.items():
|
||||||
if vp not in app.config['EXTERNAL_VANTAGE_POINTS']:
|
if vantage_point not in app.config['EXTERNAL_VANTAGE_POINTS']:
|
||||||
continue
|
continue
|
||||||
for url in results[vp]:
|
for url in urls:
|
||||||
print(f"Found {url} blocked")
|
print(f"Found {url} blocked")
|
||||||
proxy = Proxy.query.filter(
|
proxy = Proxy.query.filter(
|
||||||
Proxy.provider == "cloudfront",
|
Proxy.provider == "cloudfront",
|
||||||
|
@ -54,9 +68,6 @@ class BlockExternalAutomation(BaseAutomation):
|
||||||
if not proxy.origin.auto_rotation:
|
if not proxy.origin.auto_rotation:
|
||||||
print("Proxy auto-rotation forbidden for origin")
|
print("Proxy auto-rotation forbidden for origin")
|
||||||
continue
|
continue
|
||||||
if proxy.deprecated:
|
|
||||||
print("Proxy already marked blocked")
|
|
||||||
continue
|
|
||||||
if proxy.added > datetime.datetime.utcnow() - datetime.timedelta(hours=3):
|
if proxy.added > datetime.datetime.utcnow() - datetime.timedelta(hours=3):
|
||||||
activities.append(Activity(
|
activities.append(Activity(
|
||||||
activity_type="block_warning",
|
activity_type="block_warning",
|
||||||
|
@ -78,8 +89,8 @@ class BlockExternalAutomation(BaseAutomation):
|
||||||
activity_type="block_warning",
|
activity_type="block_warning",
|
||||||
text=(
|
text=(
|
||||||
"More than 15 proxies were marked blocked according to external source. REFUSING to rotate.")))
|
"More than 15 proxies were marked blocked according to external source. REFUSING to rotate.")))
|
||||||
for a in activities:
|
for activity in activities:
|
||||||
a.notify()
|
activity.notify()
|
||||||
db.session.add(a)
|
db.session.add(activity)
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
return True, ""
|
return True, ""
|
||||||
|
|
|
@ -27,9 +27,9 @@ def _check_origin(api_url: str, result: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
return result
|
return result
|
||||||
for r in req['results']:
|
for r in req['results']:
|
||||||
not_ok = False
|
not_ok = False
|
||||||
for s in ["anomaly", "confirmed", "failure"]:
|
for status in ["anomaly", "confirmed", "failure"]:
|
||||||
if s in r and r[s]:
|
if status in r and r[status]:
|
||||||
result[r["probe_cc"]][s] += 1
|
result[r["probe_cc"]][status] += 1
|
||||||
not_ok = True
|
not_ok = True
|
||||||
break
|
break
|
||||||
if not not_ok:
|
if not not_ok:
|
||||||
|
@ -60,6 +60,10 @@ def threshold_origin(domain_name: str) -> Dict[str, Any]:
|
||||||
|
|
||||||
|
|
||||||
class BlockOONIAutomation(BaseAutomation):
|
class BlockOONIAutomation(BaseAutomation):
|
||||||
|
"""
|
||||||
|
Automation task to import origin and/or proxy reachability results from OONI.
|
||||||
|
"""
|
||||||
|
|
||||||
short_name = "block_ooni"
|
short_name = "block_ooni"
|
||||||
description = "Import origin and/or proxy reachability results from OONI"
|
description = "Import origin and/or proxy reachability results from OONI"
|
||||||
frequency = 240
|
frequency = 240
|
||||||
|
|
|
@ -10,6 +10,16 @@ from app.terraform import BaseAutomation
|
||||||
|
|
||||||
|
|
||||||
class BlockRoskomsvobodaAutomation(BaseAutomation):
|
class BlockRoskomsvobodaAutomation(BaseAutomation):
|
||||||
|
"""
|
||||||
|
Automation task to import Russian blocklist from RosKomSvoboda.
|
||||||
|
|
||||||
|
This task will import the Russian state register of prohibited sites,
|
||||||
|
which is part of the enforcement of federal laws of the Russian Federation
|
||||||
|
No. 139-FZ, No. 187-FZ, No. 398-FZ and a number of others that regulate
|
||||||
|
the dissemination of information on the Internet.
|
||||||
|
|
||||||
|
Where proxies are found to be blocked they will be rotated.
|
||||||
|
"""
|
||||||
short_name = "block_roskomsvoboda"
|
short_name = "block_roskomsvoboda"
|
||||||
description = "Import Russian blocklist from RosKomSvoboda"
|
description = "Import Russian blocklist from RosKomSvoboda"
|
||||||
frequency = 90
|
frequency = 90
|
||||||
|
@ -22,27 +32,24 @@ class BlockRoskomsvobodaAutomation(BaseAutomation):
|
||||||
).all()
|
).all()
|
||||||
patterns = requests.get("https://reestr.rublacklist.net/api/v2/domains/json").json()
|
patterns = requests.get("https://reestr.rublacklist.net/api/v2/domains/json").json()
|
||||||
for pattern in patterns:
|
for pattern in patterns:
|
||||||
for p in proxies:
|
for proxy in proxies:
|
||||||
if p.url is None:
|
if proxy.url is None:
|
||||||
# Not ready yet
|
# Not ready yet
|
||||||
continue
|
continue
|
||||||
if fnmatch(p.url[len("https://"):], pattern):
|
if fnmatch(proxy.url[len("https://"):], pattern):
|
||||||
print(f"Found {p.url} blocked")
|
print(f"Found {proxy.url} blocked")
|
||||||
if not p.origin.auto_rotation:
|
if not proxy.origin.auto_rotation:
|
||||||
print("Proxy auto-rotation forbidden for origin")
|
print("Proxy auto-rotation forbidden for origin")
|
||||||
continue
|
continue
|
||||||
if p.deprecated:
|
proxy.deprecate(reason="roskomsvoboda")
|
||||||
print("Proxy already marked blocked")
|
|
||||||
continue
|
|
||||||
p.deprecate(reason="roskomsvoboda")
|
|
||||||
activities.append(Activity(
|
activities.append(Activity(
|
||||||
activity_type="block",
|
activity_type="block",
|
||||||
text=(f"Proxy {p.url} for {p.origin.domain_name} detected blocked according to RosKomSvoboda. "
|
text=(f"Proxy {proxy.url} for {proxy.origin.domain_name} detected blocked "
|
||||||
"Rotation scheduled.")
|
"according to RosKomSvoboda. Rotation scheduled.")
|
||||||
))
|
))
|
||||||
for a in activities:
|
for activity in activities:
|
||||||
db.session.add(a)
|
db.session.add(activity)
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
for a in activities:
|
for activity in activities:
|
||||||
a.notify()
|
activity.notify()
|
||||||
return True, ""
|
return True, ""
|
||||||
|
|
|
@ -1,2 +1,2 @@
|
||||||
[flake8]
|
[flake8]
|
||||||
ignore = E501
|
ignore = E501,W503
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue