block: try to unify the mirror block modules

This commit is contained in:
Iain Learmonth 2022-06-18 12:36:54 +01:00
parent db0233691c
commit 360c786610
4 changed files with 114 additions and 109 deletions

View file

@ -78,18 +78,32 @@ class AbstractResource(db.Model): # type: ignore
def brn(self) -> BRN:
raise NotImplementedError()
def deprecate(self, *, reason: str) -> None:
def deprecate(self, *, reason: str) -> bool:
"""
Marks the resource as deprecated. In the event that the resource was already
deprecated, no change will be recorded and the function will return False.
:param reason: an opaque string that records the deprecation reason
:return: if the proxy was deprecated
"""
if self.deprecated is not None:
logging.info("Deprecating %s (reason=%s)", self.brn, reason)
self.deprecated = datetime.utcnow()
self.deprecation_reason = reason
self.updated = datetime.utcnow()
return True
else:
logging.info("Not deprecating %s (reason=%s) because it's already deprecated", self.brn, reason)
return False
def destroy(self) -> None:
"""
Marks the resource for destruction.
:return: None
"""
if self.deprecated is None:
self.deprecated = datetime.utcnow()
self.deprecate(reason="destroyed")
self.destroyed = datetime.utcnow()
self.updated = datetime.utcnow()

View file

@ -1,96 +1,34 @@
import datetime
from typing import Tuple, List, Dict
from typing import List, Dict
from bs4 import BeautifulSoup
import requests
from app import app
from app.extensions import db
from app.models.activity import Activity
from app.models.mirrors import Proxy
from app.terraform import BaseAutomation
from app.terraform.block_mirror import BlockMirrorAutomation
class BlockExternalAutomation(BaseAutomation):
class BlockExternalAutomation(BlockMirrorAutomation):
"""
Automation task to import proxy reachability results from external source.
"""
short_name = "block_external"
description = "Import proxy reachability results from external source"
content: bytes
results: Dict[str, List[str]]
_content: bytes
def _fetch(self) -> None:
user_agent = {'User-agent': 'BypassCensorship/1.0'}
page = requests.get(app.config['EXTERNAL_CHECK_URL'], headers=user_agent)
self.content = page.content
self._content = page.content
def _parse(self) -> None:
soup = BeautifulSoup(self.content, 'html.parser')
soup = BeautifulSoup(self._content, 'html.parser')
h2 = soup.find_all('h2') # pylint: disable=invalid-name
div = soup.find_all('div', class_="overflow-auto mb-5")
results = {}
i = 0
while i < len(h2):
if not div[i].div:
urls = []
anchors = div[i].find_all('a')
j = 0
while j < len(anchors):
urls.append(anchors[j].text)
j += 1
results[h2[i].text] = urls
else:
results[h2[i].text] = []
for idx, heading in enumerate(h2):
if not div[idx].div and heading.text in app.config['EXTERNAL_VANTAGE_POINTS']:
anchors = div[idx].find_all('a')
for anchor in anchors:
self.patterns.append("https://" + anchor.text)
i += 1
self.results = results
def automate(self, full: bool = False) -> Tuple[bool, str]:
# TODO: handle errors in fetching remote content
# TODO: handle errors in parsing the remote content
self._fetch()
self._parse()
activities = []
blocked_proxies = []
for vantage_point, urls in self.results.items():
if vantage_point not in app.config['EXTERNAL_VANTAGE_POINTS']:
continue
for url in urls:
print(f"Found {url} blocked")
proxy = Proxy.query.filter(
Proxy.provider == "cloudfront",
Proxy.url == f"https://{url}"
).first()
if not proxy:
print("Proxy not found")
continue
if not proxy.origin.auto_rotation:
print("Proxy auto-rotation forbidden for origin")
continue
if proxy.added > datetime.datetime.utcnow() - datetime.timedelta(hours=3):
activities.append(Activity(
activity_type="block_warning",
text=(
f"Proxy {proxy.url} for {proxy.origin.domain_name} detected blocked according to "
"external source. REFUSING to rotate because this proxy is less than 3 hours old.")))
continue
blocked_proxies.append(proxy)
if len(blocked_proxies) <= 15:
for proxy in blocked_proxies:
activities.append(Activity(
activity_type="block",
text=(f"Proxy {proxy.url} for {proxy.origin.domain_name} detected blocked according to external "
"source. Rotation scheduled.")
))
proxy.deprecate(reason="external")
else:
activities.append(Activity(
activity_type="block_warning",
text=(
"More than 15 proxies were marked blocked according to external source. REFUSING to rotate.")))
for activity in activities:
activity.notify()
db.session.add(activity)
db.session.commit()
return True, ""

View file

@ -0,0 +1,79 @@
from datetime import datetime, timedelta
import logging
from abc import abstractmethod
from fnmatch import fnmatch
from typing import Tuple, List
from app.extensions import db
from app.models.activity import Activity
from app.models.mirrors import Proxy
from app.terraform import BaseAutomation
class BlockMirrorAutomation(BaseAutomation):
patterns: List[str]
def __init__(self):
"""
Constructor method.
"""
self.patterns = list()
super().__init__()
def automate(self, full: bool = False) -> Tuple[bool, str]:
self.fetch()
self.parse()
rotated = list()
for pattern in self.patterns:
for proxy in active_proxies():
if proxy.url is None:
# Not ready yet
continue
if fnmatch(proxy.url, pattern):
logging.debug("Found %s blocked", proxy.url)
if not proxy.origin.auto_rotation:
logging.debug("Proxy auto-rotation forbidden for origin")
continue
if proxy.added > datetime.utcnow() - timedelta(hours=3):
logging.debug("Not rotating a proxy less than 3 hours old")
continue
if proxy.deprecate(reason=self.short_name):
logging.info("Rotated %s", proxy.url)
rotated.append((proxy.url, proxy.origin.domain_name))
else:
logging.debug("Not rotating a proxy that is already deprecated")
if rotated:
activity = Activity(
activity_type="block",
text=(f"[{self.short_name}] ♻ Rotated {len(rotated)} proxies: \n" +
"\n".join([f"* {proxy_domain} ({origin_domain})" for proxy_domain, origin_domain in rotated]))
)
db.session.add(activity)
db.session.commit()
activity.notify()
return True, ""
@abstractmethod
def fetch(self):
"""
Fetch the blocklist data. It is the responsibility of the automation task
to persist this within the object for the parse step.
:return: None
"""
@abstractmethod
def parse(self):
"""
Parse the blocklist data.
:return: None
"""
def active_proxies() -> List[Proxy]:
return Proxy.query.filter(
Proxy.deprecated.is_(None),
Proxy.destroyed.is_(None)
).all()

View file

@ -1,11 +1,7 @@
from fnmatch import fnmatch
from typing import Tuple, List
from typing import Any
import requests
from app.extensions import db
from app.models.activity import Activity
from app.models.mirrors import Proxy
from app.terraform import BaseAutomation
@ -24,32 +20,10 @@ class BlockRoskomsvobodaAutomation(BaseAutomation):
description = "Import Russian blocklist from RosKomSvoboda"
frequency = 90
def automate(self, full: bool = False) -> Tuple[bool, str]:
activities = []
proxies: List[Proxy] = Proxy.query.filter(
Proxy.deprecated.is_(None),
Proxy.destroyed.is_(None)
).all()
patterns = requests.get("https://reestr.rublacklist.net/api/v2/domains/json").json()
for pattern in patterns:
for proxy in proxies:
if proxy.url is None:
# Not ready yet
continue
if fnmatch(proxy.url[len("https://"):], pattern):
print(f"Found {proxy.url} blocked")
if not proxy.origin.auto_rotation:
print("Proxy auto-rotation forbidden for origin")
continue
proxy.deprecate(reason="roskomsvoboda")
activities.append(Activity(
activity_type="block",
text=(f"Proxy {proxy.url} for {proxy.origin.domain_name} detected blocked "
"according to RosKomSvoboda. Rotation scheduled.")
))
for activity in activities:
db.session.add(activity)
db.session.commit()
for activity in activities:
activity.notify()
return True, ""
_data: Any
def fetch(self):
self._data = requests.get("https://reestr.rublacklist.net/api/v2/domains/json").json()
def parse(self):
self.patterns.extend(["https://" + pattern for pattern in self._data])