block: try to unify the mirror block modules
This commit is contained in:
parent
db0233691c
commit
360c786610
4 changed files with 114 additions and 109 deletions
|
@ -78,18 +78,32 @@ class AbstractResource(db.Model): # type: ignore
|
||||||
def brn(self) -> BRN:
|
def brn(self) -> BRN:
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
def deprecate(self, *, reason: str) -> None:
|
def deprecate(self, *, reason: str) -> bool:
|
||||||
|
"""
|
||||||
|
Marks the resource as deprecated. In the event that the resource was already
|
||||||
|
deprecated, no change will be recorded and the function will return False.
|
||||||
|
|
||||||
|
:param reason: an opaque string that records the deprecation reason
|
||||||
|
:return: if the proxy was deprecated
|
||||||
|
"""
|
||||||
if self.deprecated is not None:
|
if self.deprecated is not None:
|
||||||
logging.info("Deprecating %s (reason=%s)", self.brn, reason)
|
logging.info("Deprecating %s (reason=%s)", self.brn, reason)
|
||||||
self.deprecated = datetime.utcnow()
|
self.deprecated = datetime.utcnow()
|
||||||
self.deprecation_reason = reason
|
self.deprecation_reason = reason
|
||||||
self.updated = datetime.utcnow()
|
self.updated = datetime.utcnow()
|
||||||
|
return True
|
||||||
else:
|
else:
|
||||||
logging.info("Not deprecating %s (reason=%s) because it's already deprecated", self.brn, reason)
|
logging.info("Not deprecating %s (reason=%s) because it's already deprecated", self.brn, reason)
|
||||||
|
return False
|
||||||
|
|
||||||
def destroy(self) -> None:
|
def destroy(self) -> None:
|
||||||
|
"""
|
||||||
|
Marks the resource for destruction.
|
||||||
|
|
||||||
|
:return: None
|
||||||
|
"""
|
||||||
if self.deprecated is None:
|
if self.deprecated is None:
|
||||||
self.deprecated = datetime.utcnow()
|
self.deprecate(reason="destroyed")
|
||||||
self.destroyed = datetime.utcnow()
|
self.destroyed = datetime.utcnow()
|
||||||
self.updated = datetime.utcnow()
|
self.updated = datetime.utcnow()
|
||||||
|
|
||||||
|
|
|
@ -1,96 +1,34 @@
|
||||||
import datetime
|
from typing import List, Dict
|
||||||
from typing import Tuple, List, Dict
|
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from app import app
|
from app import app
|
||||||
from app.extensions import db
|
from app.terraform.block_mirror import BlockMirrorAutomation
|
||||||
from app.models.activity import Activity
|
|
||||||
from app.models.mirrors import Proxy
|
|
||||||
from app.terraform import BaseAutomation
|
|
||||||
|
|
||||||
|
|
||||||
class BlockExternalAutomation(BaseAutomation):
|
class BlockExternalAutomation(BlockMirrorAutomation):
|
||||||
"""
|
"""
|
||||||
Automation task to import proxy reachability results from external source.
|
Automation task to import proxy reachability results from external source.
|
||||||
"""
|
"""
|
||||||
short_name = "block_external"
|
short_name = "block_external"
|
||||||
description = "Import proxy reachability results from external source"
|
description = "Import proxy reachability results from external source"
|
||||||
|
|
||||||
content: bytes
|
_content: bytes
|
||||||
results: Dict[str, List[str]]
|
|
||||||
|
|
||||||
def _fetch(self) -> None:
|
def _fetch(self) -> None:
|
||||||
user_agent = {'User-agent': 'BypassCensorship/1.0'}
|
user_agent = {'User-agent': 'BypassCensorship/1.0'}
|
||||||
page = requests.get(app.config['EXTERNAL_CHECK_URL'], headers=user_agent)
|
page = requests.get(app.config['EXTERNAL_CHECK_URL'], headers=user_agent)
|
||||||
self.content = page.content
|
self._content = page.content
|
||||||
|
|
||||||
def _parse(self) -> None:
|
def _parse(self) -> None:
|
||||||
soup = BeautifulSoup(self.content, 'html.parser')
|
soup = BeautifulSoup(self._content, 'html.parser')
|
||||||
h2 = soup.find_all('h2') # pylint: disable=invalid-name
|
h2 = soup.find_all('h2') # pylint: disable=invalid-name
|
||||||
div = soup.find_all('div', class_="overflow-auto mb-5")
|
div = soup.find_all('div', class_="overflow-auto mb-5")
|
||||||
results = {}
|
|
||||||
i = 0
|
i = 0
|
||||||
while i < len(h2):
|
for idx, heading in enumerate(h2):
|
||||||
if not div[i].div:
|
if not div[idx].div and heading.text in app.config['EXTERNAL_VANTAGE_POINTS']:
|
||||||
urls = []
|
anchors = div[idx].find_all('a')
|
||||||
anchors = div[i].find_all('a')
|
for anchor in anchors:
|
||||||
j = 0
|
self.patterns.append("https://" + anchor.text)
|
||||||
while j < len(anchors):
|
|
||||||
urls.append(anchors[j].text)
|
|
||||||
j += 1
|
|
||||||
results[h2[i].text] = urls
|
|
||||||
else:
|
|
||||||
results[h2[i].text] = []
|
|
||||||
i += 1
|
i += 1
|
||||||
self.results = results
|
|
||||||
|
|
||||||
def automate(self, full: bool = False) -> Tuple[bool, str]:
|
|
||||||
# TODO: handle errors in fetching remote content
|
|
||||||
# TODO: handle errors in parsing the remote content
|
|
||||||
self._fetch()
|
|
||||||
self._parse()
|
|
||||||
activities = []
|
|
||||||
blocked_proxies = []
|
|
||||||
for vantage_point, urls in self.results.items():
|
|
||||||
if vantage_point not in app.config['EXTERNAL_VANTAGE_POINTS']:
|
|
||||||
continue
|
|
||||||
for url in urls:
|
|
||||||
print(f"Found {url} blocked")
|
|
||||||
proxy = Proxy.query.filter(
|
|
||||||
Proxy.provider == "cloudfront",
|
|
||||||
Proxy.url == f"https://{url}"
|
|
||||||
).first()
|
|
||||||
if not proxy:
|
|
||||||
print("Proxy not found")
|
|
||||||
continue
|
|
||||||
if not proxy.origin.auto_rotation:
|
|
||||||
print("Proxy auto-rotation forbidden for origin")
|
|
||||||
continue
|
|
||||||
if proxy.added > datetime.datetime.utcnow() - datetime.timedelta(hours=3):
|
|
||||||
activities.append(Activity(
|
|
||||||
activity_type="block_warning",
|
|
||||||
text=(
|
|
||||||
f"Proxy {proxy.url} for {proxy.origin.domain_name} detected blocked according to "
|
|
||||||
"external source. REFUSING to rotate because this proxy is less than 3 hours old.")))
|
|
||||||
continue
|
|
||||||
blocked_proxies.append(proxy)
|
|
||||||
if len(blocked_proxies) <= 15:
|
|
||||||
for proxy in blocked_proxies:
|
|
||||||
activities.append(Activity(
|
|
||||||
activity_type="block",
|
|
||||||
text=(f"Proxy {proxy.url} for {proxy.origin.domain_name} detected blocked according to external "
|
|
||||||
"source. Rotation scheduled.")
|
|
||||||
))
|
|
||||||
proxy.deprecate(reason="external")
|
|
||||||
else:
|
|
||||||
activities.append(Activity(
|
|
||||||
activity_type="block_warning",
|
|
||||||
text=(
|
|
||||||
"More than 15 proxies were marked blocked according to external source. REFUSING to rotate.")))
|
|
||||||
for activity in activities:
|
|
||||||
activity.notify()
|
|
||||||
db.session.add(activity)
|
|
||||||
db.session.commit()
|
|
||||||
return True, ""
|
|
||||||
|
|
79
app/terraform/block_mirror.py
Normal file
79
app/terraform/block_mirror.py
Normal file
|
@ -0,0 +1,79 @@
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
import logging
|
||||||
|
from abc import abstractmethod
|
||||||
|
from fnmatch import fnmatch
|
||||||
|
from typing import Tuple, List
|
||||||
|
|
||||||
|
from app.extensions import db
|
||||||
|
from app.models.activity import Activity
|
||||||
|
from app.models.mirrors import Proxy
|
||||||
|
from app.terraform import BaseAutomation
|
||||||
|
|
||||||
|
|
||||||
|
class BlockMirrorAutomation(BaseAutomation):
|
||||||
|
|
||||||
|
patterns: List[str]
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
"""
|
||||||
|
Constructor method.
|
||||||
|
"""
|
||||||
|
self.patterns = list()
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
def automate(self, full: bool = False) -> Tuple[bool, str]:
|
||||||
|
self.fetch()
|
||||||
|
self.parse()
|
||||||
|
rotated = list()
|
||||||
|
for pattern in self.patterns:
|
||||||
|
for proxy in active_proxies():
|
||||||
|
if proxy.url is None:
|
||||||
|
# Not ready yet
|
||||||
|
continue
|
||||||
|
if fnmatch(proxy.url, pattern):
|
||||||
|
logging.debug("Found %s blocked", proxy.url)
|
||||||
|
if not proxy.origin.auto_rotation:
|
||||||
|
logging.debug("Proxy auto-rotation forbidden for origin")
|
||||||
|
continue
|
||||||
|
if proxy.added > datetime.utcnow() - timedelta(hours=3):
|
||||||
|
logging.debug("Not rotating a proxy less than 3 hours old")
|
||||||
|
continue
|
||||||
|
if proxy.deprecate(reason=self.short_name):
|
||||||
|
logging.info("Rotated %s", proxy.url)
|
||||||
|
rotated.append((proxy.url, proxy.origin.domain_name))
|
||||||
|
else:
|
||||||
|
logging.debug("Not rotating a proxy that is already deprecated")
|
||||||
|
if rotated:
|
||||||
|
activity = Activity(
|
||||||
|
activity_type="block",
|
||||||
|
text=(f"[{self.short_name}] ♻ Rotated {len(rotated)} proxies️️: \n" +
|
||||||
|
"\n".join([f"* {proxy_domain} ({origin_domain})" for proxy_domain, origin_domain in rotated]))
|
||||||
|
)
|
||||||
|
db.session.add(activity)
|
||||||
|
db.session.commit()
|
||||||
|
activity.notify()
|
||||||
|
return True, ""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def fetch(self):
|
||||||
|
"""
|
||||||
|
Fetch the blocklist data. It is the responsibility of the automation task
|
||||||
|
to persist this within the object for the parse step.
|
||||||
|
|
||||||
|
:return: None
|
||||||
|
"""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def parse(self):
|
||||||
|
"""
|
||||||
|
Parse the blocklist data.
|
||||||
|
|
||||||
|
:return: None
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def active_proxies() -> List[Proxy]:
|
||||||
|
return Proxy.query.filter(
|
||||||
|
Proxy.deprecated.is_(None),
|
||||||
|
Proxy.destroyed.is_(None)
|
||||||
|
).all()
|
|
@ -1,11 +1,7 @@
|
||||||
from fnmatch import fnmatch
|
from typing import Any
|
||||||
from typing import Tuple, List
|
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from app.extensions import db
|
|
||||||
from app.models.activity import Activity
|
|
||||||
from app.models.mirrors import Proxy
|
|
||||||
from app.terraform import BaseAutomation
|
from app.terraform import BaseAutomation
|
||||||
|
|
||||||
|
|
||||||
|
@ -24,32 +20,10 @@ class BlockRoskomsvobodaAutomation(BaseAutomation):
|
||||||
description = "Import Russian blocklist from RosKomSvoboda"
|
description = "Import Russian blocklist from RosKomSvoboda"
|
||||||
frequency = 90
|
frequency = 90
|
||||||
|
|
||||||
def automate(self, full: bool = False) -> Tuple[bool, str]:
|
_data: Any
|
||||||
activities = []
|
|
||||||
proxies: List[Proxy] = Proxy.query.filter(
|
def fetch(self):
|
||||||
Proxy.deprecated.is_(None),
|
self._data = requests.get("https://reestr.rublacklist.net/api/v2/domains/json").json()
|
||||||
Proxy.destroyed.is_(None)
|
|
||||||
).all()
|
def parse(self):
|
||||||
patterns = requests.get("https://reestr.rublacklist.net/api/v2/domains/json").json()
|
self.patterns.extend(["https://" + pattern for pattern in self._data])
|
||||||
for pattern in patterns:
|
|
||||||
for proxy in proxies:
|
|
||||||
if proxy.url is None:
|
|
||||||
# Not ready yet
|
|
||||||
continue
|
|
||||||
if fnmatch(proxy.url[len("https://"):], pattern):
|
|
||||||
print(f"Found {proxy.url} blocked")
|
|
||||||
if not proxy.origin.auto_rotation:
|
|
||||||
print("Proxy auto-rotation forbidden for origin")
|
|
||||||
continue
|
|
||||||
proxy.deprecate(reason="roskomsvoboda")
|
|
||||||
activities.append(Activity(
|
|
||||||
activity_type="block",
|
|
||||||
text=(f"Proxy {proxy.url} for {proxy.origin.domain_name} detected blocked "
|
|
||||||
"according to RosKomSvoboda. Rotation scheduled.")
|
|
||||||
))
|
|
||||||
for activity in activities:
|
|
||||||
db.session.add(activity)
|
|
||||||
db.session.commit()
|
|
||||||
for activity in activities:
|
|
||||||
activity.notify()
|
|
||||||
return True, ""
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue