2022-05-23 10:55:59 +01:00
|
|
|
import datetime
|
2022-06-17 12:42:42 +01:00
|
|
|
from typing import Tuple, List, Dict
|
2022-05-09 08:09:57 +01:00
|
|
|
|
2022-03-10 14:26:22 +00:00
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
import requests
|
|
|
|
|
|
|
|
from app import app
|
2022-05-01 16:23:45 +01:00
|
|
|
from app.extensions import db
|
2022-05-14 10:35:24 +01:00
|
|
|
from app.models.activity import Activity
|
2022-04-22 14:01:16 +01:00
|
|
|
from app.models.mirrors import Proxy
|
2022-05-09 08:09:57 +01:00
|
|
|
from app.terraform import BaseAutomation
|
|
|
|
|
|
|
|
|
|
|
|
class BlockExternalAutomation(BaseAutomation):
|
2022-06-17 12:42:42 +01:00
|
|
|
"""
|
|
|
|
Automation task to import proxy reachability results from external source.
|
|
|
|
"""
|
2022-05-09 08:09:57 +01:00
|
|
|
short_name = "block_external"
|
|
|
|
description = "Import proxy reachability results from external source"
|
|
|
|
|
2022-06-17 12:42:42 +01:00
|
|
|
content: bytes
|
|
|
|
results: Dict[str, List[str]]
|
|
|
|
|
|
|
|
def _fetch(self) -> None:
|
2022-05-09 08:09:57 +01:00
|
|
|
user_agent = {'User-agent': 'BypassCensorship/1.0'}
|
|
|
|
page = requests.get(app.config['EXTERNAL_CHECK_URL'], headers=user_agent)
|
2022-06-17 12:42:42 +01:00
|
|
|
self.content = page.content
|
2022-05-09 08:09:57 +01:00
|
|
|
|
2022-06-17 12:42:42 +01:00
|
|
|
def _parse(self) -> None:
|
|
|
|
soup = BeautifulSoup(self.content, 'html.parser')
|
|
|
|
h2 = soup.find_all('h2') # pylint: disable=invalid-name
|
|
|
|
div = soup.find_all('div', class_="overflow-auto mb-5")
|
2022-05-09 08:09:57 +01:00
|
|
|
results = {}
|
|
|
|
i = 0
|
|
|
|
while i < len(h2):
|
|
|
|
if not div[i].div:
|
|
|
|
urls = []
|
2022-06-17 12:42:42 +01:00
|
|
|
anchors = div[i].find_all('a')
|
2022-05-09 08:09:57 +01:00
|
|
|
j = 0
|
2022-06-17 12:42:42 +01:00
|
|
|
while j < len(anchors):
|
|
|
|
urls.append(anchors[j].text)
|
2022-05-09 08:09:57 +01:00
|
|
|
j += 1
|
|
|
|
results[h2[i].text] = urls
|
|
|
|
else:
|
|
|
|
results[h2[i].text] = []
|
|
|
|
i += 1
|
2022-06-17 12:42:42 +01:00
|
|
|
self.results = results
|
|
|
|
|
|
|
|
def automate(self, full: bool = False) -> Tuple[bool, str]:
|
|
|
|
# TODO: handle errors in fetching remote content
|
|
|
|
# TODO: handle errors in parsing the remote content
|
|
|
|
self._fetch()
|
|
|
|
self._parse()
|
2022-05-14 10:35:24 +01:00
|
|
|
activities = []
|
2022-05-23 10:55:59 +01:00
|
|
|
blocked_proxies = []
|
2022-06-17 12:42:42 +01:00
|
|
|
for vantage_point, urls in self.results.items():
|
|
|
|
if vantage_point not in app.config['EXTERNAL_VANTAGE_POINTS']:
|
2022-05-03 11:46:56 +01:00
|
|
|
continue
|
2022-06-17 12:42:42 +01:00
|
|
|
for url in urls:
|
2022-05-09 08:09:57 +01:00
|
|
|
print(f"Found {url} blocked")
|
|
|
|
proxy = Proxy.query.filter(
|
|
|
|
Proxy.provider == "cloudfront",
|
|
|
|
Proxy.url == f"https://{url}"
|
|
|
|
).first()
|
|
|
|
if not proxy:
|
|
|
|
print("Proxy not found")
|
|
|
|
continue
|
|
|
|
if not proxy.origin.auto_rotation:
|
|
|
|
print("Proxy auto-rotation forbidden for origin")
|
|
|
|
continue
|
2022-05-25 09:20:57 +01:00
|
|
|
if proxy.added > datetime.datetime.utcnow() - datetime.timedelta(hours=3):
|
2022-05-23 10:55:59 +01:00
|
|
|
activities.append(Activity(
|
|
|
|
activity_type="block_warning",
|
|
|
|
text=(
|
|
|
|
f"Proxy {proxy.url} for {proxy.origin.domain_name} detected blocked according to "
|
|
|
|
"external source. REFUSING to rotate because this proxy is less than 3 hours old.")))
|
|
|
|
continue
|
|
|
|
blocked_proxies.append(proxy)
|
|
|
|
if len(blocked_proxies) <= 15:
|
|
|
|
for proxy in blocked_proxies:
|
2022-05-14 10:35:24 +01:00
|
|
|
activities.append(Activity(
|
|
|
|
activity_type="block",
|
2022-05-23 10:55:59 +01:00
|
|
|
text=(f"Proxy {proxy.url} for {proxy.origin.domain_name} detected blocked according to external "
|
|
|
|
"source. Rotation scheduled.")
|
2022-05-14 10:35:24 +01:00
|
|
|
))
|
2022-05-09 08:09:57 +01:00
|
|
|
proxy.deprecate(reason="external")
|
2022-05-23 10:55:59 +01:00
|
|
|
else:
|
|
|
|
activities.append(Activity(
|
|
|
|
activity_type="block_warning",
|
|
|
|
text=(
|
|
|
|
"More than 15 proxies were marked blocked according to external source. REFUSING to rotate.")))
|
2022-06-17 12:42:42 +01:00
|
|
|
for activity in activities:
|
|
|
|
activity.notify()
|
|
|
|
db.session.add(activity)
|
2022-05-09 08:09:57 +01:00
|
|
|
db.session.commit()
|
|
|
|
return True, ""
|