2024-12-06 16:08:48 +00:00
|
|
|
|
import fnmatch
|
2022-06-18 12:36:54 +01:00
|
|
|
|
import logging
|
|
|
|
|
from abc import abstractmethod
|
2024-12-06 16:08:48 +00:00
|
|
|
|
from collections import defaultdict
|
|
|
|
|
from datetime import datetime, timedelta, timezone
|
|
|
|
|
from typing import Any, Dict, List, Optional, Tuple
|
2022-06-18 12:36:54 +01:00
|
|
|
|
|
|
|
|
|
from app.extensions import db
|
|
|
|
|
from app.models.activity import Activity
|
|
|
|
|
from app.models.mirrors import Proxy
|
|
|
|
|
from app.terraform import BaseAutomation
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class BlockMirrorAutomation(BaseAutomation):
|
2023-10-29 15:45:10 +00:00
|
|
|
|
patterns: Dict[str, List[str]]
|
2023-04-26 16:21:12 +01:00
|
|
|
|
_data: Any
|
2022-06-18 12:36:54 +01:00
|
|
|
|
|
2022-11-28 21:18:56 +00:00
|
|
|
|
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
2022-06-18 12:36:54 +01:00
|
|
|
|
"""
|
|
|
|
|
Constructor method.
|
|
|
|
|
"""
|
2023-10-29 15:45:10 +00:00
|
|
|
|
self.patterns = defaultdict(list)
|
2023-04-26 16:21:12 +01:00
|
|
|
|
self._data = None
|
2022-11-28 21:18:56 +00:00
|
|
|
|
super().__init__(*args, **kwargs)
|
2022-06-18 12:36:54 +01:00
|
|
|
|
|
|
|
|
|
def automate(self, full: bool = False) -> Tuple[bool, str]:
|
|
|
|
|
self.fetch()
|
2022-06-18 13:35:25 +01:00
|
|
|
|
logging.debug("Fetch complete")
|
2022-06-18 12:36:54 +01:00
|
|
|
|
self.parse()
|
2022-06-18 13:35:25 +01:00
|
|
|
|
logging.debug("Parse complete")
|
2022-06-23 11:38:27 +01:00
|
|
|
|
rotated = []
|
2022-06-23 17:10:49 +01:00
|
|
|
|
proxy_urls = list(filter(lambda u: u is not None, active_proxy_urls()))
|
2023-10-29 15:45:10 +00:00
|
|
|
|
for source, patterns in self.patterns.items():
|
|
|
|
|
logging.debug("Processing blocked URLs from %s", source)
|
|
|
|
|
for pattern in patterns:
|
|
|
|
|
blocked_urls = fnmatch.filter(proxy_urls, pattern)
|
|
|
|
|
for blocked_url in blocked_urls:
|
|
|
|
|
if not (proxy := proxy_by_url(blocked_url)):
|
|
|
|
|
continue
|
|
|
|
|
logging.debug("Found %s blocked", proxy.url)
|
|
|
|
|
if not proxy.origin.auto_rotation:
|
|
|
|
|
logging.debug("Proxy auto-rotation forbidden for origin")
|
|
|
|
|
continue
|
2024-12-06 16:08:48 +00:00
|
|
|
|
if proxy.added > datetime.now(tz=timezone.utc) - timedelta(hours=3):
|
2023-10-29 15:45:10 +00:00
|
|
|
|
logging.debug("Not rotating a proxy less than 3 hours old")
|
|
|
|
|
continue
|
|
|
|
|
if proxy.deprecate(reason=f"block_{source}"):
|
|
|
|
|
logging.info("Rotated %s", proxy.url)
|
|
|
|
|
rotated.append((proxy.url, proxy.origin.domain_name))
|
|
|
|
|
else:
|
|
|
|
|
logging.debug("Not rotating a proxy that is already deprecated")
|
2022-06-18 12:36:54 +01:00
|
|
|
|
if rotated:
|
|
|
|
|
activity = Activity(
|
|
|
|
|
activity_type="block",
|
2024-12-06 18:15:47 +00:00
|
|
|
|
text=(
|
|
|
|
|
f"[{self.short_name}] ♻ Rotated {len(rotated)} proxies️️: \n"
|
|
|
|
|
+ "\n".join(
|
|
|
|
|
[
|
|
|
|
|
f"* {proxy_domain} ({origin_domain})"
|
|
|
|
|
for proxy_domain, origin_domain in rotated
|
|
|
|
|
]
|
|
|
|
|
)
|
|
|
|
|
),
|
2022-06-18 12:36:54 +01:00
|
|
|
|
)
|
|
|
|
|
db.session.add(activity)
|
|
|
|
|
activity.notify()
|
2022-06-18 12:48:09 +01:00
|
|
|
|
db.session.commit()
|
2022-06-18 12:36:54 +01:00
|
|
|
|
return True, ""
|
|
|
|
|
|
|
|
|
|
@abstractmethod
|
2022-06-18 13:17:36 +01:00
|
|
|
|
def fetch(self) -> None:
|
2022-06-18 12:36:54 +01:00
|
|
|
|
"""
|
|
|
|
|
Fetch the blocklist data. It is the responsibility of the automation task
|
|
|
|
|
to persist this within the object for the parse step.
|
|
|
|
|
|
|
|
|
|
:return: None
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
@abstractmethod
|
2022-06-18 13:17:36 +01:00
|
|
|
|
def parse(self) -> None:
|
2022-06-18 12:36:54 +01:00
|
|
|
|
"""
|
|
|
|
|
Parse the blocklist data.
|
|
|
|
|
|
|
|
|
|
:return: None
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
2022-06-18 13:57:58 +01:00
|
|
|
|
def active_proxy_urls() -> List[str]:
|
2024-12-06 18:15:47 +00:00
|
|
|
|
return [
|
|
|
|
|
proxy.url
|
|
|
|
|
for proxy in Proxy.query.filter(
|
|
|
|
|
Proxy.deprecated.is_(None), Proxy.destroyed.is_(None)
|
|
|
|
|
).all()
|
|
|
|
|
]
|
2022-06-18 13:47:29 +01:00
|
|
|
|
|
|
|
|
|
|
2022-12-14 14:07:19 +00:00
|
|
|
|
def proxy_by_url(url: str) -> Optional[Proxy]:
|
2022-06-22 16:38:19 +01:00
|
|
|
|
return Proxy.query.filter( # type: ignore[no-any-return]
|
2024-12-06 18:15:47 +00:00
|
|
|
|
Proxy.deprecated.is_(None), Proxy.destroyed.is_(None), Proxy.url == url
|
2022-06-18 13:57:58 +01:00
|
|
|
|
).first()
|