feat: geo risk scores
This commit is contained in:
parent
315dae7f06
commit
0e0d499428
17 changed files with 558 additions and 54 deletions
|
@ -1,4 +1,5 @@
|
|||
import logging
|
||||
from collections import defaultdict
|
||||
|
||||
import requests
|
||||
|
||||
|
@ -35,17 +36,26 @@ class BlockExternalAutomation(BlockMirrorAutomation):
|
|||
|
||||
def fetch(self) -> None:
|
||||
user_agent = {'User-agent': 'BypassCensorship/1.0'}
|
||||
if isinstance(app.config.get('EXTERNAL_CHECK_URL', []), list):
|
||||
check_urls = app.config.get('EXTERNAL_CHECK_URL', [])
|
||||
elif isinstance(app.config.get('EXTERNAL_CHECK_URL'), str):
|
||||
check_urls = [app.config['EXTERNAL_CHECK_URL']]
|
||||
check_urls_config = app.config.get('EXTERNAL_CHECK_URL', [])
|
||||
|
||||
if isinstance(check_urls_config, dict):
|
||||
# Config is already a dictionary, use as is.
|
||||
check_urls = check_urls_config
|
||||
elif isinstance(check_urls_config, list):
|
||||
# Convert list of strings to a dictionary with "external_N" keys.
|
||||
check_urls = {f"external_{i}": url for i, url in enumerate(check_urls_config)}
|
||||
elif isinstance(check_urls_config, str):
|
||||
# Single string, convert to a dictionary with key "external".
|
||||
check_urls = {"external": check_urls_config}
|
||||
else:
|
||||
check_urls = []
|
||||
for check_url in check_urls:
|
||||
# Fallback if the config item is neither dict, list, nor string.
|
||||
check_urls = {}
|
||||
for source, check_url in check_urls.items():
|
||||
if self._data is None:
|
||||
self._data = []
|
||||
self._data.extend(requests.get(check_url, headers=user_agent, timeout=30).json())
|
||||
self._data = defaultdict(list)
|
||||
self._data[source].extend(requests.get(check_url, headers=user_agent, timeout=30).json())
|
||||
|
||||
def parse(self) -> None:
|
||||
self.patterns.extend(["https://" + trim_http_https(pattern) for pattern in self._data])
|
||||
for source, patterns in self._data.items():
|
||||
self.patterns[source].extend(["https://" + trim_http_https(pattern) for pattern in patterns])
|
||||
logging.debug("Found URLs: %s", self.patterns)
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
from collections import defaultdict
|
||||
from datetime import datetime, timedelta
|
||||
import logging
|
||||
from abc import abstractmethod
|
||||
import fnmatch
|
||||
from typing import Tuple, List, Any, Optional
|
||||
from typing import Tuple, List, Any, Optional, Dict
|
||||
|
||||
from app.extensions import db
|
||||
from app.models.activity import Activity
|
||||
|
@ -11,14 +12,14 @@ from app.terraform import BaseAutomation
|
|||
|
||||
|
||||
class BlockMirrorAutomation(BaseAutomation):
|
||||
patterns: List[str]
|
||||
patterns: Dict[str, List[str]]
|
||||
_data: Any
|
||||
|
||||
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
||||
"""
|
||||
Constructor method.
|
||||
"""
|
||||
self.patterns = []
|
||||
self.patterns = defaultdict(list)
|
||||
self._data = None
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
|
@ -29,23 +30,25 @@ class BlockMirrorAutomation(BaseAutomation):
|
|||
logging.debug("Parse complete")
|
||||
rotated = []
|
||||
proxy_urls = list(filter(lambda u: u is not None, active_proxy_urls()))
|
||||
for pattern in self.patterns:
|
||||
blocked_urls = fnmatch.filter(proxy_urls, pattern)
|
||||
for blocked_url in blocked_urls:
|
||||
if not (proxy := proxy_by_url(blocked_url)):
|
||||
continue
|
||||
logging.debug("Found %s blocked", proxy.url)
|
||||
if not proxy.origin.auto_rotation:
|
||||
logging.debug("Proxy auto-rotation forbidden for origin")
|
||||
continue
|
||||
if proxy.added > datetime.utcnow() - timedelta(hours=3):
|
||||
logging.debug("Not rotating a proxy less than 3 hours old")
|
||||
continue
|
||||
if proxy.deprecate(reason=self.short_name):
|
||||
logging.info("Rotated %s", proxy.url)
|
||||
rotated.append((proxy.url, proxy.origin.domain_name))
|
||||
else:
|
||||
logging.debug("Not rotating a proxy that is already deprecated")
|
||||
for source, patterns in self.patterns.items():
|
||||
logging.debug("Processing blocked URLs from %s", source)
|
||||
for pattern in patterns:
|
||||
blocked_urls = fnmatch.filter(proxy_urls, pattern)
|
||||
for blocked_url in blocked_urls:
|
||||
if not (proxy := proxy_by_url(blocked_url)):
|
||||
continue
|
||||
logging.debug("Found %s blocked", proxy.url)
|
||||
if not proxy.origin.auto_rotation:
|
||||
logging.debug("Proxy auto-rotation forbidden for origin")
|
||||
continue
|
||||
if proxy.added > datetime.utcnow() - timedelta(hours=3):
|
||||
logging.debug("Not rotating a proxy less than 3 hours old")
|
||||
continue
|
||||
if proxy.deprecate(reason=f"block_{source}"):
|
||||
logging.info("Rotated %s", proxy.url)
|
||||
rotated.append((proxy.url, proxy.origin.domain_name))
|
||||
else:
|
||||
logging.debug("Not rotating a proxy that is already deprecated")
|
||||
if rotated:
|
||||
activity = Activity(
|
||||
activity_type="block",
|
||||
|
|
|
@ -93,7 +93,7 @@ class BlockRoskomsvobodaAutomation(BlockMirrorAutomation):
|
|||
for _event, element in lxml.etree.iterparse(BytesIO(self._data),
|
||||
resolve_entities=False):
|
||||
if element.tag == "domain":
|
||||
self.patterns.append("https://" + element.text.strip())
|
||||
self.patterns["roskomsvoboda"].append("https://" + element.text.strip())
|
||||
except XMLSyntaxError:
|
||||
activity = Activity(
|
||||
activity_type="automation",
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue