feat: geo risk scores

This commit is contained in:
Iain Learmonth 2023-10-29 15:45:10 +00:00
parent 315dae7f06
commit 0e0d499428
17 changed files with 558 additions and 54 deletions

View file

@ -1,4 +1,5 @@
import logging
from collections import defaultdict
import requests
@ -35,17 +36,26 @@ class BlockExternalAutomation(BlockMirrorAutomation):
def fetch(self) -> None:
user_agent = {'User-agent': 'BypassCensorship/1.0'}
if isinstance(app.config.get('EXTERNAL_CHECK_URL', []), list):
check_urls = app.config.get('EXTERNAL_CHECK_URL', [])
elif isinstance(app.config.get('EXTERNAL_CHECK_URL'), str):
check_urls = [app.config['EXTERNAL_CHECK_URL']]
check_urls_config = app.config.get('EXTERNAL_CHECK_URL', [])
if isinstance(check_urls_config, dict):
# Config is already a dictionary, use as is.
check_urls = check_urls_config
elif isinstance(check_urls_config, list):
# Convert list of strings to a dictionary with "external_N" keys.
check_urls = {f"external_{i}": url for i, url in enumerate(check_urls_config)}
elif isinstance(check_urls_config, str):
# Single string, convert to a dictionary with key "external".
check_urls = {"external": check_urls_config}
else:
check_urls = []
for check_url in check_urls:
# Fallback if the config item is neither dict, list, nor string.
check_urls = {}
for source, check_url in check_urls.items():
if self._data is None:
self._data = []
self._data.extend(requests.get(check_url, headers=user_agent, timeout=30).json())
self._data = defaultdict(list)
self._data[source].extend(requests.get(check_url, headers=user_agent, timeout=30).json())
def parse(self) -> None:
self.patterns.extend(["https://" + trim_http_https(pattern) for pattern in self._data])
for source, patterns in self._data.items():
self.patterns[source].extend(["https://" + trim_http_https(pattern) for pattern in patterns])
logging.debug("Found URLs: %s", self.patterns)

View file

@ -1,8 +1,9 @@
from collections import defaultdict
from datetime import datetime, timedelta
import logging
from abc import abstractmethod
import fnmatch
from typing import Tuple, List, Any, Optional
from typing import Tuple, List, Any, Optional, Dict
from app.extensions import db
from app.models.activity import Activity
@ -11,14 +12,14 @@ from app.terraform import BaseAutomation
class BlockMirrorAutomation(BaseAutomation):
patterns: List[str]
patterns: Dict[str, List[str]]
_data: Any
def __init__(self, *args: Any, **kwargs: Any) -> None:
"""
Constructor method.
"""
self.patterns = []
self.patterns = defaultdict(list)
self._data = None
super().__init__(*args, **kwargs)
@ -29,23 +30,25 @@ class BlockMirrorAutomation(BaseAutomation):
logging.debug("Parse complete")
rotated = []
proxy_urls = list(filter(lambda u: u is not None, active_proxy_urls()))
for pattern in self.patterns:
blocked_urls = fnmatch.filter(proxy_urls, pattern)
for blocked_url in blocked_urls:
if not (proxy := proxy_by_url(blocked_url)):
continue
logging.debug("Found %s blocked", proxy.url)
if not proxy.origin.auto_rotation:
logging.debug("Proxy auto-rotation forbidden for origin")
continue
if proxy.added > datetime.utcnow() - timedelta(hours=3):
logging.debug("Not rotating a proxy less than 3 hours old")
continue
if proxy.deprecate(reason=self.short_name):
logging.info("Rotated %s", proxy.url)
rotated.append((proxy.url, proxy.origin.domain_name))
else:
logging.debug("Not rotating a proxy that is already deprecated")
for source, patterns in self.patterns.items():
logging.debug("Processing blocked URLs from %s", source)
for pattern in patterns:
blocked_urls = fnmatch.filter(proxy_urls, pattern)
for blocked_url in blocked_urls:
if not (proxy := proxy_by_url(blocked_url)):
continue
logging.debug("Found %s blocked", proxy.url)
if not proxy.origin.auto_rotation:
logging.debug("Proxy auto-rotation forbidden for origin")
continue
if proxy.added > datetime.utcnow() - timedelta(hours=3):
logging.debug("Not rotating a proxy less than 3 hours old")
continue
if proxy.deprecate(reason=f"block_{source}"):
logging.info("Rotated %s", proxy.url)
rotated.append((proxy.url, proxy.origin.domain_name))
else:
logging.debug("Not rotating a proxy that is already deprecated")
if rotated:
activity = Activity(
activity_type="block",

View file

@ -93,7 +93,7 @@ class BlockRoskomsvobodaAutomation(BlockMirrorAutomation):
for _event, element in lxml.etree.iterparse(BytesIO(self._data),
resolve_entities=False):
if element.tag == "domain":
self.patterns.append("https://" + element.text.strip())
self.patterns["roskomsvoboda"].append("https://" + element.text.strip())
except XMLSyntaxError:
activity = Activity(
activity_type="automation",