2023-04-26 16:01:36 +01:00
|
|
|
import logging
|
2023-10-29 15:45:10 +00:00
|
|
|
from collections import defaultdict
|
2023-04-26 16:01:36 +01:00
|
|
|
|
2022-03-10 14:26:22 +00:00
|
|
|
import requests
|
|
|
|
|
|
|
|
from app import app
|
2022-06-18 12:36:54 +01:00
|
|
|
from app.terraform.block_mirror import BlockMirrorAutomation
|
2022-05-09 08:09:57 +01:00
|
|
|
|
|
|
|
|
2023-04-26 16:01:36 +01:00
|
|
|
def _trim_prefix(s: str, prefix: str) -> str:
|
|
|
|
if s.startswith(prefix):
|
|
|
|
return s[len(prefix):]
|
|
|
|
return s
|
|
|
|
|
|
|
|
|
|
|
|
def trim_http_https(s: str) -> str:
|
|
|
|
"""
|
|
|
|
Return the string with "http://" or "https://" removed from the start of the string if present.
|
|
|
|
|
|
|
|
:param s: String to modify.
|
|
|
|
:return: Modified string.
|
|
|
|
"""
|
|
|
|
return _trim_prefix(
|
|
|
|
_trim_prefix(s, "https://"),
|
|
|
|
"http://")
|
|
|
|
|
|
|
|
|
2022-06-18 12:36:54 +01:00
|
|
|
class BlockExternalAutomation(BlockMirrorAutomation):
|
2022-06-17 12:42:42 +01:00
|
|
|
"""
|
|
|
|
Automation task to import proxy reachability results from external source.
|
|
|
|
"""
|
2022-05-09 08:09:57 +01:00
|
|
|
short_name = "block_external"
|
|
|
|
description = "Import proxy reachability results from external source"
|
|
|
|
|
2022-06-18 12:36:54 +01:00
|
|
|
_content: bytes
|
2022-06-17 12:42:42 +01:00
|
|
|
|
2022-06-18 12:48:09 +01:00
|
|
|
def fetch(self) -> None:
|
2022-05-09 08:09:57 +01:00
|
|
|
user_agent = {'User-agent': 'BypassCensorship/1.0'}
|
2023-10-29 15:45:10 +00:00
|
|
|
check_urls_config = app.config.get('EXTERNAL_CHECK_URL', [])
|
|
|
|
|
|
|
|
if isinstance(check_urls_config, dict):
|
|
|
|
# Config is already a dictionary, use as is.
|
|
|
|
check_urls = check_urls_config
|
|
|
|
elif isinstance(check_urls_config, list):
|
|
|
|
# Convert list of strings to a dictionary with "external_N" keys.
|
|
|
|
check_urls = {f"external_{i}": url for i, url in enumerate(check_urls_config)}
|
|
|
|
elif isinstance(check_urls_config, str):
|
|
|
|
# Single string, convert to a dictionary with key "external".
|
|
|
|
check_urls = {"external": check_urls_config}
|
2023-05-16 16:28:53 +01:00
|
|
|
else:
|
2023-10-29 15:45:10 +00:00
|
|
|
# Fallback if the config item is neither dict, list, nor string.
|
|
|
|
check_urls = {}
|
|
|
|
for source, check_url in check_urls.items():
|
2023-05-16 16:28:53 +01:00
|
|
|
if self._data is None:
|
2023-10-29 15:45:10 +00:00
|
|
|
self._data = defaultdict(list)
|
|
|
|
self._data[source].extend(requests.get(check_url, headers=user_agent, timeout=30).json())
|
2022-05-09 08:09:57 +01:00
|
|
|
|
2022-06-18 12:48:09 +01:00
|
|
|
def parse(self) -> None:
|
2023-10-29 15:45:10 +00:00
|
|
|
for source, patterns in self._data.items():
|
|
|
|
self.patterns[source].extend(["https://" + trim_http_https(pattern) for pattern in patterns])
|
2023-04-26 16:01:36 +01:00
|
|
|
logging.debug("Found URLs: %s", self.patterns)
|