majuna/app/terraform/block_external.py

67 lines
2.2 KiB
Python
Raw Normal View History

import logging
2023-10-29 15:45:10 +00:00
from collections import defaultdict
2022-03-10 14:26:22 +00:00
import requests
from app import app
from app.terraform.block_mirror import BlockMirrorAutomation
def _trim_prefix(s: str, prefix: str) -> str:
if s.startswith(prefix):
2024-12-06 18:15:47 +00:00
return s[len(prefix) :]
return s
def trim_http_https(s: str) -> str:
"""
Return the string with "http://" or "https://" removed from the start of the string if present.
:param s: String to modify.
:return: Modified string.
"""
2024-12-06 18:15:47 +00:00
return _trim_prefix(_trim_prefix(s, "https://"), "http://")
class BlockExternalAutomation(BlockMirrorAutomation):
2022-06-17 12:42:42 +01:00
"""
Automation task to import proxy reachability results from external source.
"""
2024-12-06 18:15:47 +00:00
short_name = "block_external"
description = "Import proxy reachability results from external source"
_content: bytes
2022-06-17 12:42:42 +01:00
def fetch(self) -> None:
2024-12-06 18:15:47 +00:00
user_agent = {"User-agent": "BypassCensorship/1.0"}
check_urls_config = app.config.get("EXTERNAL_CHECK_URL", [])
2023-10-29 15:45:10 +00:00
if isinstance(check_urls_config, dict):
# Config is already a dictionary, use as is.
check_urls = check_urls_config
elif isinstance(check_urls_config, list):
# Convert list of strings to a dictionary with "external_N" keys.
2024-12-06 18:15:47 +00:00
check_urls = {
f"external_{i}": url for i, url in enumerate(check_urls_config)
}
2023-10-29 15:45:10 +00:00
elif isinstance(check_urls_config, str):
# Single string, convert to a dictionary with key "external".
check_urls = {"external": check_urls_config}
else:
2023-10-29 15:45:10 +00:00
# Fallback if the config item is neither dict, list, nor string.
check_urls = {}
for source, check_url in check_urls.items():
if self._data is None:
2023-10-29 15:45:10 +00:00
self._data = defaultdict(list)
2024-12-06 18:15:47 +00:00
self._data[source].extend(
requests.get(check_url, headers=user_agent, timeout=30).json()
)
def parse(self) -> None:
2023-10-29 15:45:10 +00:00
for source, patterns in self._data.items():
2024-12-06 18:15:47 +00:00
self.patterns[source].extend(
["https://" + trim_http_https(pattern) for pattern in patterns]
)
logging.debug("Found URLs: %s", self.patterns)