feat(block): use json list for external block checks

2023-04-26 16:01:36 +01:00 · 2023-04-26 16:01:36 +01:00 · 19681d1eca
commit 19681d1eca
parent c424b9a5fa
1 changed files with 23 additions and 13 deletions
--- a/app/terraform/block_external.py
+++ b/app/terraform/block_external.py
@ -1,10 +1,29 @@
-from bs4 import BeautifulSoup
+import logging
 import requests
 from app import app
 from app.terraform.block_mirror import BlockMirrorAutomation
 def _trim_prefix(s: str, prefix: str) -> str:
    if s.startswith(prefix):
        return s[len(prefix):]
    return s
 def trim_http_https(s: str) -> str:
    """
    Return the string with "http://" or "https://" removed from the start of the string if present.
    :param s: String to modify.
    :return: Modified string.
    """
    return _trim_prefix(
        _trim_prefix(s, "https://"),
        "http://")
 class BlockExternalAutomation(BlockMirrorAutomation):
    """
    Automation task to import proxy reachability results from external source.
@ -16,17 +35,8 @@ class BlockExternalAutomation(BlockMirrorAutomation):
    def fetch(self) -> None:
        user_agent = {'User-agent': 'BypassCensorship/1.0'}
-        page = requests.get(app.config['EXTERNAL_CHECK_URL'], headers=user_agent, timeout=30)
+        self._data = requests.get(app.config['EXTERNAL_CHECK_URL'], headers=user_agent, timeout=30).json()
        self._content = page.content
    def parse(self) -> None:
-        soup = BeautifulSoup(self._content, 'html.parser')
+        self.patterns.extend(["https://" + trim_http_https(pattern) for pattern in self._data])
-        h2 = soup.find_all('h2')  # pylint: disable=invalid-name
+        logging.debug("Found URLs: %s", self.patterns)
        div = soup.find_all('div', class_="overflow-auto mb-5")
        i = 0
        for idx, heading in enumerate(h2):
            if not div[idx].div:
                anchors = div[idx].find_all('a')
                for anchor in anchors:
                    self.patterns.append("https://" + anchor.text)
            i += 1