from typing import List, Dict from bs4 import BeautifulSoup import requests from app import app from app.terraform.block_mirror import BlockMirrorAutomation class BlockExternalAutomation(BlockMirrorAutomation): """ Automation task to import proxy reachability results from external source. """ short_name = "block_external" description = "Import proxy reachability results from external source" _content: bytes def _fetch(self) -> None: user_agent = {'User-agent': 'BypassCensorship/1.0'} page = requests.get(app.config['EXTERNAL_CHECK_URL'], headers=user_agent) self._content = page.content def _parse(self) -> None: soup = BeautifulSoup(self._content, 'html.parser') h2 = soup.find_all('h2') # pylint: disable=invalid-name div = soup.find_all('div', class_="overflow-auto mb-5") i = 0 for idx, heading in enumerate(h2): if not div[idx].div and heading.text in app.config['EXTERNAL_VANTAGE_POINTS']: anchors = div[idx].find_all('a') for anchor in anchors: self.patterns.append("https://" + anchor.text) i += 1