from bs4 import BeautifulSoup import requests from app import app from app.terraform.block_mirror import BlockMirrorAutomation class BlockExternalAutomation(BlockMirrorAutomation): """ Automation task to import proxy reachability results from external source. """ short_name = "block_external" description = "Import proxy reachability results from external source" _content: bytes def fetch(self) -> None: user_agent = {'User-agent': 'BypassCensorship/1.0'} page = requests.get(app.config['EXTERNAL_CHECK_URL'], headers=user_agent) self._content = page.content def parse(self) -> None: soup = BeautifulSoup(self._content, 'html.parser') h2 = soup.find_all('h2') # pylint: disable=invalid-name div = soup.find_all('div', class_="overflow-auto mb-5") i = 0 for idx, heading in enumerate(h2): if not div[idx].div: anchors = div[idx].find_all('a') for anchor in anchors: self.patterns.append("https://" + anchor.text) i += 1