feat(block): use json list for external block checks
This commit is contained in:
parent
c424b9a5fa
commit
19681d1eca
1 changed files with 23 additions and 13 deletions
|
@ -1,10 +1,29 @@
|
||||||
from bs4 import BeautifulSoup
|
import logging
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from app import app
|
from app import app
|
||||||
from app.terraform.block_mirror import BlockMirrorAutomation
|
from app.terraform.block_mirror import BlockMirrorAutomation
|
||||||
|
|
||||||
|
|
||||||
|
def _trim_prefix(s: str, prefix: str) -> str:
|
||||||
|
if s.startswith(prefix):
|
||||||
|
return s[len(prefix):]
|
||||||
|
return s
|
||||||
|
|
||||||
|
|
||||||
|
def trim_http_https(s: str) -> str:
|
||||||
|
"""
|
||||||
|
Return the string with "http://" or "https://" removed from the start of the string if present.
|
||||||
|
|
||||||
|
:param s: String to modify.
|
||||||
|
:return: Modified string.
|
||||||
|
"""
|
||||||
|
return _trim_prefix(
|
||||||
|
_trim_prefix(s, "https://"),
|
||||||
|
"http://")
|
||||||
|
|
||||||
|
|
||||||
class BlockExternalAutomation(BlockMirrorAutomation):
|
class BlockExternalAutomation(BlockMirrorAutomation):
|
||||||
"""
|
"""
|
||||||
Automation task to import proxy reachability results from external source.
|
Automation task to import proxy reachability results from external source.
|
||||||
|
@ -16,17 +35,8 @@ class BlockExternalAutomation(BlockMirrorAutomation):
|
||||||
|
|
||||||
def fetch(self) -> None:
|
def fetch(self) -> None:
|
||||||
user_agent = {'User-agent': 'BypassCensorship/1.0'}
|
user_agent = {'User-agent': 'BypassCensorship/1.0'}
|
||||||
page = requests.get(app.config['EXTERNAL_CHECK_URL'], headers=user_agent, timeout=30)
|
self._data = requests.get(app.config['EXTERNAL_CHECK_URL'], headers=user_agent, timeout=30).json()
|
||||||
self._content = page.content
|
|
||||||
|
|
||||||
def parse(self) -> None:
|
def parse(self) -> None:
|
||||||
soup = BeautifulSoup(self._content, 'html.parser')
|
self.patterns.extend(["https://" + trim_http_https(pattern) for pattern in self._data])
|
||||||
h2 = soup.find_all('h2') # pylint: disable=invalid-name
|
logging.debug("Found URLs: %s", self.patterns)
|
||||||
div = soup.find_all('div', class_="overflow-auto mb-5")
|
|
||||||
i = 0
|
|
||||||
for idx, heading in enumerate(h2):
|
|
||||||
if not div[idx].div:
|
|
||||||
anchors = div[idx].find_all('a')
|
|
||||||
for anchor in anchors:
|
|
||||||
self.patterns.append("https://" + anchor.text)
|
|
||||||
i += 1
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue