feat(block): use roskomsvoboda private api
This commit is contained in:
parent
19681d1eca
commit
fb1341365f
2 changed files with 78 additions and 4 deletions
|
@ -1,7 +1,16 @@
|
|||
from typing import Any
|
||||
import json
|
||||
import logging
|
||||
from io import BytesIO
|
||||
from typing import Any, Optional
|
||||
from zipfile import ZipFile, BadZipFile
|
||||
|
||||
import lxml
|
||||
import requests
|
||||
from lxml.etree import XMLSyntaxError
|
||||
|
||||
from app.extensions import db
|
||||
from app.models.activity import Activity
|
||||
from app.models.tfstate import TerraformState
|
||||
from app.terraform.block_mirror import BlockMirrorAutomation
|
||||
|
||||
|
||||
|
@ -22,9 +31,72 @@ class BlockRoskomsvobodaAutomation(BlockMirrorAutomation):
|
|||
|
||||
_data: Any
|
||||
|
||||
def _fetch(self, latest_rev) -> None:
|
||||
self._data = None
|
||||
try:
|
||||
r = requests.get(f"https://dumps.rublacklist.net/fetch/{latest_rev}",
|
||||
verify=False, timeout=180)
|
||||
r.raise_for_status()
|
||||
zip_file = ZipFile(BytesIO(r.content))
|
||||
self._data = zip_file.read("dump.xml")
|
||||
logging.debug("Downloaded %s bytes in dump %s", len(self._data), latest_rev)
|
||||
except requests.HTTPError:
|
||||
activity = Activity(
|
||||
activity_type="automation",
|
||||
text=(f"[{self.short_name}] 🚨 Unable to download dump {latest_rev} due to HTTP error {r.status_code}. "
|
||||
"The automation task has not been disabled and will attempt to download the next dump when the "
|
||||
"latest dump revision is incremented at the server."))
|
||||
activity.notify()
|
||||
db.session.add(activity)
|
||||
db.session.commit()
|
||||
except BadZipFile:
|
||||
activity = Activity(
|
||||
activity_type="automation",
|
||||
text=(f"[{self.short_name}] 🚨 Unable to extract zip file from dump {latest_rev}. There was an error "
|
||||
"related to the format of the zip file. "
|
||||
"The automation task has not been disabled and will attempt to download the next dump when the "
|
||||
"latest dump revision is incremented at the server."))
|
||||
activity.notify()
|
||||
db.session.add(activity)
|
||||
db.session.commit()
|
||||
|
||||
def fetch(self) -> None:
|
||||
self._data = requests.get("https://reestr.rublacklist.net/api/v3/domains/",
|
||||
timeout=180).json()
|
||||
state: Optional[TerraformState] = TerraformState.query.filter(
|
||||
TerraformState.key == "block_roskomsvoboda").first()
|
||||
if state is None:
|
||||
state = TerraformState()
|
||||
state.key = "block_roskomsvoboda"
|
||||
db.session.add(state)
|
||||
latest_metadata = {"dump_rev": "0"}
|
||||
else:
|
||||
latest_metadata = json.loads(state.state)
|
||||
latest_rev = requests.get("https://dumps.rublacklist.net/fetch/latest",
|
||||
verify=False, timeout=30).text.strip()
|
||||
logging.debug("Latest revision is %s, already got %s", latest_rev, latest_metadata["dump_rev"])
|
||||
if latest_rev != latest_metadata["dump_rev"]:
|
||||
state.state = json.dumps({"dump_rev": latest_rev})
|
||||
db.session.commit()
|
||||
self._fetch(latest_rev)
|
||||
else:
|
||||
self._data = None
|
||||
|
||||
def parse(self) -> None:
|
||||
self.patterns.extend(["https://" + pattern for pattern in self._data])
|
||||
if not self._data:
|
||||
logging.debug("No new data to parse")
|
||||
return
|
||||
try:
|
||||
for _event, element in lxml.etree.iterparse(BytesIO(self._data)):
|
||||
if element.tag == "domain":
|
||||
self.patterns.append("https://" + element.text.strip())
|
||||
except XMLSyntaxError:
|
||||
activity = Activity(
|
||||
activity_type="automation",
|
||||
text=(f"[{self.short_name}] 🚨 Unable to parse XML file from dump. There was an error "
|
||||
"related to the format of the XML file within the zip file. Interestingly we were able to "
|
||||
"extract the file from the zip file fine. "
|
||||
"The automation task has not been disabled and will attempt to download the next dump when the "
|
||||
"latest dump revision is incremented at the server."))
|
||||
activity.notify()
|
||||
db.session.add(activity)
|
||||
db.session.commit()
|
||||
logging.debug("Found %s patterns", len(self.patterns))
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue