fix: publish feeds atomically
This commit is contained in:
parent
cbb427b89d
commit
e64a32d76b
7 changed files with 253 additions and 4 deletions
|
|
@ -38,6 +38,10 @@ def feed_output_path(*, out_dir: Path, feed_slug: str) -> Path:
|
|||
return feed_output_dir(out_dir=out_dir, feed_slug=feed_slug) / "feed.rss"
|
||||
|
||||
|
||||
def staged_feed_output_path(*, out_dir: Path, feed_slug: str) -> Path:
|
||||
return feed_output_dir(out_dir=out_dir, feed_slug=feed_slug) / ".feed.rss.next"
|
||||
|
||||
|
||||
def _resolve_path(base_path: Path, value: str) -> Path:
|
||||
path = Path(value).expanduser()
|
||||
if not path.is_absolute():
|
||||
|
|
@ -218,7 +222,7 @@ def build_feed_settings(
|
|||
{
|
||||
"REPUBLISHER_OUT_DIR": str(out_dir),
|
||||
"FEEDS": {
|
||||
str(feed_output_path(out_dir=out_dir, feed_slug=feed_slug)): {
|
||||
str(staged_feed_output_path(out_dir=out_dir, feed_slug=feed_slug)): {
|
||||
"format": "rss",
|
||||
"postprocessing": [],
|
||||
"feed_name": feed_slug,
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ from repub.config import (
|
|||
load_config,
|
||||
)
|
||||
from repub.media import check_runtime
|
||||
from repub.postprocessing import publish_staged_feed
|
||||
from repub.spiders.rss_spider import RssFeedSpider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -81,6 +82,14 @@ def run_feeds(
|
|||
deferred = process.crawl(crawler, feed_name=feed.slug, url=feed.url)
|
||||
|
||||
def handle_success(_: object) -> None:
|
||||
try:
|
||||
publish_staged_feed(out_dir=out_dir, feed_slug=feed.slug)
|
||||
except Exception:
|
||||
failure = Failure()
|
||||
logger.error("Feed %s (%s) failed to publish", feed.name, feed.slug)
|
||||
logger.critical("%s", failure.getTraceback())
|
||||
results.append((feed.slug, failure))
|
||||
return None
|
||||
logger.info("Feed %s (%s) completed successfully", feed.name, feed.slug)
|
||||
results.append((feed.slug, None))
|
||||
return None
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ from repub.model import (
|
|||
initialize_database,
|
||||
load_feed_url,
|
||||
)
|
||||
from repub.postprocessing import publish_staged_feed
|
||||
from repub.spiders.rss_spider import RssFeedSpider
|
||||
|
||||
|
||||
|
|
@ -299,6 +300,14 @@ def main(argv: list[str] | None = None) -> int:
|
|||
return 130
|
||||
|
||||
if exit_code == 0:
|
||||
try:
|
||||
publish_staged_feed(out_dir=out_dir, feed_slug=feed.slug)
|
||||
except Exception as error:
|
||||
print(
|
||||
f"worker[{args.job_id}:{args.execution_id}]: publish failed: {error}",
|
||||
flush=True,
|
||||
)
|
||||
return 1
|
||||
print(
|
||||
f"worker[{args.job_id}:{args.execution_id}]: completed successfully",
|
||||
flush=True,
|
||||
|
|
|
|||
|
|
@ -0,0 +1,47 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from contextlib import suppress
|
||||
from pathlib import Path
|
||||
from xml.etree import ElementTree
|
||||
|
||||
from repub.config import feed_output_path, staged_feed_output_path
|
||||
|
||||
|
||||
def publish_staged_feed(*, out_dir: Path, feed_slug: str) -> Path:
|
||||
staged_path = staged_feed_output_path(out_dir=out_dir, feed_slug=feed_slug)
|
||||
public_path = feed_output_path(out_dir=out_dir, feed_slug=feed_slug)
|
||||
|
||||
public_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
_validate_staged_feed(staged_path)
|
||||
_fsync_file(staged_path)
|
||||
os.replace(staged_path, public_path)
|
||||
_fsync_directory(public_path.parent)
|
||||
return public_path
|
||||
|
||||
|
||||
def _fsync_file(path: Path) -> None:
|
||||
with path.open("rb") as handle:
|
||||
os.fsync(handle.fileno())
|
||||
|
||||
|
||||
def _validate_staged_feed(path: Path) -> None:
|
||||
try:
|
||||
root = ElementTree.parse(path).getroot()
|
||||
except ElementTree.ParseError as error:
|
||||
raise ValueError(f"Staged feed is not well-formed XML: {path}") from error
|
||||
|
||||
if root.tag != "rss":
|
||||
raise ValueError(f"Staged feed is not an RSS document: {path}")
|
||||
if root.find("channel") is None:
|
||||
raise ValueError(f"Staged feed is missing an RSS channel: {path}")
|
||||
|
||||
|
||||
def _fsync_directory(path: Path) -> None:
|
||||
flags = os.O_RDONLY | getattr(os, "O_DIRECTORY", 0)
|
||||
with suppress(OSError):
|
||||
fd = os.open(path, flags)
|
||||
try:
|
||||
os.fsync(fd)
|
||||
finally:
|
||||
os.close(fd)
|
||||
Loading…
Add table
Add a link
Reference in a new issue