From e7b00b4129d0f3a3c1e02ddff32e6889e95c6162 Mon Sep 17 00:00:00 2001 From: Abel Luck Date: Tue, 31 Mar 2026 17:30:24 +0200 Subject: [PATCH] Serve RSS feeds through app with host rewrites --- README.md | 12 ++++--- repub/web.py | 49 +++++++++++++++++++++++++--- tests/test_dev_mode.py | 72 ++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 121 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 213f955..de2260b 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ uv sync --all-groups uv run repub ``` -With no arguments, `uv run repub` starts the web UI in local dev mode and serves published feed files from `/feeds/...` out of `out/feeds/...`. +With no arguments, `uv run repub` starts the web UI in local dev mode. The Python app serves published `.rss` files from `/feeds/...` out of `out/feeds/...`, and in dev mode it also serves non-RSS feed artifacts from the same tree. By default the UI listens on `127.0.0.1:8080`. You can override that with `REPUBLISHER_HOST` and `REPUBLISHER_PORT`, or with: @@ -32,15 +32,17 @@ By default the UI listens on `127.0.0.1:8080`. You can override that with `REPUB uv run repub serve --host 0.0.0.0 --port 8080 ``` -If you invoke the `serve` subcommand explicitly, use `--dev-mode` to expose published feeds directly from the Quart app: +If you invoke the `serve` subcommand explicitly, use `--dev-mode` to expose non-RSS feed artifacts directly from the Quart app: ```sh uv run repub serve --dev-mode ``` -In `--dev-mode`, requests under `/feeds/...` are served from `out/feeds/...`. +Requests for `/feeds/**/*.rss` are always handled by the Python app. It rewrites mirrored feed URLs on the fly by replacing the configured `Feed URL` origin with `https://`. -In production, do not rely on Quart to serve published feeds. Configure the reverse proxy to serve `out/feeds/...` directly at `/feeds/...`. +In `--dev-mode`, non-RSS requests under `/feeds/...` are served from `out/feeds/...`. + +In production, keep `/feeds/**/*.rss` routed to the Python app. Non-RSS feed artifacts under `out/feeds/...` should still be served directly by the reverse proxy at `/feeds/...`. Important: the admin UI has no built-in authentication. Keep it bound to localhost or put it behind a trusted network layer such as Tailscale. @@ -57,7 +59,7 @@ Operational notes: - The default database path is `republisher.db`. Set `REPUBLISHER_DB_PATH` to use a different SQLite file. - Mirrored feeds are written under `out/feeds//`. - In production, expose `out/feeds/` directly from the reverse proxy at `/feeds/`. + In production, route `/feeds/**/*.rss` to the Python app and expose the remaining `out/feeds/` artifacts directly from the reverse proxy at `/feeds/`. - `Feed URL` is used to generate absolute media URLs and `atom:link rel="self"` in exported feeds. - Job logs and stats artifacts are written under `out/logs/`. diff --git a/repub/web.py b/repub/web.py index 372e121..b2783e5 100644 --- a/repub/web.py +++ b/repub/web.py @@ -31,6 +31,7 @@ from repub.model import ( delete_job_source, delete_source, initialize_database, + load_feed_url, load_job_enabled, load_settings_form, load_source_form, @@ -151,15 +152,19 @@ def create_app(*, dev_mode: bool = False) -> Quart: @app.get("/feeds/") async def published_feed(feed_path: str) -> Response: + if Path(feed_path).suffix == ".rss": + return _rss_feed_response( + _read_feed_text( + feeds_dir=Path(app.config["REPUB_FEEDS_DIR"]), + feed_path=feed_path, + ) + ) if not bool(app.config["REPUB_DEV_MODE"]): return Response(status=404) - response = await send_from_directory( + return await send_from_directory( str(Path(app.config["REPUB_FEEDS_DIR"])), feed_path, ) - if Path(feed_path).suffix == ".rss": - response.mimetype = "application/rss+xml" - return response @app.get("/static/-.") async def versioned_static_asset( @@ -582,6 +587,42 @@ def _load_sidebar_counts(app: Quart) -> dict[str, int]: } +def _rss_feed_response(feed_text: str | None) -> Response: + if feed_text is None: + return Response(status=404) + return Response(feed_text, mimetype="application/rss+xml") + + +def _read_feed_text(*, feeds_dir: Path, feed_path: str) -> str | None: + resolved_path = _resolve_feed_path(feeds_dir=feeds_dir, feed_path=feed_path) + if resolved_path is None: + return None + return _rewrite_feed_text( + resolved_path.read_text(encoding="utf-8"), + configured_feed_url=load_feed_url(), + request_host=request.host, + ) + + +def _resolve_feed_path(*, feeds_dir: Path, feed_path: str) -> Path | None: + base_dir = feeds_dir.resolve() + candidate_path = (base_dir / feed_path).resolve() + try: + candidate_path.relative_to(base_dir) + except ValueError: + return None + return candidate_path if candidate_path.is_file() else None + + +def _rewrite_feed_text( + feed_text: str, *, configured_feed_url: str, request_host: str +) -> str: + configured_origin = configured_feed_url.rstrip("/") + if configured_origin == "": + return feed_text + return feed_text.replace(configured_origin, f"https://{request_host}") + + async def _clean_tab_state_periodically(app: Quart) -> None: while True: await asyncio.sleep(TAB_STATE_CLEAN_INTERVAL.total_seconds()) diff --git a/tests/test_dev_mode.py b/tests/test_dev_mode.py index f58d640..ae84740 100644 --- a/tests/test_dev_mode.py +++ b/tests/test_dev_mode.py @@ -3,6 +3,7 @@ from __future__ import annotations import asyncio from pathlib import Path +from repub.model import save_setting from repub.web import create_app @@ -49,9 +50,7 @@ def test_dev_mode_serves_feed_enclosure_assets(monkeypatch, tmp_path: Path) -> N asyncio.run(run()) -def test_default_mode_does_not_serve_published_feeds( - monkeypatch, tmp_path: Path -) -> None: +def test_default_mode_serves_published_rss_feeds(monkeypatch, tmp_path: Path) -> None: db_path = tmp_path / "default-mode.db" feeds_dir = tmp_path / "out" / "feeds" monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path)) @@ -66,6 +65,73 @@ def test_default_mode_does_not_serve_published_feeds( client = app.test_client() response = await client.get("/feeds/demo-source/feed.rss") + assert response.status_code == 200 + assert response.mimetype == "application/rss+xml" + assert await response.get_data(as_text=True) == "\n" + + asyncio.run(run()) + + +def test_default_mode_does_not_serve_feed_enclosure_assets( + monkeypatch, tmp_path: Path +) -> None: + db_path = tmp_path / "default-mode-assets.db" + feeds_dir = tmp_path / "out" / "feeds" + monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path)) + + async def run() -> None: + app = create_app() + app.config["REPUB_FEEDS_DIR"] = feeds_dir + enclosure_path = feeds_dir / "demo-source" / "audio" / "episode.mp3" + enclosure_path.parent.mkdir(parents=True) + enclosure_path.write_bytes(b"mp3-data") + + client = app.test_client() + response = await client.get("/feeds/demo-source/audio/episode.mp3") + assert response.status_code == 404 asyncio.run(run()) + + +def test_published_rss_rewrites_feed_url_to_https_host_header( + monkeypatch, tmp_path: Path +) -> None: + db_path = tmp_path / "rewrite-feed-url.db" + feeds_dir = tmp_path / "out" / "feeds" + monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path)) + + async def run() -> None: + app = create_app() + app.config["REPUB_FEEDS_DIR"] = feeds_dir + save_setting("feed_url", "https://ocb.bypasscensorship.org") + feed_path = feeds_dir / "mn-america-latina" / "feed.rss" + feed_path.parent.mkdir(parents=True) + feed_path.write_text( + ( + "" + "https://ocb.bypasscensorship.org/feeds/" + "mn-america-latina/images/full/example.jpg" + "https://example.com/article" + "\n" + ), + encoding="utf-8", + ) + + client = app.test_client() + response = await client.get( + "/feeds/mn-america-latina/feed.rss", + headers={"Host": "altmirror.example:8443"}, + ) + + assert response.status_code == 200 + assert response.mimetype == "application/rss+xml" + assert await response.get_data(as_text=True) == ( + "" + "https://altmirror.example:8443/feeds/" + "mn-america-latina/images/full/example.jpg" + "https://example.com/article" + "\n" + ) + + asyncio.run(run())