Serve RSS feeds through app with host rewrites

This commit is contained in:
Abel Luck 2026-03-31 17:30:24 +02:00
parent 3f28e46ff6
commit e7b00b4129
3 changed files with 121 additions and 12 deletions

View file

@ -24,7 +24,7 @@ uv sync --all-groups
uv run repub
```
With no arguments, `uv run repub` starts the web UI in local dev mode and serves published feed files from `/feeds/...` out of `out/feeds/...`.
With no arguments, `uv run repub` starts the web UI in local dev mode. The Python app serves published `.rss` files from `/feeds/...` out of `out/feeds/...`, and in dev mode it also serves non-RSS feed artifacts from the same tree.
By default the UI listens on `127.0.0.1:8080`. You can override that with `REPUBLISHER_HOST` and `REPUBLISHER_PORT`, or with:
@ -32,15 +32,17 @@ By default the UI listens on `127.0.0.1:8080`. You can override that with `REPUB
uv run repub serve --host 0.0.0.0 --port 8080
```
If you invoke the `serve` subcommand explicitly, use `--dev-mode` to expose published feeds directly from the Quart app:
If you invoke the `serve` subcommand explicitly, use `--dev-mode` to expose non-RSS feed artifacts directly from the Quart app:
```sh
uv run repub serve --dev-mode
```
In `--dev-mode`, requests under `/feeds/...` are served from `out/feeds/...`.
Requests for `/feeds/**/*.rss` are always handled by the Python app. It rewrites mirrored feed URLs on the fly by replacing the configured `Feed URL` origin with `https://<Host header>`.
In production, do not rely on Quart to serve published feeds. Configure the reverse proxy to serve `out/feeds/...` directly at `/feeds/...`.
In `--dev-mode`, non-RSS requests under `/feeds/...` are served from `out/feeds/...`.
In production, keep `/feeds/**/*.rss` routed to the Python app. Non-RSS feed artifacts under `out/feeds/...` should still be served directly by the reverse proxy at `/feeds/...`.
Important: the admin UI has no built-in authentication. Keep it bound to localhost or put it behind a trusted network layer such as Tailscale.
@ -57,7 +59,7 @@ Operational notes:
- The default database path is `republisher.db`. Set `REPUBLISHER_DB_PATH` to use a different SQLite file.
- Mirrored feeds are written under `out/feeds/<slug>/`.
In production, expose `out/feeds/` directly from the reverse proxy at `/feeds/`.
In production, route `/feeds/**/*.rss` to the Python app and expose the remaining `out/feeds/` artifacts directly from the reverse proxy at `/feeds/`.
- `Feed URL` is used to generate absolute media URLs and `atom:link rel="self"` in exported feeds.
- Job logs and stats artifacts are written under `out/logs/`.

View file

@ -31,6 +31,7 @@ from repub.model import (
delete_job_source,
delete_source,
initialize_database,
load_feed_url,
load_job_enabled,
load_settings_form,
load_source_form,
@ -151,15 +152,19 @@ def create_app(*, dev_mode: bool = False) -> Quart:
@app.get("/feeds/<path:feed_path>")
async def published_feed(feed_path: str) -> Response:
if Path(feed_path).suffix == ".rss":
return _rss_feed_response(
_read_feed_text(
feeds_dir=Path(app.config["REPUB_FEEDS_DIR"]),
feed_path=feed_path,
)
)
if not bool(app.config["REPUB_DEV_MODE"]):
return Response(status=404)
response = await send_from_directory(
return await send_from_directory(
str(Path(app.config["REPUB_FEEDS_DIR"])),
feed_path,
)
if Path(feed_path).suffix == ".rss":
response.mimetype = "application/rss+xml"
return response
@app.get("/static/<string:asset_name>-<string:asset_hash>.<string:extension>")
async def versioned_static_asset(
@ -582,6 +587,42 @@ def _load_sidebar_counts(app: Quart) -> dict[str, int]:
}
def _rss_feed_response(feed_text: str | None) -> Response:
if feed_text is None:
return Response(status=404)
return Response(feed_text, mimetype="application/rss+xml")
def _read_feed_text(*, feeds_dir: Path, feed_path: str) -> str | None:
resolved_path = _resolve_feed_path(feeds_dir=feeds_dir, feed_path=feed_path)
if resolved_path is None:
return None
return _rewrite_feed_text(
resolved_path.read_text(encoding="utf-8"),
configured_feed_url=load_feed_url(),
request_host=request.host,
)
def _resolve_feed_path(*, feeds_dir: Path, feed_path: str) -> Path | None:
base_dir = feeds_dir.resolve()
candidate_path = (base_dir / feed_path).resolve()
try:
candidate_path.relative_to(base_dir)
except ValueError:
return None
return candidate_path if candidate_path.is_file() else None
def _rewrite_feed_text(
feed_text: str, *, configured_feed_url: str, request_host: str
) -> str:
configured_origin = configured_feed_url.rstrip("/")
if configured_origin == "":
return feed_text
return feed_text.replace(configured_origin, f"https://{request_host}")
async def _clean_tab_state_periodically(app: Quart) -> None:
while True:
await asyncio.sleep(TAB_STATE_CLEAN_INTERVAL.total_seconds())

View file

@ -3,6 +3,7 @@ from __future__ import annotations
import asyncio
from pathlib import Path
from repub.model import save_setting
from repub.web import create_app
@ -49,9 +50,7 @@ def test_dev_mode_serves_feed_enclosure_assets(monkeypatch, tmp_path: Path) -> N
asyncio.run(run())
def test_default_mode_does_not_serve_published_feeds(
monkeypatch, tmp_path: Path
) -> None:
def test_default_mode_serves_published_rss_feeds(monkeypatch, tmp_path: Path) -> None:
db_path = tmp_path / "default-mode.db"
feeds_dir = tmp_path / "out" / "feeds"
monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path))
@ -66,6 +65,73 @@ def test_default_mode_does_not_serve_published_feeds(
client = app.test_client()
response = await client.get("/feeds/demo-source/feed.rss")
assert response.status_code == 200
assert response.mimetype == "application/rss+xml"
assert await response.get_data(as_text=True) == "<rss/>\n"
asyncio.run(run())
def test_default_mode_does_not_serve_feed_enclosure_assets(
monkeypatch, tmp_path: Path
) -> None:
db_path = tmp_path / "default-mode-assets.db"
feeds_dir = tmp_path / "out" / "feeds"
monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path))
async def run() -> None:
app = create_app()
app.config["REPUB_FEEDS_DIR"] = feeds_dir
enclosure_path = feeds_dir / "demo-source" / "audio" / "episode.mp3"
enclosure_path.parent.mkdir(parents=True)
enclosure_path.write_bytes(b"mp3-data")
client = app.test_client()
response = await client.get("/feeds/demo-source/audio/episode.mp3")
assert response.status_code == 404
asyncio.run(run())
def test_published_rss_rewrites_feed_url_to_https_host_header(
monkeypatch, tmp_path: Path
) -> None:
db_path = tmp_path / "rewrite-feed-url.db"
feeds_dir = tmp_path / "out" / "feeds"
monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path))
async def run() -> None:
app = create_app()
app.config["REPUB_FEEDS_DIR"] = feeds_dir
save_setting("feed_url", "https://ocb.bypasscensorship.org")
feed_path = feeds_dir / "mn-america-latina" / "feed.rss"
feed_path.parent.mkdir(parents=True)
feed_path.write_text(
(
"<rss><channel>"
"<url>https://ocb.bypasscensorship.org/feeds/"
"mn-america-latina/images/full/example.jpg</url>"
"<link>https://example.com/article</link>"
"</channel></rss>\n"
),
encoding="utf-8",
)
client = app.test_client()
response = await client.get(
"/feeds/mn-america-latina/feed.rss",
headers={"Host": "altmirror.example:8443"},
)
assert response.status_code == 200
assert response.mimetype == "application/rss+xml"
assert await response.get_data(as_text=True) == (
"<rss><channel>"
"<url>https://altmirror.example:8443/feeds/"
"mn-america-latina/images/full/example.jpg</url>"
"<link>https://example.com/article</link>"
"</channel></rss>\n"
)
asyncio.run(run())