Serve RSS feeds through app with host rewrites
This commit is contained in:
parent
3f28e46ff6
commit
e7b00b4129
3 changed files with 121 additions and 12 deletions
12
README.md
12
README.md
|
|
@ -24,7 +24,7 @@ uv sync --all-groups
|
|||
uv run repub
|
||||
```
|
||||
|
||||
With no arguments, `uv run repub` starts the web UI in local dev mode and serves published feed files from `/feeds/...` out of `out/feeds/...`.
|
||||
With no arguments, `uv run repub` starts the web UI in local dev mode. The Python app serves published `.rss` files from `/feeds/...` out of `out/feeds/...`, and in dev mode it also serves non-RSS feed artifacts from the same tree.
|
||||
|
||||
By default the UI listens on `127.0.0.1:8080`. You can override that with `REPUBLISHER_HOST` and `REPUBLISHER_PORT`, or with:
|
||||
|
||||
|
|
@ -32,15 +32,17 @@ By default the UI listens on `127.0.0.1:8080`. You can override that with `REPUB
|
|||
uv run repub serve --host 0.0.0.0 --port 8080
|
||||
```
|
||||
|
||||
If you invoke the `serve` subcommand explicitly, use `--dev-mode` to expose published feeds directly from the Quart app:
|
||||
If you invoke the `serve` subcommand explicitly, use `--dev-mode` to expose non-RSS feed artifacts directly from the Quart app:
|
||||
|
||||
```sh
|
||||
uv run repub serve --dev-mode
|
||||
```
|
||||
|
||||
In `--dev-mode`, requests under `/feeds/...` are served from `out/feeds/...`.
|
||||
Requests for `/feeds/**/*.rss` are always handled by the Python app. It rewrites mirrored feed URLs on the fly by replacing the configured `Feed URL` origin with `https://<Host header>`.
|
||||
|
||||
In production, do not rely on Quart to serve published feeds. Configure the reverse proxy to serve `out/feeds/...` directly at `/feeds/...`.
|
||||
In `--dev-mode`, non-RSS requests under `/feeds/...` are served from `out/feeds/...`.
|
||||
|
||||
In production, keep `/feeds/**/*.rss` routed to the Python app. Non-RSS feed artifacts under `out/feeds/...` should still be served directly by the reverse proxy at `/feeds/...`.
|
||||
|
||||
Important: the admin UI has no built-in authentication. Keep it bound to localhost or put it behind a trusted network layer such as Tailscale.
|
||||
|
||||
|
|
@ -57,7 +59,7 @@ Operational notes:
|
|||
|
||||
- The default database path is `republisher.db`. Set `REPUBLISHER_DB_PATH` to use a different SQLite file.
|
||||
- Mirrored feeds are written under `out/feeds/<slug>/`.
|
||||
In production, expose `out/feeds/` directly from the reverse proxy at `/feeds/`.
|
||||
In production, route `/feeds/**/*.rss` to the Python app and expose the remaining `out/feeds/` artifacts directly from the reverse proxy at `/feeds/`.
|
||||
- `Feed URL` is used to generate absolute media URLs and `atom:link rel="self"` in exported feeds.
|
||||
- Job logs and stats artifacts are written under `out/logs/`.
|
||||
|
||||
|
|
|
|||
49
repub/web.py
49
repub/web.py
|
|
@ -31,6 +31,7 @@ from repub.model import (
|
|||
delete_job_source,
|
||||
delete_source,
|
||||
initialize_database,
|
||||
load_feed_url,
|
||||
load_job_enabled,
|
||||
load_settings_form,
|
||||
load_source_form,
|
||||
|
|
@ -151,15 +152,19 @@ def create_app(*, dev_mode: bool = False) -> Quart:
|
|||
|
||||
@app.get("/feeds/<path:feed_path>")
|
||||
async def published_feed(feed_path: str) -> Response:
|
||||
if Path(feed_path).suffix == ".rss":
|
||||
return _rss_feed_response(
|
||||
_read_feed_text(
|
||||
feeds_dir=Path(app.config["REPUB_FEEDS_DIR"]),
|
||||
feed_path=feed_path,
|
||||
)
|
||||
)
|
||||
if not bool(app.config["REPUB_DEV_MODE"]):
|
||||
return Response(status=404)
|
||||
response = await send_from_directory(
|
||||
return await send_from_directory(
|
||||
str(Path(app.config["REPUB_FEEDS_DIR"])),
|
||||
feed_path,
|
||||
)
|
||||
if Path(feed_path).suffix == ".rss":
|
||||
response.mimetype = "application/rss+xml"
|
||||
return response
|
||||
|
||||
@app.get("/static/<string:asset_name>-<string:asset_hash>.<string:extension>")
|
||||
async def versioned_static_asset(
|
||||
|
|
@ -582,6 +587,42 @@ def _load_sidebar_counts(app: Quart) -> dict[str, int]:
|
|||
}
|
||||
|
||||
|
||||
def _rss_feed_response(feed_text: str | None) -> Response:
|
||||
if feed_text is None:
|
||||
return Response(status=404)
|
||||
return Response(feed_text, mimetype="application/rss+xml")
|
||||
|
||||
|
||||
def _read_feed_text(*, feeds_dir: Path, feed_path: str) -> str | None:
|
||||
resolved_path = _resolve_feed_path(feeds_dir=feeds_dir, feed_path=feed_path)
|
||||
if resolved_path is None:
|
||||
return None
|
||||
return _rewrite_feed_text(
|
||||
resolved_path.read_text(encoding="utf-8"),
|
||||
configured_feed_url=load_feed_url(),
|
||||
request_host=request.host,
|
||||
)
|
||||
|
||||
|
||||
def _resolve_feed_path(*, feeds_dir: Path, feed_path: str) -> Path | None:
|
||||
base_dir = feeds_dir.resolve()
|
||||
candidate_path = (base_dir / feed_path).resolve()
|
||||
try:
|
||||
candidate_path.relative_to(base_dir)
|
||||
except ValueError:
|
||||
return None
|
||||
return candidate_path if candidate_path.is_file() else None
|
||||
|
||||
|
||||
def _rewrite_feed_text(
|
||||
feed_text: str, *, configured_feed_url: str, request_host: str
|
||||
) -> str:
|
||||
configured_origin = configured_feed_url.rstrip("/")
|
||||
if configured_origin == "":
|
||||
return feed_text
|
||||
return feed_text.replace(configured_origin, f"https://{request_host}")
|
||||
|
||||
|
||||
async def _clean_tab_state_periodically(app: Quart) -> None:
|
||||
while True:
|
||||
await asyncio.sleep(TAB_STATE_CLEAN_INTERVAL.total_seconds())
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ from __future__ import annotations
|
|||
import asyncio
|
||||
from pathlib import Path
|
||||
|
||||
from repub.model import save_setting
|
||||
from repub.web import create_app
|
||||
|
||||
|
||||
|
|
@ -49,9 +50,7 @@ def test_dev_mode_serves_feed_enclosure_assets(monkeypatch, tmp_path: Path) -> N
|
|||
asyncio.run(run())
|
||||
|
||||
|
||||
def test_default_mode_does_not_serve_published_feeds(
|
||||
monkeypatch, tmp_path: Path
|
||||
) -> None:
|
||||
def test_default_mode_serves_published_rss_feeds(monkeypatch, tmp_path: Path) -> None:
|
||||
db_path = tmp_path / "default-mode.db"
|
||||
feeds_dir = tmp_path / "out" / "feeds"
|
||||
monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path))
|
||||
|
|
@ -66,6 +65,73 @@ def test_default_mode_does_not_serve_published_feeds(
|
|||
client = app.test_client()
|
||||
response = await client.get("/feeds/demo-source/feed.rss")
|
||||
|
||||
assert response.status_code == 200
|
||||
assert response.mimetype == "application/rss+xml"
|
||||
assert await response.get_data(as_text=True) == "<rss/>\n"
|
||||
|
||||
asyncio.run(run())
|
||||
|
||||
|
||||
def test_default_mode_does_not_serve_feed_enclosure_assets(
|
||||
monkeypatch, tmp_path: Path
|
||||
) -> None:
|
||||
db_path = tmp_path / "default-mode-assets.db"
|
||||
feeds_dir = tmp_path / "out" / "feeds"
|
||||
monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path))
|
||||
|
||||
async def run() -> None:
|
||||
app = create_app()
|
||||
app.config["REPUB_FEEDS_DIR"] = feeds_dir
|
||||
enclosure_path = feeds_dir / "demo-source" / "audio" / "episode.mp3"
|
||||
enclosure_path.parent.mkdir(parents=True)
|
||||
enclosure_path.write_bytes(b"mp3-data")
|
||||
|
||||
client = app.test_client()
|
||||
response = await client.get("/feeds/demo-source/audio/episode.mp3")
|
||||
|
||||
assert response.status_code == 404
|
||||
|
||||
asyncio.run(run())
|
||||
|
||||
|
||||
def test_published_rss_rewrites_feed_url_to_https_host_header(
|
||||
monkeypatch, tmp_path: Path
|
||||
) -> None:
|
||||
db_path = tmp_path / "rewrite-feed-url.db"
|
||||
feeds_dir = tmp_path / "out" / "feeds"
|
||||
monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path))
|
||||
|
||||
async def run() -> None:
|
||||
app = create_app()
|
||||
app.config["REPUB_FEEDS_DIR"] = feeds_dir
|
||||
save_setting("feed_url", "https://ocb.bypasscensorship.org")
|
||||
feed_path = feeds_dir / "mn-america-latina" / "feed.rss"
|
||||
feed_path.parent.mkdir(parents=True)
|
||||
feed_path.write_text(
|
||||
(
|
||||
"<rss><channel>"
|
||||
"<url>https://ocb.bypasscensorship.org/feeds/"
|
||||
"mn-america-latina/images/full/example.jpg</url>"
|
||||
"<link>https://example.com/article</link>"
|
||||
"</channel></rss>\n"
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
client = app.test_client()
|
||||
response = await client.get(
|
||||
"/feeds/mn-america-latina/feed.rss",
|
||||
headers={"Host": "altmirror.example:8443"},
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
assert response.mimetype == "application/rss+xml"
|
||||
assert await response.get_data(as_text=True) == (
|
||||
"<rss><channel>"
|
||||
"<url>https://altmirror.example:8443/feeds/"
|
||||
"mn-america-latina/images/full/example.jpg</url>"
|
||||
"<link>https://example.com/article</link>"
|
||||
"</channel></rss>\n"
|
||||
)
|
||||
|
||||
asyncio.run(run())
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue