diff --git a/alembic/versions/2026-05-21_adds_hits.py b/alembic/versions/2026-05-21_adds_hits.py new file mode 100644 index 0000000..2442cd3 --- /dev/null +++ b/alembic/versions/2026-05-21_adds_hits.py @@ -0,0 +1,27 @@ +"""adds hits + +Revision ID: 0b42b25de259 +Revises: e723dddd82db +Create Date: 2026-05-21 16:16:52.147050 + +""" + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "0b42b25de259" +down_revision = "e723dddd82db" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.add_column("mirror", sa.Column("hits", sa.Integer(), nullable=True)) + op.execute("UPDATE mirror SET hits = 0") + op.alter_column("mirror", "hits", nullable=False) + + +def downgrade() -> None: + op.drop_column("mirror", "hits") diff --git a/pyproject.toml b/pyproject.toml index 4c377d9..91b93c3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,5 +48,5 @@ line-length = 92 asyncio_default_fixture_loop_scope = "module" [tool.ruff] -target-version = "py314" +target-version = "py313" line-length = 92 diff --git a/src/kldscp/client.py b/src/kldscp/client.py index 56a7f17..3e4ad01 100644 --- a/src/kldscp/client.py +++ b/src/kldscp/client.py @@ -44,6 +44,6 @@ def get_kaleidoscope_mirror(origin: str) -> str | None: if response.status_code == 200: try: return response.json()["upstream"] - except KeyError: + except (KeyError, requests.exceptions.JSONDecodeError): return None return None diff --git a/src/main.py b/src/main.py index 3bd66c6..131965f 100644 --- a/src/main.py +++ b/src/main.py @@ -29,7 +29,7 @@ app.include_router(mirrors_router) @app.get("/") def home(host: str = Header(settings.LINK_DOMAIN)): - if host.lower().strip() != settings.API_DOMAIN: + if host.lower().strip() != settings.API_DOMAIN.lower(): return RedirectResponse( settings.INVALID_URL, status_code=status.HTTP_302_FOUND, diff --git a/src/mirrors/models.py b/src/mirrors/models.py index 1d807fc..a57e9b7 100644 --- a/src/mirrors/models.py +++ b/src/mirrors/models.py @@ -1,6 +1,6 @@ from datetime import datetime -from sqlalchemy.orm import Mapped +from sqlalchemy.orm import Mapped, mapped_column from src.models import CustomBase, IdMixin @@ -14,4 +14,4 @@ class Mirror(CustomBase, IdMixin): first_seen: Mapped[datetime] last_seen: Mapped[datetime] # TODO: Record hits when a redirect goes to the mirror - hits: Mapped[int] = 0 + hits: Mapped[int] = mapped_column(default=0) diff --git a/src/mirrors/router.py b/src/mirrors/router.py index 34dd959..0b9da95 100644 --- a/src/mirrors/router.py +++ b/src/mirrors/router.py @@ -4,7 +4,7 @@ from fastapi import APIRouter from src.database import DbSession from src.mirrors.schemas import MirrorLinks, RedirectorData -from src.mirrors.service import refresh_mirrors +from src.mirrors.service import refresh_mirrors, resolve_mirror as resolve_mirror_service from src.security import ApiKey router = APIRouter() @@ -12,16 +12,6 @@ router = APIRouter() @router.post("/api/v1/mirrors") def update_mirrors(db: DbSession, auth: ApiKey, data: RedirectorData): - for pool, data in enumerate(data.pools): - refresh_mirrors(db, pool, data.origins) + for pool, pool_data in enumerate(data.pools): + refresh_mirrors(db, pool, pool_data.origins) db.commit() - - -@router.get("/api/v1/resolve", response_model=MirrorLinks) -def resolve_mirror(db: DbSession, auth: ApiKey, url: str): - parsed = urlparse(url) - try: - mirror = resolve_mirror(db, parsed.netloc) - return {"url": parsed._replace(netloc=mirror)} - except ValueError: - return {"mirrors": []} diff --git a/src/mirrors/tasks.py b/src/mirrors/tasks.py index 614f94e..f0bc7f2 100644 --- a/src/mirrors/tasks.py +++ b/src/mirrors/tasks.py @@ -1,3 +1,5 @@ +import logging + import requests from src.database import get_db_session @@ -7,10 +9,16 @@ from src.utils import repeat_every @repeat_every(seconds=600) def update_rsf_mirrors(): - with get_db_session() as db: - r = requests.get( - "https://raw.githubusercontent.com/RSF-RWB/collateralfreedom/refs/heads/main/sites.json" - ) - mirrors = r.json() - refresh_mirrors(db, -2, mirrors) # Tracking as hardcoded pool -2 - db.commit() + try: + r = requests.get( + "https://raw.githubusercontent.com/RSF-RWB/collateralfreedom/refs/heads/main/sites.json", + timeout = 30, + ) + r.raise_for_status() + mirrors = r.json() + except (requests.RequestException, ValueError) as e: + logging.exception(e) + return + with get_db_session() as db: + refresh_mirrors(db, -2, mirrors) # Tracking as hardcoded pool -2 + db.commit() \ No newline at end of file diff --git a/src/security.py b/src/security.py index c384ff2..527c30a 100644 --- a/src/security.py +++ b/src/security.py @@ -8,6 +8,8 @@ from src.config import settings def api_key(host: str = Header(), authorization: str | None = Header(None)) -> bool: + # This function deliberately does not throw an exception in the case of unauthenticated use as there are public + # endpoints that need to optionally be authenticated. if host.lower().strip() != settings.API_DOMAIN.strip(): raise HTTPException(status_code=status.HTTP_404_NOT_FOUND) try: diff --git a/src/snapshots/client.py b/src/snapshots/client.py index e9c8e19..5461cc2 100644 --- a/src/snapshots/client.py +++ b/src/snapshots/client.py @@ -162,13 +162,17 @@ class SnapshotCamera: for element in body.select(", ".join(self.config.article_body_remove_selector)): element.decompose() for image in body.select("img"): - image.attrs = { - "src": fetch_url( - pangea_expanded_image_url(self.url), - image.get("src", image.get("data-src", "")), - ), - "alt": image.get("alt", ""), - } + img_src = fetch_url( + pangea_expanded_image_url(self.url), + image.get("src", image.get("data-src", "")) + ) + if img_src: + image.attrs = { + "src": img_src, + "alt": image.get("alt", ""), + } + else: + image.decompose() with get_db_session() as db: for hyperlink in body.select("a"): absolute_url = urljoin(self.url, hyperlink.get("href")) @@ -265,6 +269,7 @@ class SnapshotCamera: page_language=page_language, site_favicon=self.favicon(), site_logo=fetch_file(self.config.site_logo), + site_pixel=self.config.site_pixel, site_title=self.config.site_title, site_url=site_url, site_mirror_url=site_mirror_url, diff --git a/src/snapshots/config.py b/src/snapshots/config.py index 9e24328..eb3d6f1 100644 --- a/src/snapshots/config.py +++ b/src/snapshots/config.py @@ -19,6 +19,7 @@ class SnapshotsConfig(BaseModel): match_urls: list[str] pre_remove_selectors: list[str] = "aside" site_logo: str + site_pixel: str | None = None site_title: str diff --git a/src/snapshots/schemas.py b/src/snapshots/schemas.py index 9024f13..9052319 100644 --- a/src/snapshots/schemas.py +++ b/src/snapshots/schemas.py @@ -18,6 +18,7 @@ class SnapshotContext(BaseModel): page_language: str | None = None site_favicon: str | None = None site_logo: str | None = None + site_pixel: str | None = None site_title: str site_mirror_url: str | None = None site_url: str diff --git a/src/snapshots/templates/article-template.html.j2 b/src/snapshots/templates/article-template.html.j2 index 5f102c5..191f6d5 100644 --- a/src/snapshots/templates/article-template.html.j2 +++ b/src/snapshots/templates/article-template.html.j2 @@ -67,8 +67,11 @@ g.async = true; g.src = u + 'matomo.js'; s.parentNode.insertBefore(g, s); - })(); + })(); + {% if site_pixel %} + onload=_=>document.body.insertAdjacentHTML("beforeend",'') + {% endif %}