Compare commits

...

10 commits

12 changed files with 70 additions and 33 deletions

View file

@ -0,0 +1,27 @@
"""adds hits
Revision ID: 0b42b25de259
Revises: e723dddd82db
Create Date: 2026-05-21 16:16:52.147050
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "0b42b25de259"
down_revision = "e723dddd82db"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.add_column("mirror", sa.Column("hits", sa.Integer(), nullable=True))
op.execute("UPDATE mirror SET hits = 0")
op.alter_column("mirror", "hits", nullable=False)
def downgrade() -> None:
op.drop_column("mirror", "hits")

View file

@ -48,5 +48,5 @@ line-length = 92
asyncio_default_fixture_loop_scope = "module"
[tool.ruff]
target-version = "py314"
target-version = "py313"
line-length = 92

View file

@ -44,6 +44,6 @@ def get_kaleidoscope_mirror(origin: str) -> str | None:
if response.status_code == 200:
try:
return response.json()["upstream"]
except KeyError:
except (KeyError, requests.exceptions.JSONDecodeError):
return None
return None

View file

@ -29,7 +29,7 @@ app.include_router(mirrors_router)
@app.get("/")
def home(host: str = Header(settings.LINK_DOMAIN)):
if host.lower().strip() != settings.API_DOMAIN:
if host.lower().strip() != settings.API_DOMAIN.lower():
return RedirectResponse(
settings.INVALID_URL,
status_code=status.HTTP_302_FOUND,

View file

@ -1,6 +1,6 @@
from datetime import datetime
from sqlalchemy.orm import Mapped
from sqlalchemy.orm import Mapped, mapped_column
from src.models import CustomBase, IdMixin
@ -14,4 +14,4 @@ class Mirror(CustomBase, IdMixin):
first_seen: Mapped[datetime]
last_seen: Mapped[datetime]
# TODO: Record hits when a redirect goes to the mirror
hits: Mapped[int] = 0
hits: Mapped[int] = mapped_column(default=0)

View file

@ -4,7 +4,7 @@ from fastapi import APIRouter
from src.database import DbSession
from src.mirrors.schemas import MirrorLinks, RedirectorData
from src.mirrors.service import refresh_mirrors
from src.mirrors.service import refresh_mirrors, resolve_mirror as resolve_mirror_service
from src.security import ApiKey
router = APIRouter()
@ -12,16 +12,6 @@ router = APIRouter()
@router.post("/api/v1/mirrors")
def update_mirrors(db: DbSession, auth: ApiKey, data: RedirectorData):
for pool, data in enumerate(data.pools):
refresh_mirrors(db, pool, data.origins)
for pool, pool_data in enumerate(data.pools):
refresh_mirrors(db, pool, pool_data.origins)
db.commit()
@router.get("/api/v1/resolve", response_model=MirrorLinks)
def resolve_mirror(db: DbSession, auth: ApiKey, url: str):
parsed = urlparse(url)
try:
mirror = resolve_mirror(db, parsed.netloc)
return {"url": parsed._replace(netloc=mirror)}
except ValueError:
return {"mirrors": []}

View file

@ -1,3 +1,5 @@
import logging
import requests
from src.database import get_db_session
@ -7,10 +9,16 @@ from src.utils import repeat_every
@repeat_every(seconds=600)
def update_rsf_mirrors():
with get_db_session() as db:
try:
r = requests.get(
"https://raw.githubusercontent.com/RSF-RWB/collateralfreedom/refs/heads/main/sites.json"
"https://raw.githubusercontent.com/RSF-RWB/collateralfreedom/refs/heads/main/sites.json",
timeout = 30,
)
r.raise_for_status()
mirrors = r.json()
except (requests.RequestException, ValueError) as e:
logging.exception(e)
return
with get_db_session() as db:
refresh_mirrors(db, -2, mirrors) # Tracking as hardcoded pool -2
db.commit()

View file

@ -8,6 +8,8 @@ from src.config import settings
def api_key(host: str = Header(), authorization: str | None = Header(None)) -> bool:
# This function deliberately does not throw an exception in the case of unauthenticated use as there are public
# endpoints that need to optionally be authenticated.
if host.lower().strip() != settings.API_DOMAIN.strip():
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND)
try:

View file

@ -162,13 +162,17 @@ class SnapshotCamera:
for element in body.select(", ".join(self.config.article_body_remove_selector)):
element.decompose()
for image in body.select("img"):
image.attrs = {
"src": fetch_url(
img_src = fetch_url(
pangea_expanded_image_url(self.url),
image.get("src", image.get("data-src", "")),
),
image.get("src", image.get("data-src", ""))
)
if img_src:
image.attrs = {
"src": img_src,
"alt": image.get("alt", ""),
}
else:
image.decompose()
with get_db_session() as db:
for hyperlink in body.select("a"):
absolute_url = urljoin(self.url, hyperlink.get("href"))
@ -265,6 +269,7 @@ class SnapshotCamera:
page_language=page_language,
site_favicon=self.favicon(),
site_logo=fetch_file(self.config.site_logo),
site_pixel=self.config.site_pixel,
site_title=self.config.site_title,
site_url=site_url,
site_mirror_url=site_mirror_url,

View file

@ -19,6 +19,7 @@ class SnapshotsConfig(BaseModel):
match_urls: list[str]
pre_remove_selectors: list[str] = "aside"
site_logo: str
site_pixel: str | None = None
site_title: str

View file

@ -18,6 +18,7 @@ class SnapshotContext(BaseModel):
page_language: str | None = None
site_favicon: str | None = None
site_logo: str | None = None
site_pixel: str | None = None
site_title: str
site_mirror_url: str | None = None
site_url: str

View file

@ -69,6 +69,9 @@
s.parentNode.insertBefore(g, s);
})();
{% if site_pixel %}
onload=_=>document.body.insertAdjacentHTML("beforeend",'<img src="{{ site_pixel }}">')
{% endif %}
</script>
</head>
<body>