Compare commits
No commits in common. "1d5126c2f8eac4bc9b7d1c0e064f672023785380" and "2a99edeec3939295b502eac027564e178673b045" have entirely different histories.
1d5126c2f8
...
2a99edeec3
20 changed files with 56 additions and 791 deletions
|
|
@ -12,6 +12,7 @@ See @README.md
|
|||
|
||||
- Prefer immutable style functional programming style
|
||||
- functions that operate on data over classes that encapsulate state
|
||||
- No backwards-compatibility guarantees; prefer breaking changes over backwards compat and complexity.
|
||||
- Think carefully and implement the most concise solution that changes as little code as possible.
|
||||
|
||||
|
||||
|
|
@ -103,8 +104,6 @@ uv run repub crawl -c repub.toml
|
|||
|
||||
- Keep `treefmt.nix`, `flake.nix`, and `pyproject.toml` aligned.
|
||||
- Prefer updating the flake-exported package and checks rather than adding ad hoc scripts.
|
||||
- Put new SQLite schema objects in numbered files under `repub/sql/` such as `002_*.sql`.
|
||||
- For backward-compatible column additions on existing SQLite databases, use Peewee's `playhouse.migrate` helpers instead of raw ad hoc `ALTER TABLE` logic.
|
||||
- Do not commit, amend, or stage unrelated files unless explicitly asked.
|
||||
- Final verication `nix flake check` must be greenbefore claiming task completeness
|
||||
|
||||
|
|
|
|||
|
|
@ -39,9 +39,7 @@ def nav_link(
|
|||
]
|
||||
|
||||
|
||||
def admin_sidebar(
|
||||
*, current_path: str, source_count: int = 0, running_count: int = 0
|
||||
) -> Renderable:
|
||||
def admin_sidebar(*, current_path: str) -> Renderable:
|
||||
return h.aside(
|
||||
class_="relative overflow-hidden bg-slate-950 px-6 py-8 text-white lg:min-h-screen"
|
||||
)[
|
||||
|
|
@ -70,20 +68,14 @@ def admin_sidebar(
|
|||
label="Sources",
|
||||
href="/sources",
|
||||
active=current_path.startswith("/sources"),
|
||||
badge=str(source_count),
|
||||
badge="12",
|
||||
),
|
||||
nav_link(
|
||||
label="Runs",
|
||||
href="/runs",
|
||||
active=current_path.startswith("/runs")
|
||||
or current_path.startswith("/job/"),
|
||||
badge=str(running_count),
|
||||
),
|
||||
nav_link(
|
||||
label="Settings",
|
||||
href="/settings",
|
||||
active=current_path.startswith("/settings"),
|
||||
badge="App",
|
||||
badge="3",
|
||||
),
|
||||
],
|
||||
h.div(class_="mt-auto rounded-3xl bg-white/5 p-5 ring-1 ring-white/10")[
|
||||
|
|
@ -156,19 +148,13 @@ def page_shell(
|
|||
title: str,
|
||||
description: str | None = None,
|
||||
actions: Node | None = None,
|
||||
source_count: int = 0,
|
||||
running_count: int = 0,
|
||||
content: Node,
|
||||
) -> Renderable:
|
||||
return h.main(
|
||||
id="morph",
|
||||
class_="min-h-screen lg:grid lg:grid-cols-[18rem_minmax(0,1fr)]",
|
||||
)[
|
||||
admin_sidebar(
|
||||
current_path=current_path,
|
||||
source_count=source_count,
|
||||
running_count=running_count,
|
||||
),
|
||||
admin_sidebar(current_path=current_path),
|
||||
h.div(class_="px-4 py-4 sm:px-5 lg:px-6 lg:py-5")[
|
||||
h.div(class_="mx-auto max-w-7xl space-y-5")[
|
||||
h.section[
|
||||
|
|
|
|||
|
|
@ -180,8 +180,6 @@ def build_feed_settings(
|
|||
*,
|
||||
out_dir: Path,
|
||||
feed_slug: str,
|
||||
convert_images: bool = True,
|
||||
convert_video: bool = True,
|
||||
) -> Settings:
|
||||
feed_dir = feed_output_dir(out_dir=out_dir, feed_slug=feed_slug)
|
||||
image_dir = base_settings.get("REPUBLISHER_IMAGE_DIR", IMAGE_DIR)
|
||||
|
|
@ -189,20 +187,14 @@ def build_feed_settings(
|
|||
audio_dir = base_settings.get("REPUBLISHER_AUDIO_DIR", AUDIO_DIR)
|
||||
file_dir = base_settings.get("REPUBLISHER_FILE_DIR", FILE_DIR)
|
||||
item_pipelines = dict(base_settings.getdict("ITEM_PIPELINES"))
|
||||
item_pipelines.pop("repub.pipelines.ImagePipeline", None)
|
||||
item_pipelines.pop("repub.pipelines.AudioPipeline", None)
|
||||
item_pipelines.pop("repub.pipelines.VideoPipeline", None)
|
||||
item_pipelines.pop("repub.pipelines.FilePipeline", None)
|
||||
item_pipelines.update(
|
||||
{
|
||||
"repub.pipelines.ImagePipeline": 1,
|
||||
"repub.pipelines.AudioPipeline": 2,
|
||||
"repub.pipelines.VideoPipeline": 3,
|
||||
"repub.pipelines.FilePipeline": 4,
|
||||
}
|
||||
)
|
||||
if convert_images:
|
||||
item_pipelines["repub.pipelines.ImagePipeline"] = 1
|
||||
if convert_video:
|
||||
item_pipelines["repub.pipelines.VideoPipeline"] = 3
|
||||
settings = base_settings.copy()
|
||||
settings.setdict(
|
||||
{
|
||||
|
|
|
|||
|
|
@ -186,8 +186,6 @@ class JobSourceConfig:
|
|||
source_slug: str
|
||||
source_type: str
|
||||
spider_arguments: dict[str, str]
|
||||
convert_images: bool = True
|
||||
convert_video: bool = True
|
||||
feed_url: str | None = None
|
||||
pangea_domain: str | None = None
|
||||
pangea_category: str | None = None
|
||||
|
|
@ -269,8 +267,6 @@ def main(argv: list[str] | None = None) -> int:
|
|||
out_dir=out_dir,
|
||||
feed=feed,
|
||||
stats_path=stats_path,
|
||||
convert_images=source_config.convert_images,
|
||||
convert_video=source_config.convert_video,
|
||||
)
|
||||
)
|
||||
print(
|
||||
|
|
@ -330,8 +326,6 @@ def _load_job_source_config(*, db_path: str, job_id: int) -> JobSourceConfig:
|
|||
source_slug=source.slug,
|
||||
source_type=source.source_type,
|
||||
spider_arguments=spider_arguments,
|
||||
convert_images=bool(job.convert_images),
|
||||
convert_video=bool(job.convert_video),
|
||||
feed_url=feed.feed_url,
|
||||
)
|
||||
|
||||
|
|
@ -345,8 +339,6 @@ def _load_job_source_config(*, db_path: str, job_id: int) -> JobSourceConfig:
|
|||
source_slug=source.slug,
|
||||
source_type=source.source_type,
|
||||
spider_arguments=spider_arguments,
|
||||
convert_images=bool(job.convert_images),
|
||||
convert_video=bool(job.convert_video),
|
||||
pangea_domain=pangea.domain,
|
||||
pangea_category=pangea.category_name,
|
||||
content_type=pangea.content_type,
|
||||
|
|
@ -417,14 +409,7 @@ def _resolve_feed(
|
|||
)
|
||||
|
||||
|
||||
def _build_crawl_settings(
|
||||
*,
|
||||
out_dir: Path,
|
||||
feed: FeedConfig,
|
||||
stats_path: Path,
|
||||
convert_images: bool = True,
|
||||
convert_video: bool = True,
|
||||
):
|
||||
def _build_crawl_settings(*, out_dir: Path, feed: FeedConfig, stats_path: Path):
|
||||
base_settings = build_base_settings(
|
||||
RepublisherConfig(
|
||||
config_path=out_dir / "job-runner.toml",
|
||||
|
|
@ -434,13 +419,7 @@ def _build_crawl_settings(
|
|||
)
|
||||
)
|
||||
prepare_output_dirs(out_dir, feed.slug)
|
||||
settings = build_feed_settings(
|
||||
base_settings,
|
||||
out_dir=out_dir,
|
||||
feed_slug=feed.slug,
|
||||
convert_images=convert_images,
|
||||
convert_video=convert_video,
|
||||
)
|
||||
settings = build_feed_settings(base_settings, out_dir=out_dir, feed_slug=feed.slug)
|
||||
settings.set("LOG_FILE", None, priority="cmdline")
|
||||
settings.set(
|
||||
"STATS_CLASS",
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ import os
|
|||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from datetime import UTC, datetime, timedelta
|
||||
|
|
@ -16,15 +15,7 @@ from apscheduler.schedulers.background import BackgroundScheduler
|
|||
from apscheduler.triggers.cron import CronTrigger
|
||||
|
||||
from repub.config import feed_output_dir, feed_output_path
|
||||
from repub.model import (
|
||||
Job,
|
||||
JobExecution,
|
||||
JobExecutionStatus,
|
||||
Source,
|
||||
database,
|
||||
load_max_concurrent_jobs,
|
||||
utc_now,
|
||||
)
|
||||
from repub.model import Job, JobExecution, JobExecutionStatus, Source, database, utc_now
|
||||
|
||||
SCHEDULER_JOB_PREFIX = "job-"
|
||||
POLL_JOB_ID = "runtime-poll-workers"
|
||||
|
|
@ -114,7 +105,6 @@ class JobRuntime:
|
|||
self.graceful_stop_seconds = graceful_stop_seconds
|
||||
self.scheduler = BackgroundScheduler(timezone=UTC)
|
||||
self._workers: dict[int, RunningWorker] = {}
|
||||
self._run_lock = threading.Lock()
|
||||
self._started = False
|
||||
|
||||
def start(self) -> None:
|
||||
|
|
@ -188,15 +178,11 @@ class JobRuntime:
|
|||
def run_job_now(self, job_id: int, *, reason: str) -> int | None:
|
||||
del reason
|
||||
self.start()
|
||||
with self._run_lock:
|
||||
with database.connection_context():
|
||||
job = Job.get_or_none(id=job_id)
|
||||
if job is None:
|
||||
return None
|
||||
|
||||
if self._max_concurrent_jobs_reached():
|
||||
return None
|
||||
|
||||
already_running = (
|
||||
JobExecution.select()
|
||||
.where(
|
||||
|
|
@ -253,14 +239,6 @@ class JobRuntime:
|
|||
self._trigger_refresh()
|
||||
return execution_id
|
||||
|
||||
def _max_concurrent_jobs_reached(self) -> bool:
|
||||
return (
|
||||
JobExecution.select()
|
||||
.where(JobExecution.running_status == JobExecutionStatus.RUNNING)
|
||||
.count()
|
||||
>= load_max_concurrent_jobs()
|
||||
)
|
||||
|
||||
def request_execution_cancel(self, execution_id: int) -> bool:
|
||||
with database.connection_context():
|
||||
execution = JobExecution.get_or_none(id=execution_id)
|
||||
|
|
|
|||
102
repub/model.py
102
repub/model.py
|
|
@ -1,13 +1,11 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from datetime import UTC, datetime
|
||||
from enum import IntEnum
|
||||
from importlib import resources
|
||||
from importlib.resources.abc import Traversable
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from peewee import (
|
||||
BooleanField,
|
||||
|
|
@ -19,7 +17,6 @@ from peewee import (
|
|||
SqliteDatabase,
|
||||
TextField,
|
||||
)
|
||||
from playhouse.migrate import SchemaMigrator, migrate
|
||||
|
||||
DEFAULT_DB_PATH = Path("republisher.db")
|
||||
DATABASE_PRAGMAS = {
|
||||
|
|
@ -32,8 +29,6 @@ DATABASE_PRAGMAS = {
|
|||
"temp_store": "memory",
|
||||
}
|
||||
SCHEMA_GLOB = "*.sql"
|
||||
MAX_CONCURRENT_JOBS_SETTING_KEY = "max_concurrent_jobs"
|
||||
DEFAULT_MAX_CONCURRENT_JOBS = 1
|
||||
|
||||
database = SqliteDatabase(None, pragmas=DATABASE_PRAGMAS)
|
||||
|
||||
|
|
@ -80,93 +75,20 @@ def initialize_database(db_path: str | Path | None = None) -> Path:
|
|||
database.init(str(resolved_path), pragmas=DATABASE_PRAGMAS)
|
||||
database.connect(reuse_if_open=True)
|
||||
try:
|
||||
connection = database.connection()
|
||||
for path in schema_paths():
|
||||
database.connection().executescript(path.read_text(encoding="utf-8"))
|
||||
_run_legacy_migrations()
|
||||
connection.executescript(path.read_text(encoding="utf-8"))
|
||||
finally:
|
||||
database.close()
|
||||
|
||||
return resolved_path
|
||||
|
||||
|
||||
def _run_legacy_migrations() -> None:
|
||||
job_columns = {column.name for column in database.get_columns("job")}
|
||||
operations = []
|
||||
migrator = SchemaMigrator.from_database(database)
|
||||
if "convert_images" not in job_columns:
|
||||
operations.extend(
|
||||
(
|
||||
migrator.add_column(
|
||||
"job",
|
||||
"convert_images",
|
||||
BooleanField(
|
||||
default=True,
|
||||
constraints=[Check("convert_images IN (0, 1)")],
|
||||
),
|
||||
),
|
||||
migrator.add_column_default("job", "convert_images", 1),
|
||||
)
|
||||
)
|
||||
if "convert_video" not in job_columns:
|
||||
operations.extend(
|
||||
(
|
||||
migrator.add_column(
|
||||
"job",
|
||||
"convert_video",
|
||||
BooleanField(
|
||||
default=True,
|
||||
constraints=[Check("convert_video IN (0, 1)")],
|
||||
),
|
||||
),
|
||||
migrator.add_column_default("job", "convert_video", 1),
|
||||
)
|
||||
)
|
||||
if operations:
|
||||
with database.atomic():
|
||||
migrate(*operations)
|
||||
|
||||
|
||||
def source_slug_exists(slug: str) -> bool:
|
||||
with database.connection_context():
|
||||
return Source.select().where(Source.slug == slug).exists()
|
||||
|
||||
|
||||
def save_setting(key: str, value: Any) -> None:
|
||||
payload = json.dumps(value, sort_keys=True)
|
||||
with database.connection_context():
|
||||
with database.atomic():
|
||||
setting = AppSetting.get_or_none(AppSetting.key == key)
|
||||
if setting is None:
|
||||
AppSetting.create(key=key, value=payload)
|
||||
return
|
||||
setting.value = payload
|
||||
setting.save()
|
||||
|
||||
|
||||
def load_setting(key: str, default: Any) -> Any:
|
||||
with database.connection_context():
|
||||
setting = AppSetting.get_or_none(AppSetting.key == key)
|
||||
if setting is None:
|
||||
return default
|
||||
try:
|
||||
return json.loads(setting.value)
|
||||
except json.JSONDecodeError:
|
||||
return default
|
||||
|
||||
|
||||
def load_max_concurrent_jobs() -> int:
|
||||
value = load_setting(MAX_CONCURRENT_JOBS_SETTING_KEY, DEFAULT_MAX_CONCURRENT_JOBS)
|
||||
try:
|
||||
parsed = int(value)
|
||||
except (TypeError, ValueError):
|
||||
return DEFAULT_MAX_CONCURRENT_JOBS
|
||||
return parsed if parsed >= 1 else DEFAULT_MAX_CONCURRENT_JOBS
|
||||
|
||||
|
||||
def load_settings_form() -> dict[str, object]:
|
||||
return {"max_concurrent_jobs": load_max_concurrent_jobs()}
|
||||
|
||||
|
||||
def load_source_form(slug: str) -> dict[str, object] | None:
|
||||
with database.connection_context():
|
||||
source = Source.get_or_none(Source.slug == slug)
|
||||
|
|
@ -181,8 +103,6 @@ def load_source_form(slug: str) -> dict[str, object] | None:
|
|||
"notes": source.notes,
|
||||
"spider_arguments": job.spider_arguments,
|
||||
"enabled": job.enabled,
|
||||
"convert_images": job.convert_images,
|
||||
"convert_video": job.convert_video,
|
||||
"cron_minute": job.cron_minute,
|
||||
"cron_hour": job.cron_hour,
|
||||
"cron_day_of_month": job.cron_day_of_month,
|
||||
|
|
@ -235,8 +155,6 @@ def create_source(
|
|||
cron_day_of_month: str,
|
||||
cron_day_of_week: str,
|
||||
cron_month: str,
|
||||
convert_images: bool = True,
|
||||
convert_video: bool = True,
|
||||
feed_url: str = "",
|
||||
pangea_domain: str = "",
|
||||
pangea_category: str = "",
|
||||
|
|
@ -279,8 +197,6 @@ def create_source(
|
|||
Job.create(
|
||||
source=source,
|
||||
enabled=enabled,
|
||||
convert_images=convert_images,
|
||||
convert_video=convert_video,
|
||||
spider_arguments=spider_arguments,
|
||||
cron_minute=cron_minute,
|
||||
cron_hour=cron_hour,
|
||||
|
|
@ -305,8 +221,6 @@ def update_source(
|
|||
cron_day_of_month: str,
|
||||
cron_day_of_week: str,
|
||||
cron_month: str,
|
||||
convert_images: bool = True,
|
||||
convert_video: bool = True,
|
||||
feed_url: str = "",
|
||||
pangea_domain: str = "",
|
||||
pangea_category: str = "",
|
||||
|
|
@ -332,8 +246,6 @@ def update_source(
|
|||
|
||||
job = Job.get(Job.source == source)
|
||||
job.enabled = enabled
|
||||
job.convert_images = convert_images
|
||||
job.convert_video = convert_video
|
||||
job.spider_arguments = spider_arguments
|
||||
job.cron_minute = cron_minute
|
||||
job.cron_hour = cron_hour
|
||||
|
|
@ -463,14 +375,6 @@ class BaseModel(Model):
|
|||
database = database
|
||||
|
||||
|
||||
class AppSetting(BaseModel):
|
||||
key = TextField(primary_key=True)
|
||||
value = TextField()
|
||||
|
||||
class Meta:
|
||||
table_name = "app_setting"
|
||||
|
||||
|
||||
class Source(BaseModel):
|
||||
created_at = DateTimeField(default=utc_now)
|
||||
updated_at = DateTimeField(default=utc_now)
|
||||
|
|
@ -515,8 +419,6 @@ class Job(BaseModel):
|
|||
created_at = DateTimeField(default=utc_now)
|
||||
updated_at = DateTimeField(default=utc_now)
|
||||
enabled = BooleanField()
|
||||
convert_images = BooleanField(default=True)
|
||||
convert_video = BooleanField(default=True)
|
||||
spider_arguments = TextField(default="")
|
||||
cron_minute = TextField()
|
||||
cron_hour = TextField()
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
from repub.pages.dashboard import dashboard_page, dashboard_page_with_data
|
||||
from repub.pages.runs import execution_logs_page, runs_page
|
||||
from repub.pages.settings import settings_page
|
||||
from repub.pages.shim import shim_page
|
||||
from repub.pages.sources import create_source_page, edit_source_page, sources_page
|
||||
|
||||
|
|
@ -11,7 +10,6 @@ __all__ = [
|
|||
"edit_source_page",
|
||||
"execution_logs_page",
|
||||
"runs_page",
|
||||
"settings_page",
|
||||
"shim_page",
|
||||
"sources_page",
|
||||
]
|
||||
|
|
|
|||
|
|
@ -251,23 +251,17 @@ def dashboard_page_with_data(
|
|||
running_executions: tuple[Mapping[str, object], ...] | None = None,
|
||||
source_feeds: tuple[Mapping[str, object], ...] | None = None,
|
||||
) -> Renderable:
|
||||
running_items = running_executions or ()
|
||||
source_items = source_feeds or ()
|
||||
return h.main(
|
||||
id="morph",
|
||||
class_="min-h-screen lg:grid lg:grid-cols-[18rem_minmax(0,1fr)]",
|
||||
)[
|
||||
admin_sidebar(
|
||||
current_path="/",
|
||||
source_count=len(source_items),
|
||||
running_count=len(running_items),
|
||||
),
|
||||
admin_sidebar(current_path="/"),
|
||||
h.div(class_="px-4 py-4 sm:px-5 lg:px-6 lg:py-5")[
|
||||
h.div(class_="mx-auto max-w-7xl space-y-5")[
|
||||
dashboard_header(),
|
||||
operational_snapshot(snapshot=snapshot),
|
||||
running_executions_table(running_executions=running_items),
|
||||
published_feeds_table(source_feeds=source_items),
|
||||
running_executions_table(running_executions=running_executions),
|
||||
published_feeds_table(source_feeds=source_feeds),
|
||||
]
|
||||
],
|
||||
]
|
||||
|
|
|
|||
|
|
@ -194,7 +194,6 @@ def runs_page(
|
|||
running_executions: tuple[Mapping[str, object], ...] | None = None,
|
||||
upcoming_jobs: tuple[Mapping[str, object], ...] | None = None,
|
||||
completed_executions: tuple[Mapping[str, object], ...] | None = None,
|
||||
source_count: int = 0,
|
||||
) -> Renderable:
|
||||
running_items = running_executions or ()
|
||||
upcoming_items = upcoming_jobs or ()
|
||||
|
|
@ -208,8 +207,6 @@ def runs_page(
|
|||
eyebrow="Execution control",
|
||||
title="Runs",
|
||||
actions=muted_action_link(href="/sources", label="Back to sources"),
|
||||
source_count=source_count,
|
||||
running_count=len(running_items),
|
||||
content=(
|
||||
table_section(
|
||||
eyebrow="Live work",
|
||||
|
|
|
|||
|
|
@ -1,82 +0,0 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Mapping
|
||||
|
||||
import htpy as h
|
||||
from htpy import Renderable
|
||||
|
||||
from repub.components import input_field, muted_action_link, page_shell, section_card
|
||||
|
||||
|
||||
def _value(settings: Mapping[str, object] | None, key: str, default: str = "") -> str:
|
||||
if settings is None:
|
||||
return default
|
||||
return str(settings.get(key, default))
|
||||
|
||||
|
||||
def settings_page(
|
||||
*,
|
||||
settings: Mapping[str, object] | None = None,
|
||||
action_path: str = "/actions/settings",
|
||||
source_count: int = 0,
|
||||
running_count: int = 0,
|
||||
) -> Renderable:
|
||||
return page_shell(
|
||||
current_path="/settings",
|
||||
eyebrow="Configuration",
|
||||
title="Settings",
|
||||
description="Global runtime controls for the republisher.",
|
||||
source_count=source_count,
|
||||
running_count=running_count,
|
||||
content=section_card(
|
||||
content=(
|
||||
h.form(
|
||||
{
|
||||
"data-signals": "{_formError: '', _formSuccess: ''}",
|
||||
"data-signals__ifmissing": (
|
||||
"{"
|
||||
f"maxConcurrentJobs: '{_value(settings, 'max_concurrent_jobs', '1')}'"
|
||||
"}"
|
||||
),
|
||||
"data-on:submit": f"@post('{action_path}')",
|
||||
},
|
||||
class_="space-y-6 rounded-[1.5rem] bg-white p-6 shadow-sm ring-1 ring-slate-200",
|
||||
)[
|
||||
h.div[
|
||||
h.p(
|
||||
class_="text-xs font-semibold uppercase tracking-[0.22em] text-amber-600"
|
||||
)["Scheduler"],
|
||||
h.h2(class_="mt-2 text-xl font-semibold text-slate-950")[
|
||||
"Runtime settings"
|
||||
],
|
||||
h.p(class_="mt-2 text-sm text-slate-600")[
|
||||
"Limit how many jobs the scheduler and manual runs can execute at the same time."
|
||||
],
|
||||
],
|
||||
h.div(
|
||||
{
|
||||
"data-show": "$_formError !== ''",
|
||||
"data-text": "$_formError",
|
||||
},
|
||||
class_="rounded-2xl bg-rose-50 px-4 py-3 text-sm font-medium text-rose-800",
|
||||
),
|
||||
input_field(
|
||||
label="Max concurrent jobs",
|
||||
field_id="max-concurrent-jobs",
|
||||
value=_value(settings, "max_concurrent_jobs", "1"),
|
||||
help_text="Must be an integer greater than or equal to 1.",
|
||||
signal_name="maxConcurrentJobs",
|
||||
),
|
||||
h.div(
|
||||
class_="flex flex-wrap justify-end gap-3 border-t border-slate-200 pt-6"
|
||||
)[
|
||||
muted_action_link(href="/", label="Back to dashboard"),
|
||||
h.button(
|
||||
type="submit",
|
||||
class_="rounded-full bg-slate-950 px-4 py-2.5 text-sm font-semibold text-white transition hover:bg-slate-800",
|
||||
)["Save settings"],
|
||||
],
|
||||
],
|
||||
)
|
||||
),
|
||||
)
|
||||
|
|
@ -37,11 +37,7 @@ def shim_page(
|
|||
id="morph",
|
||||
class_="min-h-screen lg:grid lg:grid-cols-[18rem_minmax(0,1fr)]",
|
||||
)[
|
||||
admin_sidebar(
|
||||
current_path=current_path,
|
||||
source_count=0,
|
||||
running_count=0,
|
||||
),
|
||||
admin_sidebar(current_path=current_path),
|
||||
h.div(class_="px-4 py-4 sm:px-5 lg:px-6 lg:py-5")[
|
||||
h.div(class_="mx-auto max-w-7xl space-y-5")[
|
||||
h.section[
|
||||
|
|
|
|||
|
|
@ -128,18 +128,13 @@ def sources_table(
|
|||
|
||||
|
||||
def sources_page(
|
||||
*,
|
||||
sources: tuple[Mapping[str, object], ...] | None = None,
|
||||
running_count: int = 0,
|
||||
*, sources: tuple[Mapping[str, object], ...] | None = None
|
||||
) -> Renderable:
|
||||
source_items = sources or ()
|
||||
return page_shell(
|
||||
current_path="/sources",
|
||||
eyebrow="Source management",
|
||||
title="Sources",
|
||||
source_count=len(source_items),
|
||||
running_count=running_count,
|
||||
content=sources_table(sources=source_items),
|
||||
content=sources_table(sources=sources),
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -403,18 +398,6 @@ def source_form(
|
|||
signal_name="jobEnabled",
|
||||
checked=_checked(source, "enabled", True),
|
||||
),
|
||||
toggle_field(
|
||||
label="Convert images",
|
||||
description="Normalize mirrored images through the image conversion pipeline for this source.",
|
||||
signal_name="convertImages",
|
||||
checked=_checked(source, "convert_images", True),
|
||||
),
|
||||
toggle_field(
|
||||
label="Convert video",
|
||||
description="Run mirrored videos through the video conversion pipeline for this source.",
|
||||
signal_name="convertVideo",
|
||||
checked=_checked(source, "convert_video", True),
|
||||
),
|
||||
],
|
||||
],
|
||||
],
|
||||
|
|
@ -432,12 +415,7 @@ def source_form(
|
|||
)
|
||||
|
||||
|
||||
def create_source_page(
|
||||
*,
|
||||
action_path: str = "/actions/sources/create",
|
||||
source_count: int = 0,
|
||||
running_count: int = 0,
|
||||
) -> Renderable:
|
||||
def create_source_page(*, action_path: str = "/actions/sources/create") -> Renderable:
|
||||
actions = (
|
||||
muted_action_link(href="/sources", label="Back to sources"),
|
||||
header_action_link(href="/runs", label="View runs"),
|
||||
|
|
@ -447,8 +425,6 @@ def create_source_page(
|
|||
eyebrow="Source creation",
|
||||
title="Create source",
|
||||
actions=actions,
|
||||
source_count=source_count,
|
||||
running_count=running_count,
|
||||
content=source_form(mode="create", action_path=action_path),
|
||||
)
|
||||
|
||||
|
|
@ -458,8 +434,6 @@ def edit_source_page(
|
|||
slug: str,
|
||||
source: Mapping[str, object],
|
||||
action_path: str,
|
||||
source_count: int = 0,
|
||||
running_count: int = 0,
|
||||
) -> Renderable:
|
||||
actions = (
|
||||
muted_action_link(href="/sources", label="Back to sources"),
|
||||
|
|
@ -470,7 +444,5 @@ def edit_source_page(
|
|||
eyebrow="Source editing",
|
||||
title="Edit source",
|
||||
actions=actions,
|
||||
source_count=source_count,
|
||||
running_count=running_count,
|
||||
content=source_form(mode="edit", action_path=action_path, source=source),
|
||||
)
|
||||
|
|
|
|||
|
|
@ -242,9 +242,8 @@ class RssFeedSpider(BaseRssFeedSpider):
|
|||
|
||||
if "content" in entry:
|
||||
for c in entry.content:
|
||||
raw_html = getattr(c, "value", "") or ""
|
||||
if c.type == "text/html" and raw_html.strip() != "":
|
||||
html, urls = self.munge_cdata_html(raw_html)
|
||||
if c.type == "text/html":
|
||||
html, urls = self.munge_cdata_html(c.value)
|
||||
item.append(CONTENT.encoded(CDATA(html)))
|
||||
image_urls.extend(urls[FileType.IMAGE])
|
||||
video_urls.extend(urls[FileType.VIDEO])
|
||||
|
|
|
|||
|
|
@ -1,4 +0,0 @@
|
|||
CREATE TABLE IF NOT EXISTS app_setting (
|
||||
key TEXT PRIMARY KEY,
|
||||
value TEXT NOT NULL
|
||||
);
|
||||
90
repub/web.py
90
repub/web.py
|
|
@ -28,10 +28,8 @@ from repub.model import (
|
|||
delete_job_source,
|
||||
delete_source,
|
||||
initialize_database,
|
||||
load_settings_form,
|
||||
load_source_form,
|
||||
load_sources,
|
||||
save_setting,
|
||||
source_slug_exists,
|
||||
update_source,
|
||||
)
|
||||
|
|
@ -41,7 +39,6 @@ from repub.pages import (
|
|||
edit_source_page,
|
||||
execution_logs_page,
|
||||
runs_page,
|
||||
settings_page,
|
||||
shim_page,
|
||||
sources_page,
|
||||
)
|
||||
|
|
@ -62,8 +59,6 @@ class SourceFormData(TypedDict):
|
|||
notes: str
|
||||
spider_arguments: str
|
||||
enabled: bool
|
||||
convert_images: bool
|
||||
convert_video: bool
|
||||
cron_minute: str
|
||||
cron_hour: str
|
||||
cron_day_of_month: str
|
||||
|
|
@ -82,10 +77,6 @@ class SourceFormData(TypedDict):
|
|||
include_content: bool
|
||||
|
||||
|
||||
class SettingsFormData(TypedDict):
|
||||
max_concurrent_jobs: int
|
||||
|
||||
|
||||
DEFAULT_PANGEA_CONTENT_FORMAT = "MOBILE_3"
|
||||
DEFAULT_PANGEA_CONTENT_TYPE = "articles"
|
||||
DEFAULT_PANGEA_MAX_ARTICLES = "10"
|
||||
|
|
@ -132,7 +123,6 @@ def create_app(*, dev_mode: bool = False) -> Quart:
|
|||
@app.get("/sources/create")
|
||||
@app.get("/sources/<string:slug>/edit")
|
||||
@app.get("/runs")
|
||||
@app.get("/settings")
|
||||
@app.get("/job/<int:job_id>/execution/<int:execution_id>/logs")
|
||||
async def page_shim(
|
||||
slug: str | None = None,
|
||||
|
|
@ -168,11 +158,7 @@ def create_app(*, dev_mode: bool = False) -> Quart:
|
|||
|
||||
@app.post("/sources/<string:slug>/edit")
|
||||
async def edit_source_patch(slug: str) -> DatastarResponse:
|
||||
return _page_patch_response(app, lambda: render_edit_source(slug, app))
|
||||
|
||||
@app.post("/settings")
|
||||
async def settings_patch() -> DatastarResponse:
|
||||
return _page_patch_response(app, lambda: render_settings(app))
|
||||
return _page_patch_response(app, lambda: render_edit_source(slug))
|
||||
|
||||
@app.post("/actions/sources/create")
|
||||
async def create_source_action() -> DatastarResponse:
|
||||
|
|
@ -231,20 +217,6 @@ def create_app(*, dev_mode: bool = False) -> Quart:
|
|||
trigger_refresh(app)
|
||||
return Response(status=204)
|
||||
|
||||
@app.post("/actions/settings")
|
||||
async def update_settings_action() -> DatastarResponse:
|
||||
signals = cast(dict[str, object], await read_signals())
|
||||
settings, error = validate_settings_form(signals)
|
||||
if error is not None:
|
||||
return DatastarResponse(
|
||||
SSE.patch_signals({"_formError": error, "_formSuccess": ""})
|
||||
)
|
||||
|
||||
assert settings is not None
|
||||
save_setting("max_concurrent_jobs", settings["max_concurrent_jobs"])
|
||||
trigger_refresh(app)
|
||||
return DatastarResponse(SSE.redirect("/settings"))
|
||||
|
||||
@app.post("/runs")
|
||||
async def runs_patch() -> DatastarResponse:
|
||||
return _page_patch_response(app, lambda: render_runs(app))
|
||||
|
|
@ -328,30 +300,16 @@ async def render_dashboard(app: Quart | None = None) -> Renderable:
|
|||
|
||||
|
||||
async def render_sources(app: Quart | None = None) -> Renderable:
|
||||
if app is None:
|
||||
return sources_page()
|
||||
|
||||
sources = load_sources()
|
||||
return sources_page(
|
||||
sources=sources,
|
||||
running_count=len(
|
||||
load_runs_view(log_dir=app.config["REPUB_LOG_DIR"])["running"]
|
||||
),
|
||||
)
|
||||
sources = None if app is None else load_sources()
|
||||
return sources_page(sources=sources)
|
||||
|
||||
|
||||
async def render_create_source(app: Quart | None = None) -> Renderable:
|
||||
if app is None:
|
||||
del app
|
||||
return create_source_page()
|
||||
|
||||
sidebar_counts = _load_sidebar_counts(app)
|
||||
return create_source_page(
|
||||
source_count=sidebar_counts["source_count"],
|
||||
running_count=sidebar_counts["running_count"],
|
||||
)
|
||||
|
||||
|
||||
async def render_edit_source(slug: str, app: Quart | None = None) -> Renderable:
|
||||
async def render_edit_source(slug: str) -> Renderable:
|
||||
source = load_source_form(slug)
|
||||
if source is None:
|
||||
return sources_page(sources=())
|
||||
|
|
@ -359,7 +317,6 @@ async def render_edit_source(slug: str, app: Quart | None = None) -> Renderable:
|
|||
slug=slug,
|
||||
source=source,
|
||||
action_path=f"/actions/sources/{slug}/edit",
|
||||
**({} if app is None else _load_sidebar_counts(app)),
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -372,18 +329,6 @@ async def render_runs(app: Quart | None = None) -> Renderable:
|
|||
running_executions=cast(tuple[dict[str, object], ...], view["running"]),
|
||||
upcoming_jobs=cast(tuple[dict[str, object], ...], view["upcoming"]),
|
||||
completed_executions=cast(tuple[dict[str, object], ...], view["completed"]),
|
||||
source_count=len(load_sources()),
|
||||
)
|
||||
|
||||
|
||||
async def render_settings(app: Quart | None = None) -> Renderable:
|
||||
if app is None:
|
||||
return settings_page(settings=load_settings_form())
|
||||
sidebar_counts = _load_sidebar_counts(app)
|
||||
return settings_page(
|
||||
settings=load_settings_form(),
|
||||
source_count=sidebar_counts["source_count"],
|
||||
running_count=sidebar_counts["running_count"],
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -432,15 +377,6 @@ async def _unsubscribe_on_close(
|
|||
get_refresh_broker(app).unsubscribe(cast(asyncio.Queue[object], queue))
|
||||
|
||||
|
||||
def _load_sidebar_counts(app: Quart) -> dict[str, int]:
|
||||
return {
|
||||
"source_count": len(load_sources()),
|
||||
"running_count": len(
|
||||
load_runs_view(log_dir=app.config["REPUB_LOG_DIR"])["running"]
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def validate_source_form(
|
||||
signals: dict[str, object] | None,
|
||||
*,
|
||||
|
|
@ -533,8 +469,6 @@ def validate_source_form(
|
|||
"max_articles": _parse_int(max_articles),
|
||||
"oldest_article": _parse_int(oldest_article),
|
||||
"enabled": enabled,
|
||||
"convert_images": _read_bool(signals, "convertImages", default=True),
|
||||
"convert_video": _read_bool(signals, "convertVideo", default=True),
|
||||
"only_newest": _read_bool(signals, "onlyNewest", default=True),
|
||||
"include_authors": _read_bool(signals, "includeAuthors", default=True),
|
||||
"exclude_media": _read_bool(signals, "excludeMedia", default=False),
|
||||
|
|
@ -548,20 +482,6 @@ def validate_source_form(
|
|||
return source, None
|
||||
|
||||
|
||||
def validate_settings_form(
|
||||
signals: dict[str, object] | None,
|
||||
) -> tuple[SettingsFormData | None, str | None]:
|
||||
if signals is None:
|
||||
return None, "Missing form data."
|
||||
|
||||
max_concurrent_jobs = _parse_int(_read_string(signals, "maxConcurrentJobs"))
|
||||
if max_concurrent_jobs is None:
|
||||
return None, "Max concurrent jobs must be an integer."
|
||||
if max_concurrent_jobs < 1:
|
||||
return None, "Max concurrent jobs must be at least 1."
|
||||
return {"max_concurrent_jobs": max_concurrent_jobs}, None
|
||||
|
||||
|
||||
def _read_string(signals: dict[str, object], key: str, *, strip: bool = True) -> str:
|
||||
value = str(signals.get(key, ""))
|
||||
return value.strip() if strip else value
|
||||
|
|
|
|||
|
|
@ -195,35 +195,3 @@ def test_build_feed_settings_uses_runtime_media_dir_overrides(tmp_path: Path) ->
|
|||
assert feed_settings["AUDIO_STORE"] == str(
|
||||
out_dir / "feeds" / "gp-pod" / "audio-custom"
|
||||
)
|
||||
|
||||
|
||||
def test_build_feed_settings_can_disable_image_and_video_conversion(
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
out_dir = (tmp_path / "mirror").resolve()
|
||||
config = RepublisherConfig(
|
||||
config_path=tmp_path / "repub.toml",
|
||||
out_dir=out_dir,
|
||||
feeds=(
|
||||
FeedConfig(
|
||||
name="Guardian Project Podcast",
|
||||
slug="gp-pod",
|
||||
url="https://guardianproject.info/podcast/podcast.xml",
|
||||
),
|
||||
),
|
||||
scrapy_settings={},
|
||||
)
|
||||
|
||||
base_settings = build_base_settings(config)
|
||||
feed_settings = build_feed_settings(
|
||||
base_settings,
|
||||
out_dir=out_dir,
|
||||
feed_slug="gp-pod",
|
||||
convert_images=False,
|
||||
convert_video=False,
|
||||
)
|
||||
|
||||
assert "repub.pipelines.ImagePipeline" not in feed_settings["ITEM_PIPELINES"]
|
||||
assert "repub.pipelines.VideoPipeline" not in feed_settings["ITEM_PIPELINES"]
|
||||
assert feed_settings["ITEM_PIPELINES"]["repub.pipelines.AudioPipeline"] == 2
|
||||
assert feed_settings["ITEM_PIPELINES"]["repub.pipelines.FilePipeline"] == 4
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
from pathlib import Path
|
||||
|
||||
from scrapy.http import TextResponse
|
||||
from scrapy.settings import Settings
|
||||
|
||||
from repub import entrypoint as entrypoint_module
|
||||
|
|
@ -64,46 +63,3 @@ def test_rss_spider_rewrites_public_asset_urls_as_relative_paths() -> None:
|
|||
)
|
||||
== f"audio/{local_audio_path('https://example.com/media/podcast.mp3')}"
|
||||
)
|
||||
|
||||
|
||||
def test_rss_spider_keeps_items_with_empty_content_encoded() -> None:
|
||||
feed_text = """<?xml version="1.0" encoding="UTF-8"?>
|
||||
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" version="2.0">
|
||||
<channel>
|
||||
<title>Empty Content Feed</title>
|
||||
<link>https://example.com</link>
|
||||
<description>Feed with empty HTML content blocks.</description>
|
||||
<item>
|
||||
<title>Entry With Empty Content</title>
|
||||
<link>https://example.com/entry</link>
|
||||
<description>Summary text still exists.</description>
|
||||
<guid isPermaLink="false">entry-1</guid>
|
||||
<pubDate>Mon, 01 Jan 2024 00:00:00 +0000</pubDate>
|
||||
<content:encoded></content:encoded>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
||||
"""
|
||||
spider = RssFeedSpider(feed_name="demo", url="https://example.com/feed.rss")
|
||||
spider.settings = Settings(
|
||||
values={
|
||||
"REPUBLISHER_IMAGE_DIR": "images",
|
||||
"REPUBLISHER_FILE_DIR": "files",
|
||||
"REPUBLISHER_AUDIO_DIR": "audio",
|
||||
"REPUBLISHER_VIDEO_DIR": "video",
|
||||
}
|
||||
)
|
||||
response = TextResponse(
|
||||
url="https://example.com/feed.rss",
|
||||
body=feed_text.encode("utf-8"),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
parse_result = spider._parse(response)
|
||||
|
||||
assert parse_result is not None
|
||||
items = list(parse_result)
|
||||
|
||||
assert len(items) == 2
|
||||
assert items[0].el.findtext("title") == "Empty Content Feed"
|
||||
assert items[1].el.findtext("title") == "Entry With Empty Content"
|
||||
|
|
|
|||
|
|
@ -7,15 +7,11 @@ import pytest
|
|||
from peewee import IntegrityError
|
||||
|
||||
from repub.model import (
|
||||
AppSetting,
|
||||
Job,
|
||||
Source,
|
||||
database,
|
||||
initialize_database,
|
||||
load_max_concurrent_jobs,
|
||||
resolve_database_path,
|
||||
save_setting,
|
||||
schema_paths,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -55,7 +51,6 @@ def test_initialize_database_bootstraps_schema_from_sql_files(tmp_path: Path) ->
|
|||
)
|
||||
}
|
||||
assert table_names == {
|
||||
"app_setting",
|
||||
"job",
|
||||
"job_execution",
|
||||
"source",
|
||||
|
|
@ -64,43 +59,17 @@ def test_initialize_database_bootstraps_schema_from_sql_files(tmp_path: Path) ->
|
|||
}
|
||||
|
||||
defaults = {
|
||||
row[1]: row[4] for row in connection.execute("PRAGMA table_info('job')")
|
||||
row[1]: row[4]
|
||||
for row in connection.execute("PRAGMA table_info('source_pangea')")
|
||||
}
|
||||
assert defaults["convert_images"] == "1"
|
||||
assert defaults["convert_video"] == "1"
|
||||
finally:
|
||||
connection.close()
|
||||
|
||||
|
||||
def test_initialize_database_applies_newer_sql_files_to_existing_databases(
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
db_path = tmp_path / "existing.db"
|
||||
connection = sqlite3.connect(db_path)
|
||||
try:
|
||||
connection.executescript(schema_paths()[0].read_text(encoding="utf-8"))
|
||||
finally:
|
||||
connection.close()
|
||||
|
||||
initialize_database(db_path)
|
||||
|
||||
connection = sqlite3.connect(db_path)
|
||||
try:
|
||||
table_names = {
|
||||
row[0]
|
||||
for row in connection.execute(
|
||||
"""
|
||||
SELECT name
|
||||
FROM sqlite_master
|
||||
WHERE type = 'table' AND name NOT LIKE 'sqlite_%'
|
||||
"""
|
||||
)
|
||||
}
|
||||
assert "app_setting" in table_names
|
||||
|
||||
job_columns = {row[1] for row in connection.execute("PRAGMA table_info('job')")}
|
||||
assert "convert_images" in job_columns
|
||||
assert "convert_video" in job_columns
|
||||
assert defaults["content_type"] is None
|
||||
assert defaults["only_newest"] is None
|
||||
assert defaults["max_articles"] is None
|
||||
assert defaults["oldest_article"] is None
|
||||
assert defaults["include_authors"] is None
|
||||
assert defaults["exclude_media"] is None
|
||||
assert defaults["include_content"] is None
|
||||
assert defaults["content_format"] is None
|
||||
finally:
|
||||
connection.close()
|
||||
|
||||
|
|
@ -199,20 +168,3 @@ def test_job_table_allows_exactly_one_job_per_source(tmp_path: Path) -> None:
|
|||
cron_day_of_week="*",
|
||||
cron_month="*",
|
||||
)
|
||||
|
||||
|
||||
def test_load_max_concurrent_jobs_defaults_to_one(tmp_path: Path) -> None:
|
||||
initialize_database(tmp_path / "settings-defaults.db")
|
||||
|
||||
assert load_max_concurrent_jobs() == 1
|
||||
|
||||
|
||||
def test_save_setting_persists_json_value(tmp_path: Path) -> None:
|
||||
initialize_database(tmp_path / "settings-roundtrip.db")
|
||||
|
||||
save_setting("max_concurrent_jobs", 4)
|
||||
|
||||
row = AppSetting.get(AppSetting.key == "max_concurrent_jobs")
|
||||
|
||||
assert row.value == "4"
|
||||
assert load_max_concurrent_jobs() == 4
|
||||
|
|
|
|||
|
|
@ -20,7 +20,6 @@ from repub.model import (
|
|||
Source,
|
||||
create_source,
|
||||
initialize_database,
|
||||
save_setting,
|
||||
)
|
||||
from repub.web import create_app, get_job_runtime, render_execution_logs, render_runs
|
||||
|
||||
|
|
@ -138,68 +137,6 @@ def test_job_runtime_run_now_writes_log_and_stats_and_marks_success(
|
|||
runtime.shutdown()
|
||||
|
||||
|
||||
def test_job_runtime_respects_max_concurrent_jobs_setting(tmp_path: Path) -> None:
|
||||
db_path = tmp_path / "max-concurrency.db"
|
||||
log_dir = tmp_path / "out" / "logs"
|
||||
initialize_database(db_path)
|
||||
save_setting("max_concurrent_jobs", 1)
|
||||
|
||||
with _slow_feed_server() as feed_url:
|
||||
first_source = create_source(
|
||||
name="First source",
|
||||
slug="first-source",
|
||||
source_type="feed",
|
||||
notes="",
|
||||
spider_arguments="",
|
||||
enabled=False,
|
||||
cron_minute="*/5",
|
||||
cron_hour="*",
|
||||
cron_day_of_month="*",
|
||||
cron_day_of_week="*",
|
||||
cron_month="*",
|
||||
feed_url=feed_url,
|
||||
)
|
||||
second_source = create_source(
|
||||
name="Second source",
|
||||
slug="second-source",
|
||||
source_type="feed",
|
||||
notes="",
|
||||
spider_arguments="",
|
||||
enabled=False,
|
||||
cron_minute="*/5",
|
||||
cron_hour="*",
|
||||
cron_day_of_month="*",
|
||||
cron_day_of_week="*",
|
||||
cron_month="*",
|
||||
feed_url=feed_url,
|
||||
)
|
||||
first_job = Job.get(Job.source == first_source)
|
||||
second_job = Job.get(Job.source == second_source)
|
||||
|
||||
runtime = JobRuntime(log_dir=log_dir)
|
||||
try:
|
||||
runtime.start()
|
||||
first_execution_id = runtime.run_job_now(first_job.id, reason="manual")
|
||||
|
||||
assert first_execution_id is not None
|
||||
_wait_for_running_execution(first_execution_id)
|
||||
|
||||
second_execution_id = runtime.run_job_now(second_job.id, reason="manual")
|
||||
|
||||
assert second_execution_id is None
|
||||
assert (
|
||||
JobExecution.select()
|
||||
.where(JobExecution.running_status == JobExecutionStatus.RUNNING)
|
||||
.count()
|
||||
== 1
|
||||
)
|
||||
runtime.request_execution_cancel(first_execution_id)
|
||||
finished_execution = _wait_for_terminal_execution(first_execution_id)
|
||||
assert finished_execution.running_status == JobExecutionStatus.CANCELED
|
||||
finally:
|
||||
runtime.shutdown()
|
||||
|
||||
|
||||
def test_job_runtime_cancel_marks_execution_canceled(tmp_path: Path) -> None:
|
||||
initialize_database(tmp_path / "cancel.db")
|
||||
with _slow_feed_server() as feed_url:
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@ from __future__ import annotations
|
|||
|
||||
import asyncio
|
||||
import os
|
||||
import re
|
||||
from datetime import UTC, datetime, timedelta
|
||||
from pathlib import Path
|
||||
from typing import Any, cast
|
||||
|
|
@ -18,8 +17,6 @@ from repub.model import (
|
|||
SourceFeed,
|
||||
SourcePangea,
|
||||
create_source,
|
||||
load_max_concurrent_jobs,
|
||||
save_setting,
|
||||
)
|
||||
from repub.pages.runs import runs_page
|
||||
from repub.web import (
|
||||
|
|
@ -30,7 +27,6 @@ from repub.web import (
|
|||
render_edit_source,
|
||||
render_execution_logs,
|
||||
render_runs,
|
||||
render_settings,
|
||||
render_sources,
|
||||
)
|
||||
|
||||
|
|
@ -113,7 +109,6 @@ def test_root_get_serves_datastar_shim() -> None:
|
|||
assert '<main id="morph"' in body
|
||||
assert 'href="/sources"' in body
|
||||
assert 'href="/runs"' in body
|
||||
assert 'href="/settings"' in body
|
||||
assert "Connecting" in body
|
||||
|
||||
asyncio.run(run())
|
||||
|
|
@ -435,94 +430,6 @@ def test_render_sources_shows_table_and_create_link() -> None:
|
|||
asyncio.run(run())
|
||||
|
||||
|
||||
def test_render_sources_shows_live_sidebar_badges(monkeypatch, tmp_path: Path) -> None:
|
||||
db_path = tmp_path / "sources-sidebar.db"
|
||||
monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path))
|
||||
app = create_app()
|
||||
create_source(
|
||||
name="First source",
|
||||
slug="first-source",
|
||||
source_type="feed",
|
||||
notes="",
|
||||
spider_arguments="",
|
||||
enabled=True,
|
||||
cron_minute="0",
|
||||
cron_hour="*",
|
||||
cron_day_of_month="*",
|
||||
cron_day_of_week="*",
|
||||
cron_month="*",
|
||||
feed_url="https://example.com/first.xml",
|
||||
)
|
||||
create_source(
|
||||
name="Second source",
|
||||
slug="second-source",
|
||||
source_type="feed",
|
||||
notes="",
|
||||
spider_arguments="",
|
||||
enabled=True,
|
||||
cron_minute="0",
|
||||
cron_hour="*",
|
||||
cron_day_of_month="*",
|
||||
cron_day_of_week="*",
|
||||
cron_month="*",
|
||||
feed_url="https://example.com/second.xml",
|
||||
)
|
||||
|
||||
async def run() -> None:
|
||||
body = str(await render_sources(app))
|
||||
|
||||
assert re.search(
|
||||
r'href="/sources"[^>]*>.*?<span>Sources</span>\s*<span[^>]*>2</span>',
|
||||
body,
|
||||
re.S,
|
||||
)
|
||||
assert re.search(
|
||||
r'href="/runs"[^>]*>.*?<span>Runs</span>\s*<span[^>]*>0</span>',
|
||||
body,
|
||||
re.S,
|
||||
)
|
||||
|
||||
asyncio.run(run())
|
||||
|
||||
|
||||
def test_render_dashboard_shows_live_sidebar_badges(
|
||||
monkeypatch, tmp_path: Path
|
||||
) -> None:
|
||||
db_path = tmp_path / "dashboard-sidebar.db"
|
||||
monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path))
|
||||
app = create_app()
|
||||
create_source(
|
||||
name="Dashboard source",
|
||||
slug="dashboard-source",
|
||||
source_type="feed",
|
||||
notes="",
|
||||
spider_arguments="",
|
||||
enabled=True,
|
||||
cron_minute="0",
|
||||
cron_hour="*",
|
||||
cron_day_of_month="*",
|
||||
cron_day_of_week="*",
|
||||
cron_month="*",
|
||||
feed_url="https://example.com/dashboard.xml",
|
||||
)
|
||||
|
||||
async def run() -> None:
|
||||
body = str(await render_dashboard(app))
|
||||
|
||||
assert re.search(
|
||||
r'href="/sources"[^>]*>.*?<span>Sources</span>\s*<span[^>]*>1</span>',
|
||||
body,
|
||||
re.S,
|
||||
)
|
||||
assert re.search(
|
||||
r'href="/runs"[^>]*>.*?<span>Runs</span>\s*<span[^>]*>0</span>',
|
||||
body,
|
||||
re.S,
|
||||
)
|
||||
|
||||
asyncio.run(run())
|
||||
|
||||
|
||||
def test_render_sources_shows_delete_action_for_each_source(
|
||||
monkeypatch, tmp_path: Path
|
||||
) -> None:
|
||||
|
|
@ -569,8 +476,6 @@ def test_render_create_source_shows_dedicated_form_page() -> None:
|
|||
assert "includeAuthors" in body
|
||||
assert "excludeMedia" in body
|
||||
assert "includeContent" in body
|
||||
assert "convertImages" in body
|
||||
assert "convertVideo" in body
|
||||
assert "TEXT_ONLY" in body
|
||||
assert "breakingnews" in body
|
||||
assert "Pangea domain" in body
|
||||
|
|
@ -607,8 +512,6 @@ def test_render_edit_source_shows_existing_values(monkeypatch, tmp_path: Path) -
|
|||
notes="Regional health alerts.",
|
||||
spider_arguments="language=en\ndownload_media=true",
|
||||
enabled=True,
|
||||
convert_images=False,
|
||||
convert_video=False,
|
||||
cron_minute="0",
|
||||
cron_hour="*/6",
|
||||
cron_day_of_month="*",
|
||||
|
|
@ -643,28 +546,6 @@ def test_render_edit_source_shows_existing_values(monkeypatch, tmp_path: Path) -
|
|||
assert "example.org" in body
|
||||
assert "Health" in body
|
||||
assert "language=en\ndownload_media=true" in body
|
||||
assert "convertImages: false" in body
|
||||
assert "convertVideo: false" in body
|
||||
|
||||
asyncio.run(run())
|
||||
|
||||
|
||||
def test_render_settings_shows_current_max_concurrent_jobs(
|
||||
monkeypatch, tmp_path: Path
|
||||
) -> None:
|
||||
db_path = tmp_path / "settings-page.db"
|
||||
monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path))
|
||||
create_app()
|
||||
save_setting("max_concurrent_jobs", 3)
|
||||
|
||||
async def run() -> None:
|
||||
app = create_app()
|
||||
body = str(await render_settings(app))
|
||||
|
||||
assert ">Settings<" in body
|
||||
assert "/actions/settings" in body
|
||||
assert 'value="3"' in body
|
||||
assert "Max concurrent jobs" in body
|
||||
|
||||
asyncio.run(run())
|
||||
|
||||
|
|
@ -721,8 +602,6 @@ def test_create_source_action_creates_pangea_source_and_job_in_database(
|
|||
assert pangea.content_type == "breakingnews"
|
||||
assert pangea.include_content is True
|
||||
assert job.enabled is True
|
||||
assert job.convert_images is True
|
||||
assert job.convert_video is True
|
||||
assert job.spider_arguments == "language=en\ndownload_media=true"
|
||||
assert job.cron_hour == "*/6"
|
||||
assert "kenya-health" in rendered_sources
|
||||
|
|
@ -834,8 +713,6 @@ def test_edit_source_action_updates_existing_source_and_job_in_database(
|
|||
"cronDayOfWeek": "*",
|
||||
"cronMonth": "*",
|
||||
"jobEnabled": False,
|
||||
"convertImages": False,
|
||||
"convertVideo": False,
|
||||
"onlyNewest": False,
|
||||
"includeAuthors": False,
|
||||
"excludeMedia": True,
|
||||
|
|
@ -860,8 +737,6 @@ def test_edit_source_action_updates_existing_source_and_job_in_database(
|
|||
assert pangea.include_authors is False
|
||||
assert pangea.exclude_media is True
|
||||
assert job.enabled is False
|
||||
assert job.convert_images is False
|
||||
assert job.convert_video is False
|
||||
assert job.spider_arguments == "language=sw\ninclude_audio=false"
|
||||
assert job.cron_hour == "2"
|
||||
assert "Kenya health desk nightly" in rendered_sources
|
||||
|
|
@ -988,55 +863,6 @@ def test_create_source_action_validates_duplicate_slug_and_pangea_type(
|
|||
asyncio.run(run())
|
||||
|
||||
|
||||
def test_settings_action_updates_max_concurrent_jobs(
|
||||
monkeypatch, tmp_path: Path
|
||||
) -> None:
|
||||
db_path = tmp_path / "settings-action.db"
|
||||
monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path))
|
||||
|
||||
async def run() -> None:
|
||||
app = create_app()
|
||||
client = app.test_client()
|
||||
|
||||
response = await client.post(
|
||||
"/actions/settings",
|
||||
headers={"Datastar-Request": "true"},
|
||||
json={"maxConcurrentJobs": "3"},
|
||||
)
|
||||
body = await response.get_data(as_text=True)
|
||||
|
||||
assert response.status_code == 200
|
||||
assert "window.location = '/settings'" in body
|
||||
assert load_max_concurrent_jobs() == 3
|
||||
assert 'value="3"' in str(await render_settings(app))
|
||||
|
||||
asyncio.run(run())
|
||||
|
||||
|
||||
def test_settings_action_rejects_non_positive_max_concurrent_jobs(
|
||||
monkeypatch, tmp_path: Path
|
||||
) -> None:
|
||||
db_path = tmp_path / "settings-invalid.db"
|
||||
monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path))
|
||||
|
||||
async def run() -> None:
|
||||
app = create_app()
|
||||
client = app.test_client()
|
||||
|
||||
response = await client.post(
|
||||
"/actions/settings",
|
||||
headers={"Datastar-Request": "true"},
|
||||
json={"maxConcurrentJobs": "0"},
|
||||
)
|
||||
body = await response.get_data(as_text=True)
|
||||
|
||||
assert response.status_code == 200
|
||||
assert "Max concurrent jobs must be at least 1." in body
|
||||
assert load_max_concurrent_jobs() == 1
|
||||
|
||||
asyncio.run(run())
|
||||
|
||||
|
||||
def test_render_runs_shows_running_upcoming_and_completed_tables(
|
||||
monkeypatch, tmp_path: Path
|
||||
) -> None:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue