Refactor database access through managed connections

This commit is contained in:
Abel Luck 2026-03-31 17:30:07 +02:00
parent f19bab6fa2
commit 3f28e46ff6
10 changed files with 1327 additions and 716 deletions

111
tests/test_db.py Normal file
View file

@ -0,0 +1,111 @@
from __future__ import annotations
import threading
import time
from pathlib import Path
import pytest
from peewee import InterfaceError
from repub.db import get_database_connection
from repub.model import AppSetting, database, initialize_database
def test_queries_require_managed_database_context(tmp_path: Path) -> None:
initialize_database(tmp_path / "managed-context.db")
with pytest.raises(
RuntimeError, match="database.reader\\(\\)|database.writer\\(\\)"
):
AppSetting.select().count()
def test_writer_and_reader_contexts_allow_persisted_queries(tmp_path: Path) -> None:
initialize_database(tmp_path / "reader-writer.db")
with database.writer():
AppSetting.create(key="feed_url", value='"https://mirror.example"')
with database.reader():
setting = AppSetting.get(AppSetting.key == "feed_url")
assert setting.value == '"https://mirror.example"'
def test_managed_connections_disable_peewee_autoconnect(tmp_path: Path) -> None:
initialize_database(tmp_path / "autoconnect-disabled.db")
connection = get_database_connection()
assert connection is not None
assert connection.writer_db.autoconnect is False
assert all(reader_db.autoconnect is False for reader_db in connection.reader_dbs)
reader_db = connection.reader_dbs[0]
reader_db.close()
with pytest.raises(InterfaceError, match="database connection not opened"):
reader_db.execute_sql("SELECT 1")
def test_database_connection_initializes_four_readers_and_one_writer(
tmp_path: Path,
) -> None:
initialize_database(tmp_path / "pool-shape.db")
connection = get_database_connection()
assert connection is not None
assert connection.pool_size == 4
assert len(connection.reader_dbs) == 4
assert connection._reader_pool.qsize() == 4
assert connection.writer_db is not None
def test_reader_lease_is_returned_to_the_pool_after_use(tmp_path: Path) -> None:
initialize_database(tmp_path / "reader-lease.db")
connection = get_database_connection()
assert connection is not None
initial_size = connection._reader_pool.qsize()
with database.reader():
assert connection._reader_pool.qsize() == initial_size - 1
assert connection._reader_pool.qsize() == initial_size
def test_writer_contexts_serialize_through_the_single_writer(tmp_path: Path) -> None:
initialize_database(tmp_path / "single-writer.db")
events: list[str] = []
entered_first_writer = threading.Event()
allow_first_writer_to_exit = threading.Event()
def first_writer() -> None:
with database.writer():
events.append("first-entered")
entered_first_writer.set()
allow_first_writer_to_exit.wait(timeout=1)
events.append("first-exiting")
def second_writer() -> None:
entered_first_writer.wait(timeout=1)
with database.writer():
events.append("second-entered")
first_thread = threading.Thread(target=first_writer)
second_thread = threading.Thread(target=second_writer)
first_thread.start()
second_thread.start()
assert entered_first_writer.wait(timeout=1) is True
time.sleep(0.05)
assert events == ["first-entered"]
allow_first_writer_to_exit.set()
first_thread.join(timeout=1)
second_thread.join(timeout=1)
assert events == ["first-entered", "first-exiting", "second-entered"]

View file

@ -9,6 +9,7 @@ from repub.model import (
JobExecution,
JobExecutionStatus,
create_source,
database,
initialize_database,
)
@ -31,15 +32,16 @@ def test_load_runs_view_humanizes_completed_execution_summary_bytes(
cron_month="*",
feed_url="https://example.com/completed.xml",
)
job = Job.get(Job.source == source)
JobExecution.create(
job=job,
running_status=JobExecutionStatus.SUCCEEDED,
ended_at=datetime(2026, 3, 30, 12, 0, tzinfo=UTC),
requests_count=14,
items_count=11,
bytes_count=16_410_269,
)
with database.writer():
job = Job.get(Job.source == source)
JobExecution.create(
job=job,
running_status=JobExecutionStatus.SUCCEEDED,
ended_at=datetime(2026, 3, 30, 12, 0, tzinfo=UTC),
requests_count=14,
items_count=11,
bytes_count=16_410_269,
)
view = load_runs_view(
log_dir=tmp_path / "out" / "logs",
@ -67,13 +69,14 @@ def test_load_runs_view_projects_completed_execution_duration(
cron_month="*",
feed_url="https://example.com/completed.xml",
)
job = Job.get(Job.source == source)
JobExecution.create(
job=job,
running_status=JobExecutionStatus.SUCCEEDED,
started_at=datetime(2026, 3, 30, 11, 59, 12, tzinfo=UTC),
ended_at=datetime(2026, 3, 30, 12, 0, tzinfo=UTC),
)
with database.writer():
job = Job.get(Job.source == source)
JobExecution.create(
job=job,
running_status=JobExecutionStatus.SUCCEEDED,
started_at=datetime(2026, 3, 30, 11, 59, 12, tzinfo=UTC),
ended_at=datetime(2026, 3, 30, 12, 0, tzinfo=UTC),
)
view = load_runs_view(
log_dir=tmp_path / "out" / "logs",
@ -101,15 +104,16 @@ def test_load_runs_view_humanizes_running_execution_summary_bytes(
cron_month="*",
feed_url="https://example.com/running.xml",
)
job = Job.get(Job.source == source)
JobExecution.create(
job=job,
running_status=JobExecutionStatus.RUNNING,
started_at=datetime(2026, 3, 30, 12, 0, tzinfo=UTC),
requests_count=14,
items_count=11,
bytes_count=1_536,
)
with database.writer():
job = Job.get(Job.source == source)
JobExecution.create(
job=job,
running_status=JobExecutionStatus.RUNNING,
started_at=datetime(2026, 3, 30, 12, 0, tzinfo=UTC),
requests_count=14,
items_count=11,
bytes_count=1_536,
)
view = load_runs_view(
log_dir=tmp_path / "out" / "logs",
@ -137,12 +141,13 @@ def test_load_runs_view_projects_running_execution_duration(
cron_month="*",
feed_url="https://example.com/running.xml",
)
job = Job.get(Job.source == source)
JobExecution.create(
job=job,
running_status=JobExecutionStatus.RUNNING,
started_at=datetime(2026, 3, 30, 11, 59, 12, tzinfo=UTC),
)
with database.writer():
job = Job.get(Job.source == source)
JobExecution.create(
job=job,
running_status=JobExecutionStatus.RUNNING,
started_at=datetime(2026, 3, 30, 11, 59, 12, tzinfo=UTC),
)
view = load_runs_view(
log_dir=tmp_path / "out" / "logs",
@ -184,21 +189,22 @@ def test_load_runs_view_projects_queued_executions_in_fifo_order(
cron_month="*",
feed_url="https://example.com/second.xml",
)
first_job = Job.get(Job.source == first_source)
second_job = Job.get(Job.source == second_source)
reference_time = datetime(2026, 3, 30, 12, 30, tzinfo=UTC)
first_created_at = reference_time - timedelta(minutes=7)
second_created_at = reference_time - timedelta(minutes=3)
first_execution = JobExecution.create(
job=first_job,
created_at=first_created_at,
running_status=JobExecutionStatus.PENDING,
)
second_execution = JobExecution.create(
job=second_job,
created_at=second_created_at,
running_status=JobExecutionStatus.PENDING,
)
with database.writer():
first_job = Job.get(Job.source == first_source)
second_job = Job.get(Job.source == second_source)
first_execution = JobExecution.create(
job=first_job,
created_at=first_created_at,
running_status=JobExecutionStatus.PENDING,
)
second_execution = JobExecution.create(
job=second_job,
created_at=second_created_at,
running_status=JobExecutionStatus.PENDING,
)
view = load_runs_view(
log_dir=tmp_path / "out" / "logs",
@ -258,12 +264,13 @@ def test_load_runs_view_keeps_queued_jobs_in_scheduled_jobs(
cron_month="*",
feed_url="https://example.com/scheduled.xml",
)
queued_job = Job.get(Job.source == queued_source)
Job.get(Job.source == scheduled_source)
JobExecution.create(
job=queued_job,
running_status=JobExecutionStatus.PENDING,
)
with database.writer():
queued_job = Job.get(Job.source == queued_source)
Job.get(Job.source == scheduled_source)
JobExecution.create(
job=queued_job,
running_status=JobExecutionStatus.PENDING,
)
view = load_runs_view(
log_dir=tmp_path / "out" / "logs",
@ -299,17 +306,18 @@ def test_load_runs_view_running_row_targets_queued_follow_up_cancel(
cron_month="*",
feed_url="https://example.com/running.xml",
)
job = Job.get(Job.source == source)
JobExecution.create(
job=job,
started_at=datetime(2026, 3, 30, 12, 0, tzinfo=UTC),
running_status=JobExecutionStatus.RUNNING,
)
pending_execution = JobExecution.create(
job=job,
created_at=datetime(2026, 3, 30, 12, 5, tzinfo=UTC),
running_status=JobExecutionStatus.PENDING,
)
with database.writer():
job = Job.get(Job.source == source)
JobExecution.create(
job=job,
started_at=datetime(2026, 3, 30, 12, 0, tzinfo=UTC),
running_status=JobExecutionStatus.RUNNING,
)
pending_execution = JobExecution.create(
job=job,
created_at=datetime(2026, 3, 30, 12, 5, tzinfo=UTC),
running_status=JobExecutionStatus.PENDING,
)
view = load_runs_view(
log_dir=tmp_path / "out" / "logs",
@ -341,14 +349,15 @@ def test_load_runs_view_paginates_completed_executions_after_20_rows(
cron_month="*",
feed_url="https://example.com/completed.xml",
)
job = Job.get(Job.source == source)
base_time = datetime(2026, 3, 30, 12, 0, tzinfo=UTC)
for offset in range(21):
JobExecution.create(
job=job,
running_status=JobExecutionStatus.SUCCEEDED,
ended_at=base_time - timedelta(minutes=offset),
)
with database.writer():
job = Job.get(Job.source == source)
base_time = datetime(2026, 3, 30, 12, 0, tzinfo=UTC)
for offset in range(21):
JobExecution.create(
job=job,
running_status=JobExecutionStatus.SUCCEEDED,
ended_at=base_time - timedelta(minutes=offset),
)
first_page = load_runs_view(
log_dir=tmp_path / "out" / "logs",

View file

@ -112,8 +112,7 @@ def test_initialize_database_configures_sqlite_pragmas(tmp_path: Path) -> None:
initialize_database(db_path)
database.connect(reuse_if_open=True)
try:
with database.reader_conn():
pragma_values = {
"cache_size": database.execute_sql("PRAGMA cache_size").fetchone()[0],
"page_size": database.execute_sql("PRAGMA page_size").fetchone()[0],
@ -132,8 +131,6 @@ def test_initialize_database_configures_sqlite_pragmas(tmp_path: Path) -> None:
"foreign_keys": 1,
"busy_timeout": 5000,
}
finally:
database.close()
def test_initialize_database_creates_scheduler_and_execution_indexes(
@ -208,34 +205,35 @@ def test_initialize_database_creates_run_queue_indexes(tmp_path: Path) -> None:
def test_job_table_allows_exactly_one_job_per_source(tmp_path: Path) -> None:
initialize_database(tmp_path / "jobs.db")
source = Source.create(
name="Guardian feed mirror",
slug="guardian-feed",
source_type="feed",
)
Job.create(
source=source,
enabled=True,
spider_arguments="",
cron_minute="15",
cron_hour="*",
cron_day_of_month="*",
cron_day_of_week="*",
cron_month="*",
)
with pytest.raises(IntegrityError):
with database.writer():
source = Source.create(
name="Guardian feed mirror",
slug="guardian-feed",
source_type="feed",
)
Job.create(
source=source,
enabled=True,
spider_arguments="language=en",
cron_minute="30",
spider_arguments="",
cron_minute="15",
cron_hour="*",
cron_day_of_month="*",
cron_day_of_week="*",
cron_month="*",
)
with pytest.raises(IntegrityError):
Job.create(
source=source,
enabled=True,
spider_arguments="language=en",
cron_minute="30",
cron_hour="*",
cron_day_of_month="*",
cron_day_of_week="*",
cron_month="*",
)
def test_load_max_concurrent_jobs_defaults_to_one(tmp_path: Path) -> None:
initialize_database(tmp_path / "settings-defaults.db")
@ -248,7 +246,8 @@ def test_save_setting_persists_json_value(tmp_path: Path) -> None:
save_setting("max_concurrent_jobs", 4)
row = AppSetting.get(AppSetting.key == "max_concurrent_jobs")
with database.reader():
row = AppSetting.get(AppSetting.key == "max_concurrent_jobs")
assert row.value == "4"
assert load_max_concurrent_jobs() == 4

View file

@ -19,6 +19,7 @@ from repub.model import (
JobExecutionStatus,
Source,
create_source,
database,
initialize_database,
save_setting,
)
@ -29,6 +30,16 @@ FIXTURE_FEED_PATH = (
).resolve()
def _db_reader(callable_):
with database.reader():
return callable_()
def _db_writer(callable_):
with database.writer():
return callable_()
def initialize_runtime_database(db_path: Path) -> None:
initialize_database(db_path)
save_setting("feed_url", "http://localhost:8080")
@ -64,8 +75,9 @@ def test_job_runtime_syncs_enabled_jobs_into_apscheduler(tmp_path: Path) -> None
cron_month="*",
feed_url="https://example.com/disabled.xml",
)
enabled_job = Job.get(Job.source == enabled_source)
disabled_job = Job.get(Job.source == disabled_source)
with database.reader():
enabled_job = Job.get(Job.source == enabled_source)
disabled_job = Job.get(Job.source == disabled_source)
runtime = JobRuntime(log_dir=tmp_path / "out" / "logs")
try:
@ -77,8 +89,10 @@ def test_job_runtime_syncs_enabled_jobs_into_apscheduler(tmp_path: Path) -> None
assert f"job-{enabled_job.id}" in scheduled_ids
assert f"job-{disabled_job.id}" not in scheduled_ids
enabled_job.enabled = False
enabled_job.save()
with database.writer():
enabled_job = Job.get_by_id(enabled_job.id)
enabled_job.enabled = False
enabled_job.save()
runtime.sync_jobs()
scheduled_ids = {job.id for job in runtime.scheduler.get_jobs()}
@ -105,7 +119,8 @@ def test_job_runtime_run_now_writes_log_and_stats_and_marks_success(
cron_month="*",
feed_url=FIXTURE_FEED_PATH.as_uri(),
)
job = Job.get(Job.source == source)
with database.reader():
job = Job.get(Job.source == source)
runtime = JobRuntime(log_dir=tmp_path / "out" / "logs")
try:
@ -178,8 +193,9 @@ def test_job_runtime_respects_max_concurrent_jobs_setting(tmp_path: Path) -> Non
cron_month="*",
feed_url=feed_url,
)
first_job = Job.get(Job.source == first_source)
second_job = Job.get(Job.source == second_source)
with database.reader():
first_job = Job.get(Job.source == first_source)
second_job = Job.get(Job.source == second_source)
runtime = JobRuntime(log_dir=log_dir)
try:
@ -197,16 +213,20 @@ def test_job_runtime_respects_max_concurrent_jobs_setting(tmp_path: Path) -> Non
JobExecutionStatus.PENDING,
)
assert (
JobExecution.select()
.where(JobExecution.running_status == JobExecutionStatus.RUNNING)
.count()
_db_reader(
lambda: JobExecution.select()
.where(JobExecution.running_status == JobExecutionStatus.RUNNING)
.count()
)
== 1
)
assert second_execution.started_at is None
assert (
JobExecution.select()
.where(JobExecution.running_status == JobExecutionStatus.PENDING)
.count()
_db_reader(
lambda: JobExecution.select()
.where(JobExecution.running_status == JobExecutionStatus.PENDING)
.count()
)
== 1
)
runtime.request_execution_cancel(first_execution_id)
@ -253,8 +273,9 @@ def test_job_runtime_starts_queued_execution_after_capacity_opens(
cron_month="*",
feed_url=FIXTURE_FEED_PATH.as_uri(),
)
first_job = Job.get(Job.source == first_source)
second_job = Job.get(Job.source == second_source)
with database.reader():
first_job = Job.get(Job.source == first_source)
second_job = Job.get(Job.source == second_source)
runtime = JobRuntime(log_dir=log_dir)
try:
@ -314,8 +335,9 @@ def test_job_runtime_deduplicates_manual_queue_requests(tmp_path: Path) -> None:
cron_month="*",
feed_url="https://example.com/queued.xml",
)
blocking_job = Job.get(Job.source == blocking_source)
queued_job = Job.get(Job.source == queued_source)
with database.reader():
blocking_job = Job.get(Job.source == blocking_source)
queued_job = Job.get(Job.source == queued_source)
runtime = JobRuntime(log_dir=log_dir)
try:
@ -332,12 +354,14 @@ def test_job_runtime_deduplicates_manual_queue_requests(tmp_path: Path) -> None:
assert first_pending_id is not None
assert second_pending_id == first_pending_id
assert (
JobExecution.select()
.where(
(JobExecution.job == queued_job)
& (JobExecution.running_status == JobExecutionStatus.PENDING)
_db_reader(
lambda: JobExecution.select()
.where(
(JobExecution.job == queued_job)
& (JobExecution.running_status == JobExecutionStatus.PENDING)
)
.count()
)
.count()
== 1
)
finally:
@ -367,7 +391,8 @@ def test_job_runtime_allows_one_running_and_one_pending_per_job(
cron_month="*",
feed_url=feed_url,
)
job = Job.get(Job.source == source)
with database.reader():
job = Job.get(Job.source == source)
runtime = JobRuntime(log_dir=log_dir)
try:
@ -383,17 +408,21 @@ def test_job_runtime_allows_one_running_and_one_pending_per_job(
assert pending_execution_id is not None
assert duplicate_pending_id == pending_execution_id
assert (
JobExecution.select()
.where(JobExecution.job == job)
.where(JobExecution.running_status == JobExecutionStatus.RUNNING)
.count()
_db_reader(
lambda: JobExecution.select()
.where(JobExecution.job == job)
.where(JobExecution.running_status == JobExecutionStatus.RUNNING)
.count()
)
== 1
)
assert (
JobExecution.select()
.where(JobExecution.job == job)
.where(JobExecution.running_status == JobExecutionStatus.PENDING)
.count()
_db_reader(
lambda: JobExecution.select()
.where(JobExecution.job == job)
.where(JobExecution.running_status == JobExecutionStatus.PENDING)
.count()
)
== 1
)
finally:
@ -420,11 +449,12 @@ def test_job_runtime_start_drains_pending_rows_created_before_start(
cron_month="*",
feed_url=FIXTURE_FEED_PATH.as_uri(),
)
job = Job.get(Job.source == source)
pending_execution = JobExecution.create(
job=job,
running_status=JobExecutionStatus.PENDING,
)
with database.writer():
job = Job.get(Job.source == source)
pending_execution = JobExecution.create(
job=job,
running_status=JobExecutionStatus.PENDING,
)
runtime = JobRuntime(log_dir=log_dir)
try:
@ -477,18 +507,23 @@ def test_job_runtime_scheduled_runs_use_the_persistent_queue(
cron_month="*",
feed_url="https://example.com/second-scheduled.xml",
)
first_job = Job.get(Job.source == first_source)
second_job = Job.get(Job.source == second_source)
with database.reader():
first_job = Job.get(Job.source == first_source)
second_job = Job.get(Job.source == second_source)
runtime = JobRuntime(log_dir=log_dir)
try:
runtime.start()
runtime.run_scheduled_job(first_job.id)
first_execution = JobExecution.get(JobExecution.job == first_job)
first_execution = _db_reader(
lambda: JobExecution.get(JobExecution.job == first_job)
)
_wait_for_running_execution(int(first_execution.get_id()))
runtime.run_scheduled_job(second_job.id)
second_execution = JobExecution.get(JobExecution.job == second_job)
second_execution = _db_reader(
lambda: JobExecution.get(JobExecution.job == second_job)
)
assert second_execution.running_status == JobExecutionStatus.PENDING
assert second_execution.started_at is None
@ -519,7 +554,8 @@ def test_job_runtime_cancel_pending_follow_up_keeps_running_worker_alive(
cron_month="*",
feed_url=feed_url,
)
job = Job.get(Job.source == source)
with database.reader():
job = Job.get(Job.source == source)
runtime = JobRuntime(log_dir=log_dir)
try:
@ -533,9 +569,14 @@ def test_job_runtime_cancel_pending_follow_up_keeps_running_worker_alive(
_wait_for_execution_status(pending_execution_id, JobExecutionStatus.PENDING)
assert runtime.cancel_queued_execution(pending_execution_id) is True
assert JobExecution.get_or_none(id=pending_execution_id) is None
assert (
JobExecution.get_by_id(running_execution_id).running_status
_db_reader(lambda: JobExecution.get_or_none(id=pending_execution_id))
is None
)
assert (
_db_reader(
lambda: JobExecution.get_by_id(running_execution_id).running_status
)
== JobExecutionStatus.RUNNING
)
finally:
@ -559,7 +600,8 @@ def test_job_runtime_cancel_marks_execution_canceled(tmp_path: Path) -> None:
cron_month="*",
feed_url=feed_url,
)
job = Job.get(Job.source == source)
with database.reader():
job = Job.get(Job.source == source)
runtime = JobRuntime(log_dir=tmp_path / "out" / "logs")
try:
@ -602,12 +644,13 @@ def test_job_runtime_start_reconciles_stale_running_execution(tmp_path: Path) ->
cron_month="*",
feed_url="https://example.com/stale.xml",
)
job = Job.get(Job.source == source)
execution = JobExecution.create(
job=job,
started_at="2026-03-30 12:30:00+00:00",
running_status=JobExecutionStatus.RUNNING,
)
with database.writer():
job = Job.get(Job.source == source)
execution = JobExecution.create(
job=job,
started_at="2026-03-30 12:30:00+00:00",
running_status=JobExecutionStatus.RUNNING,
)
artifacts = JobArtifacts.for_execution(
log_dir=tmp_path / "out" / "logs",
job_id=job.id,
@ -622,7 +665,9 @@ def test_job_runtime_start_reconciles_stale_running_execution(tmp_path: Path) ->
runtime = JobRuntime(log_dir=tmp_path / "out" / "logs")
try:
runtime.start()
reconciled_execution = JobExecution.get_by_id(execution.get_id())
reconciled_execution = _db_reader(
lambda: JobExecution.get_by_id(execution.get_id())
)
assert reconciled_execution.running_status == JobExecutionStatus.FAILED
assert reconciled_execution.ended_at is not None
@ -649,12 +694,13 @@ def test_job_runtime_publishes_refresh_while_jobs_are_running(tmp_path: Path) ->
cron_month="*",
feed_url="https://example.com/running.xml",
)
job = Job.get(Job.source == source)
JobExecution.create(
job=job,
started_at=datetime(2026, 3, 30, 12, 0, tzinfo=UTC),
running_status=JobExecutionStatus.RUNNING,
)
with database.writer():
job = Job.get(Job.source == source)
JobExecution.create(
job=job,
started_at=datetime(2026, 3, 30, 12, 0, tzinfo=UTC),
running_status=JobExecutionStatus.RUNNING,
)
events: list[object] = []
runtime = JobRuntime(
@ -688,12 +734,13 @@ def test_job_runtime_start_reattaches_live_worker_after_app_restart(
cron_month="*",
feed_url=feed_url,
)
job = Job.get(Job.source == source)
execution = JobExecution.create(
job=job,
started_at=datetime.now(UTC),
running_status=JobExecutionStatus.RUNNING,
)
with database.writer():
job = Job.get(Job.source == source)
execution = JobExecution.create(
job=job,
started_at=datetime.now(UTC),
running_status=JobExecutionStatus.RUNNING,
)
artifacts = JobArtifacts.for_execution(
log_dir=log_dir,
job_id=job.id,
@ -728,7 +775,9 @@ def test_job_runtime_start_reattaches_live_worker_after_app_restart(
time.sleep(0.1)
runtime.start()
running_execution = JobExecution.get_by_id(execution.get_id())
running_execution = _db_reader(
lambda: JobExecution.get_by_id(execution.get_id())
)
assert running_execution.running_status == JobExecutionStatus.RUNNING
assert running_execution.ended_at is None
@ -764,13 +813,14 @@ def test_job_runtime_start_restores_live_worker_marked_failed_by_restart_bug(
cron_month="*",
feed_url=feed_url,
)
job = Job.get(Job.source == source)
execution = JobExecution.create(
job=job,
started_at=datetime.now(UTC),
ended_at=datetime.now(UTC),
running_status=JobExecutionStatus.FAILED,
)
with database.writer():
job = Job.get(Job.source == source)
execution = JobExecution.create(
job=job,
started_at=datetime.now(UTC),
ended_at=datetime.now(UTC),
running_status=JobExecutionStatus.FAILED,
)
artifacts = JobArtifacts.for_execution(
log_dir=log_dir,
job_id=job.id,
@ -805,7 +855,9 @@ def test_job_runtime_start_restores_live_worker_marked_failed_by_restart_bug(
time.sleep(0.1)
runtime.start()
restored_execution = JobExecution.get_by_id(execution.get_id())
restored_execution = _db_reader(
lambda: JobExecution.get_by_id(execution.get_id())
)
assert restored_execution.running_status == JobExecutionStatus.RUNNING
assert restored_execution.ended_at is None
@ -895,14 +947,15 @@ def test_load_runs_view_humanizes_completed_execution_end_time(
cron_month="*",
feed_url="https://example.com/completed.xml",
)
job = Job.get(Job.source == source)
reference_time = datetime(2026, 1, 15, 12, 0, tzinfo=UTC)
ended_at = reference_time - timedelta(hours=2)
JobExecution.create(
job=job,
running_status=JobExecutionStatus.SUCCEEDED,
ended_at=ended_at,
)
with database.writer():
job = Job.get(Job.source == source)
reference_time = datetime(2026, 1, 15, 12, 0, tzinfo=UTC)
ended_at = reference_time - timedelta(hours=2)
JobExecution.create(
job=job,
running_status=JobExecutionStatus.SUCCEEDED,
ended_at=ended_at,
)
view = load_runs_view(log_dir=app.config["REPUB_LOG_DIR"], now=reference_time)
completed = view["completed"][0]
@ -934,14 +987,15 @@ def test_load_runs_view_humanizes_running_execution_start_time(
cron_month="*",
feed_url="https://example.com/running.xml",
)
job = Job.get(Job.source == source)
reference_time = datetime(2026, 1, 15, 12, 0, tzinfo=UTC)
started_at = reference_time - timedelta(hours=2)
JobExecution.create(
job=job,
running_status=JobExecutionStatus.RUNNING,
started_at=started_at,
)
with database.writer():
job = Job.get(Job.source == source)
reference_time = datetime(2026, 1, 15, 12, 0, tzinfo=UTC)
started_at = reference_time - timedelta(hours=2)
JobExecution.create(
job=job,
running_status=JobExecutionStatus.RUNNING,
started_at=started_at,
)
view = load_runs_view(log_dir=app.config["REPUB_LOG_DIR"], now=reference_time)
running = view["running"][0]
@ -974,7 +1028,8 @@ def test_render_runs_uses_database_backed_jobs_and_executions(
cron_month="*",
feed_url=FIXTURE_FEED_PATH.as_uri(),
)
job = Job.get(Job.source == source)
with database.reader():
job = Job.get(Job.source == source)
runtime = get_job_runtime(app)
runtime.start()
try:
@ -1021,11 +1076,12 @@ def test_render_execution_logs_handles_missing_execution_and_missing_log_file(
cron_month="*",
feed_url="https://example.com/log-source.xml",
)
job = Job.get(Job.source == source)
execution = JobExecution.create(
job=job,
running_status=JobExecutionStatus.FAILED,
)
with database.writer():
job = Job.get(Job.source == source)
execution = JobExecution.create(
job=job,
running_status=JobExecutionStatus.FAILED,
)
async def run() -> None:
missing_execution = str(
@ -1067,18 +1123,25 @@ def test_delete_job_action_removes_source_job_and_execution_history(
cron_month="*",
feed_url="https://example.com/delete.xml",
)
job = Job.get(Job.source == source)
execution = JobExecution.create(
job=job,
running_status=JobExecutionStatus.SUCCEEDED,
)
with database.writer():
job = Job.get(Job.source == source)
execution = JobExecution.create(
job=job,
running_status=JobExecutionStatus.SUCCEEDED,
)
response = await client.post(f"/actions/jobs/{job.id}/delete")
assert response.status_code == 204
assert Source.get_or_none(Source.slug == "delete-source") is None
assert Job.get_or_none(id=job.id) is None
assert JobExecution.get_or_none(id=int(execution.get_id())) is None
assert (
_db_reader(lambda: Source.get_or_none(Source.slug == "delete-source"))
is None
)
assert _db_reader(lambda: Job.get_or_none(id=job.id)) is None
assert (
_db_reader(lambda: JobExecution.get_or_none(id=int(execution.get_id())))
is None
)
asyncio.run(run())
@ -1107,18 +1170,25 @@ def test_delete_source_action_removes_source_job_and_execution_history(
cron_month="*",
feed_url="https://example.com/delete-source-row.xml",
)
job = Job.get(Job.source == source)
execution = JobExecution.create(
job=job,
running_status=JobExecutionStatus.SUCCEEDED,
)
with database.writer():
job = Job.get(Job.source == source)
execution = JobExecution.create(
job=job,
running_status=JobExecutionStatus.SUCCEEDED,
)
response = await client.post("/actions/sources/delete-source-row/delete")
assert response.status_code == 204
assert Source.get_or_none(Source.slug == "delete-source-row") is None
assert Job.get_or_none(id=job.id) is None
assert JobExecution.get_or_none(id=int(execution.get_id())) is None
assert (
_db_reader(lambda: Source.get_or_none(Source.slug == "delete-source-row"))
is None
)
assert _db_reader(lambda: Job.get_or_none(id=job.id)) is None
assert (
_db_reader(lambda: JobExecution.get_or_none(id=int(execution.get_id())))
is None
)
asyncio.run(run())
@ -1128,7 +1198,7 @@ def _wait_for_running_execution(
) -> JobExecution:
deadline = time.monotonic() + timeout_seconds
while time.monotonic() < deadline:
execution = JobExecution.get_by_id(execution_id)
execution = _db_reader(lambda: JobExecution.get_by_id(execution_id))
if execution.running_status == JobExecutionStatus.RUNNING:
return execution
time.sleep(0.02)
@ -1143,7 +1213,7 @@ def _wait_for_execution_status(
) -> JobExecution:
deadline = time.monotonic() + timeout_seconds
while time.monotonic() < deadline:
execution = JobExecution.get_by_id(execution_id)
execution = _db_reader(lambda: JobExecution.get_by_id(execution_id))
if execution.running_status == status:
return execution
time.sleep(0.02)
@ -1155,7 +1225,7 @@ def _wait_for_terminal_execution(
) -> JobExecution:
deadline = time.monotonic() + timeout_seconds
while time.monotonic() < deadline:
execution = JobExecution.get_by_id(execution_id)
execution = _db_reader(lambda: JobExecution.get_by_id(execution_id))
if execution.running_status in {
JobExecutionStatus.SUCCEEDED,
JobExecutionStatus.FAILED,

View file

@ -21,6 +21,7 @@ from repub.model import (
SourceFeed,
SourcePangea,
create_source,
database,
load_max_concurrent_jobs,
load_settings_form,
save_setting,
@ -43,6 +44,35 @@ from repub.web import (
)
def _db_reader(fn):
with database.reader():
return fn()
def _db_writer(fn):
with database.writer():
return fn()
def test_web_routes_do_not_access_peewee_models_directly() -> None:
web_source = Path("repub/web.py").read_text(encoding="utf-8")
assert (
re.search(
r"\b(Job|Source|JobExecution|SourceFeed|SourcePangea)\.get",
web_source,
)
is None
)
assert (
re.search(
r"\b(Job|Source|JobExecution|SourceFeed|SourcePangea)\.select",
web_source,
)
is None
)
def test_status_badge_uses_green_done_tone() -> None:
badge = str(status_badge(label="Succeeded", tone="done"))
@ -790,8 +820,12 @@ def test_load_dashboard_view_lists_source_feed_artifacts(
updated_at = reference_time - timedelta(minutes=32)
updated_at_epoch = updated_at.timestamp()
os.utime(feed_path, (updated_at_epoch, updated_at_epoch))
available_job = Job.get(Job.source == available_source)
missing_job = Job.get(Job.source == missing_source)
available_job, missing_job = _db_reader(
lambda: (
Job.get(Job.source == available_source),
Job.get(Job.source == missing_source),
)
)
source_feeds = cast(
tuple[dict[str, object], ...],
@ -871,16 +905,18 @@ def test_load_dashboard_view_projects_feed_status_from_job_runtime(
feed_url="https://example.com/queued.xml",
)
running_job = Job.get(Job.source == running_source)
queued_job = Job.get(Job.source == queued_source)
JobExecution.create(
job=running_job,
running_status=JobExecutionStatus.RUNNING,
started_at=reference_time - timedelta(minutes=2),
)
JobExecution.create(
job=queued_job,
running_status=JobExecutionStatus.PENDING,
_db_writer(
lambda: (
JobExecution.create(
job=Job.get(Job.source == running_source),
running_status=JobExecutionStatus.RUNNING,
started_at=reference_time - timedelta(minutes=2),
),
JobExecution.create(
job=Job.get(Job.source == queued_source),
running_status=JobExecutionStatus.PENDING,
),
)
)
source_feeds = cast(
@ -938,8 +974,12 @@ def test_render_dashboard_shows_source_feed_links_and_statuses(
published_feed = tmp_path / "out" / "feeds" / "published-source" / "feed.rss"
published_feed.parent.mkdir(parents=True)
published_feed.write_text("<rss/>\n", encoding="utf-8")
published_job = Job.get(Job.source == published_source)
missing_job = Job.get(Job.source == missing_source)
published_job, missing_job = _db_reader(
lambda: (
Job.get(Job.source == published_source),
Job.get(Job.source == missing_source),
)
)
body = str(await render_dashboard(app))
@ -1253,9 +1293,15 @@ def test_create_source_action_creates_pangea_source_and_job_in_database(
assert response.status_code == 200
assert "window.location = '/sources'" in body
source = Source.get(Source.slug == "kenya-health")
pangea = SourcePangea.get(SourcePangea.source == source)
job = Job.get(Job.source == source)
source, pangea, job = _db_reader(
lambda: (
Source.get(Source.slug == "kenya-health"),
SourcePangea.get(
SourcePangea.source == Source.get(Source.slug == "kenya-health")
),
Job.get(Job.source == Source.get(Source.slug == "kenya-health")),
)
)
rendered_sources = str(await render_sources(app))
assert source.name == "Kenya health desk"
@ -1307,9 +1353,15 @@ def test_create_source_action_creates_feed_source_and_job_in_database(
assert response.status_code == 200
assert "window.location = '/sources'" in body
source = Source.get(Source.slug == "nasa-feed")
feed = SourceFeed.get(SourceFeed.source == source)
job = Job.get(Job.source == source)
source, feed, job = _db_reader(
lambda: (
Source.get(Source.slug == "nasa-feed"),
SourceFeed.get(
SourceFeed.source == Source.get(Source.slug == "nasa-feed")
),
Job.get(Job.source == Source.get(Source.slug == "nasa-feed")),
)
)
rendered_sources = str(await render_sources(app))
assert source.source_type == "feed"
@ -1390,9 +1442,15 @@ def test_edit_source_action_updates_existing_source_and_job_in_database(
assert response.status_code == 200
assert "window.location = '/sources'" in body
source = Source.get(Source.slug == "kenya-health")
pangea = SourcePangea.get(SourcePangea.source == source)
job = Job.get(Job.source == source)
source, pangea, job = _db_reader(
lambda: (
Source.get(Source.slug == "kenya-health"),
SourcePangea.get(
SourcePangea.source == Source.get(Source.slug == "kenya-health")
),
Job.get(Job.source == Source.get(Source.slug == "kenya-health")),
)
)
rendered_sources = str(await render_sources(app))
assert source.name == "Kenya health desk nightly"
@ -1477,8 +1535,18 @@ def test_edit_source_action_rejects_slug_changes(monkeypatch, tmp_path: Path) ->
assert response.status_code == 200
assert "Slug is immutable." in body
assert Source.get(Source.slug == "kenya-health").name == "Kenya health desk"
assert Source.select().where(Source.slug == "kenya-health-renamed").count() == 0
assert (
_db_reader(lambda: Source.get(Source.slug == "kenya-health").name)
== "Kenya health desk"
)
assert (
_db_reader(
lambda: Source.select()
.where(Source.slug == "kenya-health-renamed")
.count()
)
== 0
)
asyncio.run(run())
@ -1491,10 +1559,12 @@ def test_create_source_action_validates_duplicate_slug_and_pangea_type(
async def run() -> None:
app = create_app()
Source.create(
name="Guardian feed mirror",
slug="guardian-feed",
source_type="feed",
_db_writer(
lambda: Source.create(
name="Guardian feed mirror",
slug="guardian-feed",
source_type="feed",
)
)
client = app.test_client()
@ -1526,7 +1596,14 @@ def test_create_source_action_validates_duplicate_slug_and_pangea_type(
assert "Content format is invalid." in body
assert "Content type is invalid." in body
assert "Max articles must be an integer." in body
assert Source.select().where(Source.name == "Duplicate guardian").count() == 0
assert (
_db_reader(
lambda: Source.select()
.where(Source.name == "Duplicate guardian")
.count()
)
== 0
)
asyncio.run(run())
@ -1629,10 +1706,14 @@ def test_render_runs_shows_running_scheduled_and_completed_tables(
cron_month="*",
feed_url="https://example.com/runs.xml",
)
job = Job.get(Job.source == source)
execution = JobExecution.create(
job=job,
running_status=JobExecutionStatus.SUCCEEDED,
job, execution = _db_writer(
lambda: (
Job.get(Job.source == source),
JobExecution.create(
job=Job.get(Job.source == source),
running_status=JobExecutionStatus.SUCCEEDED,
),
)
)
body = str(await render_runs(app))
@ -1704,14 +1785,16 @@ def test_runs_pagination_action_updates_only_the_current_tab(
cron_month="*",
feed_url="https://example.com/paged-runs.xml",
)
job = Job.get(Job.source == source)
for minute in range(21):
JobExecution.create(
job=job,
ended_at=datetime(2026, 3, 30, 12, minute, tzinfo=UTC),
running_status=JobExecutionStatus.SUCCEEDED,
_db_writer(
lambda: tuple(
JobExecution.create(
job=Job.get(Job.source == source),
ended_at=datetime(2026, 3, 30, 12, minute, tzinfo=UTC),
running_status=JobExecutionStatus.SUCCEEDED,
)
for minute in range(21)
)
)
async with client.request(
"/runs?u=shim",
@ -1853,10 +1936,14 @@ def test_render_runs_keeps_queued_execution_in_scheduled_jobs_table(
cron_month="*",
feed_url="https://example.com/scheduled.xml",
)
queued_job = Job.get(Job.source == queued_source)
queued_execution = JobExecution.create(
job=queued_job,
running_status=JobExecutionStatus.PENDING,
queued_job, queued_execution = _db_writer(
lambda: (
Job.get(Job.source == queued_source),
JobExecution.create(
job=Job.get(Job.source == queued_source),
running_status=JobExecutionStatus.PENDING,
),
)
)
async def run() -> None:
@ -1899,15 +1986,19 @@ def test_render_runs_shows_cancel_button_for_running_row_with_queued_follow_up(
cron_month="*",
feed_url="https://example.com/busy.xml",
)
job = Job.get(Job.source == source)
running_execution = JobExecution.create(
job=job,
started_at=datetime(2026, 3, 30, 12, 0, tzinfo=UTC),
running_status=JobExecutionStatus.RUNNING,
)
pending_execution = JobExecution.create(
job=job,
running_status=JobExecutionStatus.PENDING,
job, running_execution, pending_execution = _db_writer(
lambda: (
Job.get(Job.source == source),
JobExecution.create(
job=Job.get(Job.source == source),
started_at=datetime(2026, 3, 30, 12, 0, tzinfo=UTC),
running_status=JobExecutionStatus.RUNNING,
),
JobExecution.create(
job=Job.get(Job.source == source),
running_status=JobExecutionStatus.PENDING,
),
)
)
async def run() -> None:
@ -2036,15 +2127,19 @@ def test_cancel_queued_execution_action_deletes_pending_row_without_touching_run
cron_month="*",
feed_url="https://example.com/busy.xml",
)
job = Job.get(Job.source == source)
running_execution = JobExecution.create(
job=job,
started_at=datetime(2026, 3, 30, 12, 0, tzinfo=UTC),
running_status=JobExecutionStatus.RUNNING,
)
pending_execution = JobExecution.create(
job=job,
running_status=JobExecutionStatus.PENDING,
job, running_execution, pending_execution = _db_writer(
lambda: (
Job.get(Job.source == source),
JobExecution.create(
job=Job.get(Job.source == source),
started_at=datetime(2026, 3, 30, 12, 0, tzinfo=UTC),
running_status=JobExecutionStatus.RUNNING,
),
JobExecution.create(
job=Job.get(Job.source == source),
running_status=JobExecutionStatus.PENDING,
),
)
)
response = await client.post(
@ -2052,9 +2147,18 @@ def test_cancel_queued_execution_action_deletes_pending_row_without_touching_run
)
assert response.status_code == 204
assert JobExecution.get_or_none(id=int(pending_execution.get_id())) is None
assert (
JobExecution.get_by_id(int(running_execution.get_id())).running_status
_db_reader(
lambda: JobExecution.get_or_none(id=int(pending_execution.get_id()))
)
is None
)
assert (
_db_reader(
lambda: JobExecution.get_by_id(
int(running_execution.get_id())
).running_status
)
== JobExecutionStatus.RUNNING
)
@ -2087,16 +2191,20 @@ def test_clear_completed_executions_action_removes_history_and_log_artifacts(
cron_month="*",
feed_url="https://example.com/history.xml",
)
job = Job.get(Job.source == source)
completed_execution = JobExecution.create(
job=job,
running_status=JobExecutionStatus.SUCCEEDED,
ended_at=datetime(2026, 3, 30, 12, 0, tzinfo=UTC),
)
running_execution = JobExecution.create(
job=job,
running_status=JobExecutionStatus.RUNNING,
started_at=datetime(2026, 3, 30, 12, 5, tzinfo=UTC),
job, completed_execution, running_execution = _db_writer(
lambda: (
Job.get(Job.source == source),
JobExecution.create(
job=Job.get(Job.source == source),
running_status=JobExecutionStatus.SUCCEEDED,
ended_at=datetime(2026, 3, 30, 12, 0, tzinfo=UTC),
),
JobExecution.create(
job=Job.get(Job.source == source),
running_status=JobExecutionStatus.RUNNING,
started_at=datetime(2026, 3, 30, 12, 5, tzinfo=UTC),
),
)
)
log_dir.mkdir(parents=True, exist_ok=True)
completed_prefix = (
@ -2112,8 +2220,18 @@ def test_clear_completed_executions_action_removes_history_and_log_artifacts(
response = await client.post("/actions/completed-executions/clear")
assert response.status_code == 204
assert JobExecution.get_or_none(id=int(completed_execution.get_id())) is None
assert JobExecution.get_or_none(id=int(running_execution.get_id())) is not None
assert (
_db_reader(
lambda: JobExecution.get_or_none(id=int(completed_execution.get_id()))
)
is None
)
assert (
_db_reader(
lambda: JobExecution.get_or_none(id=int(running_execution.get_id()))
)
is not None
)
for suffix in (".log", ".jsonl", ".pygea.log"):
assert not completed_prefix.with_suffix(suffix).exists()
assert running_log_path.exists()
@ -2161,17 +2279,21 @@ def test_move_queued_execution_action_reorders_queue(
cron_month="*",
feed_url="https://example.com/second.xml",
)
first_job = Job.get(Job.source == first_source)
second_job = Job.get(Job.source == second_source)
first_execution = JobExecution.create(
job=first_job,
created_at=datetime(2026, 3, 30, 12, 0, tzinfo=UTC),
running_status=JobExecutionStatus.PENDING,
)
second_execution = JobExecution.create(
job=second_job,
created_at=datetime(2026, 3, 30, 12, 5, tzinfo=UTC),
running_status=JobExecutionStatus.PENDING,
first_job, second_job, first_execution, second_execution = _db_writer(
lambda: (
Job.get(Job.source == first_source),
Job.get(Job.source == second_source),
JobExecution.create(
job=Job.get(Job.source == first_source),
created_at=datetime(2026, 3, 30, 12, 0, tzinfo=UTC),
running_status=JobExecutionStatus.PENDING,
),
JobExecution.create(
job=Job.get(Job.source == second_source),
created_at=datetime(2026, 3, 30, 12, 5, tzinfo=UTC),
running_status=JobExecutionStatus.PENDING,
),
)
)
response = await client.post(
@ -2217,17 +2339,26 @@ def test_toggle_job_enabled_action_removes_queued_execution(
cron_month="*",
feed_url="https://example.com/queued.xml",
)
job = Job.get(Job.source == source)
queued_execution = JobExecution.create(
job=job,
running_status=JobExecutionStatus.PENDING,
job, queued_execution = _db_writer(
lambda: (
Job.get(Job.source == source),
JobExecution.create(
job=Job.get(Job.source == source),
running_status=JobExecutionStatus.PENDING,
),
)
)
response = await client.post(f"/actions/jobs/{job.id}/toggle-enabled")
assert response.status_code == 204
assert Job.get_by_id(job.id).enabled is False
assert JobExecution.get_or_none(id=int(queued_execution.get_id())) is None
assert _db_reader(lambda: Job.get_by_id(job.id).enabled) is False
assert (
_db_reader(
lambda: JobExecution.get_or_none(id=int(queued_execution.get_id()))
)
is None
)
body = str(await render_runs(app))
assert (
f"/actions/queued-executions/{int(queued_execution.get_id())}/cancel"
@ -2279,10 +2410,14 @@ def test_render_execution_logs_uses_app_route(monkeypatch, tmp_path: Path) -> No
cron_month="*",
feed_url="https://example.com/logs.xml",
)
job = Job.get(Job.source == source)
execution = JobExecution.create(
job=job,
running_status=JobExecutionStatus.RUNNING,
job, execution = _db_writer(
lambda: (
Job.get(Job.source == source),
JobExecution.create(
job=Job.get(Job.source == source),
running_status=JobExecutionStatus.RUNNING,
),
)
)
log_path = log_dir / f"job-{job.id}-execution-{execution.get_id()}.log"
log_path.parent.mkdir(parents=True, exist_ok=True)