republisher/tests/test_jobs.py

304 lines
9.1 KiB
Python

from __future__ import annotations
from datetime import UTC, datetime, timedelta
from pathlib import Path
from repub.jobs import load_runs_view
from repub.model import (
Job,
JobExecution,
JobExecutionStatus,
create_source,
initialize_database,
)
def test_load_runs_view_humanizes_completed_execution_summary_bytes(
tmp_path: Path,
) -> None:
initialize_database(tmp_path / "jobs-completed.db")
source = create_source(
name="Completed source",
slug="completed-source",
source_type="feed",
notes="",
spider_arguments="",
enabled=False,
cron_minute="*/5",
cron_hour="*",
cron_day_of_month="*",
cron_day_of_week="*",
cron_month="*",
feed_url="https://example.com/completed.xml",
)
job = Job.get(Job.source == source)
JobExecution.create(
job=job,
running_status=JobExecutionStatus.SUCCEEDED,
ended_at=datetime(2026, 3, 30, 12, 0, tzinfo=UTC),
requests_count=14,
items_count=11,
bytes_count=16_410_269,
)
view = load_runs_view(
log_dir=tmp_path / "out" / "logs",
now=datetime(2026, 3, 30, 12, 30, tzinfo=UTC),
)
assert view["completed"][0]["stats"] == "14 requests • 11 items • 15.7 MiB"
def test_load_runs_view_humanizes_running_execution_summary_bytes(
tmp_path: Path,
) -> None:
initialize_database(tmp_path / "jobs-running.db")
source = create_source(
name="Running source",
slug="running-source",
source_type="feed",
notes="",
spider_arguments="",
enabled=False,
cron_minute="*/5",
cron_hour="*",
cron_day_of_month="*",
cron_day_of_week="*",
cron_month="*",
feed_url="https://example.com/running.xml",
)
job = Job.get(Job.source == source)
JobExecution.create(
job=job,
running_status=JobExecutionStatus.RUNNING,
started_at=datetime(2026, 3, 30, 12, 0, tzinfo=UTC),
requests_count=14,
items_count=11,
bytes_count=1_536,
)
view = load_runs_view(
log_dir=tmp_path / "out" / "logs",
now=datetime(2026, 3, 30, 12, 30, tzinfo=UTC),
)
assert view["running"][0]["stats"] == "14 requests • 11 items • 1.5 KiB"
def test_load_runs_view_projects_queued_executions_in_fifo_order(
tmp_path: Path,
) -> None:
initialize_database(tmp_path / "jobs-queued.db")
first_source = create_source(
name="First queued source",
slug="first-queued-source",
source_type="feed",
notes="",
spider_arguments="",
enabled=True,
cron_minute="*/5",
cron_hour="*",
cron_day_of_month="*",
cron_day_of_week="*",
cron_month="*",
feed_url="https://example.com/first.xml",
)
second_source = create_source(
name="Second queued source",
slug="second-queued-source",
source_type="feed",
notes="",
spider_arguments="",
enabled=True,
cron_minute="*/5",
cron_hour="*",
cron_day_of_month="*",
cron_day_of_week="*",
cron_month="*",
feed_url="https://example.com/second.xml",
)
first_job = Job.get(Job.source == first_source)
second_job = Job.get(Job.source == second_source)
reference_time = datetime(2026, 3, 30, 12, 30, tzinfo=UTC)
first_created_at = reference_time - timedelta(minutes=7)
second_created_at = reference_time - timedelta(minutes=3)
first_execution = JobExecution.create(
job=first_job,
created_at=first_created_at,
running_status=JobExecutionStatus.PENDING,
)
second_execution = JobExecution.create(
job=second_job,
created_at=second_created_at,
running_status=JobExecutionStatus.PENDING,
)
view = load_runs_view(
log_dir=tmp_path / "out" / "logs",
now=reference_time,
)
assert tuple(row["execution_id"] for row in view["queued"]) == (
int(first_execution.get_id()),
int(second_execution.get_id()),
)
assert tuple(row["queue_position"] for row in view["queued"]) == (1, 2)
assert tuple(row["queued_at"] for row in view["queued"]) == (
"7 minutes ago",
"3 minutes ago",
)
assert view["queued"][0]["move_up_disabled"] is True
assert (
view["queued"][0]["move_down_post_path"]
== f"/actions/queued-executions/{int(first_execution.get_id())}/move-down"
)
assert (
view["queued"][1]["move_up_post_path"]
== f"/actions/queued-executions/{int(second_execution.get_id())}/move-up"
)
assert view["queued"][1]["move_down_disabled"] is True
def test_load_runs_view_keeps_queued_jobs_in_scheduled_jobs(
tmp_path: Path,
) -> None:
initialize_database(tmp_path / "jobs-queue-separation.db")
queued_source = create_source(
name="Queued source",
slug="queued-source",
source_type="feed",
notes="",
spider_arguments="",
enabled=True,
cron_minute="*/5",
cron_hour="*",
cron_day_of_month="*",
cron_day_of_week="*",
cron_month="*",
feed_url="https://example.com/queued.xml",
)
scheduled_source = create_source(
name="Scheduled source",
slug="scheduled-source",
source_type="feed",
notes="",
spider_arguments="",
enabled=True,
cron_minute="*/5",
cron_hour="*",
cron_day_of_month="*",
cron_day_of_week="*",
cron_month="*",
feed_url="https://example.com/scheduled.xml",
)
queued_job = Job.get(Job.source == queued_source)
Job.get(Job.source == scheduled_source)
JobExecution.create(
job=queued_job,
running_status=JobExecutionStatus.PENDING,
)
view = load_runs_view(
log_dir=tmp_path / "out" / "logs",
now=datetime(2026, 3, 30, 12, 30, tzinfo=UTC),
)
assert tuple(row["slug"] for row in view["queued"]) == ("queued-source",)
assert tuple(row["slug"] for row in view["upcoming"]) == (
"queued-source",
"scheduled-source",
)
assert view["upcoming"][0]["run_reason"] == "Queued"
assert view["upcoming"][0]["run_disabled"] is True
assert view["upcoming"][1]["run_reason"] == "Ready"
assert view["upcoming"][1]["run_disabled"] is False
def test_load_runs_view_running_row_targets_queued_follow_up_cancel(
tmp_path: Path,
) -> None:
initialize_database(tmp_path / "jobs-running-cancel.db")
source = create_source(
name="Running source",
slug="running-source",
source_type="feed",
notes="",
spider_arguments="",
enabled=True,
cron_minute="*/5",
cron_hour="*",
cron_day_of_month="*",
cron_day_of_week="*",
cron_month="*",
feed_url="https://example.com/running.xml",
)
job = Job.get(Job.source == source)
JobExecution.create(
job=job,
started_at=datetime(2026, 3, 30, 12, 0, tzinfo=UTC),
running_status=JobExecutionStatus.RUNNING,
)
pending_execution = JobExecution.create(
job=job,
created_at=datetime(2026, 3, 30, 12, 5, tzinfo=UTC),
running_status=JobExecutionStatus.PENDING,
)
view = load_runs_view(
log_dir=tmp_path / "out" / "logs",
now=datetime(2026, 3, 30, 12, 30, tzinfo=UTC),
)
running_row = view["running"][0]
assert running_row["cancel_label"] == "Cancel"
assert running_row["cancel_post_path"] == (
f"/actions/queued-executions/{int(pending_execution.get_id())}/cancel"
)
def test_load_runs_view_paginates_completed_executions_after_20_rows(
tmp_path: Path,
) -> None:
initialize_database(tmp_path / "jobs-completed-pagination.db")
source = create_source(
name="Completed source",
slug="completed-source",
source_type="feed",
notes="",
spider_arguments="",
enabled=False,
cron_minute="*/5",
cron_hour="*",
cron_day_of_month="*",
cron_day_of_week="*",
cron_month="*",
feed_url="https://example.com/completed.xml",
)
job = Job.get(Job.source == source)
base_time = datetime(2026, 3, 30, 12, 0, tzinfo=UTC)
for offset in range(21):
JobExecution.create(
job=job,
running_status=JobExecutionStatus.SUCCEEDED,
ended_at=base_time - timedelta(minutes=offset),
)
first_page = load_runs_view(
log_dir=tmp_path / "out" / "logs",
now=base_time,
completed_page=1,
)
second_page = load_runs_view(
log_dir=tmp_path / "out" / "logs",
now=base_time,
completed_page=2,
)
assert len(first_page["completed"]) == 20
assert len(second_page["completed"]) == 1
assert first_page["completed_page"] == 1
assert second_page["completed_page"] == 2
assert first_page["completed_total_pages"] == 2
assert second_page["completed_total_pages"] == 2
assert first_page["completed_total_count"] == 21
assert second_page["completed_total_count"] == 21