Prune old job executions
All checks were successful
buildbot/nix-eval Build done.
buildbot/nix-build Build done.
buildbot/nix-effects Build done.

This commit is contained in:
Abel Luck 2026-06-02 11:31:39 +02:00
parent 813f19f355
commit 710ac76192
6 changed files with 552 additions and 11 deletions

189
tests/test_job_retention.py Normal file
View file

@ -0,0 +1,189 @@
from __future__ import annotations
from datetime import UTC, datetime, timedelta
from pathlib import Path
from repub.job_retention import cleanup_job_executions
from repub.jobs import JobArtifacts
from repub.model import (
Job,
JobExecution,
JobExecutionStatus,
create_source,
database,
initialize_database,
)
NOW = datetime(2026, 6, 2, 12, 0, tzinfo=UTC)
def _create_job(name: str) -> Job:
source = create_source(
name=name,
slug=name.lower().replace(" ", "-"),
source_type="feed",
notes="",
spider_arguments="",
enabled=False,
cron_minute="*/5",
cron_hour="*",
cron_day_of_month="*",
cron_day_of_week="*",
cron_month="*",
feed_url=f"https://example.com/{name.lower().replace(' ', '-')}.xml",
)
with database.reader():
return Job.get(Job.source == source)
def _create_execution(
job: Job,
*,
status: JobExecutionStatus,
ended_at: datetime | None = None,
) -> JobExecution:
with database.writer():
return JobExecution.create(
job=job,
running_status=status,
started_at=ended_at - timedelta(minutes=5) if ended_at else None,
ended_at=ended_at,
)
def _write_artifacts(
log_dir: Path, job: Job, execution: JobExecution
) -> tuple[Path, ...]:
artifacts = JobArtifacts.for_execution(
log_dir=log_dir,
job_id=int(job.get_id()),
execution_id=int(execution.get_id()),
)
artifacts.log_path.parent.mkdir(parents=True, exist_ok=True)
paths = (
artifacts.log_path,
artifacts.stats_path,
artifacts.log_path.with_suffix(".pygea.log"),
)
for path in paths:
path.write_text(f"artifact {path.name}", encoding="utf-8")
return paths
def _execution_exists(execution: JobExecution) -> bool:
with database.reader():
return JobExecution.get_or_none(id=int(execution.get_id())) is not None
def test_cleanup_job_executions_prunes_old_completed_rows_and_artifacts(
tmp_path: Path,
) -> None:
initialize_database(tmp_path / "job-retention.db")
log_dir = tmp_path / "out" / "logs"
job = _create_job("Retention source")
old_success = _create_execution(
job,
status=JobExecutionStatus.SUCCEEDED,
ended_at=NOW - timedelta(days=8),
)
fresh_success = _create_execution(
job,
status=JobExecutionStatus.SUCCEEDED,
ended_at=NOW - timedelta(days=6, hours=23),
)
old_failed = _create_execution(
job,
status=JobExecutionStatus.FAILED,
ended_at=NOW - timedelta(days=91),
)
fresh_failed = _create_execution(
job,
status=JobExecutionStatus.FAILED,
ended_at=NOW - timedelta(days=89),
)
old_canceled = _create_execution(
job,
status=JobExecutionStatus.CANCELED,
ended_at=NOW - timedelta(days=91),
)
old_running = _create_execution(
job,
status=JobExecutionStatus.RUNNING,
ended_at=None,
)
old_pending = _create_execution(
job,
status=JobExecutionStatus.PENDING,
ended_at=None,
)
pruned_paths = tuple(
path
for execution in (old_success, old_failed, old_canceled)
for path in _write_artifacts(log_dir, job, execution)
)
kept_paths = tuple(
path
for execution in (fresh_success, fresh_failed, old_running, old_pending)
for path in _write_artifacts(log_dir, job, execution)
)
result = cleanup_job_executions(log_dir=log_dir, now=NOW)
assert result.matched_executions == 3
assert result.deleted_executions == 3
assert result.matched_files == len(pruned_paths)
assert result.deleted_files == len(pruned_paths)
assert result.failures == 0
assert not _execution_exists(old_success)
assert _execution_exists(fresh_success)
assert not _execution_exists(old_failed)
assert _execution_exists(fresh_failed)
assert not _execution_exists(old_canceled)
assert _execution_exists(old_running)
assert _execution_exists(old_pending)
assert all(not path.exists() for path in pruned_paths)
assert all(path.exists() for path in kept_paths)
def test_cleanup_job_executions_dry_run_leaves_rows_and_artifacts(
tmp_path: Path,
) -> None:
initialize_database(tmp_path / "job-retention-dry-run.db")
log_dir = tmp_path / "out" / "logs"
job = _create_job("Dry run source")
execution = _create_execution(
job,
status=JobExecutionStatus.SUCCEEDED,
ended_at=NOW - timedelta(days=8),
)
paths = _write_artifacts(log_dir, job, execution)
result = cleanup_job_executions(log_dir=log_dir, now=NOW, dry_run=True)
assert result.matched_executions == 1
assert result.deleted_executions == 0
assert result.matched_files == len(paths)
assert result.deleted_files == 0
assert _execution_exists(execution)
assert all(path.exists() for path in paths)
def test_cleanup_job_executions_prunes_rows_when_artifacts_are_missing(
tmp_path: Path,
) -> None:
initialize_database(tmp_path / "job-retention-missing-artifacts.db")
job = _create_job("Missing artifacts source")
execution = _create_execution(
job,
status=JobExecutionStatus.FAILED,
ended_at=NOW - timedelta(days=91),
)
result = cleanup_job_executions(log_dir=tmp_path / "out" / "logs", now=NOW)
assert result.matched_executions == 1
assert result.deleted_executions == 1
assert result.matched_files == 0
assert result.deleted_files == 0
assert result.failures == 0
assert not _execution_exists(execution)