from __future__ import annotations from datetime import UTC, datetime, timedelta from pathlib import Path from repub.job_retention import cleanup_job_executions from repub.jobs import JobArtifacts from repub.model import ( Job, JobExecution, JobExecutionStatus, create_source, database, initialize_database, ) NOW = datetime(2026, 6, 2, 12, 0, tzinfo=UTC) def _create_job(name: str) -> Job: source = create_source( name=name, slug=name.lower().replace(" ", "-"), source_type="feed", notes="", spider_arguments="", enabled=False, cron_minute="*/5", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url=f"https://example.com/{name.lower().replace(' ', '-')}.xml", ) with database.reader(): return Job.get(Job.source == source) def _create_execution( job: Job, *, status: JobExecutionStatus, ended_at: datetime | None = None, ) -> JobExecution: with database.writer(): return JobExecution.create( job=job, running_status=status, started_at=ended_at - timedelta(minutes=5) if ended_at else None, ended_at=ended_at, ) def _write_artifacts( log_dir: Path, job: Job, execution: JobExecution ) -> tuple[Path, ...]: artifacts = JobArtifacts.for_execution( log_dir=log_dir, job_id=int(job.get_id()), execution_id=int(execution.get_id()), ) artifacts.log_path.parent.mkdir(parents=True, exist_ok=True) paths = ( artifacts.log_path, artifacts.stats_path, artifacts.log_path.with_suffix(".pygea.log"), ) for path in paths: path.write_text(f"artifact {path.name}", encoding="utf-8") return paths def _execution_exists(execution: JobExecution) -> bool: with database.reader(): return JobExecution.get_or_none(id=int(execution.get_id())) is not None def test_cleanup_job_executions_prunes_old_completed_rows_and_artifacts( tmp_path: Path, ) -> None: initialize_database(tmp_path / "job-retention.db") log_dir = tmp_path / "out" / "logs" job = _create_job("Retention source") old_success = _create_execution( job, status=JobExecutionStatus.SUCCEEDED, ended_at=NOW - timedelta(days=8), ) fresh_success = _create_execution( job, status=JobExecutionStatus.SUCCEEDED, ended_at=NOW - timedelta(days=6, hours=23), ) old_failed = _create_execution( job, status=JobExecutionStatus.FAILED, ended_at=NOW - timedelta(days=91), ) fresh_failed = _create_execution( job, status=JobExecutionStatus.FAILED, ended_at=NOW - timedelta(days=89), ) old_canceled = _create_execution( job, status=JobExecutionStatus.CANCELED, ended_at=NOW - timedelta(days=91), ) old_running = _create_execution( job, status=JobExecutionStatus.RUNNING, ended_at=None, ) old_pending = _create_execution( job, status=JobExecutionStatus.PENDING, ended_at=None, ) pruned_paths = tuple( path for execution in (old_success, old_failed, old_canceled) for path in _write_artifacts(log_dir, job, execution) ) kept_paths = tuple( path for execution in (fresh_success, fresh_failed, old_running, old_pending) for path in _write_artifacts(log_dir, job, execution) ) result = cleanup_job_executions(log_dir=log_dir, now=NOW) assert result.matched_executions == 3 assert result.deleted_executions == 3 assert result.matched_files == len(pruned_paths) assert result.deleted_files == len(pruned_paths) assert result.failures == 0 assert not _execution_exists(old_success) assert _execution_exists(fresh_success) assert not _execution_exists(old_failed) assert _execution_exists(fresh_failed) assert not _execution_exists(old_canceled) assert _execution_exists(old_running) assert _execution_exists(old_pending) assert all(not path.exists() for path in pruned_paths) assert all(path.exists() for path in kept_paths) def test_cleanup_job_executions_dry_run_leaves_rows_and_artifacts( tmp_path: Path, ) -> None: initialize_database(tmp_path / "job-retention-dry-run.db") log_dir = tmp_path / "out" / "logs" job = _create_job("Dry run source") execution = _create_execution( job, status=JobExecutionStatus.SUCCEEDED, ended_at=NOW - timedelta(days=8), ) paths = _write_artifacts(log_dir, job, execution) result = cleanup_job_executions(log_dir=log_dir, now=NOW, dry_run=True) assert result.matched_executions == 1 assert result.deleted_executions == 0 assert result.matched_files == len(paths) assert result.deleted_files == 0 assert _execution_exists(execution) assert all(path.exists() for path in paths) def test_cleanup_job_executions_prunes_rows_when_artifacts_are_missing( tmp_path: Path, ) -> None: initialize_database(tmp_path / "job-retention-missing-artifacts.db") job = _create_job("Missing artifacts source") execution = _create_execution( job, status=JobExecutionStatus.FAILED, ended_at=NOW - timedelta(days=91), ) result = cleanup_job_executions(log_dir=tmp_path / "out" / "logs", now=NOW) assert result.matched_executions == 1 assert result.deleted_executions == 1 assert result.matched_files == 0 assert result.deleted_files == 0 assert result.failures == 0 assert not _execution_exists(execution)