Prune old job executions
This commit is contained in:
parent
813f19f355
commit
710ac76192
6 changed files with 552 additions and 11 deletions
|
|
@ -67,6 +67,7 @@ def test_parse_args_supports_cleanup_media_defaults() -> None:
|
|||
assert command == "cleanup-media"
|
||||
assert args.config is None
|
||||
assert args.feeds_dir is None
|
||||
assert args.log_dir is None
|
||||
assert args.days == 25
|
||||
assert args.dry_run is False
|
||||
|
||||
|
|
@ -84,7 +85,24 @@ def test_entrypoint_runs_cleanup_media(monkeypatch, tmp_path) -> None:
|
|||
recorded["media_dirs"] = media_dirs
|
||||
return FakeResult()
|
||||
|
||||
def fake_cleanup_job_executions(
|
||||
*,
|
||||
log_dir,
|
||||
successful_days,
|
||||
unsuccessful_days,
|
||||
dry_run,
|
||||
):
|
||||
recorded["log_dir"] = log_dir
|
||||
recorded["successful_days"] = successful_days
|
||||
recorded["unsuccessful_days"] = unsuccessful_days
|
||||
recorded["job_dry_run"] = dry_run
|
||||
return FakeResult()
|
||||
|
||||
monkeypatch.setattr("repub.entrypoint.cleanup_media", fake_cleanup_media)
|
||||
monkeypatch.setattr(
|
||||
"repub.entrypoint.cleanup_job_executions",
|
||||
fake_cleanup_job_executions,
|
||||
)
|
||||
|
||||
exit_code = entrypoint(
|
||||
[
|
||||
|
|
@ -103,6 +121,10 @@ def test_entrypoint_runs_cleanup_media(monkeypatch, tmp_path) -> None:
|
|||
"retention_days": 10,
|
||||
"dry_run": True,
|
||||
"media_dirs": ("images", "audio", "video", "files"),
|
||||
"log_dir": tmp_path / "logs",
|
||||
"successful_days": 7,
|
||||
"unsuccessful_days": 90,
|
||||
"job_dry_run": True,
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -139,7 +161,24 @@ REPUBLISHER_FILE_DIR = "files-custom"
|
|||
recorded["media_dirs"] = media_dirs
|
||||
return FakeResult()
|
||||
|
||||
def fake_cleanup_job_executions(
|
||||
*,
|
||||
log_dir,
|
||||
successful_days,
|
||||
unsuccessful_days,
|
||||
dry_run,
|
||||
):
|
||||
recorded["log_dir"] = log_dir
|
||||
recorded["successful_days"] = successful_days
|
||||
recorded["unsuccessful_days"] = unsuccessful_days
|
||||
recorded["job_dry_run"] = dry_run
|
||||
return FakeResult()
|
||||
|
||||
monkeypatch.setattr("repub.entrypoint.cleanup_media", fake_cleanup_media)
|
||||
monkeypatch.setattr(
|
||||
"repub.entrypoint.cleanup_job_executions",
|
||||
fake_cleanup_job_executions,
|
||||
)
|
||||
|
||||
exit_code = entrypoint(["cleanup-media", "--config", str(config_path)])
|
||||
|
||||
|
|
@ -154,6 +193,61 @@ REPUBLISHER_FILE_DIR = "files-custom"
|
|||
"videos-custom",
|
||||
"files-custom",
|
||||
),
|
||||
"log_dir": tmp_path / "mirror" / "logs",
|
||||
"successful_days": 7,
|
||||
"unsuccessful_days": 90,
|
||||
"job_dry_run": False,
|
||||
}
|
||||
|
||||
|
||||
def test_entrypoint_cleanup_media_accepts_log_dir_override(
|
||||
monkeypatch, tmp_path
|
||||
) -> None:
|
||||
recorded: dict[str, object] = {}
|
||||
|
||||
class FakeResult:
|
||||
failures = 0
|
||||
|
||||
def fake_cleanup_media(*, feeds_dir, retention_days, dry_run, media_dirs):
|
||||
recorded["feeds_dir"] = feeds_dir
|
||||
return FakeResult()
|
||||
|
||||
def fake_cleanup_job_executions(
|
||||
*,
|
||||
log_dir,
|
||||
successful_days,
|
||||
unsuccessful_days,
|
||||
dry_run,
|
||||
):
|
||||
recorded["log_dir"] = log_dir
|
||||
recorded["successful_days"] = successful_days
|
||||
recorded["unsuccessful_days"] = unsuccessful_days
|
||||
recorded["dry_run"] = dry_run
|
||||
return FakeResult()
|
||||
|
||||
monkeypatch.setattr("repub.entrypoint.cleanup_media", fake_cleanup_media)
|
||||
monkeypatch.setattr(
|
||||
"repub.entrypoint.cleanup_job_executions",
|
||||
fake_cleanup_job_executions,
|
||||
)
|
||||
|
||||
exit_code = entrypoint(
|
||||
[
|
||||
"cleanup-media",
|
||||
"--feeds-dir",
|
||||
str(tmp_path / "feeds"),
|
||||
"--log-dir",
|
||||
str(tmp_path / "custom-logs"),
|
||||
]
|
||||
)
|
||||
|
||||
assert exit_code == 0
|
||||
assert recorded == {
|
||||
"feeds_dir": tmp_path / "feeds",
|
||||
"log_dir": tmp_path / "custom-logs",
|
||||
"successful_days": 7,
|
||||
"unsuccessful_days": 90,
|
||||
"dry_run": False,
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
189
tests/test_job_retention.py
Normal file
189
tests/test_job_retention.py
Normal file
|
|
@ -0,0 +1,189 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from datetime import UTC, datetime, timedelta
|
||||
from pathlib import Path
|
||||
|
||||
from repub.job_retention import cleanup_job_executions
|
||||
from repub.jobs import JobArtifacts
|
||||
from repub.model import (
|
||||
Job,
|
||||
JobExecution,
|
||||
JobExecutionStatus,
|
||||
create_source,
|
||||
database,
|
||||
initialize_database,
|
||||
)
|
||||
|
||||
NOW = datetime(2026, 6, 2, 12, 0, tzinfo=UTC)
|
||||
|
||||
|
||||
def _create_job(name: str) -> Job:
|
||||
source = create_source(
|
||||
name=name,
|
||||
slug=name.lower().replace(" ", "-"),
|
||||
source_type="feed",
|
||||
notes="",
|
||||
spider_arguments="",
|
||||
enabled=False,
|
||||
cron_minute="*/5",
|
||||
cron_hour="*",
|
||||
cron_day_of_month="*",
|
||||
cron_day_of_week="*",
|
||||
cron_month="*",
|
||||
feed_url=f"https://example.com/{name.lower().replace(' ', '-')}.xml",
|
||||
)
|
||||
with database.reader():
|
||||
return Job.get(Job.source == source)
|
||||
|
||||
|
||||
def _create_execution(
|
||||
job: Job,
|
||||
*,
|
||||
status: JobExecutionStatus,
|
||||
ended_at: datetime | None = None,
|
||||
) -> JobExecution:
|
||||
with database.writer():
|
||||
return JobExecution.create(
|
||||
job=job,
|
||||
running_status=status,
|
||||
started_at=ended_at - timedelta(minutes=5) if ended_at else None,
|
||||
ended_at=ended_at,
|
||||
)
|
||||
|
||||
|
||||
def _write_artifacts(
|
||||
log_dir: Path, job: Job, execution: JobExecution
|
||||
) -> tuple[Path, ...]:
|
||||
artifacts = JobArtifacts.for_execution(
|
||||
log_dir=log_dir,
|
||||
job_id=int(job.get_id()),
|
||||
execution_id=int(execution.get_id()),
|
||||
)
|
||||
artifacts.log_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
paths = (
|
||||
artifacts.log_path,
|
||||
artifacts.stats_path,
|
||||
artifacts.log_path.with_suffix(".pygea.log"),
|
||||
)
|
||||
for path in paths:
|
||||
path.write_text(f"artifact {path.name}", encoding="utf-8")
|
||||
return paths
|
||||
|
||||
|
||||
def _execution_exists(execution: JobExecution) -> bool:
|
||||
with database.reader():
|
||||
return JobExecution.get_or_none(id=int(execution.get_id())) is not None
|
||||
|
||||
|
||||
def test_cleanup_job_executions_prunes_old_completed_rows_and_artifacts(
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
initialize_database(tmp_path / "job-retention.db")
|
||||
log_dir = tmp_path / "out" / "logs"
|
||||
job = _create_job("Retention source")
|
||||
old_success = _create_execution(
|
||||
job,
|
||||
status=JobExecutionStatus.SUCCEEDED,
|
||||
ended_at=NOW - timedelta(days=8),
|
||||
)
|
||||
fresh_success = _create_execution(
|
||||
job,
|
||||
status=JobExecutionStatus.SUCCEEDED,
|
||||
ended_at=NOW - timedelta(days=6, hours=23),
|
||||
)
|
||||
old_failed = _create_execution(
|
||||
job,
|
||||
status=JobExecutionStatus.FAILED,
|
||||
ended_at=NOW - timedelta(days=91),
|
||||
)
|
||||
fresh_failed = _create_execution(
|
||||
job,
|
||||
status=JobExecutionStatus.FAILED,
|
||||
ended_at=NOW - timedelta(days=89),
|
||||
)
|
||||
old_canceled = _create_execution(
|
||||
job,
|
||||
status=JobExecutionStatus.CANCELED,
|
||||
ended_at=NOW - timedelta(days=91),
|
||||
)
|
||||
old_running = _create_execution(
|
||||
job,
|
||||
status=JobExecutionStatus.RUNNING,
|
||||
ended_at=None,
|
||||
)
|
||||
old_pending = _create_execution(
|
||||
job,
|
||||
status=JobExecutionStatus.PENDING,
|
||||
ended_at=None,
|
||||
)
|
||||
pruned_paths = tuple(
|
||||
path
|
||||
for execution in (old_success, old_failed, old_canceled)
|
||||
for path in _write_artifacts(log_dir, job, execution)
|
||||
)
|
||||
kept_paths = tuple(
|
||||
path
|
||||
for execution in (fresh_success, fresh_failed, old_running, old_pending)
|
||||
for path in _write_artifacts(log_dir, job, execution)
|
||||
)
|
||||
|
||||
result = cleanup_job_executions(log_dir=log_dir, now=NOW)
|
||||
|
||||
assert result.matched_executions == 3
|
||||
assert result.deleted_executions == 3
|
||||
assert result.matched_files == len(pruned_paths)
|
||||
assert result.deleted_files == len(pruned_paths)
|
||||
assert result.failures == 0
|
||||
assert not _execution_exists(old_success)
|
||||
assert _execution_exists(fresh_success)
|
||||
assert not _execution_exists(old_failed)
|
||||
assert _execution_exists(fresh_failed)
|
||||
assert not _execution_exists(old_canceled)
|
||||
assert _execution_exists(old_running)
|
||||
assert _execution_exists(old_pending)
|
||||
assert all(not path.exists() for path in pruned_paths)
|
||||
assert all(path.exists() for path in kept_paths)
|
||||
|
||||
|
||||
def test_cleanup_job_executions_dry_run_leaves_rows_and_artifacts(
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
initialize_database(tmp_path / "job-retention-dry-run.db")
|
||||
log_dir = tmp_path / "out" / "logs"
|
||||
job = _create_job("Dry run source")
|
||||
execution = _create_execution(
|
||||
job,
|
||||
status=JobExecutionStatus.SUCCEEDED,
|
||||
ended_at=NOW - timedelta(days=8),
|
||||
)
|
||||
paths = _write_artifacts(log_dir, job, execution)
|
||||
|
||||
result = cleanup_job_executions(log_dir=log_dir, now=NOW, dry_run=True)
|
||||
|
||||
assert result.matched_executions == 1
|
||||
assert result.deleted_executions == 0
|
||||
assert result.matched_files == len(paths)
|
||||
assert result.deleted_files == 0
|
||||
assert _execution_exists(execution)
|
||||
assert all(path.exists() for path in paths)
|
||||
|
||||
|
||||
def test_cleanup_job_executions_prunes_rows_when_artifacts_are_missing(
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
initialize_database(tmp_path / "job-retention-missing-artifacts.db")
|
||||
job = _create_job("Missing artifacts source")
|
||||
execution = _create_execution(
|
||||
job,
|
||||
status=JobExecutionStatus.FAILED,
|
||||
ended_at=NOW - timedelta(days=91),
|
||||
)
|
||||
|
||||
result = cleanup_job_executions(log_dir=tmp_path / "out" / "logs", now=NOW)
|
||||
|
||||
assert result.matched_executions == 1
|
||||
assert result.deleted_executions == 1
|
||||
assert result.matched_files == 0
|
||||
assert result.deleted_files == 0
|
||||
assert result.failures == 0
|
||||
assert not _execution_exists(execution)
|
||||
Loading…
Add table
Add a link
Reference in a new issue