Refactor database access through managed connections

This commit is contained in:
Abel Luck 2026-03-31 17:30:07 +02:00
parent f19bab6fa2
commit 3f28e46ff6
10 changed files with 1327 additions and 716 deletions

View file

@ -1,12 +1,8 @@
from __future__ import annotations
import json
import os
from datetime import UTC, datetime
from enum import IntEnum
from importlib import resources
from importlib.resources.abc import Traversable
from pathlib import Path
from typing import Any
from peewee import (
@ -16,29 +12,24 @@ from peewee import (
ForeignKeyField,
IntegerField,
Model,
SqliteDatabase,
TextField,
)
from playhouse.migrate import SchemaMigrator, migrate
DEFAULT_DB_PATH = Path("republisher.db")
DATABASE_PRAGMAS = {
"busy_timeout": 5000,
"cache_size": 15625,
"foreign_keys": 1,
"journal_mode": "wal",
"page_size": 4096,
"synchronous": "normal",
"temp_store": "memory",
}
SCHEMA_GLOB = "*.sql"
from repub import db as db_module
DEFAULT_DB_PATH = db_module.DEFAULT_DB_PATH
DATABASE_PRAGMAS = db_module.DATABASE_PRAGMAS
SCHEMA_GLOB = db_module.SCHEMA_GLOB
database = db_module.database
initialize_database = db_module.initialize_database
resolve_database_path = db_module.resolve_database_path
schema_paths = db_module.schema_paths
MAX_CONCURRENT_JOBS_SETTING_KEY = "max_concurrent_jobs"
DEFAULT_MAX_CONCURRENT_JOBS = 1
FEED_URL_SETTING_KEY = "feed_url"
DEFAULT_FEED_URL = ""
database = SqliteDatabase(None, pragmas=DATABASE_PRAGMAS)
class JobExecutionStatus(IntEnum):
PENDING = 0
@ -52,101 +43,24 @@ def utc_now() -> datetime:
return datetime.now(UTC)
def resolve_database_path(db_path: str | Path | None = None) -> Path:
raw_value = (
os.environ.get("REPUBLISHER_DB_PATH", DEFAULT_DB_PATH)
if db_path is None
else db_path
)
raw_path = Path(raw_value)
return raw_path.expanduser().resolve()
def schema_paths() -> tuple[Traversable, ...]:
schema_dir = resources.files("repub").joinpath("sql")
return tuple(
sorted(
(path for path in schema_dir.iterdir() if path.name.endswith(".sql")),
key=lambda path: path.name,
)
)
def initialize_database(db_path: str | Path | None = None) -> Path:
resolved_path = resolve_database_path(db_path)
resolved_path.parent.mkdir(parents=True, exist_ok=True)
if not database.is_closed():
database.close()
database.init(str(resolved_path), pragmas=DATABASE_PRAGMAS)
database.connect(reuse_if_open=True)
try:
for path in schema_paths():
database.connection().executescript(path.read_text(encoding="utf-8"))
_run_legacy_migrations()
finally:
database.close()
return resolved_path
def _run_legacy_migrations() -> None:
job_columns = {column.name for column in database.get_columns("job")}
operations = []
migrator = SchemaMigrator.from_database(database)
if "convert_images" not in job_columns:
operations.extend(
(
migrator.add_column(
"job",
"convert_images",
BooleanField(
default=True,
constraints=[Check("convert_images IN (0, 1)")],
),
),
migrator.add_column_default("job", "convert_images", 1),
)
)
if "convert_video" not in job_columns:
operations.extend(
(
migrator.add_column(
"job",
"convert_video",
BooleanField(
default=True,
constraints=[Check("convert_video IN (0, 1)")],
),
),
migrator.add_column_default("job", "convert_video", 1),
)
)
if operations:
with database.atomic():
migrate(*operations)
def source_slug_exists(slug: str) -> bool:
with database.connection_context():
with database.reader():
return Source.select().where(Source.slug == slug).exists()
def save_setting(key: str, value: Any) -> None:
payload = json.dumps(value, sort_keys=True)
with database.connection_context():
with database.atomic():
setting = AppSetting.get_or_none(AppSetting.key == key)
if setting is None:
AppSetting.create(key=key, value=payload)
return
setting.value = payload
setting.save()
with database.writer():
setting = AppSetting.get_or_none(AppSetting.key == key)
if setting is None:
AppSetting.create(key=key, value=payload)
return
setting.value = payload
setting.save()
def load_setting(key: str, default: Any) -> Any:
with database.connection_context():
with database.reader():
setting = AppSetting.get_or_none(AppSetting.key == key)
if setting is None:
return default
@ -177,8 +91,14 @@ def load_settings_form() -> dict[str, object]:
}
def load_job_enabled(job_id: int) -> bool | None:
with database.reader():
job = Job.get_or_none(id=job_id)
return None if job is None else job.enabled
def load_source_form(slug: str) -> dict[str, object] | None:
with database.connection_context():
with database.reader():
source = Source.get_or_none(Source.slug == slug)
if source is None:
return None
@ -259,46 +179,45 @@ def create_source(
include_content: bool = True,
content_format: str = "",
) -> Source:
with database.connection_context():
with database.atomic():
source = Source.create(
name=name,
slug=slug,
source_type=source_type,
notes=notes,
)
if source_type == "feed":
SourceFeed.create(
source=source,
feed_url=feed_url,
)
else:
SourcePangea.create(
source=source,
domain=pangea_domain,
category_name=pangea_category,
content_type=content_type,
only_newest=only_newest,
max_articles=max_articles,
oldest_article=oldest_article,
include_authors=include_authors,
exclude_media=exclude_media,
include_content=include_content,
content_format=content_format,
)
Job.create(
with database.writer():
source = Source.create(
name=name,
slug=slug,
source_type=source_type,
notes=notes,
)
if source_type == "feed":
SourceFeed.create(
source=source,
enabled=enabled,
convert_images=convert_images,
convert_video=convert_video,
spider_arguments=spider_arguments,
cron_minute=cron_minute,
cron_hour=cron_hour,
cron_day_of_month=cron_day_of_month,
cron_day_of_week=cron_day_of_week,
cron_month=cron_month,
feed_url=feed_url,
)
return source
else:
SourcePangea.create(
source=source,
domain=pangea_domain,
category_name=pangea_category,
content_type=content_type,
only_newest=only_newest,
max_articles=max_articles,
oldest_article=oldest_article,
include_authors=include_authors,
exclude_media=exclude_media,
include_content=include_content,
content_format=content_format,
)
Job.create(
source=source,
enabled=enabled,
convert_images=convert_images,
convert_video=convert_video,
spider_arguments=spider_arguments,
cron_minute=cron_minute,
cron_hour=cron_hour,
cron_day_of_month=cron_day_of_month,
cron_day_of_week=cron_day_of_week,
cron_month=cron_month,
)
return source
def update_source(
@ -329,91 +248,88 @@ def update_source(
include_content: bool = True,
content_format: str = "",
) -> Source | None:
with database.connection_context():
with database.atomic():
source = Source.get_or_none(Source.slug == source_slug)
if source is None:
return None
with database.writer():
source = Source.get_or_none(Source.slug == source_slug)
if source is None:
return None
source.name = name
source.notes = notes
source.source_type = source_type
source.save()
source.name = name
source.notes = notes
source.source_type = source_type
source.save()
job = Job.get(Job.source == source)
job.enabled = enabled
job.convert_images = convert_images
job.convert_video = convert_video
job.spider_arguments = spider_arguments
job.cron_minute = cron_minute
job.cron_hour = cron_hour
job.cron_day_of_month = cron_day_of_month
job.cron_day_of_week = cron_day_of_week
job.cron_month = cron_month
job.save()
job = Job.get(Job.source == source)
job.enabled = enabled
job.convert_images = convert_images
job.convert_video = convert_video
job.spider_arguments = spider_arguments
job.cron_minute = cron_minute
job.cron_hour = cron_hour
job.cron_day_of_month = cron_day_of_month
job.cron_day_of_week = cron_day_of_week
job.cron_month = cron_month
job.save()
if source_type == "feed":
SourcePangea.delete().where(SourcePangea.source == source).execute()
feed = SourceFeed.get_or_none(SourceFeed.source == source)
if feed is None:
SourceFeed.create(source=source, feed_url=feed_url)
else:
feed.feed_url = feed_url
feed.save()
if source_type == "feed":
SourcePangea.delete().where(SourcePangea.source == source).execute()
feed = SourceFeed.get_or_none(SourceFeed.source == source)
if feed is None:
SourceFeed.create(source=source, feed_url=feed_url)
else:
SourceFeed.delete().where(SourceFeed.source == source).execute()
pangea = SourcePangea.get_or_none(SourcePangea.source == source)
if pangea is None:
SourcePangea.create(
source=source,
domain=pangea_domain,
category_name=pangea_category,
content_type=content_type,
only_newest=only_newest,
max_articles=max_articles,
oldest_article=oldest_article,
include_authors=include_authors,
exclude_media=exclude_media,
include_content=include_content,
content_format=content_format,
)
else:
pangea.domain = pangea_domain
pangea.category_name = pangea_category
pangea.content_type = content_type
pangea.only_newest = only_newest
pangea.max_articles = max_articles
pangea.oldest_article = oldest_article
pangea.include_authors = include_authors
pangea.exclude_media = exclude_media
pangea.include_content = include_content
pangea.content_format = content_format
pangea.save()
feed.feed_url = feed_url
feed.save()
else:
SourceFeed.delete().where(SourceFeed.source == source).execute()
pangea = SourcePangea.get_or_none(SourcePangea.source == source)
if pangea is None:
SourcePangea.create(
source=source,
domain=pangea_domain,
category_name=pangea_category,
content_type=content_type,
only_newest=only_newest,
max_articles=max_articles,
oldest_article=oldest_article,
include_authors=include_authors,
exclude_media=exclude_media,
include_content=include_content,
content_format=content_format,
)
else:
pangea.domain = pangea_domain
pangea.category_name = pangea_category
pangea.content_type = content_type
pangea.only_newest = only_newest
pangea.max_articles = max_articles
pangea.oldest_article = oldest_article
pangea.include_authors = include_authors
pangea.exclude_media = exclude_media
pangea.include_content = include_content
pangea.content_format = content_format
pangea.save()
return source
return source
def delete_job_source(job_id: int) -> bool:
with database.connection_context():
with database.atomic():
job = Job.get_or_none(id=job_id)
if job is None:
return False
source = Source.get_by_id(job.source_id)
return source.delete_instance() > 0
with database.writer():
job = Job.get_or_none(id=job_id)
if job is None:
return False
source = Source.get_by_id(job.source_id)
return source.delete_instance() > 0
def delete_source(slug: str) -> bool:
with database.connection_context():
with database.atomic():
source = Source.get_or_none(Source.slug == slug)
if source is None:
return False
return source.delete_instance() > 0
with database.writer():
source = Source.get_or_none(Source.slug == slug)
if source is None:
return False
return source.delete_instance() > 0
def load_sources() -> tuple[dict[str, object], ...]:
with database.connection_context():
with database.reader():
sources = tuple(Source.select().order_by(Source.created_at.desc()))
source_ids = tuple(int(source.get_id()) for source in sources)
if not source_ids: