db backed source creation

This commit is contained in:
Abel Luck 2026-03-30 13:37:25 +02:00
parent b9e288a22d
commit 847aeae772
5 changed files with 312 additions and 99 deletions

View file

@ -84,6 +84,132 @@ def initialize_database(db_path: str | Path | None = None) -> Path:
return resolved_path
def source_slug_exists(slug: str) -> bool:
with database.connection_context():
return Source.select().where(Source.slug == slug).exists()
def create_source(
*,
name: str,
slug: str,
source_type: str,
notes: str,
spider_arguments: str,
enabled: bool,
cron_minute: str,
cron_hour: str,
cron_day_of_month: str,
cron_day_of_week: str,
cron_month: str,
feed_url: str = "",
pangea_domain: str = "",
pangea_category: str = "",
content_type: str = "",
only_newest: bool = True,
max_articles: int | None = None,
oldest_article: int | None = None,
include_authors: bool = True,
exclude_media: bool = False,
include_content: bool = True,
content_format: str = "",
) -> Source:
with database.connection_context():
with database.atomic():
source = Source.create(
name=name,
slug=slug,
source_type=source_type,
notes=notes,
)
if source_type == "feed":
SourceFeed.create(
source=source,
feed_url=feed_url,
)
else:
SourcePangea.create(
source=source,
domain=pangea_domain,
category_name=pangea_category,
content_type=content_type,
only_newest=only_newest,
max_articles=max_articles,
oldest_article=oldest_article,
include_authors=include_authors,
exclude_media=exclude_media,
include_content=include_content,
content_format=content_format,
)
Job.create(
source=source,
enabled=enabled,
spider_arguments=spider_arguments,
cron_minute=cron_minute,
cron_hour=cron_hour,
cron_day_of_month=cron_day_of_month,
cron_day_of_week=cron_day_of_week,
cron_month=cron_month,
)
return source
def load_sources() -> tuple[dict[str, object], ...]:
with database.connection_context():
sources = tuple(Source.select().order_by(Source.created_at.desc()))
source_ids = tuple(int(source.get_id()) for source in sources)
if not source_ids:
return ()
jobs = {
job.source_id: job for job in Job.select().where(Job.source.in_(source_ids))
}
feed_configs = {
config.source_id: config
for config in SourceFeed.select().where(SourceFeed.source.in_(source_ids))
}
pangea_configs = {
config.source_id: config
for config in SourcePangea.select().where(
SourcePangea.source.in_(source_ids)
)
}
return tuple(
_project_source(source, jobs, feed_configs, pangea_configs)
for source in sources
)
def _project_source(
source: "Source",
jobs: dict[int, "Job"],
feed_configs: dict[int, "SourceFeed"],
pangea_configs: dict[int, "SourcePangea"],
) -> dict[str, object]:
source_id = int(source.get_id())
job = jobs[source_id]
if source.source_type == "feed":
upstream = feed_configs[source_id].feed_url
source_type = "Feed"
else:
pangea = pangea_configs[source_id]
upstream = f"{pangea.domain} / {pangea.category_name}"
source_type = "Pangea"
return {
"name": source.name,
"slug": source.slug,
"source_type": source_type,
"upstream": upstream,
"schedule": (
f"cron: {job.cron_minute} {job.cron_hour} {job.cron_day_of_month} "
f"{job.cron_month} {job.cron_day_of_week}"
),
"last_run": "Never run",
"state": "Enabled" if job.enabled else "Disabled",
"state_tone": "scheduled" if job.enabled else "idle",
}
class BaseModel(Model):
class Meta:
database = database