republisher/repub/model.py

from __future__ import annotations

import json
import os
from datetime import UTC, datetime
from enum import IntEnum
from importlib import resources
from importlib.resources.abc import Traversable
from pathlib import Path
from typing import Any

from peewee import (
    BooleanField,
    Check,
    DateTimeField,
    ForeignKeyField,
    IntegerField,
    Model,
    SqliteDatabase,
    TextField,
)
from playhouse.migrate import SchemaMigrator, migrate

DEFAULT_DB_PATH = Path("republisher.db")
DATABASE_PRAGMAS = {
    "busy_timeout": 5000,
    "cache_size": 15625,
    "foreign_keys": 1,
    "journal_mode": "wal",
    "page_size": 4096,
    "synchronous": "normal",
    "temp_store": "memory",
}
SCHEMA_GLOB = "*.sql"
MAX_CONCURRENT_JOBS_SETTING_KEY = "max_concurrent_jobs"
DEFAULT_MAX_CONCURRENT_JOBS = 1
FEED_URL_SETTING_KEY = "feed_url"
DEFAULT_FEED_URL = ""

database = SqliteDatabase(None, pragmas=DATABASE_PRAGMAS)


class JobExecutionStatus(IntEnum):
    PENDING = 0
    RUNNING = 1
    SUCCEEDED = 2
    FAILED = 3
    CANCELED = 4


def utc_now() -> datetime:
    return datetime.now(UTC)


def resolve_database_path(db_path: str | Path | None = None) -> Path:
    raw_value = (
        os.environ.get("REPUBLISHER_DB_PATH", DEFAULT_DB_PATH)
        if db_path is None
        else db_path
    )
    raw_path = Path(raw_value)
    return raw_path.expanduser().resolve()


def schema_paths() -> tuple[Traversable, ...]:
    schema_dir = resources.files("repub").joinpath("sql")
    return tuple(
        sorted(
            (path for path in schema_dir.iterdir() if path.name.endswith(".sql")),
            key=lambda path: path.name,
        )
    )


def initialize_database(db_path: str | Path | None = None) -> Path:
    resolved_path = resolve_database_path(db_path)
    resolved_path.parent.mkdir(parents=True, exist_ok=True)

    if not database.is_closed():
        database.close()

    database.init(str(resolved_path), pragmas=DATABASE_PRAGMAS)
    database.connect(reuse_if_open=True)
    try:
        for path in schema_paths():
            database.connection().executescript(path.read_text(encoding="utf-8"))
        _run_legacy_migrations()
    finally:
        database.close()

    return resolved_path


def _run_legacy_migrations() -> None:
    job_columns = {column.name for column in database.get_columns("job")}
    operations = []
    migrator = SchemaMigrator.from_database(database)
    if "convert_images" not in job_columns:
        operations.extend(
            (
                migrator.add_column(
                    "job",
                    "convert_images",
                    BooleanField(
                        default=True,
                        constraints=[Check("convert_images IN (0, 1)")],
                    ),
                ),
                migrator.add_column_default("job", "convert_images", 1),
            )
        )
    if "convert_video" not in job_columns:
        operations.extend(
            (
                migrator.add_column(
                    "job",
                    "convert_video",
                    BooleanField(
                        default=True,
                        constraints=[Check("convert_video IN (0, 1)")],
                    ),
                ),
                migrator.add_column_default("job", "convert_video", 1),
            )
        )
    if operations:
        with database.atomic():
            migrate(*operations)


def source_slug_exists(slug: str) -> bool:
    with database.connection_context():
        return Source.select().where(Source.slug == slug).exists()


def save_setting(key: str, value: Any) -> None:
    payload = json.dumps(value, sort_keys=True)
    with database.connection_context():
        with database.atomic():
            setting = AppSetting.get_or_none(AppSetting.key == key)
            if setting is None:
                AppSetting.create(key=key, value=payload)
                return
            setting.value = payload
            setting.save()


def load_setting(key: str, default: Any) -> Any:
    with database.connection_context():
        setting = AppSetting.get_or_none(AppSetting.key == key)
    if setting is None:
        return default
    try:
        return json.loads(setting.value)
    except json.JSONDecodeError:
        return default


def load_max_concurrent_jobs() -> int:
    value = load_setting(MAX_CONCURRENT_JOBS_SETTING_KEY, DEFAULT_MAX_CONCURRENT_JOBS)
    try:
        parsed = int(value)
    except (TypeError, ValueError):
        return DEFAULT_MAX_CONCURRENT_JOBS
    return parsed if parsed >= 1 else DEFAULT_MAX_CONCURRENT_JOBS


def load_feed_url() -> str:
    value = load_setting(FEED_URL_SETTING_KEY, DEFAULT_FEED_URL)
    return value if isinstance(value, str) else DEFAULT_FEED_URL


def load_settings_form() -> dict[str, object]:
    return {
        "max_concurrent_jobs": load_max_concurrent_jobs(),
        "feed_url": load_feed_url(),
    }


def load_source_form(slug: str) -> dict[str, object] | None:
    with database.connection_context():
        source = Source.get_or_none(Source.slug == slug)
        if source is None:
            return None

        job = Job.get(Job.source == source)
        form_data: dict[str, object] = {
            "name": source.name,
            "slug": source.slug,
            "source_type": source.source_type,
            "notes": source.notes,
            "spider_arguments": job.spider_arguments,
            "enabled": job.enabled,
            "convert_images": job.convert_images,
            "convert_video": job.convert_video,
            "cron_minute": job.cron_minute,
            "cron_hour": job.cron_hour,
            "cron_day_of_month": job.cron_day_of_month,
            "cron_day_of_week": job.cron_day_of_week,
            "cron_month": job.cron_month,
            "feed_url": "",
            "pangea_domain": "",
            "pangea_category": "",
            "content_format": "MOBILE_3",
            "content_type": "articles",
            "max_articles": "10",
            "oldest_article": "3",
            "only_newest": True,
            "include_authors": True,
            "exclude_media": False,
            "include_content": True,
        }
        if source.source_type == "feed":
            feed = SourceFeed.get(SourceFeed.source == source)
            form_data["feed_url"] = feed.feed_url
        else:
            pangea = SourcePangea.get(SourcePangea.source == source)
            form_data.update(
                {
                    "pangea_domain": pangea.domain,
                    "pangea_category": pangea.category_name,
                    "content_format": pangea.content_format,
                    "content_type": pangea.content_type,
                    "max_articles": str(pangea.max_articles),
                    "oldest_article": str(pangea.oldest_article),
                    "only_newest": pangea.only_newest,
                    "include_authors": pangea.include_authors,
                    "exclude_media": pangea.exclude_media,
                    "include_content": pangea.include_content,
                }
            )
        return form_data


def create_source(
    *,
    name: str,
    slug: str,
    source_type: str,
    notes: str,
    spider_arguments: str,
    enabled: bool,
    cron_minute: str,
    cron_hour: str,
    cron_day_of_month: str,
    cron_day_of_week: str,
    cron_month: str,
    convert_images: bool = True,
    convert_video: bool = True,
    feed_url: str = "",
    pangea_domain: str = "",
    pangea_category: str = "",
    content_type: str = "",
    only_newest: bool = True,
    max_articles: int | None = None,
    oldest_article: int | None = None,
    include_authors: bool = True,
    exclude_media: bool = False,
    include_content: bool = True,
    content_format: str = "",
) -> Source:
    with database.connection_context():
        with database.atomic():
            source = Source.create(
                name=name,
                slug=slug,
                source_type=source_type,
                notes=notes,
            )
            if source_type == "feed":
                SourceFeed.create(
                    source=source,
                    feed_url=feed_url,
                )
            else:
                SourcePangea.create(
                    source=source,
                    domain=pangea_domain,
                    category_name=pangea_category,
                    content_type=content_type,
                    only_newest=only_newest,
                    max_articles=max_articles,
                    oldest_article=oldest_article,
                    include_authors=include_authors,
                    exclude_media=exclude_media,
                    include_content=include_content,
                    content_format=content_format,
                )
            Job.create(
                source=source,
                enabled=enabled,
                convert_images=convert_images,
                convert_video=convert_video,
                spider_arguments=spider_arguments,
                cron_minute=cron_minute,
                cron_hour=cron_hour,
                cron_day_of_month=cron_day_of_month,
                cron_day_of_week=cron_day_of_week,
                cron_month=cron_month,
            )
            return source


def update_source(
    source_slug: str,
    *,
    name: str,
    slug: str,
    source_type: str,
    notes: str,
    spider_arguments: str,
    enabled: bool,
    cron_minute: str,
    cron_hour: str,
    cron_day_of_month: str,
    cron_day_of_week: str,
    cron_month: str,
    convert_images: bool = True,
    convert_video: bool = True,
    feed_url: str = "",
    pangea_domain: str = "",
    pangea_category: str = "",
    content_type: str = "",
    only_newest: bool = True,
    max_articles: int | None = None,
    oldest_article: int | None = None,
    include_authors: bool = True,
    exclude_media: bool = False,
    include_content: bool = True,
    content_format: str = "",
) -> Source | None:
    with database.connection_context():
        with database.atomic():
            source = Source.get_or_none(Source.slug == source_slug)
            if source is None:
                return None

            source.name = name
            source.notes = notes
            source.source_type = source_type
            source.save()

            job = Job.get(Job.source == source)
            job.enabled = enabled
            job.convert_images = convert_images
            job.convert_video = convert_video
            job.spider_arguments = spider_arguments
            job.cron_minute = cron_minute
            job.cron_hour = cron_hour
            job.cron_day_of_month = cron_day_of_month
            job.cron_day_of_week = cron_day_of_week
            job.cron_month = cron_month
            job.save()

            if source_type == "feed":
                SourcePangea.delete().where(SourcePangea.source == source).execute()
                feed = SourceFeed.get_or_none(SourceFeed.source == source)
                if feed is None:
                    SourceFeed.create(source=source, feed_url=feed_url)
                else:
                    feed.feed_url = feed_url
                    feed.save()
            else:
                SourceFeed.delete().where(SourceFeed.source == source).execute()
                pangea = SourcePangea.get_or_none(SourcePangea.source == source)
                if pangea is None:
                    SourcePangea.create(
                        source=source,
                        domain=pangea_domain,
                        category_name=pangea_category,
                        content_type=content_type,
                        only_newest=only_newest,
                        max_articles=max_articles,
                        oldest_article=oldest_article,
                        include_authors=include_authors,
                        exclude_media=exclude_media,
                        include_content=include_content,
                        content_format=content_format,
                    )
                else:
                    pangea.domain = pangea_domain
                    pangea.category_name = pangea_category
                    pangea.content_type = content_type
                    pangea.only_newest = only_newest
                    pangea.max_articles = max_articles
                    pangea.oldest_article = oldest_article
                    pangea.include_authors = include_authors
                    pangea.exclude_media = exclude_media
                    pangea.include_content = include_content
                    pangea.content_format = content_format
                    pangea.save()

            return source


def delete_job_source(job_id: int) -> bool:
    with database.connection_context():
        with database.atomic():
            job = Job.get_or_none(id=job_id)
            if job is None:
                return False
            source = Source.get_by_id(job.source_id)
            return source.delete_instance() > 0


def delete_source(slug: str) -> bool:
    with database.connection_context():
        with database.atomic():
            source = Source.get_or_none(Source.slug == slug)
            if source is None:
                return False
            return source.delete_instance() > 0


def load_sources() -> tuple[dict[str, object], ...]:
    with database.connection_context():
        sources = tuple(Source.select().order_by(Source.created_at.desc()))
        source_ids = tuple(int(source.get_id()) for source in sources)
        if not source_ids:
            return ()
        jobs = {
            job.source_id: job for job in Job.select().where(Job.source.in_(source_ids))
        }
        feed_configs = {
            config.source_id: config
            for config in SourceFeed.select().where(SourceFeed.source.in_(source_ids))
        }
        pangea_configs = {
            config.source_id: config
            for config in SourcePangea.select().where(
                SourcePangea.source.in_(source_ids)
            )
        }
        return tuple(
            _project_source(source, jobs, feed_configs, pangea_configs)
            for source in sources
        )


def _project_source(
    source: "Source",
    jobs: dict[int, "Job"],
    feed_configs: dict[int, "SourceFeed"],
    pangea_configs: dict[int, "SourcePangea"],
) -> dict[str, object]:
    source_id = int(source.get_id())
    job = jobs[source_id]
    if source.source_type == "feed":
        upstream = feed_configs[source_id].feed_url
        source_type = "Feed"
    else:
        pangea = pangea_configs[source_id]
        upstream = f"{pangea.domain} / {pangea.category_name}"
        source_type = "Pangea"

    return {
        "name": source.name,
        "slug": source.slug,
        "source_type": source_type,
        "upstream": upstream,
        "schedule": (
            f"cron: {job.cron_minute} {job.cron_hour} {job.cron_day_of_month} "
            f"{job.cron_month} {job.cron_day_of_week}"
        ),
        "last_run": "Never run",
        "state": "Enabled" if job.enabled else "Disabled",
        "state_tone": "scheduled" if job.enabled else "idle",
    }


class BaseModel(Model):
    class Meta:
        database = database


class AppSetting(BaseModel):
    key = TextField(primary_key=True)
    value = TextField()

    class Meta:
        table_name = "app_setting"


class Source(BaseModel):
    created_at = DateTimeField(default=utc_now)
    updated_at = DateTimeField(default=utc_now)
    name = TextField()
    slug = TextField(unique=True)
    source_type = TextField(constraints=[Check("source_type IN ('feed', 'pangea')")])
    notes = TextField(default="")

    class Meta:
        table_name = "source"


class SourceFeed(BaseModel):
    source = ForeignKeyField(Source, primary_key=True, backref="feed_config")
    feed_url = TextField()
    etag = TextField(null=True)
    last_modified = TextField(null=True)

    class Meta:
        table_name = "source_feed"


class SourcePangea(BaseModel):
    source = ForeignKeyField(Source, primary_key=True, backref="pangea_config")
    domain = TextField()
    category_name = TextField()
    content_type = TextField()
    only_newest = BooleanField()
    max_articles = IntegerField()
    oldest_article = IntegerField()
    include_authors = BooleanField()
    exclude_media = BooleanField()
    include_content = BooleanField()
    content_format = TextField()

    class Meta:
        table_name = "source_pangea"


class Job(BaseModel):
    source = ForeignKeyField(Source, unique=True, backref="job")
    created_at = DateTimeField(default=utc_now)
    updated_at = DateTimeField(default=utc_now)
    enabled = BooleanField()
    convert_images = BooleanField(default=True)
    convert_video = BooleanField(default=True)
    spider_arguments = TextField(default="")
    cron_minute = TextField()
    cron_hour = TextField()
    cron_day_of_month = TextField()
    cron_day_of_week = TextField()
    cron_month = TextField()

    class Meta:
        table_name = "job"


class JobExecution(BaseModel):
    job = ForeignKeyField(Job, backref="executions")
    created_at = DateTimeField(default=utc_now)
    started_at = DateTimeField(null=True)
    ended_at = DateTimeField(null=True)
    stop_requested_at = DateTimeField(null=True)
    running_status = IntegerField(
        default=JobExecutionStatus.PENDING,
        constraints=[Check("running_status BETWEEN 0 AND 4")],
    )
    requests_count = IntegerField(default=0)
    items_count = IntegerField(default=0)
    warnings_count = IntegerField(default=0)
    errors_count = IntegerField(default=0)
    bytes_count = IntegerField(default=0)
    retries_count = IntegerField(default=0)
    exceptions_count = IntegerField(default=0)
    cache_size_count = IntegerField(default=0)
    cache_object_count = IntegerField(default=0)
    raw_stats = TextField(default="{}")

    class Meta:
        table_name = "job_execution"
add sqlite database 2026-03-30 13:26:59 +02:00			`from __future__ import annotations`

Add settings and live sidebar counts 2026-03-30 18:26:02 +02:00			`import json`
add sqlite database 2026-03-30 13:26:59 +02:00			`import os`
			`from datetime import UTC, datetime`
			`from enum import IntEnum`
			`from importlib import resources`
			`from importlib.resources.abc import Traversable`
			`from pathlib import Path`
Add settings and live sidebar counts 2026-03-30 18:26:02 +02:00			`from typing import Any`
add sqlite database 2026-03-30 13:26:59 +02:00
			`from peewee import (`
			`BooleanField,`
			`Check,`
			`DateTimeField,`
			`ForeignKeyField,`
			`IntegerField,`
			`Model,`
			`SqliteDatabase,`
			`TextField,`
			`)`
Move app settings schema into SQL migration 2026-03-30 18:47:36 +02:00			`from playhouse.migrate import SchemaMigrator, migrate`
add sqlite database 2026-03-30 13:26:59 +02:00
			`DEFAULT_DB_PATH = Path("republisher.db")`
			`DATABASE_PRAGMAS = {`
			`"busy_timeout": 5000,`
			`"cache_size": 15625,`
			`"foreign_keys": 1,`
			`"journal_mode": "wal",`
			`"page_size": 4096,`
			`"synchronous": "normal",`
			`"temp_store": "memory",`
			`}`
			`SCHEMA_GLOB = "*.sql"`
Add settings and live sidebar counts 2026-03-30 18:26:02 +02:00			`MAX_CONCURRENT_JOBS_SETTING_KEY = "max_concurrent_jobs"`
			`DEFAULT_MAX_CONCURRENT_JOBS = 1`
Fix feed validation output 2026-03-31 12:14:47 +02:00			`FEED_URL_SETTING_KEY = "feed_url"`
			`DEFAULT_FEED_URL = ""`
add sqlite database 2026-03-30 13:26:59 +02:00
			`database = SqliteDatabase(None, pragmas=DATABASE_PRAGMAS)`


			`class JobExecutionStatus(IntEnum):`
			`PENDING = 0`
			`RUNNING = 1`
			`SUCCEEDED = 2`
			`FAILED = 3`
			`CANCELED = 4`


			`def utc_now() -> datetime:`
			`return datetime.now(UTC)`


			`def resolve_database_path(db_path: str \| Path \| None = None) -> Path:`
			`raw_value = (`
			`os.environ.get("REPUBLISHER_DB_PATH", DEFAULT_DB_PATH)`
			`if db_path is None`
			`else db_path`
			`)`
			`raw_path = Path(raw_value)`
			`return raw_path.expanduser().resolve()`


			`def schema_paths() -> tuple[Traversable, ...]:`
			`schema_dir = resources.files("repub").joinpath("sql")`
			`return tuple(`
			`sorted(`
			`(path for path in schema_dir.iterdir() if path.name.endswith(".sql")),`
			`key=lambda path: path.name,`
			`)`
			`)`


			`def initialize_database(db_path: str \| Path \| None = None) -> Path:`
			`resolved_path = resolve_database_path(db_path)`
			`resolved_path.parent.mkdir(parents=True, exist_ok=True)`

			`if not database.is_closed():`
			`database.close()`

			`database.init(str(resolved_path), pragmas=DATABASE_PRAGMAS)`
			`database.connect(reuse_if_open=True)`
			`try:`
			`for path in schema_paths():`
Move app settings schema into SQL migration 2026-03-30 18:47:36 +02:00			`database.connection().executescript(path.read_text(encoding="utf-8"))`
			`_run_legacy_migrations()`
add sqlite database 2026-03-30 13:26:59 +02:00			`finally:`
			`database.close()`

			`return resolved_path`


Move app settings schema into SQL migration 2026-03-30 18:47:36 +02:00			`def _run_legacy_migrations() -> None:`
			`job_columns = {column.name for column in database.get_columns("job")}`
			`operations = []`
			`migrator = SchemaMigrator.from_database(database)`
Add settings and live sidebar counts 2026-03-30 18:26:02 +02:00			`if "convert_images" not in job_columns:`
Move app settings schema into SQL migration 2026-03-30 18:47:36 +02:00			`operations.extend(`
			`(`
			`migrator.add_column(`
			`"job",`
			`"convert_images",`
			`BooleanField(`
			`default=True,`
			`constraints=[Check("convert_images IN (0, 1)")],`
			`),`
			`),`
			`migrator.add_column_default("job", "convert_images", 1),`
			`)`
Add settings and live sidebar counts 2026-03-30 18:26:02 +02:00			`)`
			`if "convert_video" not in job_columns:`
Move app settings schema into SQL migration 2026-03-30 18:47:36 +02:00			`operations.extend(`
			`(`
			`migrator.add_column(`
			`"job",`
			`"convert_video",`
			`BooleanField(`
			`default=True,`
			`constraints=[Check("convert_video IN (0, 1)")],`
			`),`
			`),`
			`migrator.add_column_default("job", "convert_video", 1),`
			`)`
Add settings and live sidebar counts 2026-03-30 18:26:02 +02:00			`)`
Move app settings schema into SQL migration 2026-03-30 18:47:36 +02:00			`if operations:`
			`with database.atomic():`
			`migrate(*operations)`
Add settings and live sidebar counts 2026-03-30 18:26:02 +02:00

db backed source creation 2026-03-30 13:37:25 +02:00			`def source_slug_exists(slug: str) -> bool:`
			`with database.connection_context():`
			`return Source.select().where(Source.slug == slug).exists()`


Add settings and live sidebar counts 2026-03-30 18:26:02 +02:00			`def save_setting(key: str, value: Any) -> None:`
			`payload = json.dumps(value, sort_keys=True)`
			`with database.connection_context():`
			`with database.atomic():`
			`setting = AppSetting.get_or_none(AppSetting.key == key)`
			`if setting is None:`
			`AppSetting.create(key=key, value=payload)`
			`return`
			`setting.value = payload`
			`setting.save()`


			`def load_setting(key: str, default: Any) -> Any:`
			`with database.connection_context():`
			`setting = AppSetting.get_or_none(AppSetting.key == key)`
			`if setting is None:`
			`return default`
			`try:`
			`return json.loads(setting.value)`
			`except json.JSONDecodeError:`
			`return default`


			`def load_max_concurrent_jobs() -> int:`
			`value = load_setting(MAX_CONCURRENT_JOBS_SETTING_KEY, DEFAULT_MAX_CONCURRENT_JOBS)`
			`try:`
			`parsed = int(value)`
			`except (TypeError, ValueError):`
			`return DEFAULT_MAX_CONCURRENT_JOBS`
			`return parsed if parsed >= 1 else DEFAULT_MAX_CONCURRENT_JOBS`


Fix feed validation output 2026-03-31 12:14:47 +02:00			`def load_feed_url() -> str:`
			`value = load_setting(FEED_URL_SETTING_KEY, DEFAULT_FEED_URL)`
			`return value if isinstance(value, str) else DEFAULT_FEED_URL`


Add settings and live sidebar counts 2026-03-30 18:26:02 +02:00			`def load_settings_form() -> dict[str, object]:`
Fix feed validation output 2026-03-31 12:14:47 +02:00			`return {`
			`"max_concurrent_jobs": load_max_concurrent_jobs(),`
			`"feed_url": load_feed_url(),`
			`}`
Add settings and live sidebar counts 2026-03-30 18:26:02 +02:00

edit sources 2026-03-30 13:49:00 +02:00			`def load_source_form(slug: str) -> dict[str, object] \| None:`
			`with database.connection_context():`
			`source = Source.get_or_none(Source.slug == slug)`
			`if source is None:`
			`return None`

			`job = Job.get(Job.source == source)`
			`form_data: dict[str, object] = {`
			`"name": source.name,`
			`"slug": source.slug,`
			`"source_type": source.source_type,`
			`"notes": source.notes,`
			`"spider_arguments": job.spider_arguments,`
			`"enabled": job.enabled,`
Add settings and live sidebar counts 2026-03-30 18:26:02 +02:00			`"convert_images": job.convert_images,`
			`"convert_video": job.convert_video,`
edit sources 2026-03-30 13:49:00 +02:00			`"cron_minute": job.cron_minute,`
			`"cron_hour": job.cron_hour,`
			`"cron_day_of_month": job.cron_day_of_month,`
			`"cron_day_of_week": job.cron_day_of_week,`
			`"cron_month": job.cron_month,`
			`"feed_url": "",`
			`"pangea_domain": "",`
			`"pangea_category": "",`
			`"content_format": "MOBILE_3",`
			`"content_type": "articles",`
			`"max_articles": "10",`
			`"oldest_article": "3",`
			`"only_newest": True,`
			`"include_authors": True,`
			`"exclude_media": False,`
			`"include_content": True,`
			`}`
			`if source.source_type == "feed":`
			`feed = SourceFeed.get(SourceFeed.source == source)`
			`form_data["feed_url"] = feed.feed_url`
			`else:`
			`pangea = SourcePangea.get(SourcePangea.source == source)`
			`form_data.update(`
			`{`
			`"pangea_domain": pangea.domain,`
			`"pangea_category": pangea.category_name,`
			`"content_format": pangea.content_format,`
			`"content_type": pangea.content_type,`
			`"max_articles": str(pangea.max_articles),`
			`"oldest_article": str(pangea.oldest_article),`
			`"only_newest": pangea.only_newest,`
			`"include_authors": pangea.include_authors,`
			`"exclude_media": pangea.exclude_media,`
			`"include_content": pangea.include_content,`
			`}`
			`)`
			`return form_data`


db backed source creation 2026-03-30 13:37:25 +02:00			`def create_source(`
			`*,`
			`name: str,`
			`slug: str,`
			`source_type: str,`
			`notes: str,`
			`spider_arguments: str,`
			`enabled: bool,`
			`cron_minute: str,`
			`cron_hour: str,`
			`cron_day_of_month: str,`
			`cron_day_of_week: str,`
			`cron_month: str,`
Add settings and live sidebar counts 2026-03-30 18:26:02 +02:00			`convert_images: bool = True,`
			`convert_video: bool = True,`
db backed source creation 2026-03-30 13:37:25 +02:00			`feed_url: str = "",`
			`pangea_domain: str = "",`
			`pangea_category: str = "",`
			`content_type: str = "",`
			`only_newest: bool = True,`
			`max_articles: int \| None = None,`
			`oldest_article: int \| None = None,`
			`include_authors: bool = True,`
			`exclude_media: bool = False,`
			`include_content: bool = True,`
			`content_format: str = "",`
			`) -> Source:`
			`with database.connection_context():`
			`with database.atomic():`
			`source = Source.create(`
			`name=name,`
			`slug=slug,`
			`source_type=source_type,`
			`notes=notes,`
			`)`
			`if source_type == "feed":`
			`SourceFeed.create(`
			`source=source,`
			`feed_url=feed_url,`
			`)`
			`else:`
			`SourcePangea.create(`
			`source=source,`
			`domain=pangea_domain,`
			`category_name=pangea_category,`
			`content_type=content_type,`
			`only_newest=only_newest,`
			`max_articles=max_articles,`
			`oldest_article=oldest_article,`
			`include_authors=include_authors,`
			`exclude_media=exclude_media,`
			`include_content=include_content,`
			`content_format=content_format,`
			`)`
			`Job.create(`
			`source=source,`
			`enabled=enabled,`
Add settings and live sidebar counts 2026-03-30 18:26:02 +02:00			`convert_images=convert_images,`
			`convert_video=convert_video,`
db backed source creation 2026-03-30 13:37:25 +02:00			`spider_arguments=spider_arguments,`
			`cron_minute=cron_minute,`
			`cron_hour=cron_hour,`
			`cron_day_of_month=cron_day_of_month,`
			`cron_day_of_week=cron_day_of_week,`
			`cron_month=cron_month,`
			`)`
			`return source`


edit sources 2026-03-30 13:49:00 +02:00			`def update_source(`
			`source_slug: str,`
			`*,`
			`name: str,`
			`slug: str,`
			`source_type: str,`
			`notes: str,`
			`spider_arguments: str,`
			`enabled: bool,`
			`cron_minute: str,`
			`cron_hour: str,`
			`cron_day_of_month: str,`
			`cron_day_of_week: str,`
			`cron_month: str,`
Add settings and live sidebar counts 2026-03-30 18:26:02 +02:00			`convert_images: bool = True,`
			`convert_video: bool = True,`
edit sources 2026-03-30 13:49:00 +02:00			`feed_url: str = "",`
			`pangea_domain: str = "",`
			`pangea_category: str = "",`
			`content_type: str = "",`
			`only_newest: bool = True,`
			`max_articles: int \| None = None,`
			`oldest_article: int \| None = None,`
			`include_authors: bool = True,`
			`exclude_media: bool = False,`
			`include_content: bool = True,`
			`content_format: str = "",`
			`) -> Source \| None:`
			`with database.connection_context():`
			`with database.atomic():`
			`source = Source.get_or_none(Source.slug == source_slug)`
			`if source is None:`
			`return None`

			`source.name = name`
			`source.notes = notes`
			`source.source_type = source_type`
			`source.save()`

			`job = Job.get(Job.source == source)`
			`job.enabled = enabled`
Add settings and live sidebar counts 2026-03-30 18:26:02 +02:00			`job.convert_images = convert_images`
			`job.convert_video = convert_video`
edit sources 2026-03-30 13:49:00 +02:00			`job.spider_arguments = spider_arguments`
			`job.cron_minute = cron_minute`
			`job.cron_hour = cron_hour`
			`job.cron_day_of_month = cron_day_of_month`
			`job.cron_day_of_week = cron_day_of_week`
			`job.cron_month = cron_month`
			`job.save()`

			`if source_type == "feed":`
			`SourcePangea.delete().where(SourcePangea.source == source).execute()`
			`feed = SourceFeed.get_or_none(SourceFeed.source == source)`
			`if feed is None:`
			`SourceFeed.create(source=source, feed_url=feed_url)`
			`else:`
			`feed.feed_url = feed_url`
			`feed.save()`
			`else:`
			`SourceFeed.delete().where(SourceFeed.source == source).execute()`
			`pangea = SourcePangea.get_or_none(SourcePangea.source == source)`
			`if pangea is None:`
			`SourcePangea.create(`
			`source=source,`
			`domain=pangea_domain,`
			`category_name=pangea_category,`
			`content_type=content_type,`
			`only_newest=only_newest,`
			`max_articles=max_articles,`
			`oldest_article=oldest_article,`
			`include_authors=include_authors,`
			`exclude_media=exclude_media,`
			`include_content=include_content,`
			`content_format=content_format,`
			`)`
			`else:`
			`pangea.domain = pangea_domain`
			`pangea.category_name = pangea_category`
			`pangea.content_type = content_type`
			`pangea.only_newest = only_newest`
			`pangea.max_articles = max_articles`
			`pangea.oldest_article = oldest_article`
			`pangea.include_authors = include_authors`
			`pangea.exclude_media = exclude_media`
			`pangea.include_content = include_content`
			`pangea.content_format = content_format`
			`pangea.save()`

			`return source`


implement job runner and scheduler 2026-03-30 14:02:39 +02:00			`def delete_job_source(job_id: int) -> bool:`
			`with database.connection_context():`
			`with database.atomic():`
			`job = Job.get_or_none(id=job_id)`
			`if job is None:`
			`return False`
			`source = Source.get_by_id(job.source_id)`
			`return source.delete_instance() > 0`


Fix source actions and toggle regressions 2026-03-30 17:25:37 +02:00			`def delete_source(slug: str) -> bool:`
			`with database.connection_context():`
			`with database.atomic():`
			`source = Source.get_or_none(Source.slug == slug)`
			`if source is None:`
			`return False`
			`return source.delete_instance() > 0`


db backed source creation 2026-03-30 13:37:25 +02:00			`def load_sources() -> tuple[dict[str, object], ...]:`
			`with database.connection_context():`
			`sources = tuple(Source.select().order_by(Source.created_at.desc()))`
			`source_ids = tuple(int(source.get_id()) for source in sources)`
			`if not source_ids:`
			`return ()`
			`jobs = {`
			`job.source_id: job for job in Job.select().where(Job.source.in_(source_ids))`
			`}`
			`feed_configs = {`
			`config.source_id: config`
			`for config in SourceFeed.select().where(SourceFeed.source.in_(source_ids))`
			`}`
			`pangea_configs = {`
			`config.source_id: config`
			`for config in SourcePangea.select().where(`
			`SourcePangea.source.in_(source_ids)`
			`)`
			`}`
			`return tuple(`
			`_project_source(source, jobs, feed_configs, pangea_configs)`
			`for source in sources`
			`)`


			`def _project_source(`
			`source: "Source",`
			`jobs: dict[int, "Job"],`
			`feed_configs: dict[int, "SourceFeed"],`
			`pangea_configs: dict[int, "SourcePangea"],`
			`) -> dict[str, object]:`
			`source_id = int(source.get_id())`
			`job = jobs[source_id]`
			`if source.source_type == "feed":`
			`upstream = feed_configs[source_id].feed_url`
			`source_type = "Feed"`
			`else:`
			`pangea = pangea_configs[source_id]`
			`upstream = f"{pangea.domain} / {pangea.category_name}"`
			`source_type = "Pangea"`

			`return {`
			`"name": source.name,`
			`"slug": source.slug,`
			`"source_type": source_type,`
			`"upstream": upstream,`
			`"schedule": (`
			`f"cron: {job.cron_minute} {job.cron_hour} {job.cron_day_of_month} "`
			`f"{job.cron_month} {job.cron_day_of_week}"`
			`),`
			`"last_run": "Never run",`
			`"state": "Enabled" if job.enabled else "Disabled",`
			`"state_tone": "scheduled" if job.enabled else "idle",`
			`}`


add sqlite database 2026-03-30 13:26:59 +02:00			`class BaseModel(Model):`
			`class Meta:`
			`database = database`


Add settings and live sidebar counts 2026-03-30 18:26:02 +02:00			`class AppSetting(BaseModel):`
			`key = TextField(primary_key=True)`
			`value = TextField()`

			`class Meta:`
			`table_name = "app_setting"`


add sqlite database 2026-03-30 13:26:59 +02:00			`class Source(BaseModel):`
			`created_at = DateTimeField(default=utc_now)`
			`updated_at = DateTimeField(default=utc_now)`
			`name = TextField()`
			`slug = TextField(unique=True)`
			`source_type = TextField(constraints=[Check("source_type IN ('feed', 'pangea')")])`
			`notes = TextField(default="")`

			`class Meta:`
			`table_name = "source"`


			`class SourceFeed(BaseModel):`
			`source = ForeignKeyField(Source, primary_key=True, backref="feed_config")`
			`feed_url = TextField()`
			`etag = TextField(null=True)`
			`last_modified = TextField(null=True)`

			`class Meta:`
			`table_name = "source_feed"`


			`class SourcePangea(BaseModel):`
			`source = ForeignKeyField(Source, primary_key=True, backref="pangea_config")`
			`domain = TextField()`
			`category_name = TextField()`
			`content_type = TextField()`
			`only_newest = BooleanField()`
			`max_articles = IntegerField()`
			`oldest_article = IntegerField()`
			`include_authors = BooleanField()`
			`exclude_media = BooleanField()`
			`include_content = BooleanField()`
			`content_format = TextField()`

			`class Meta:`
			`table_name = "source_pangea"`


			`class Job(BaseModel):`
			`source = ForeignKeyField(Source, unique=True, backref="job")`
			`created_at = DateTimeField(default=utc_now)`
			`updated_at = DateTimeField(default=utc_now)`
			`enabled = BooleanField()`
Add settings and live sidebar counts 2026-03-30 18:26:02 +02:00			`convert_images = BooleanField(default=True)`
			`convert_video = BooleanField(default=True)`
add sqlite database 2026-03-30 13:26:59 +02:00			`spider_arguments = TextField(default="")`
			`cron_minute = TextField()`
			`cron_hour = TextField()`
			`cron_day_of_month = TextField()`
			`cron_day_of_week = TextField()`
			`cron_month = TextField()`

			`class Meta:`
			`table_name = "job"`


			`class JobExecution(BaseModel):`
			`job = ForeignKeyField(Job, backref="executions")`
			`created_at = DateTimeField(default=utc_now)`
			`started_at = DateTimeField(null=True)`
			`ended_at = DateTimeField(null=True)`
implement job runner and scheduler 2026-03-30 14:02:39 +02:00			`stop_requested_at = DateTimeField(null=True)`
add sqlite database 2026-03-30 13:26:59 +02:00			`running_status = IntegerField(`
			`default=JobExecutionStatus.PENDING,`
			`constraints=[Check("running_status BETWEEN 0 AND 4")],`
			`)`
			`requests_count = IntegerField(default=0)`
			`items_count = IntegerField(default=0)`
			`warnings_count = IntegerField(default=0)`
			`errors_count = IntegerField(default=0)`
			`bytes_count = IntegerField(default=0)`
			`retries_count = IntegerField(default=0)`
			`exceptions_count = IntegerField(default=0)`
			`cache_size_count = IntegerField(default=0)`
			`cache_object_count = IntegerField(default=0)`
			`raw_stats = TextField(default="{}")`

			`class Meta:`
			`table_name = "job_execution"`