basic feed rebuilding

This commit is contained in:
Abel Luck 2024-04-18 11:57:24 +02:00
parent 4ab05c9000
commit 6add19c288
17 changed files with 772 additions and 69 deletions

49
repub/exporters.py Normal file
View file

@ -0,0 +1,49 @@
from scrapy.exporters import BaseItemExporter
from .items import ChannelElementItem
from .exceptions import *
from typing import Any
from io import BytesIO
from repub import rss
class RssExporter(BaseItemExporter):
def __init__(self, file: BytesIO, **kwargs: Any):
super().__init__(**kwargs)
if not self.encoding:
self.encoding = "utf-8"
self.file: BytesIO = file
self.rss = rss.rss()
self.channel = None
self.item_buffer = []
def start_exporting(self) -> None:
pass
def export_item(self, item: Any):
if isinstance(item, ChannelElementItem):
self.channel = item.el
self.rss.append(item.el)
self.flush_buffer()
return
if not self.channel:
self.item_buffer.append(item)
else:
self.export_rss_item(item)
def flush_buffer(self):
for item in self.item_buffer:
self.export_rss_item(item)
self.item_buffer = []
def export_rss_item(self, item: Any):
assert self.channel is not None
self.channel.append(item.el)
def finish_exporting(self) -> None:
xml_bytes = rss.serialize(self.rss)
self.file.write(xml_bytes)