from io import BytesIO from typing import Any from repub import rss from scrapy.exporters import BaseItemExporter from .exceptions import * from .items import ChannelElementItem class RssExporter(BaseItemExporter): def __init__(self, file: BytesIO, **kwargs: Any): super().__init__(**kwargs) if not self.encoding: self.encoding = "utf-8" self.file: BytesIO = file self.rss = rss.rss() self.channel = None self.item_buffer = [] def start_exporting(self) -> None: pass def export_item(self, item: Any): if isinstance(item, ChannelElementItem): self.channel = item.el self.rss.append(item.el) self.flush_buffer() return if self.channel is None: self.item_buffer.append(item) else: self.export_rss_item(item) def flush_buffer(self): for item in self.item_buffer: self.export_rss_item(item) self.item_buffer = [] def export_rss_item(self, item: Any): assert self.channel is not None self.channel.append(item.el) def finish_exporting(self) -> None: xml_bytes = rss.serialize(self.rss) self.file.write(xml_bytes)