republisher/repub/entrypoint.py
2024-04-18 11:57:24 +02:00

32 lines
679 B
Python

from scrapy.crawler import CrawlerProcess
from scrapy.utils.project import get_project_settings
from .spiders.rss_spider import RssFeedSpider
from .postprocessing import SortRssItems
from . import colorlog
base_settings = get_project_settings()
settings = {
**base_settings,
"FEEDS": {
"out/feed.rss": {
"format": "rss",
"postprocessing": [],
},
},
}
colorlog.load_colorlog()
urls = ["https://www.nasa.gov/rss/dyn/breaking_news.rss"]
def entrypoint():
process = CrawlerProcess(settings)
process.crawl(RssFeedSpider, urls=urls)
process.start() # the script will block here until the crawling is finished