basic feed rebuilding

This commit is contained in:
Abel Luck 2024-04-18 11:57:24 +02:00
parent 4ab05c9000
commit 6add19c288
17 changed files with 772 additions and 69 deletions

32
repub/entrypoint.py Normal file
View file

@ -0,0 +1,32 @@
from scrapy.crawler import CrawlerProcess
from scrapy.utils.project import get_project_settings
from .spiders.rss_spider import RssFeedSpider
from .postprocessing import SortRssItems
from . import colorlog
base_settings = get_project_settings()
settings = {
**base_settings,
"FEEDS": {
"out/feed.rss": {
"format": "rss",
"postprocessing": [],
},
},
}
colorlog.load_colorlog()
urls = ["https://www.nasa.gov/rss/dyn/breaking_news.rss"]
def entrypoint():
process = CrawlerProcess(settings)
process.crawl(RssFeedSpider, urls=urls)
process.start() # the script will block here until the crawling is finished