basic feed rebuilding
This commit is contained in:
parent
4ab05c9000
commit
6add19c288
17 changed files with 772 additions and 69 deletions
32
repub/entrypoint.py
Normal file
32
repub/entrypoint.py
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
from scrapy.crawler import CrawlerProcess
|
||||
from scrapy.utils.project import get_project_settings
|
||||
|
||||
from .spiders.rss_spider import RssFeedSpider
|
||||
|
||||
from .postprocessing import SortRssItems
|
||||
|
||||
from . import colorlog
|
||||
|
||||
base_settings = get_project_settings()
|
||||
|
||||
settings = {
|
||||
**base_settings,
|
||||
"FEEDS": {
|
||||
"out/feed.rss": {
|
||||
"format": "rss",
|
||||
"postprocessing": [],
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
colorlog.load_colorlog()
|
||||
|
||||
|
||||
urls = ["https://www.nasa.gov/rss/dyn/breaking_news.rss"]
|
||||
|
||||
|
||||
def entrypoint():
|
||||
process = CrawlerProcess(settings)
|
||||
|
||||
process.crawl(RssFeedSpider, urls=urls)
|
||||
process.start() # the script will block here until the crawling is finished
|
||||
Loading…
Add table
Add a link
Reference in a new issue