Fix Scrapy media pipeline initialization
This commit is contained in:
parent
34d26f7def
commit
20b9759193
2 changed files with 71 additions and 19 deletions
|
|
@ -4,9 +4,9 @@ from io import BytesIO
|
|||
from os import PathLike
|
||||
from typing import Dict, List, Optional, Union
|
||||
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.pipelines.files import FilesPipeline as BaseFilesPipeline
|
||||
from scrapy.pipelines.images import ImagesPipeline as BaseImagesPipeline
|
||||
from scrapy.settings import Settings
|
||||
from scrapy.utils.misc import md5sum
|
||||
|
||||
import repub.utils
|
||||
|
|
@ -24,12 +24,9 @@ class ImagePipeline(BaseImagesPipeline):
|
|||
|
||||
|
||||
class FilePipeline(BaseFilesPipeline):
|
||||
def __init__(self, store_uri, **kwargs):
|
||||
settings = kwargs["settings"]
|
||||
if isinstance(settings, dict) or settings is None:
|
||||
settings = Settings(settings)
|
||||
self.settings = settings
|
||||
super().__init__(store_uri, **kwargs)
|
||||
def __init__(self, store_uri: Union[str, PathLike], *, crawler: Crawler):
|
||||
self.settings = crawler.settings
|
||||
super().__init__(store_uri, crawler=crawler)
|
||||
|
||||
def file_path(self, request, response=None, info=None, *, item=None):
|
||||
return repub.utils.local_file_path(request.url)
|
||||
|
|
@ -52,14 +49,12 @@ class TranscodePipeline(BaseFilesPipeline):
|
|||
self,
|
||||
media_type: repub.utils.FileType,
|
||||
store_uri: Union[str, PathLike],
|
||||
**kwargs,
|
||||
*,
|
||||
crawler: Crawler,
|
||||
):
|
||||
settings = kwargs["settings"]
|
||||
self.media_type = media_type
|
||||
if isinstance(settings, dict) or settings is None:
|
||||
settings = Settings(settings)
|
||||
self.settings = settings
|
||||
super().__init__(store_uri, **kwargs)
|
||||
self.settings = crawler.settings
|
||||
super().__init__(store_uri, crawler=crawler)
|
||||
|
||||
def file_downloaded(self, response, request, info, *, item=None):
|
||||
return self.media_downloaded(response, request, info, item=item)
|
||||
|
|
@ -139,9 +134,13 @@ class AudioPipeline(TranscodePipeline):
|
|||
DEFAULT_FILES_URLS_FIELD = "audio_urls"
|
||||
DEFAULT_FILES_RESULT_FIELD = "audios"
|
||||
|
||||
def __init__(self, store_uri: Union[str, PathLike], **kwargs):
|
||||
store_uri = kwargs["settings"]["AUDIO_STORE"]
|
||||
super().__init__(repub.utils.FileType.AUDIO, store_uri, **kwargs)
|
||||
@classmethod
|
||||
def from_crawler(cls, crawler: Crawler):
|
||||
cls._update_stores(crawler.settings)
|
||||
return cls(crawler.settings["AUDIO_STORE"], crawler=crawler)
|
||||
|
||||
def __init__(self, store_uri: Union[str, PathLike], *, crawler: Crawler):
|
||||
super().__init__(repub.utils.FileType.AUDIO, store_uri, crawler=crawler)
|
||||
|
||||
def file_path(self, request, response=None, info=None, *, item=None):
|
||||
return repub.utils.local_audio_path(request.url)
|
||||
|
|
@ -164,9 +163,13 @@ class VideoPipeline(TranscodePipeline):
|
|||
DEFAULT_FILES_URLS_FIELD = "video_urls"
|
||||
DEFAULT_FILES_RESULT_FIELD = "videos"
|
||||
|
||||
def __init__(self, store_uri: Union[str, PathLike], **kwargs):
|
||||
store_uri = kwargs["settings"]["VIDEO_STORE"]
|
||||
super().__init__(repub.utils.FileType.VIDEO, store_uri, **kwargs)
|
||||
@classmethod
|
||||
def from_crawler(cls, crawler: Crawler):
|
||||
cls._update_stores(crawler.settings)
|
||||
return cls(crawler.settings["VIDEO_STORE"], crawler=crawler)
|
||||
|
||||
def __init__(self, store_uri: Union[str, PathLike], *, crawler: Crawler):
|
||||
super().__init__(repub.utils.FileType.VIDEO, store_uri, crawler=crawler)
|
||||
|
||||
def file_path(self, request, response=None, info=None, *, item=None):
|
||||
return repub.utils.local_video_path(request.url)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue