implement media pipelines and url rewriting
This commit is contained in:
parent
0c3a7fe7fe
commit
dc4e79c130
14 changed files with 1079 additions and 124 deletions
74
repub/utils.py
Normal file
74
repub/utils.py
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
import hashlib
|
||||
import mimetypes
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Any, List, Optional
|
||||
|
||||
from scrapy.utils.python import to_bytes
|
||||
|
||||
|
||||
class FileType(Enum):
|
||||
"""File types that the republisher can handle"""
|
||||
|
||||
VIDEO = "video"
|
||||
IMAGE = "image"
|
||||
AUDIO = "audio"
|
||||
FILE = "file"
|
||||
|
||||
|
||||
def local_image_path(name: str) -> str:
|
||||
image_guid = hashlib.sha1(to_bytes(name)).hexdigest() # nosec
|
||||
return f"full/{image_guid}.jpg"
|
||||
|
||||
|
||||
def local_file_path(s: str) -> str:
|
||||
media_guid = hashlib.sha1(to_bytes(s)).hexdigest() # nosec
|
||||
media_ext = Path(s).suffix
|
||||
# Handles empty and wild extensions by trying to guess the
|
||||
# mime type then extension or default to empty string otherwise
|
||||
if media_ext not in mimetypes.types_map:
|
||||
media_ext = ""
|
||||
media_type = mimetypes.guess_type(s)[0]
|
||||
if media_type:
|
||||
media_ext = mimetypes.guess_extension(media_type)
|
||||
return f"{media_guid}{media_ext}"
|
||||
|
||||
|
||||
def local_video_path(s: str) -> str:
|
||||
return local_file_path(s)
|
||||
|
||||
|
||||
def local_audio_path(s: str) -> str:
|
||||
return local_file_path(s)
|
||||
|
||||
|
||||
def determine_file_type(
|
||||
url: str, medium: Optional[str] = None, mimetype: Optional[str] = None
|
||||
):
|
||||
"""
|
||||
Uses all available information to determine the type of a file from a path/url
|
||||
"""
|
||||
if medium:
|
||||
if medium == "video":
|
||||
return FileType.VIDEO
|
||||
if medium == "audio":
|
||||
return FileType.AUDIO
|
||||
if medium == "image":
|
||||
return FileType.IMAGE
|
||||
if medium == "document":
|
||||
return FileType.FILE
|
||||
if medium == "executable":
|
||||
return FileType.FILE
|
||||
|
||||
if not mimetype:
|
||||
mimetype = mimetypes.guess_type(url)[0]
|
||||
|
||||
if mimetype:
|
||||
if mimetype.startswith("image"):
|
||||
return FileType.IMAGE
|
||||
if mimetype.startswith("audio"):
|
||||
return FileType.AUDIO
|
||||
if mimetype.startswith("video"):
|
||||
return FileType.VIDEO
|
||||
|
||||
return FileType.FILE
|
||||
Loading…
Add table
Add a link
Reference in a new issue