98 lines
2.9 KiB
Python
98 lines
2.9 KiB
Python
import hashlib
|
|
import mimetypes
|
|
from enum import Enum
|
|
from pathlib import Path
|
|
from typing import Any, Mapping, Optional, Sequence
|
|
|
|
from scrapy.utils.python import to_bytes
|
|
|
|
|
|
class FileType(Enum):
|
|
"""File types that the republisher can handle"""
|
|
|
|
VIDEO = "video"
|
|
IMAGE = "image"
|
|
AUDIO = "audio"
|
|
FILE = "file"
|
|
|
|
|
|
def local_image_path(name: str) -> str:
|
|
image_guid = hashlib.sha1(to_bytes(name)).hexdigest() # nosec
|
|
return f"full/{image_guid}.jpg"
|
|
|
|
|
|
def local_file_path(s: str) -> str:
|
|
media_guid = hashlib.sha1(to_bytes(s)).hexdigest() # nosec
|
|
media_ext = Path(s).suffix
|
|
# Handles empty and wild extensions by trying to guess the
|
|
# mime type then extension or default to empty string otherwise
|
|
if media_ext not in mimetypes.types_map:
|
|
media_ext = ""
|
|
media_type = mimetypes.guess_type(s)[0]
|
|
if media_type:
|
|
media_ext = mimetypes.guess_extension(media_type)
|
|
return f"{media_guid}{media_ext}"
|
|
|
|
|
|
def local_video_path(s: str) -> str:
|
|
return local_file_path(s)
|
|
|
|
|
|
def local_audio_path(s: str) -> str:
|
|
return local_file_path(s)
|
|
|
|
|
|
def variant_media_path(base_path: str, profile: Mapping[str, Any]) -> str:
|
|
return f"{base_path}-{profile['name']}.{profile['extension']}"
|
|
|
|
|
|
def published_media_path(
|
|
file_type: FileType, source_url: str, profile: Mapping[str, Any]
|
|
) -> str:
|
|
if file_type == FileType.AUDIO:
|
|
return variant_media_path(local_audio_path(source_url), profile)
|
|
if file_type == FileType.VIDEO:
|
|
return variant_media_path(local_video_path(source_url), profile)
|
|
raise ValueError(f"Unsupported file type for published media path: {file_type}")
|
|
|
|
|
|
def canonical_published_media_path(
|
|
file_type: FileType, source_url: str, profiles: Sequence[Mapping[str, Any]]
|
|
) -> str:
|
|
if not profiles:
|
|
raise ValueError(f"Missing transcode profiles for {file_type.value}")
|
|
# The first configured profile is the public URL contract. Reordering profiles
|
|
# changes published URLs for already-mirrored media.
|
|
return published_media_path(file_type, source_url, profiles[0])
|
|
|
|
|
|
def determine_file_type(
|
|
url: str, medium: Optional[str] = None, mimetype: Optional[str] = None
|
|
):
|
|
"""
|
|
Uses all available information to determine the type of a file from a path/url
|
|
"""
|
|
if medium:
|
|
if medium == "video":
|
|
return FileType.VIDEO
|
|
if medium == "audio":
|
|
return FileType.AUDIO
|
|
if medium == "image":
|
|
return FileType.IMAGE
|
|
if medium == "document":
|
|
return FileType.FILE
|
|
if medium == "executable":
|
|
return FileType.FILE
|
|
|
|
if not mimetype:
|
|
mimetype = mimetypes.guess_type(url)[0]
|
|
|
|
if mimetype:
|
|
if mimetype.startswith("image"):
|
|
return FileType.IMAGE
|
|
if mimetype.startswith("audio"):
|
|
return FileType.AUDIO
|
|
if mimetype.startswith("video"):
|
|
return FileType.VIDEO
|
|
|
|
return FileType.FILE
|