republisher/repub/utils.py

99 lines
2.9 KiB
Python
Raw Normal View History

import hashlib
import mimetypes
from enum import Enum
from pathlib import Path
from typing import Any, Mapping, Optional, Sequence
from scrapy.utils.python import to_bytes
class FileType(Enum):
"""File types that the republisher can handle"""
VIDEO = "video"
IMAGE = "image"
AUDIO = "audio"
FILE = "file"
def local_image_path(name: str) -> str:
image_guid = hashlib.sha1(to_bytes(name)).hexdigest() # nosec
return f"full/{image_guid}.jpg"
def local_file_path(s: str) -> str:
media_guid = hashlib.sha1(to_bytes(s)).hexdigest() # nosec
media_ext = Path(s).suffix
# Handles empty and wild extensions by trying to guess the
# mime type then extension or default to empty string otherwise
if media_ext not in mimetypes.types_map:
media_ext = ""
media_type = mimetypes.guess_type(s)[0]
if media_type:
media_ext = mimetypes.guess_extension(media_type)
return f"{media_guid}{media_ext}"
def local_video_path(s: str) -> str:
return local_file_path(s)
def local_audio_path(s: str) -> str:
return local_file_path(s)
def variant_media_path(base_path: str, profile: Mapping[str, Any]) -> str:
return f"{base_path}-{profile['name']}.{profile['extension']}"
def published_media_path(
file_type: FileType, source_url: str, profile: Mapping[str, Any]
) -> str:
if file_type == FileType.AUDIO:
return variant_media_path(local_audio_path(source_url), profile)
if file_type == FileType.VIDEO:
return variant_media_path(local_video_path(source_url), profile)
raise ValueError(f"Unsupported file type for published media path: {file_type}")
def canonical_published_media_path(
file_type: FileType, source_url: str, profiles: Sequence[Mapping[str, Any]]
) -> str:
if not profiles:
raise ValueError(f"Missing transcode profiles for {file_type.value}")
# The first configured profile is the public URL contract. Reordering profiles
# changes published URLs for already-mirrored media.
return published_media_path(file_type, source_url, profiles[0])
def determine_file_type(
url: str, medium: Optional[str] = None, mimetype: Optional[str] = None
):
"""
Uses all available information to determine the type of a file from a path/url
"""
if medium:
if medium == "video":
return FileType.VIDEO
if medium == "audio":
return FileType.AUDIO
if medium == "image":
return FileType.IMAGE
if medium == "document":
return FileType.FILE
if medium == "executable":
return FileType.FILE
if not mimetype:
mimetype = mimetypes.guess_type(url)[0]
if mimetype:
if mimetype.startswith("image"):
return FileType.IMAGE
if mimetype.startswith("audio"):
return FileType.AUDIO
if mimetype.startswith("video"):
return FileType.VIDEO
return FileType.FILE