import hashlib import mimetypes from enum import Enum from pathlib import Path from typing import Any, List, Optional from scrapy.utils.python import to_bytes class FileType(Enum): """File types that the republisher can handle""" VIDEO = "video" IMAGE = "image" AUDIO = "audio" FILE = "file" def local_image_path(name: str) -> str: image_guid = hashlib.sha1(to_bytes(name)).hexdigest() # nosec return f"full/{image_guid}.jpg" def local_file_path(s: str) -> str: media_guid = hashlib.sha1(to_bytes(s)).hexdigest() # nosec media_ext = Path(s).suffix # Handles empty and wild extensions by trying to guess the # mime type then extension or default to empty string otherwise if media_ext not in mimetypes.types_map: media_ext = "" media_type = mimetypes.guess_type(s)[0] if media_type: media_ext = mimetypes.guess_extension(media_type) return f"{media_guid}{media_ext}" def local_video_path(s: str) -> str: return local_file_path(s) def local_audio_path(s: str) -> str: return local_file_path(s) def determine_file_type( url: str, medium: Optional[str] = None, mimetype: Optional[str] = None ): """ Uses all available information to determine the type of a file from a path/url """ if medium: if medium == "video": return FileType.VIDEO if medium == "audio": return FileType.AUDIO if medium == "image": return FileType.IMAGE if medium == "document": return FileType.FILE if medium == "executable": return FileType.FILE if not mimetype: mimetype = mimetypes.guess_type(url)[0] if mimetype: if mimetype.startswith("image"): return FileType.IMAGE if mimetype.startswith("audio"): return FileType.AUDIO if mimetype.startswith("video"): return FileType.VIDEO return FileType.FILE