Implement audio file compression

This commit is contained in:
Abel Luck 2024-04-18 17:28:09 +02:00
parent 2f0adc8308
commit ca17e44687
6 changed files with 183 additions and 23 deletions

View file

@ -1,12 +1,20 @@
import logging
import tempfile
from io import BytesIO
from os import PathLike
from pathlib import PurePosixPath
from typing import IO, DefaultDict, Optional, Set, Union
from typing import IO, DefaultDict, Dict, Optional, Set, Tuple, Union
from urllib.parse import urlparse
import repub.utils
from repub import media
from repub.exporters import RssExporter
from scrapy.pipelines.images import FilesPipeline as BaseFilesPipeline
from scrapy.pipelines.files import FileException
from scrapy.pipelines.files import FilesPipeline as BaseFilesPipeline
from scrapy.pipelines.images import ImagesPipeline as BaseImagesPipeline
from scrapy.utils.misc import md5sum
logger = logging.getLogger(__name__)
class ImagePipeline(BaseImagesPipeline):
@ -32,6 +40,49 @@ class AudioPipeline(BaseFilesPipeline):
def file_path(self, request, response=None, info=None, *, item=None):
return repub.utils.local_audio_path(request.url)
def file_downloaded(self, response, request, info, *, item=None):
return self.audio_downloaded(response, request, info, item=item)
def audio_downloaded(self, response, request, info, *, item=None):
checksum = None
for path, buf in self.get_audio(response, request, info, item=item):
if checksum is None:
buf.seek(0)
checksum = md5sum(buf)
# width, height = image.size
self.store.persist_file(
path,
buf,
info,
# meta={"width": width, "height": height},
headers={"Content-Type": "audio/mp3"},
)
return checksum
def get_audio(self, response, request, info, *, item=None):
path = self.file_path(request, response=response, info=info, item=item)
buf = BytesIO(response.body)
with tempfile.TemporaryDirectory() as tmpdir:
tmp_file = f"{tmpdir}/file"
converted_file_base = f"{tmpdir}/converted"
with open(tmp_file, "wb") as f:
f.write(buf.read())
s = media.compression_settings(tmp_file, {})
if s is not None:
converted_file = media.compress_audio(tmp_file, converted_file_base, s)
buf_converted = BytesIO()
with open(converted_file, "rb") as f:
buf_converted.write(f.read())
buf_converted.seek(0)
yield path, buf_converted
else:
logger.info(
f"Skipping audio compression for {path}, it meets requirements"
)
buf.seek(0)
yield path, buf
class VideoPipeline(BaseFilesPipeline):
def __init__(self, store_uri: Union[str, PathLike], **kwargs):