Implement audio file compression
This commit is contained in:
parent
2f0adc8308
commit
ca17e44687
6 changed files with 183 additions and 23 deletions
67
repub/media.py
Normal file
67
repub/media.py
Normal file
|
|
@ -0,0 +1,67 @@
|
|||
import logging
|
||||
import math
|
||||
|
||||
import ffmpeg
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def media_info(file_path):
|
||||
return ffmpeg.probe(file_path)
|
||||
|
||||
|
||||
def bitrate(info) -> float:
|
||||
try:
|
||||
return int(info["format"]["bit_rate"])
|
||||
except KeyError | ValueError:
|
||||
return math.inf
|
||||
|
||||
|
||||
def format(info):
|
||||
try:
|
||||
return info["format"]["format_name"]
|
||||
except KeyError | ValueError:
|
||||
return None
|
||||
|
||||
|
||||
def compression_settings(input_file, settings):
|
||||
info = media_info(input_file)
|
||||
br = settings.get("REPUBLISHER_AUDIO_BITRATE", 96000)
|
||||
fmt = settings.get("REPUBLISHER_AUDIO_FORMAT", "mp3")
|
||||
if bitrate(info) <= br:
|
||||
is_br = True
|
||||
else:
|
||||
is_br = False
|
||||
if format(info) == fmt:
|
||||
is_fmt = True
|
||||
else:
|
||||
is_fmt = False
|
||||
|
||||
if is_br and is_fmt:
|
||||
return None
|
||||
|
||||
if is_br:
|
||||
target_br = bitrate(info)
|
||||
else:
|
||||
target_br = br
|
||||
return {"bitrate": target_br, "ext": "mp3"}
|
||||
|
||||
|
||||
def compress_audio(input_file, output_file_base, settings):
|
||||
ext = settings["ext"]
|
||||
br = settings["bitrate"]
|
||||
output_file = f"{output_file_base}.{ext}"
|
||||
try:
|
||||
logger.info(f"Compressing audio {input_file} to {output_file} target_br={br}")
|
||||
out, _ = (
|
||||
ffmpeg.input(input_file)
|
||||
.output(
|
||||
output_file,
|
||||
**{"b:a": f"{br}", "map": "0:a:0"},
|
||||
loglevel="quiet",
|
||||
)
|
||||
.run()
|
||||
)
|
||||
return output_file
|
||||
except ffmpeg.Error as e:
|
||||
raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
|
||||
|
|
@ -1,12 +1,20 @@
|
|||
import logging
|
||||
import tempfile
|
||||
from io import BytesIO
|
||||
from os import PathLike
|
||||
from pathlib import PurePosixPath
|
||||
from typing import IO, DefaultDict, Optional, Set, Union
|
||||
from typing import IO, DefaultDict, Dict, Optional, Set, Tuple, Union
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import repub.utils
|
||||
from repub import media
|
||||
from repub.exporters import RssExporter
|
||||
from scrapy.pipelines.images import FilesPipeline as BaseFilesPipeline
|
||||
from scrapy.pipelines.files import FileException
|
||||
from scrapy.pipelines.files import FilesPipeline as BaseFilesPipeline
|
||||
from scrapy.pipelines.images import ImagesPipeline as BaseImagesPipeline
|
||||
from scrapy.utils.misc import md5sum
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ImagePipeline(BaseImagesPipeline):
|
||||
|
|
@ -32,6 +40,49 @@ class AudioPipeline(BaseFilesPipeline):
|
|||
def file_path(self, request, response=None, info=None, *, item=None):
|
||||
return repub.utils.local_audio_path(request.url)
|
||||
|
||||
def file_downloaded(self, response, request, info, *, item=None):
|
||||
return self.audio_downloaded(response, request, info, item=item)
|
||||
|
||||
def audio_downloaded(self, response, request, info, *, item=None):
|
||||
checksum = None
|
||||
for path, buf in self.get_audio(response, request, info, item=item):
|
||||
if checksum is None:
|
||||
buf.seek(0)
|
||||
checksum = md5sum(buf)
|
||||
# width, height = image.size
|
||||
self.store.persist_file(
|
||||
path,
|
||||
buf,
|
||||
info,
|
||||
# meta={"width": width, "height": height},
|
||||
headers={"Content-Type": "audio/mp3"},
|
||||
)
|
||||
return checksum
|
||||
|
||||
def get_audio(self, response, request, info, *, item=None):
|
||||
path = self.file_path(request, response=response, info=info, item=item)
|
||||
buf = BytesIO(response.body)
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmp_file = f"{tmpdir}/file"
|
||||
converted_file_base = f"{tmpdir}/converted"
|
||||
with open(tmp_file, "wb") as f:
|
||||
f.write(buf.read())
|
||||
|
||||
s = media.compression_settings(tmp_file, {})
|
||||
if s is not None:
|
||||
converted_file = media.compress_audio(tmp_file, converted_file_base, s)
|
||||
buf_converted = BytesIO()
|
||||
with open(converted_file, "rb") as f:
|
||||
buf_converted.write(f.read())
|
||||
buf_converted.seek(0)
|
||||
yield path, buf_converted
|
||||
else:
|
||||
logger.info(
|
||||
f"Skipping audio compression for {path}, it meets requirements"
|
||||
)
|
||||
buf.seek(0)
|
||||
yield path, buf
|
||||
|
||||
|
||||
class VideoPipeline(BaseFilesPipeline):
|
||||
def __init__(self, store_uri: Union[str, PathLike], **kwargs):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue