Implement audio file compression

This commit is contained in:
Abel Luck 2024-04-18 17:28:09 +02:00
parent 2f0adc8308
commit ca17e44687
6 changed files with 183 additions and 23 deletions

View file

@ -11,7 +11,10 @@ poetry run repub
- [x] Offlines RSS feed xml
- [x] Downloads media and enclosures
- [x] Rewrites media urls
- [ ] Media compression
- [x] Image normalization (JPG, RGB)
- [x] Audio compression
- [ ] Image compression
- [ ] Video compression
- [ ] Download and rewrite media embedded in content/CDATA fields
- [ ] Config file to drive the program
- [ ] Daemonize the program

30
poetry.lock generated
View file

@ -411,6 +411,23 @@ files = [
[package.dependencies]
sgmllib3k = "*"
[[package]]
name = "ffmpeg-python"
version = "0.2.0"
description = "Python bindings for FFmpeg - with complex filtering support"
optional = false
python-versions = "*"
files = [
{file = "ffmpeg-python-0.2.0.tar.gz", hash = "sha256:65225db34627c578ef0e11c8b1eb528bb35e024752f6f10b78c011f6f64c4127"},
{file = "ffmpeg_python-0.2.0-py3-none-any.whl", hash = "sha256:ac441a0404e053f8b6a1113a77c0f452f1cfc62f6344a769475ffdc0f56c23c5"},
]
[package.dependencies]
future = "*"
[package.extras]
dev = ["Sphinx (==2.1.0)", "future (==0.17.1)", "numpy (==1.16.4)", "pytest (==4.6.1)", "pytest-mock (==1.10.4)", "tox (==3.12.1)"]
[[package]]
name = "filelock"
version = "3.13.4"
@ -461,6 +478,17 @@ flake8 = ">=3"
[package.extras]
develop = ["build", "twine"]
[[package]]
name = "future"
version = "1.0.0"
description = "Clean single-source support for Python 3 and 2"
optional = false
python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
files = [
{file = "future-1.0.0-py3-none-any.whl", hash = "sha256:929292d34f5872e70396626ef385ec22355a1fae8ad29e1a734c3e43f9fbc216"},
{file = "future-1.0.0.tar.gz", hash = "sha256:bd2968309307861edae1458a4f8a4f3598c03be43b97521076aebf5d94c07b05"},
]
[[package]]
name = "hyperlink"
version = "21.0.0"
@ -1600,4 +1628,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
[metadata]
lock-version = "2.0"
python-versions = "^3.11"
content-hash = "8b12b19145242fe86f09024453bca29792f6e22b4e63cfc72e2c6e480f38f043"
content-hash = "4c6b23f66fa6309a313c7a054e640ec31ce9207e5b3dd9301e06ae9b9fb44f79"

View file

@ -17,6 +17,7 @@ colorlog = "^6.8.2"
feedparser = "^6.0.11"
lxml = "^5.2.1"
pillow = "^10.3.0"
ffmpeg-python = "^0.2.0"
[build-system]

67
repub/media.py Normal file
View file

@ -0,0 +1,67 @@
import logging
import math
import ffmpeg
logger = logging.getLogger(__name__)
def media_info(file_path):
return ffmpeg.probe(file_path)
def bitrate(info) -> float:
try:
return int(info["format"]["bit_rate"])
except KeyError | ValueError:
return math.inf
def format(info):
try:
return info["format"]["format_name"]
except KeyError | ValueError:
return None
def compression_settings(input_file, settings):
info = media_info(input_file)
br = settings.get("REPUBLISHER_AUDIO_BITRATE", 96000)
fmt = settings.get("REPUBLISHER_AUDIO_FORMAT", "mp3")
if bitrate(info) <= br:
is_br = True
else:
is_br = False
if format(info) == fmt:
is_fmt = True
else:
is_fmt = False
if is_br and is_fmt:
return None
if is_br:
target_br = bitrate(info)
else:
target_br = br
return {"bitrate": target_br, "ext": "mp3"}
def compress_audio(input_file, output_file_base, settings):
ext = settings["ext"]
br = settings["bitrate"]
output_file = f"{output_file_base}.{ext}"
try:
logger.info(f"Compressing audio {input_file} to {output_file} target_br={br}")
out, _ = (
ffmpeg.input(input_file)
.output(
output_file,
**{"b:a": f"{br}", "map": "0:a:0"},
loglevel="quiet",
)
.run()
)
return output_file
except ffmpeg.Error as e:
raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e

View file

@ -1,12 +1,20 @@
import logging
import tempfile
from io import BytesIO
from os import PathLike
from pathlib import PurePosixPath
from typing import IO, DefaultDict, Optional, Set, Union
from typing import IO, DefaultDict, Dict, Optional, Set, Tuple, Union
from urllib.parse import urlparse
import repub.utils
from repub import media
from repub.exporters import RssExporter
from scrapy.pipelines.images import FilesPipeline as BaseFilesPipeline
from scrapy.pipelines.files import FileException
from scrapy.pipelines.files import FilesPipeline as BaseFilesPipeline
from scrapy.pipelines.images import ImagesPipeline as BaseImagesPipeline
from scrapy.utils.misc import md5sum
logger = logging.getLogger(__name__)
class ImagePipeline(BaseImagesPipeline):
@ -32,6 +40,49 @@ class AudioPipeline(BaseFilesPipeline):
def file_path(self, request, response=None, info=None, *, item=None):
return repub.utils.local_audio_path(request.url)
def file_downloaded(self, response, request, info, *, item=None):
return self.audio_downloaded(response, request, info, item=item)
def audio_downloaded(self, response, request, info, *, item=None):
checksum = None
for path, buf in self.get_audio(response, request, info, item=item):
if checksum is None:
buf.seek(0)
checksum = md5sum(buf)
# width, height = image.size
self.store.persist_file(
path,
buf,
info,
# meta={"width": width, "height": height},
headers={"Content-Type": "audio/mp3"},
)
return checksum
def get_audio(self, response, request, info, *, item=None):
path = self.file_path(request, response=response, info=info, item=item)
buf = BytesIO(response.body)
with tempfile.TemporaryDirectory() as tmpdir:
tmp_file = f"{tmpdir}/file"
converted_file_base = f"{tmpdir}/converted"
with open(tmp_file, "wb") as f:
f.write(buf.read())
s = media.compression_settings(tmp_file, {})
if s is not None:
converted_file = media.compress_audio(tmp_file, converted_file_base, s)
buf_converted = BytesIO()
with open(converted_file, "rb") as f:
buf_converted.write(f.read())
buf_converted.seek(0)
yield path, buf_converted
else:
logger.info(
f"Skipping audio compression for {path}, it meets requirements"
)
buf.seek(0)
yield path, buf
class VideoPipeline(BaseFilesPipeline):
def __init__(self, store_uri: Union[str, PathLike], **kwargs):

View file

@ -1,16 +1,26 @@
{ system ? "x86_64-linux", pkgs ? import <nixpkgs> { inherit system; }, dev ? true, }:
{
system ? "x86_64-linux",
pkgs ? import <nixpkgs> { inherit system; },
dev ? true,
}:
let
pyCurrent = pkgs.python311;
poetryExtras = if dev then ["dev"] else [];
poetryExtras = if dev then [ "dev" ] else [ ];
poetryInstallExtras = (
if poetryExtras == [] then ""
else pkgs.lib.concatStrings [ " --with=" (pkgs.lib.concatStringsSep "," poetryExtras) ]
if poetryExtras == [ ] then
""
else
pkgs.lib.concatStrings [
" --with="
(pkgs.lib.concatStringsSep "," poetryExtras)
]
);
packages = [
pyCurrent
pkgs.ffmpeg_5-headless
#(pyCurrent (ps: with ps; [ ffmpeg-python ]))
pkgs.zsh
(pkgs.poetry.withPlugins(ps: with ps; [poetry-plugin-up]))
(pkgs.poetry.withPlugins (ps: with ps; [ poetry-plugin-up ]))
];
LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath [
@ -26,16 +36,16 @@ poetryExtras = if dev then ["dev"] else [];
# Use python from path, so you can use a different version to the one bundled with poetry
POETRY_VIRTUALENVS_PREFER_ACTIVE_PYTHON = "true";
in
pkgs.mkShell {
buildInputs = packages;
shellHook = ''
export SHELL=${pkgs.zsh}
export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}"
export POETRY_VIRTUALENVS_IN_PROJECT="${POETRY_VIRTUALENVS_IN_PROJECT}"
export POETRY_VIRTUALENVS_PATH="${POETRY_VIRTUALENVS_PATH}"
export POETRY_VIRTUALENVS_PREFER_ACTIVE_PYTHON="${POETRY_VIRTUALENVS_PREFER_ACTIVE_PYTHON}"
export PYTHON_KEYRING_BACKEND=keyring.backends.null.Keyring
poetry env use "${pyCurrent}/bin/python"
poetry install -vv --sync${poetryInstallExtras}
'';
}
pkgs.mkShell {
buildInputs = packages;
shellHook = ''
export SHELL=${pkgs.zsh}
export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}"
export POETRY_VIRTUALENVS_IN_PROJECT="${POETRY_VIRTUALENVS_IN_PROJECT}"
export POETRY_VIRTUALENVS_PATH="${POETRY_VIRTUALENVS_PATH}"
export POETRY_VIRTUALENVS_PREFER_ACTIVE_PYTHON="${POETRY_VIRTUALENVS_PREFER_ACTIVE_PYTHON}"
export PYTHON_KEYRING_BACKEND=keyring.backends.null.Keyring
poetry env use "${pyCurrent}/bin/python"
poetry install -vv --sync${poetryInstallExtras}
'';
}