Implement audio file compression
This commit is contained in:
parent
2f0adc8308
commit
ca17e44687
6 changed files with 183 additions and 23 deletions
|
|
@ -11,7 +11,10 @@ poetry run repub
|
||||||
- [x] Offlines RSS feed xml
|
- [x] Offlines RSS feed xml
|
||||||
- [x] Downloads media and enclosures
|
- [x] Downloads media and enclosures
|
||||||
- [x] Rewrites media urls
|
- [x] Rewrites media urls
|
||||||
- [ ] Media compression
|
- [x] Image normalization (JPG, RGB)
|
||||||
|
- [x] Audio compression
|
||||||
|
- [ ] Image compression
|
||||||
|
- [ ] Video compression
|
||||||
- [ ] Download and rewrite media embedded in content/CDATA fields
|
- [ ] Download and rewrite media embedded in content/CDATA fields
|
||||||
- [ ] Config file to drive the program
|
- [ ] Config file to drive the program
|
||||||
- [ ] Daemonize the program
|
- [ ] Daemonize the program
|
||||||
|
|
|
||||||
30
poetry.lock
generated
30
poetry.lock
generated
|
|
@ -411,6 +411,23 @@ files = [
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
sgmllib3k = "*"
|
sgmllib3k = "*"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ffmpeg-python"
|
||||||
|
version = "0.2.0"
|
||||||
|
description = "Python bindings for FFmpeg - with complex filtering support"
|
||||||
|
optional = false
|
||||||
|
python-versions = "*"
|
||||||
|
files = [
|
||||||
|
{file = "ffmpeg-python-0.2.0.tar.gz", hash = "sha256:65225db34627c578ef0e11c8b1eb528bb35e024752f6f10b78c011f6f64c4127"},
|
||||||
|
{file = "ffmpeg_python-0.2.0-py3-none-any.whl", hash = "sha256:ac441a0404e053f8b6a1113a77c0f452f1cfc62f6344a769475ffdc0f56c23c5"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
future = "*"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
dev = ["Sphinx (==2.1.0)", "future (==0.17.1)", "numpy (==1.16.4)", "pytest (==4.6.1)", "pytest-mock (==1.10.4)", "tox (==3.12.1)"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "filelock"
|
name = "filelock"
|
||||||
version = "3.13.4"
|
version = "3.13.4"
|
||||||
|
|
@ -461,6 +478,17 @@ flake8 = ">=3"
|
||||||
[package.extras]
|
[package.extras]
|
||||||
develop = ["build", "twine"]
|
develop = ["build", "twine"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "future"
|
||||||
|
version = "1.0.0"
|
||||||
|
description = "Clean single-source support for Python 3 and 2"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
|
||||||
|
files = [
|
||||||
|
{file = "future-1.0.0-py3-none-any.whl", hash = "sha256:929292d34f5872e70396626ef385ec22355a1fae8ad29e1a734c3e43f9fbc216"},
|
||||||
|
{file = "future-1.0.0.tar.gz", hash = "sha256:bd2968309307861edae1458a4f8a4f3598c03be43b97521076aebf5d94c07b05"},
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "hyperlink"
|
name = "hyperlink"
|
||||||
version = "21.0.0"
|
version = "21.0.0"
|
||||||
|
|
@ -1600,4 +1628,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
|
||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = "^3.11"
|
python-versions = "^3.11"
|
||||||
content-hash = "8b12b19145242fe86f09024453bca29792f6e22b4e63cfc72e2c6e480f38f043"
|
content-hash = "4c6b23f66fa6309a313c7a054e640ec31ce9207e5b3dd9301e06ae9b9fb44f79"
|
||||||
|
|
|
||||||
|
|
@ -17,6 +17,7 @@ colorlog = "^6.8.2"
|
||||||
feedparser = "^6.0.11"
|
feedparser = "^6.0.11"
|
||||||
lxml = "^5.2.1"
|
lxml = "^5.2.1"
|
||||||
pillow = "^10.3.0"
|
pillow = "^10.3.0"
|
||||||
|
ffmpeg-python = "^0.2.0"
|
||||||
|
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
|
|
|
||||||
67
repub/media.py
Normal file
67
repub/media.py
Normal file
|
|
@ -0,0 +1,67 @@
|
||||||
|
import logging
|
||||||
|
import math
|
||||||
|
|
||||||
|
import ffmpeg
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def media_info(file_path):
|
||||||
|
return ffmpeg.probe(file_path)
|
||||||
|
|
||||||
|
|
||||||
|
def bitrate(info) -> float:
|
||||||
|
try:
|
||||||
|
return int(info["format"]["bit_rate"])
|
||||||
|
except KeyError | ValueError:
|
||||||
|
return math.inf
|
||||||
|
|
||||||
|
|
||||||
|
def format(info):
|
||||||
|
try:
|
||||||
|
return info["format"]["format_name"]
|
||||||
|
except KeyError | ValueError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def compression_settings(input_file, settings):
|
||||||
|
info = media_info(input_file)
|
||||||
|
br = settings.get("REPUBLISHER_AUDIO_BITRATE", 96000)
|
||||||
|
fmt = settings.get("REPUBLISHER_AUDIO_FORMAT", "mp3")
|
||||||
|
if bitrate(info) <= br:
|
||||||
|
is_br = True
|
||||||
|
else:
|
||||||
|
is_br = False
|
||||||
|
if format(info) == fmt:
|
||||||
|
is_fmt = True
|
||||||
|
else:
|
||||||
|
is_fmt = False
|
||||||
|
|
||||||
|
if is_br and is_fmt:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if is_br:
|
||||||
|
target_br = bitrate(info)
|
||||||
|
else:
|
||||||
|
target_br = br
|
||||||
|
return {"bitrate": target_br, "ext": "mp3"}
|
||||||
|
|
||||||
|
|
||||||
|
def compress_audio(input_file, output_file_base, settings):
|
||||||
|
ext = settings["ext"]
|
||||||
|
br = settings["bitrate"]
|
||||||
|
output_file = f"{output_file_base}.{ext}"
|
||||||
|
try:
|
||||||
|
logger.info(f"Compressing audio {input_file} to {output_file} target_br={br}")
|
||||||
|
out, _ = (
|
||||||
|
ffmpeg.input(input_file)
|
||||||
|
.output(
|
||||||
|
output_file,
|
||||||
|
**{"b:a": f"{br}", "map": "0:a:0"},
|
||||||
|
loglevel="quiet",
|
||||||
|
)
|
||||||
|
.run()
|
||||||
|
)
|
||||||
|
return output_file
|
||||||
|
except ffmpeg.Error as e:
|
||||||
|
raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
|
||||||
|
|
@ -1,12 +1,20 @@
|
||||||
|
import logging
|
||||||
|
import tempfile
|
||||||
|
from io import BytesIO
|
||||||
from os import PathLike
|
from os import PathLike
|
||||||
from pathlib import PurePosixPath
|
from pathlib import PurePosixPath
|
||||||
from typing import IO, DefaultDict, Optional, Set, Union
|
from typing import IO, DefaultDict, Dict, Optional, Set, Tuple, Union
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
import repub.utils
|
import repub.utils
|
||||||
|
from repub import media
|
||||||
from repub.exporters import RssExporter
|
from repub.exporters import RssExporter
|
||||||
from scrapy.pipelines.images import FilesPipeline as BaseFilesPipeline
|
from scrapy.pipelines.files import FileException
|
||||||
|
from scrapy.pipelines.files import FilesPipeline as BaseFilesPipeline
|
||||||
from scrapy.pipelines.images import ImagesPipeline as BaseImagesPipeline
|
from scrapy.pipelines.images import ImagesPipeline as BaseImagesPipeline
|
||||||
|
from scrapy.utils.misc import md5sum
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class ImagePipeline(BaseImagesPipeline):
|
class ImagePipeline(BaseImagesPipeline):
|
||||||
|
|
@ -32,6 +40,49 @@ class AudioPipeline(BaseFilesPipeline):
|
||||||
def file_path(self, request, response=None, info=None, *, item=None):
|
def file_path(self, request, response=None, info=None, *, item=None):
|
||||||
return repub.utils.local_audio_path(request.url)
|
return repub.utils.local_audio_path(request.url)
|
||||||
|
|
||||||
|
def file_downloaded(self, response, request, info, *, item=None):
|
||||||
|
return self.audio_downloaded(response, request, info, item=item)
|
||||||
|
|
||||||
|
def audio_downloaded(self, response, request, info, *, item=None):
|
||||||
|
checksum = None
|
||||||
|
for path, buf in self.get_audio(response, request, info, item=item):
|
||||||
|
if checksum is None:
|
||||||
|
buf.seek(0)
|
||||||
|
checksum = md5sum(buf)
|
||||||
|
# width, height = image.size
|
||||||
|
self.store.persist_file(
|
||||||
|
path,
|
||||||
|
buf,
|
||||||
|
info,
|
||||||
|
# meta={"width": width, "height": height},
|
||||||
|
headers={"Content-Type": "audio/mp3"},
|
||||||
|
)
|
||||||
|
return checksum
|
||||||
|
|
||||||
|
def get_audio(self, response, request, info, *, item=None):
|
||||||
|
path = self.file_path(request, response=response, info=info, item=item)
|
||||||
|
buf = BytesIO(response.body)
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
tmp_file = f"{tmpdir}/file"
|
||||||
|
converted_file_base = f"{tmpdir}/converted"
|
||||||
|
with open(tmp_file, "wb") as f:
|
||||||
|
f.write(buf.read())
|
||||||
|
|
||||||
|
s = media.compression_settings(tmp_file, {})
|
||||||
|
if s is not None:
|
||||||
|
converted_file = media.compress_audio(tmp_file, converted_file_base, s)
|
||||||
|
buf_converted = BytesIO()
|
||||||
|
with open(converted_file, "rb") as f:
|
||||||
|
buf_converted.write(f.read())
|
||||||
|
buf_converted.seek(0)
|
||||||
|
yield path, buf_converted
|
||||||
|
else:
|
||||||
|
logger.info(
|
||||||
|
f"Skipping audio compression for {path}, it meets requirements"
|
||||||
|
)
|
||||||
|
buf.seek(0)
|
||||||
|
yield path, buf
|
||||||
|
|
||||||
|
|
||||||
class VideoPipeline(BaseFilesPipeline):
|
class VideoPipeline(BaseFilesPipeline):
|
||||||
def __init__(self, store_uri: Union[str, PathLike], **kwargs):
|
def __init__(self, store_uri: Union[str, PathLike], **kwargs):
|
||||||
|
|
|
||||||
18
shell.nix
18
shell.nix
|
|
@ -1,14 +1,24 @@
|
||||||
{ system ? "x86_64-linux", pkgs ? import <nixpkgs> { inherit system; }, dev ? true, }:
|
{
|
||||||
|
system ? "x86_64-linux",
|
||||||
|
pkgs ? import <nixpkgs> { inherit system; },
|
||||||
|
dev ? true,
|
||||||
|
}:
|
||||||
|
|
||||||
let
|
let
|
||||||
pyCurrent = pkgs.python311;
|
pyCurrent = pkgs.python311;
|
||||||
poetryExtras = if dev then [ "dev" ] else [ ];
|
poetryExtras = if dev then [ "dev" ] else [ ];
|
||||||
poetryInstallExtras = (
|
poetryInstallExtras = (
|
||||||
if poetryExtras == [] then ""
|
if poetryExtras == [ ] then
|
||||||
else pkgs.lib.concatStrings [ " --with=" (pkgs.lib.concatStringsSep "," poetryExtras) ]
|
""
|
||||||
|
else
|
||||||
|
pkgs.lib.concatStrings [
|
||||||
|
" --with="
|
||||||
|
(pkgs.lib.concatStringsSep "," poetryExtras)
|
||||||
|
]
|
||||||
);
|
);
|
||||||
packages = [
|
packages = [
|
||||||
pyCurrent
|
pkgs.ffmpeg_5-headless
|
||||||
|
#(pyCurrent (ps: with ps; [ ffmpeg-python ]))
|
||||||
pkgs.zsh
|
pkgs.zsh
|
||||||
(pkgs.poetry.withPlugins (ps: with ps; [ poetry-plugin-up ]))
|
(pkgs.poetry.withPlugins (ps: with ps; [ poetry-plugin-up ]))
|
||||||
];
|
];
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue