contentpack-sample-podcast/build.sh

190 lines
6.1 KiB
Bash
Raw Permalink Normal View History

#!/usr/bin/env bash
set -euo pipefail
if [ $# -ne 1 ]; then
echo "Usage: $0 <baseURL>" >&2
echo " <baseURL>: absolute URL prefix for mp3 enclosures" >&2
echo " e.g. https://example.com/podcast" >&2
exit 1
fi
BASE_URL="${1%/}"
POSTS_DIR="posts"
HTML_OUT="index.html"
XML_OUT="podcast.xml"
PODCAST_TITLE="Guardian Project Podcast"
PODCAST_DESC="A small sample podcast from Guardian Project covering decentralized web, privacy-preserving measurement, and content provenance for human rights."
PODCAST_AUTHOR="Guardian Project"
PODCAST_LINK="index.html"
# Extract a single frontmatter field from a markdown file
fm_get() {
awk -v key="$2" '
/^---[[:space:]]*$/ { fm++; if (fm == 2) exit; next }
fm == 1 {
i = index($0, ":")
if (i == 0) next
k = substr($0, 1, i-1)
v = substr($0, i+1)
sub(/^[[:space:]]+/, "", k); sub(/[[:space:]]+$/, "", k)
sub(/^[[:space:]]+/, "", v); sub(/[[:space:]]+$/, "", v)
if (k != key) next
if ((substr(v,1,1)=="\"" && substr(v,length(v))=="\"") || \
(substr(v,1,1)=="\x27" && substr(v,length(v))=="\x27")) {
v = substr(v, 2, length(v)-2)
}
print v
exit
}
' "$1"
}
# Escape text for safe use in HTML/XML element content and attributes
esc() {
local s="$1"
# In bash 5.2+, & in the replacement means "matched text", so escape as \&.
s="${s//&/\&amp;}"
s="${s//</\&lt;}"
s="${s//>/\&gt;}"
s="${s//\"/\&quot;}"
printf '%s' "$s"
}
# Newest-first list of posts (filenames start with YYYY-MM-DD so sort works)
mapfile -t POSTS < <(find "$POSTS_DIR" -maxdepth 1 -name '*.md' | sort -r)
if [ "${#POSTS[@]}" -eq 0 ]; then
echo "No markdown files found in $POSTS_DIR/" >&2
exit 1
fi
# --- podcast.xml header ---
{
cat <<EOF
<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd">
<channel>
<title>$(esc "$PODCAST_TITLE")</title>
<link>$(esc "$PODCAST_LINK")</link>
<language>en-us</language>
<description>$(esc "$PODCAST_DESC")</description>
<itunes:author>$(esc "$PODCAST_AUTHOR")</itunes:author>
<itunes:summary>$(esc "$PODCAST_DESC")</itunes:summary>
<itunes:explicit>no</itunes:explicit>
EOF
} > "$XML_OUT"
# --- index.html header ---
{
cat <<EOF
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>$(esc "$PODCAST_TITLE")</title>
<style>
body { font-family: system-ui, sans-serif; max-width: 720px; margin: 2em auto; padding: 0 1em; line-height: 1.5; color: #222; }
h1 { border-bottom: 2px solid #333; padding-bottom: .3em; }
.episode { border: 1px solid #ddd; border-radius: 6px; padding: 1em 1.2em; margin: 1.2em 0; background: #fafafa; }
.episode h2 { margin: 0 0 .3em 0; font-size: 1.15em; }
.meta { color: #666; font-size: .85em; margin-bottom: .6em; }
audio { width: 100%; margin-top: .5em; }
.feed-link { font-size: .9em; }
</style>
</head>
<body>
<h1>$(esc "$PODCAST_TITLE")</h1>
<p>$(esc "$PODCAST_DESC") <span class="feed-link"><a href="podcast.xml">RSS feed</a></span></p>
EOF
} > "$HTML_OUT"
# --- per-post items ---
for post in "${POSTS[@]}"; do
title=$(fm_get "$post" title)
date_raw=$(fm_get "$post" date)
desc=$(fm_get "$post" description)
[ -z "$desc" ] && desc=$(fm_get "$post" summary)
file=$(fm_get "$post" file)
voices=$(fm_get "$post" voices)
keywords=$(fm_get "$post" keywords)
explicit=$(fm_get "$post" explicit)
[ -z "$explicit" ] && explicit="no"
# Duration string is whichever of duration/length looks like HH:MM[:SS].
# The two fields are used inconsistently across existing posts.
dur_field=$(fm_get "$post" duration)
len_field=$(fm_get "$post" length)
if [[ "$dur_field" =~ ^[0-9]+:[0-9]+(:[0-9]+)?$ ]]; then
duration="$dur_field"
elif [[ "$len_field" =~ ^[0-9]+:[0-9]+(:[0-9]+)?$ ]]; then
duration="$len_field"
else
duration=""
fi
if [ -z "$file" ]; then
echo "Warning: no file: field in $post, skipping" >&2
continue
fi
mp3_path="$POSTS_DIR/$file"
if [ ! -f "$mp3_path" ]; then
echo "Warning: missing mp3 $mp3_path, skipping $post" >&2
continue
fi
filesize=$(stat -c %s "$mp3_path")
date_short=$(printf '%s' "$date_raw" | awk '{print $1}')
# Strip trailing numeric timezone (some posts use "-500" which date(1) rejects)
date_norm=$(printf '%s' "$date_raw" | sed -E 's/[[:space:]]+[+-]?[0-9]{3,4}$//')
pubdate=$(date -R -d "$date_norm" 2>/dev/null || date -R -d "$date_short")
guid="${file%.mp3}"
# podcast.xml item — enclosure is absolute, everything else is plain text
{
printf '\n <item>\n'
printf ' <title>%s</title>\n' "$(esc "$title")"
printf ' <description>%s</description>\n' "$(esc "$desc")"
printf ' <pubDate>%s</pubDate>\n' "$pubdate"
printf ' <enclosure url="%s/posts/%s" length="%s" type="audio/mpeg"/>\n' \
"$BASE_URL" "$file" "$filesize"
printf ' <guid isPermaLink="false">%s</guid>\n' "$(esc "$guid")"
[ -n "$duration" ] && printf ' <itunes:duration>%s</itunes:duration>\n' "$duration"
[ -n "$voices" ] && printf ' <itunes:author>%s</itunes:author>\n' "$(esc "$voices")"
printf ' <itunes:explicit>%s</itunes:explicit>\n' "$(esc "$explicit")"
[ -n "$keywords" ] && printf ' <itunes:keywords>%s</itunes:keywords>\n' "$(esc "$keywords")"
printf ' </item>\n'
} >> "$XML_OUT"
# index.html article — audio src is a relative path into posts/
meta="$date_short"
[ -n "$duration" ] && meta="$meta &middot; $duration"
[ -n "$voices" ] && meta="$meta &middot; Voices: $(esc "$voices")"
{
printf '<article class="episode">\n'
printf ' <h2>%s</h2>\n' "$(esc "$title")"
printf ' <div class="meta">%s</div>\n' "$meta"
printf ' <p>%s</p>\n' "$(esc "$desc")"
printf ' <audio controls preload="none" src="%s/%s"></audio>\n' "$POSTS_DIR" "$file"
printf '</article>\n\n'
} >> "$HTML_OUT"
done
# --- close files ---
cat >> "$XML_OUT" <<EOF
</channel>
</rss>
EOF
cat >> "$HTML_OUT" <<EOF
</body>
</html>
EOF
echo "Wrote $XML_OUT and $HTML_OUT (baseURL: $BASE_URL)"