#!/usr/bin/env bash set -euo pipefail if [ $# -ne 1 ]; then echo "Usage: $0 " >&2 echo " : absolute URL prefix for mp3 enclosures" >&2 echo " e.g. https://example.com/podcast" >&2 exit 1 fi BASE_URL="${1%/}" POSTS_DIR="posts" HTML_OUT="index.html" XML_OUT="podcast.xml" PODCAST_TITLE="Guardian Project Podcast" PODCAST_DESC="A small sample podcast from Guardian Project covering decentralized web, privacy-preserving measurement, and content provenance for human rights." PODCAST_AUTHOR="Guardian Project" PODCAST_LINK="index.html" # Extract a single frontmatter field from a markdown file fm_get() { awk -v key="$2" ' /^---[[:space:]]*$/ { fm++; if (fm == 2) exit; next } fm == 1 { i = index($0, ":") if (i == 0) next k = substr($0, 1, i-1) v = substr($0, i+1) sub(/^[[:space:]]+/, "", k); sub(/[[:space:]]+$/, "", k) sub(/^[[:space:]]+/, "", v); sub(/[[:space:]]+$/, "", v) if (k != key) next if ((substr(v,1,1)=="\"" && substr(v,length(v))=="\"") || \ (substr(v,1,1)=="\x27" && substr(v,length(v))=="\x27")) { v = substr(v, 2, length(v)-2) } print v exit } ' "$1" } # Escape text for safe use in HTML/XML element content and attributes esc() { local s="$1" # In bash 5.2+, & in the replacement means "matched text", so escape as \&. s="${s//&/\&}" s="${s///\>}" s="${s//\"/\"}" printf '%s' "$s" } # Newest-first list of posts (filenames start with YYYY-MM-DD so sort works) mapfile -t POSTS < <(find "$POSTS_DIR" -maxdepth 1 -name '*.md' | sort -r) if [ "${#POSTS[@]}" -eq 0 ]; then echo "No markdown files found in $POSTS_DIR/" >&2 exit 1 fi # --- podcast.xml header --- { cat < $(esc "$PODCAST_TITLE") $(esc "$PODCAST_LINK") en-us $(esc "$PODCAST_DESC") $(esc "$PODCAST_AUTHOR") $(esc "$PODCAST_DESC") no EOF } > "$XML_OUT" # --- index.html header --- { cat < $(esc "$PODCAST_TITLE")

$(esc "$PODCAST_TITLE")

$(esc "$PODCAST_DESC") RSS feed

EOF } > "$HTML_OUT" # --- per-post items --- for post in "${POSTS[@]}"; do title=$(fm_get "$post" title) date_raw=$(fm_get "$post" date) desc=$(fm_get "$post" description) [ -z "$desc" ] && desc=$(fm_get "$post" summary) file=$(fm_get "$post" file) voices=$(fm_get "$post" voices) keywords=$(fm_get "$post" keywords) explicit=$(fm_get "$post" explicit) [ -z "$explicit" ] && explicit="no" # Duration string is whichever of duration/length looks like HH:MM[:SS]. # The two fields are used inconsistently across existing posts. dur_field=$(fm_get "$post" duration) len_field=$(fm_get "$post" length) if [[ "$dur_field" =~ ^[0-9]+:[0-9]+(:[0-9]+)?$ ]]; then duration="$dur_field" elif [[ "$len_field" =~ ^[0-9]+:[0-9]+(:[0-9]+)?$ ]]; then duration="$len_field" else duration="" fi if [ -z "$file" ]; then echo "Warning: no file: field in $post, skipping" >&2 continue fi mp3_path="$POSTS_DIR/$file" if [ ! -f "$mp3_path" ]; then echo "Warning: missing mp3 $mp3_path, skipping $post" >&2 continue fi filesize=$(stat -c %s "$mp3_path") date_short=$(printf '%s' "$date_raw" | awk '{print $1}') # Strip trailing numeric timezone (some posts use "-500" which date(1) rejects) date_norm=$(printf '%s' "$date_raw" | sed -E 's/[[:space:]]+[+-]?[0-9]{3,4}$//') pubdate=$(date -R -d "$date_norm" 2>/dev/null || date -R -d "$date_short") guid="${file%.mp3}" # podcast.xml item — enclosure is absolute, everything else is plain text { printf '\n \n' printf ' %s\n' "$(esc "$title")" printf ' %s\n' "$(esc "$desc")" printf ' %s\n' "$pubdate" printf ' \n' \ "$BASE_URL" "$file" "$filesize" printf ' %s\n' "$(esc "$guid")" [ -n "$duration" ] && printf ' %s\n' "$duration" [ -n "$voices" ] && printf ' %s\n' "$(esc "$voices")" printf ' %s\n' "$(esc "$explicit")" [ -n "$keywords" ] && printf ' %s\n' "$(esc "$keywords")" printf ' \n' } >> "$XML_OUT" # index.html article — audio src is a relative path into posts/ meta="$date_short" [ -n "$duration" ] && meta="$meta · $duration" [ -n "$voices" ] && meta="$meta · Voices: $(esc "$voices")" { printf '
\n' printf '

%s

\n' "$(esc "$title")" printf '
%s
\n' "$meta" printf '

%s

\n' "$(esc "$desc")" printf ' \n' "$POSTS_DIR" "$file" printf '
\n\n' } >> "$HTML_OUT" done # --- close files --- cat >> "$XML_OUT" <
EOF cat >> "$HTML_OUT" < EOF echo "Wrote $XML_OUT and $HTML_OUT (baseURL: $BASE_URL)"