From c767848065cc37fd16ad6520a69d8e594d9e020d Mon Sep 17 00:00:00 2001 From: irl Date: Sun, 4 May 2025 16:12:08 +0100 Subject: [PATCH 01/10] feat: boost size of the shared cache up to 30MB --- src/default.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/default.conf b/src/default.conf index 969c7be..0ff8cb8 100644 --- a/src/default.conf +++ b/src/default.conf @@ -1,6 +1,6 @@ error_log /dev/stdout; -lua_shared_dict jasima_cache 20m; +lua_shared_dict jasima_cache 30m; lua_package_path "/opt/sitelen-tu/?.lua;;"; lua_shared_dict auto_ssl 1m; From d6e7ce13b04af003af3ea738ed98cf03af51a136 Mon Sep 17 00:00:00 2001 From: irl Date: Sun, 4 May 2025 17:01:08 +0100 Subject: [PATCH 02/10] doc: remove case_insensitive configuration option all replacements are now case-insensitive --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 997dbf6..33fa861 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,6 @@ The following keys are meaningful in this object: | `host_header` | Override the hostname that is used as the `Host` header when connecting to the origin. | The canonical host is used. | `"news.example.org"` | | `host_ssl` | Override the hostname that is used for TLS SNI and for verification of the upstream's TLS certificate. | The canonical host is used. | `"news.example.org"` | | `matomo_site_id` | The site ID to use for Matomo tracking. | No tracking code is added to HTML pages. | `20` | -| `rewrite_case_insensitive` | Rewrite references to other domain names using a case-insensitive match. This requires the use of the PCRE engine which is approximately 10% of the speed of the Lua pattern matching engine, so only enable if required. | Domains are matched based on the case in the pool mapping. | `true` | | `rewrite_disable` | Disable content rewriting for this host. This disables both URL rewriting, and the addition of Matomo tracking code, but not the replacement of absolute links to the canonical hostname with relative links. | Rewriting is enabled. | `true` | Note: the default case applies when the key is omitted. From 041891dc9acdfb94cfc08298f9490fef6b540535 Mon Sep 17 00:00:00 2001 From: irl Date: Mon, 5 May 2025 11:20:20 +0100 Subject: [PATCH 03/10] fix: always rewrite redirects regardless of content-type --- src/lua/header_filter.lua | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/lua/header_filter.lua b/src/lua/header_filter.lua index 7c91bcc..5c27431 100644 --- a/src/lua/header_filter.lua +++ b/src/lua/header_filter.lua @@ -15,13 +15,12 @@ if ngx.header["Content-Type"] then content_type:find("application/vnd.mpegurl") or content_type:find("application/x-mpegurl") then ngx.ctx.rewriting = true - ngx.header["Content-Length"] = nil + ngx.header["Content-Length"] = nil -- We're rewriting the body so this has the wrong value if set end end if ngx.header["Location"] then - local pool_map = ngx.ctx.jasima_pool_map - if ngx.ctx.rewriting and pool_map then + if ngx.ctx.jasima_pool_map then local location = ngx.header["Location"] ngx.header["Location"] = location:gsub("//([a-zA-Z0-9%.%-]+%.[a-zA-Z0-9]+)/", utils.get_mirror) end From 713393d36d5d0b594c19ac0efd2fe14fa4c1ed2f Mon Sep 17 00:00:00 2001 From: irl Date: Wed, 7 May 2025 18:15:07 +0100 Subject: [PATCH 04/10] fix: don't double up on forward slashes with sub_filter --- src/default.conf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/default.conf b/src/default.conf index 0ff8cb8..c72671c 100644 --- a/src/default.conf +++ b/src/default.conf @@ -62,9 +62,9 @@ server { sub_filter_once off; sub_filter_types text/html text/css text/xml application/javascript application/rss+xml application/atom+xml application/vnd.mpegurl application/x-mpegurl; - sub_filter 'http://$jasima_host' '/'; - sub_filter 'https://$jasima_host' '/'; - sub_filter '//$jasima_host' '/'; + sub_filter 'http://$jasima_host' ''; + sub_filter 'https://$jasima_host' ''; + sub_filter '//$jasima_host' ''; sub_filter 'REWRITE_JASIMA_HOST_PLACEHOLDER' $jasima_host; header_filter_by_lua_file /opt/sitelen-tu/header_filter.lua; From d1e40fa27fb4207c304fd8439f892a3b9e9370a7 Mon Sep 17 00:00:00 2001 From: irl Date: Wed, 7 May 2025 18:23:29 +0100 Subject: [PATCH 05/10] fix: use relative URL for self --- src/lua/utils.lua | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/lua/utils.lua b/src/lua/utils.lua index 827315c..2889ad2 100644 --- a/src/lua/utils.lua +++ b/src/lua/utils.lua @@ -75,6 +75,9 @@ end function _M.get_mirror(host) ngx.log(ngx.DEBUG, "Looking up mirror for " .. host) local host_lower = host:lower() + if host_lower == ngx.ctx.jasima_host then + return "//" .. host_lower .. "/" -- This will get made relative by sub_filter later + end local mirror = ngx.ctx.jasima_pool_map[host_lower] return "//" .. (mirror or host) .. "/" end From f6c22a2c96274950ea97314fd3f5b8266057f4eb Mon Sep 17 00:00:00 2001 From: irl Date: Wed, 7 May 2025 18:23:54 +0100 Subject: [PATCH 06/10] feat: match on lowercase content-type --- src/lua/header_filter.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lua/header_filter.lua b/src/lua/header_filter.lua index 5c27431..4733baf 100644 --- a/src/lua/header_filter.lua +++ b/src/lua/header_filter.lua @@ -5,7 +5,7 @@ if ngx.ctx.jasima_config.rewrite_disable then end if ngx.header["Content-Type"] then - local content_type = ngx.header["Content-Type"] + local content_type = ngx.header["Content-Type"]:lower() if content_type:find("text/html") or content_type:find("text/css") or content_type:find("text/xml") or From f7b2b6d8dd1520eef40b6fd9d89b0c02600b3582 Mon Sep 17 00:00:00 2001 From: irl Date: Wed, 7 May 2025 18:24:43 +0100 Subject: [PATCH 07/10] feat: also rewrite json documents --- src/default.conf | 8 ++++---- src/lua/header_filter.lua | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/default.conf b/src/default.conf index c72671c..ae1607a 100644 --- a/src/default.conf +++ b/src/default.conf @@ -60,15 +60,15 @@ server { proxy_set_header Accept-Encoding ""; proxy_set_header Host $jasima_host_header; + header_filter_by_lua_file /opt/sitelen-tu/header_filter.lua; + body_filter_by_lua_file /opt/sitelen-tu/body_filter.lua; + sub_filter_once off; - sub_filter_types text/html text/css text/xml application/javascript application/rss+xml application/atom+xml application/vnd.mpegurl application/x-mpegurl; + sub_filter_types text/html text/css text/xml application/javascript application/json application/rss+xml application/atom+xml application/vnd.mpegurl application/x-mpegurl; sub_filter 'http://$jasima_host' ''; sub_filter 'https://$jasima_host' ''; sub_filter '//$jasima_host' ''; sub_filter 'REWRITE_JASIMA_HOST_PLACEHOLDER' $jasima_host; - - header_filter_by_lua_file /opt/sitelen-tu/header_filter.lua; - body_filter_by_lua_file /opt/sitelen-tu/body_filter.lua; } } diff --git a/src/lua/header_filter.lua b/src/lua/header_filter.lua index 4733baf..a370bc7 100644 --- a/src/lua/header_filter.lua +++ b/src/lua/header_filter.lua @@ -10,6 +10,7 @@ if ngx.header["Content-Type"] then content_type:find("text/css") or content_type:find("text/xml") or content_type:find("application/javascript") or + content_type:find("application/json") or content_type:find("application/rss+xml") or content_type:find("application/atom+xml") or content_type:find("application/vnd.mpegurl") or From 0214a278da773d452ddd69a21dd51a88d7923a02 Mon Sep 17 00:00:00 2001 From: irl Date: Wed, 7 May 2025 18:25:55 +0100 Subject: [PATCH 08/10] fix: replace lua patterns for rewrite finding --- src/lua/body_filter.lua | 8 ++++---- src/lua/header_filter.lua | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/lua/body_filter.lua b/src/lua/body_filter.lua index 6563aba..3acd201 100644 --- a/src/lua/body_filter.lua +++ b/src/lua/body_filter.lua @@ -27,7 +27,7 @@ local function rewrite_body(body) -- Rewrite links for assets and outbound links to other mirrored sites local pool_map = ngx.ctx.jasima_pool_map if pool_map then - body = body:gsub("//([a-zA-Z0-9%.%-]+%.[a-zA-Z0-9]+)/", utils.get_mirror) + body = body:gsub("//([%a%d%.-]+%.[%a%d-]+)/+", utils.get_mirror) end -- Add Matomo tracking code if ngx.ctx.jasima_config.matomo_site_id then @@ -37,8 +37,8 @@ local function rewrite_body(body) -- Handle first party Tealium installations if ngx.ctx.jasima_host_tealium then local escaped_host = ngx.ctx.jasima_host_tealium:gsub("%.", "%%.") - body = body:gsub("(https:)??//" .. escaped_host .. "/", "/utag/" .. ngx.ctx.jasima_config.first_party_tealium.account .. "/") - body = body:gsub("//tags.tiqcdn.com/", "/") + body = body:gsub("https://" .. escaped_host .. "/+", "/utag/" .. ngx.ctx.jasima_config.first_party_tealium.account .. "/") + body = body:gsub("//tags.tiqcdn.com/+", "/") if ngx.var.jasima_host_connect == "tags.tiqcdn.com" and ngx.ctx.jasima_host_adobe then body, count = body:gsub([[return"http"%+%(a%.ssl%?"s":""%)%+"://"%+b%+"/b/ss/]], [[return"/b/ss/]]) ngx.log(ngx.DEBUG, "Performing rewrite for Adobe Analytics in Tealium tag " .. count) @@ -47,7 +47,7 @@ local function rewrite_body(body) -- Handle first party Adobe Analytics if ngx.ctx.jasima_host_adobe then local escaped_host = ngx.ctx.jasima_host_adobe:gsub("%.", "%%.") - body = body:gsub("(https:)??//" .. escaped_host .. "/", "/") + body = body:gsub("https://" .. escaped_host .. "/+", "/") end return body end diff --git a/src/lua/header_filter.lua b/src/lua/header_filter.lua index a370bc7..295d395 100644 --- a/src/lua/header_filter.lua +++ b/src/lua/header_filter.lua @@ -23,6 +23,6 @@ end if ngx.header["Location"] then if ngx.ctx.jasima_pool_map then local location = ngx.header["Location"] - ngx.header["Location"] = location:gsub("//([a-zA-Z0-9%.%-]+%.[a-zA-Z0-9]+)/", utils.get_mirror) + ngx.header["Location"] = location:gsub("//([%a%d%.-]+%.[%a%d-]+)/+", utils.get_mirror) end end From b7cf5a5084c98071e73dcbf6e0891cb98345af7e Mon Sep 17 00:00:00 2001 From: irl Date: Wed, 7 May 2025 18:33:29 +0100 Subject: [PATCH 09/10] fix: replace lua patterns for content type check --- src/lua/header_filter.lua | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/lua/header_filter.lua b/src/lua/header_filter.lua index 295d395..d7c3d17 100644 --- a/src/lua/header_filter.lua +++ b/src/lua/header_filter.lua @@ -11,10 +11,11 @@ if ngx.header["Content-Type"] then content_type:find("text/xml") or content_type:find("application/javascript") or content_type:find("application/json") or - content_type:find("application/rss+xml") or - content_type:find("application/atom+xml") or - content_type:find("application/vnd.mpegurl") or - content_type:find("application/x-mpegurl") then + content_type:find("application/rss%+xml") or + content_type:find("application/atom%+xml") or + content_type:find("application/vnd%.mpegurl") or + content_type:find("application/x%-mpegurl") then + ngx.log(ngx.DEBUG, "Enabling rewrite due to content type " .. content_type) ngx.ctx.rewriting = true ngx.header["Content-Length"] = nil -- We're rewriting the body so this has the wrong value if set end From 112a91967022182b2c7db89c287d025e12777be1 Mon Sep 17 00:00:00 2001 From: irl Date: Wed, 14 May 2025 16:22:53 +0100 Subject: [PATCH 10/10] feat: lookup from Host header for default host --- src/lua/config.lua | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/lua/config.lua b/src/lua/config.lua index 9421484..bdf4831 100644 --- a/src/lua/config.lua +++ b/src/lua/config.lua @@ -4,8 +4,32 @@ local redis = require "resty.redis" local _M = {} +local function get_default_host(host) + local cache = ngx.shared.jasima_cache + local cache_key = "default:" .. host + local cached = cache:get(cache_key) + if cached then return cached end + + local red = redis:new() + red:set_timeout(1000) + local ok, err = red:connect("redis", 6379) + if not ok then return nil, "Redis connect failed: " .. err end + + local key = "jasima:default:" .. host + local res, err = red:get(key) + if not res or res == ngx.null then return nil, "No default in Redis" end + + red:set_keepalive(10000, 100) + + cache:set(cache_key, res, 60) + return res +end + function _M.get_jasima_host() local headers = ngx.req.get_headers() + if headers["Cf-Ray"] and headers["Host"] then + return get_default_host(headers["Host"]) + end if headers["Jasima-Host"] then return headers["Jasima-Host"] end