diff --git a/README.md b/README.md index 997dbf6..33fa861 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,6 @@ The following keys are meaningful in this object: | `host_header` | Override the hostname that is used as the `Host` header when connecting to the origin. | The canonical host is used. | `"news.example.org"` | | `host_ssl` | Override the hostname that is used for TLS SNI and for verification of the upstream's TLS certificate. | The canonical host is used. | `"news.example.org"` | | `matomo_site_id` | The site ID to use for Matomo tracking. | No tracking code is added to HTML pages. | `20` | -| `rewrite_case_insensitive` | Rewrite references to other domain names using a case-insensitive match. This requires the use of the PCRE engine which is approximately 10% of the speed of the Lua pattern matching engine, so only enable if required. | Domains are matched based on the case in the pool mapping. | `true` | | `rewrite_disable` | Disable content rewriting for this host. This disables both URL rewriting, and the addition of Matomo tracking code, but not the replacement of absolute links to the canonical hostname with relative links. | Rewriting is enabled. | `true` | Note: the default case applies when the key is omitted. diff --git a/src/default.conf b/src/default.conf index 969c7be..ae1607a 100644 --- a/src/default.conf +++ b/src/default.conf @@ -1,6 +1,6 @@ error_log /dev/stdout; -lua_shared_dict jasima_cache 20m; +lua_shared_dict jasima_cache 30m; lua_package_path "/opt/sitelen-tu/?.lua;;"; lua_shared_dict auto_ssl 1m; @@ -60,15 +60,15 @@ server { proxy_set_header Accept-Encoding ""; proxy_set_header Host $jasima_host_header; - sub_filter_once off; - sub_filter_types text/html text/css text/xml application/javascript application/rss+xml application/atom+xml application/vnd.mpegurl application/x-mpegurl; - sub_filter 'http://$jasima_host' '/'; - sub_filter 'https://$jasima_host' '/'; - sub_filter '//$jasima_host' '/'; - sub_filter 'REWRITE_JASIMA_HOST_PLACEHOLDER' $jasima_host; - header_filter_by_lua_file /opt/sitelen-tu/header_filter.lua; body_filter_by_lua_file /opt/sitelen-tu/body_filter.lua; + + sub_filter_once off; + sub_filter_types text/html text/css text/xml application/javascript application/json application/rss+xml application/atom+xml application/vnd.mpegurl application/x-mpegurl; + sub_filter 'http://$jasima_host' ''; + sub_filter 'https://$jasima_host' ''; + sub_filter '//$jasima_host' ''; + sub_filter 'REWRITE_JASIMA_HOST_PLACEHOLDER' $jasima_host; } } diff --git a/src/lua/body_filter.lua b/src/lua/body_filter.lua index 6563aba..3acd201 100644 --- a/src/lua/body_filter.lua +++ b/src/lua/body_filter.lua @@ -27,7 +27,7 @@ local function rewrite_body(body) -- Rewrite links for assets and outbound links to other mirrored sites local pool_map = ngx.ctx.jasima_pool_map if pool_map then - body = body:gsub("//([a-zA-Z0-9%.%-]+%.[a-zA-Z0-9]+)/", utils.get_mirror) + body = body:gsub("//([%a%d%.-]+%.[%a%d-]+)/+", utils.get_mirror) end -- Add Matomo tracking code if ngx.ctx.jasima_config.matomo_site_id then @@ -37,8 +37,8 @@ local function rewrite_body(body) -- Handle first party Tealium installations if ngx.ctx.jasima_host_tealium then local escaped_host = ngx.ctx.jasima_host_tealium:gsub("%.", "%%.") - body = body:gsub("(https:)??//" .. escaped_host .. "/", "/utag/" .. ngx.ctx.jasima_config.first_party_tealium.account .. "/") - body = body:gsub("//tags.tiqcdn.com/", "/") + body = body:gsub("https://" .. escaped_host .. "/+", "/utag/" .. ngx.ctx.jasima_config.first_party_tealium.account .. "/") + body = body:gsub("//tags.tiqcdn.com/+", "/") if ngx.var.jasima_host_connect == "tags.tiqcdn.com" and ngx.ctx.jasima_host_adobe then body, count = body:gsub([[return"http"%+%(a%.ssl%?"s":""%)%+"://"%+b%+"/b/ss/]], [[return"/b/ss/]]) ngx.log(ngx.DEBUG, "Performing rewrite for Adobe Analytics in Tealium tag " .. count) @@ -47,7 +47,7 @@ local function rewrite_body(body) -- Handle first party Adobe Analytics if ngx.ctx.jasima_host_adobe then local escaped_host = ngx.ctx.jasima_host_adobe:gsub("%.", "%%.") - body = body:gsub("(https:)??//" .. escaped_host .. "/", "/") + body = body:gsub("https://" .. escaped_host .. "/+", "/") end return body end diff --git a/src/lua/config.lua b/src/lua/config.lua index 9421484..bdf4831 100644 --- a/src/lua/config.lua +++ b/src/lua/config.lua @@ -4,8 +4,32 @@ local redis = require "resty.redis" local _M = {} +local function get_default_host(host) + local cache = ngx.shared.jasima_cache + local cache_key = "default:" .. host + local cached = cache:get(cache_key) + if cached then return cached end + + local red = redis:new() + red:set_timeout(1000) + local ok, err = red:connect("redis", 6379) + if not ok then return nil, "Redis connect failed: " .. err end + + local key = "jasima:default:" .. host + local res, err = red:get(key) + if not res or res == ngx.null then return nil, "No default in Redis" end + + red:set_keepalive(10000, 100) + + cache:set(cache_key, res, 60) + return res +end + function _M.get_jasima_host() local headers = ngx.req.get_headers() + if headers["Cf-Ray"] and headers["Host"] then + return get_default_host(headers["Host"]) + end if headers["Jasima-Host"] then return headers["Jasima-Host"] end diff --git a/src/lua/header_filter.lua b/src/lua/header_filter.lua index 7c91bcc..d7c3d17 100644 --- a/src/lua/header_filter.lua +++ b/src/lua/header_filter.lua @@ -5,24 +5,25 @@ if ngx.ctx.jasima_config.rewrite_disable then end if ngx.header["Content-Type"] then - local content_type = ngx.header["Content-Type"] + local content_type = ngx.header["Content-Type"]:lower() if content_type:find("text/html") or content_type:find("text/css") or content_type:find("text/xml") or content_type:find("application/javascript") or - content_type:find("application/rss+xml") or - content_type:find("application/atom+xml") or - content_type:find("application/vnd.mpegurl") or - content_type:find("application/x-mpegurl") then + content_type:find("application/json") or + content_type:find("application/rss%+xml") or + content_type:find("application/atom%+xml") or + content_type:find("application/vnd%.mpegurl") or + content_type:find("application/x%-mpegurl") then + ngx.log(ngx.DEBUG, "Enabling rewrite due to content type " .. content_type) ngx.ctx.rewriting = true - ngx.header["Content-Length"] = nil + ngx.header["Content-Length"] = nil -- We're rewriting the body so this has the wrong value if set end end if ngx.header["Location"] then - local pool_map = ngx.ctx.jasima_pool_map - if ngx.ctx.rewriting and pool_map then + if ngx.ctx.jasima_pool_map then local location = ngx.header["Location"] - ngx.header["Location"] = location:gsub("//([a-zA-Z0-9%.%-]+%.[a-zA-Z0-9]+)/", utils.get_mirror) + ngx.header["Location"] = location:gsub("//([%a%d%.-]+%.[%a%d-]+)/+", utils.get_mirror) end end diff --git a/src/lua/utils.lua b/src/lua/utils.lua index 827315c..2889ad2 100644 --- a/src/lua/utils.lua +++ b/src/lua/utils.lua @@ -75,6 +75,9 @@ end function _M.get_mirror(host) ngx.log(ngx.DEBUG, "Looking up mirror for " .. host) local host_lower = host:lower() + if host_lower == ngx.ctx.jasima_host then + return "//" .. host_lower .. "/" -- This will get made relative by sub_filter later + end local mirror = ngx.ctx.jasima_pool_map[host_lower] return "//" .. (mirror or host) .. "/" end