commit 6179dea246d286b7045f4ce73707ac19df026a7e Author: irl Date: Sun Apr 27 17:20:02 2025 +0100 feat: initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..38eb3cc --- /dev/null +++ b/.gitignore @@ -0,0 +1,51 @@ +# Environment files +sitelen.env +updater.env + +# Updater configurations +/legacy/configs/*.json + +# JetBrains +*.iml +/.idea + +# Compiled Lua sources +luac.out + +# luarocks build files +*.src.rock +*.zip +*.tar.gz + +# Object files +*.o +*.os +*.ko +*.obj +*.elf + +# Precompiled Headers +*.gch +*.pch + +# Libraries +*.lib +*.a +*.la +*.lo +*.def +*.exp + +# Shared objects (inc. Windows DLLs) +*.dll +*.so +*.so.* +*.dylib + +# Executables +*.exe +*.out +*.app +*.i*86 +*.x86_64 +*.hex diff --git a/README.md b/README.md new file mode 100644 index 0000000..64013eb --- /dev/null +++ b/README.md @@ -0,0 +1,46 @@ +ilo sitelen tu +============== + +A rewriting proxy service for use as the backend of a live web mirroring service for resilient and robust access to your website. + +--- + +Origin Configuration +-------------------- + +Each origin is required to have a JSON configuration object available in the Redis backend under the key +`jasima:config:`. +The following keys are meaningful in this object: + +| Key | Description | Default | Example | +|----------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------|-----------------------------| +| `headers` | Append additional headers to the request sent to the origin. | No additional headers are added. | `{"X-Example": "Hello"}` | +| `host_connect` | Override the hostname that is used to connect to the origin. This can be useful in the case that the origin is begind a CDN that you want to avoid having to traverse, either due to DDoS protection tools being falsely triggered or simply to save costs. | The canonical host is used. | `"real-origin.example.com"` | +| `host_header` | Override the hostname that is used as the `Host` header when connecting to the origin. | The canonical host is used. | `"news.example.org"` | +| `host_ssl` | Override the hostname that is used for TLS SNI and for verification of the upstream's TLS certificate. | The canonical host is used. | `"news.example.org"` | +| `matomo_site_id` | The site ID to use for Matomo tracking. | No tracking code is added to HTML pages. | `20` | +| `rewrite_case_insensitive` | Rewrite references to other domain names using a case-insensitive match. This requires the use of the PCRE engine which is approximately 10% of the speed of the Lua pattern matching engine, so only enable if required. | Domains are matched based on the case in the pool mapping. | `true` | +| `rewrite_disable` | Disable content rewriting for this host. This disables both URL rewriting, and the addition of Matomo tracking code, but not the replacement of absolute links to the canonical hostname with relative links. | Rewriting is enabled. | `true` | + +Note: the default case applies when the key is omitted. + +Copyright +--------- + +Copyright © 2025 SR2 Communications Limited. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the +following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following + disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, +INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..b76e2d6 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,19 @@ +services: + sitelen-tu: + build: + context: src + dockerfile: Dockerfile + env_file: "sitelen.env" + ports: + - "127.0.0.1:80:80" +# updater: +# build: +# context: legacy +# dockerfile: Dockerfile +# env_file: "updater.env" +# volumes: +# - ./legacy/configs:/configs + redis: + image: redis:latest + ports: + - "127.0.0.1:6379:6379" diff --git a/legacy/Dockerfile b/legacy/Dockerfile new file mode 100644 index 0000000..d716600 --- /dev/null +++ b/legacy/Dockerfile @@ -0,0 +1,13 @@ +FROM akorn/luarocks:lua5.1-alpine + +RUN apk add build-base openssl-dev + +RUN luarocks install luasocket \ + && luarocks install luasec \ + && luarocks install lua-cjson \ + && luarocks install redis-lua \ + && luarocks install luaposix + +WORKDIR /opt/updater +COPY updater.lua . +CMD ["lua", "updater.lua"] diff --git a/legacy/README.md b/legacy/README.md new file mode 100644 index 0000000..aba6763 --- /dev/null +++ b/legacy/README.md @@ -0,0 +1,24 @@ +Legacy Updater Tool +=================== + +This updater exists to facilitate a transition from the legacy mirrors orchestration to its replacement. +As far as I know, no one else has ever deployed the original orchestrator so this will almost certainly not be useful +to you. + +Configuration +------------- + +When working with the Docker Compose file, configure the sources via the environment file: `updater.env`. +One environment variable per configured pool with an HTTP URL to the Bypass Censorship Mirrors JSON file for each pool. + +For example: + +```shell +JASIMA_MIRRORS_POOL_example=https://www.example.com/mirrors.json +``` + +### Per-site Configurations + +In the configs directory, create a file named `default.json` to provide the default site configuration. +Create files named `.json` to override the default. +This is a complete override so duplicate anything from the default you wanted to keep. diff --git a/legacy/configs/default.json.example b/legacy/configs/default.json.example new file mode 100644 index 0000000..d645745 --- /dev/null +++ b/legacy/configs/default.json.example @@ -0,0 +1,3 @@ +{ + "matomo_site_id": 200 +} diff --git a/legacy/configs/www.example.com.json.example b/legacy/configs/www.example.com.json.example new file mode 100644 index 0000000..3e87e13 --- /dev/null +++ b/legacy/configs/www.example.com.json.example @@ -0,0 +1,4 @@ +{ + "matomo_site_id": 300, + "host_connect": "www.example.org" +} diff --git a/legacy/updater.lua b/legacy/updater.lua new file mode 100644 index 0000000..fcc2ece --- /dev/null +++ b/legacy/updater.lua @@ -0,0 +1,88 @@ +local cjson = require('cjson') +local http = require('socket.http') +local stdlib = require('posix.stdlib') +local redis = require('redis') + +local client = redis.connect('redis', 6379) + +local function get_mirrors(source) + local body, _, _, _ = http.request(source) + return cjson.decode(body) +end + +local function prepare_pool_map(mirrors) + local pool_map = {} + for _, site in pairs(mirrors['sites']) do + for _, alternative in pairs(site['available_alternatives']) do + if alternative['type'] == 'mirror' then + local canonical_host = site['main_domain'] + if select(2, canonical_host:gsub("%.", "")) == 1 then + canonical_host = "www." .. canonical_host + end + pool_map[canonical_host] = alternative['url']:sub(9) + break + end + end + end + return pool_map +end + +local function redis_set(client, key, data) + local res, err = client:set(key, data) + if not res then + print("Error setting key in Redis:", err) + else + print("Data successfully stored in Redis under '" .. key .. "'") + end +end + +local function get_config(host) + local function read_file(file_path) + local file = io.open(file_path, "r") + if file then + local content = file:read("*a") + file:close() + return content + else + return nil + end + end + local file_paths = { + "/configs/" .. host .. ".json", -- Check host-specific file + "/configs/default.json" -- Check default file + } + for _, file_path in ipairs(file_paths) do + local content = read_file(file_path) + if content then + print("Using " .. file_path .. " for " .. host .. " configuration.") + return content + end + end + return "{}" +end + +local function update_pool_data(pool_name, pool_source) + local pool_map = prepare_pool_map(get_mirrors(pool_source)) + redis_set(client, 'jasima:poolmap:' .. pool_name, cjson.encode(pool_map)) + for host, _ in pairs(pool_map) do + local redis_key = 'jasima:config:' .. host + local config = get_config(host) + redis_set(client, redis_key, config) + end +end + +local interval = 5 * 60 -- 5 minutes in seconds +local last_update_time = os.clock() - interval + +while true do + local current_time = os.clock() + if current_time - last_update_time >= interval then + for pool_name, pool_source in pairs(stdlib.getenv()) do + if pool_name:match("^JASIMA_MIRRORS_POOL_") then + local pool_name = pool_name:sub(21) + update_pool_data(pool_name, pool_source) + end + end + last_update_time = current_time + end +end diff --git a/sitelen.env.example b/sitelen.env.example new file mode 100644 index 0000000..9e4981e --- /dev/null +++ b/sitelen.env.example @@ -0,0 +1 @@ +JASIMA_MATOMO_HOST=matomo.example.com diff --git a/src/Dockerfile b/src/Dockerfile new file mode 100644 index 0000000..368ad16 --- /dev/null +++ b/src/Dockerfile @@ -0,0 +1,9 @@ +FROM openresty/openresty:alpine-fat + +RUN /usr/local/openresty/luajit/bin/luarocks install lua-resty-http +RUN /usr/local/openresty/luajit/bin/luarocks install lua-resty-cookie +RUN /usr/local/openresty/luajit/bin/luarocks install lua-resty-iputils + +COPY default.conf /etc/nginx/conf.d/default.conf +COPY env.main /etc/nginx/conf.d/env.main +COPY lua/* /opt/sitelen-tu/ diff --git a/src/default.conf b/src/default.conf new file mode 100644 index 0000000..7ed212e --- /dev/null +++ b/src/default.conf @@ -0,0 +1,42 @@ +error_log /dev/stdout; + +lua_shared_dict jasima_cache 20m; +lua_package_path "/opt/sitelen-tu/?.lua;;"; +resolver 127.0.0.11 valid=60 ipv6=off; + +upstream origin { + server 127.0.0.1; + + balancer_by_lua_file /opt/sitelen-tu/balancer.lua; +} + +server { + listen 80; + server_name localhost default; + + location / { + # These variables are set in the access_by_lua stage + # TODO: These might be better to set with a set_by_lua_block + set $jasima_host "fallback.invalid"; + set $jasima_host_header "fallback.invalid"; + set $jasima_host_ssl "fallback.invalid"; + + access_by_lua_file /opt/sitelen-tu/access.lua; + + proxy_pass https://origin; + proxy_ssl_server_name on; + proxy_ssl_name $jasima_host_ssl; + proxy_set_header Accept-Encoding ""; + proxy_set_header Host $jasima_host_header; + + sub_filter_once off; + sub_filter_types text/html text/css text/xml application/javascript application/rss+xml application/atom+xml application/vnd.mpegurl application/x-mpegurl; + sub_filter 'http://$jasima_host' '/'; + sub_filter 'https://$jasima_host' '/'; + sub_filter '//$jasima_host' '/'; + sub_filter 'REWRITE_JASIMA_HOST_PLACEHOLDER' $jasima_host; + + header_filter_by_lua_file /opt/sitelen-tu/header_filter.lua; + body_filter_by_lua_file /opt/sitelen-tu/body_filter.lua; + } +} diff --git a/src/env.main b/src/env.main new file mode 100644 index 0000000..bf5deaf --- /dev/null +++ b/src/env.main @@ -0,0 +1 @@ +env JASIMA_MATOMO_HOST; \ No newline at end of file diff --git a/src/lua/access.lua b/src/lua/access.lua new file mode 100644 index 0000000..8f97d62 --- /dev/null +++ b/src/lua/access.lua @@ -0,0 +1,72 @@ +local http = require "resty.http" + +local config = require "config" +local geo = require "geo" +local utils = require "utils" + +local jasima_host = config.get_jasima_host() +ngx.ctx.jasima_host = jasima_host +if not jasima_host then + return ngx.exit(400) +end + +local err +ngx.ctx.jasima_config, err = config.load_config(jasima_host) +if err then + ngx.status = 500 + ngx.log(ngx.ERR, "Could not load config: " .. err) + return ngx.exit(500) +end + +if not ngx.ctx.jasima_config then + ngx.status = 403 + ngx.log(ngx.ERR, "Requested a canonical host that has no configuration specified: " .. jasima_host) + ngx.exit(403) +end + +local country = geo.viewer_country() +if not ngx.ctx.jasima_config.geo_redirect_disable and not geo.needs_mirror(country) then + local request_uri = ngx.var.request_uri + local new_url = "https://" .. jasima_host .. request_uri + return ngx.redirect(new_url, ngx.HTTP_MOVED_TEMPORARILY) +end + +-- Get jasima_pool (not critical) +local jasima_pool = config.get_jasima_pool() +ngx.ctx.jasima_pool_map, err = config.load_pool_mapping(jasima_pool) +if err then + ngx.status = 500 + ngx.log(ngx.WARN, "Could not load pool mapping: " .. err) + return ngx.exit(500) +end + +local headers = ngx.req.get_headers() + +-- Remove the headers that should not be proxied to the origin +for k, v in pairs(headers) do + if k:lower():match("^jasima%-") then + ngx.req.clear_header(k) + end +end + +-- Add additional headers that have been specified in the configuration +if ngx.ctx.jasima_config.headers then + for k, v in pairs(ngx.ctx.jasima_config.headers) do + ngx.req.set_header(k, v) + end +end + +-- Look up the IP to connect to the origin +local host_connect = ngx.ctx.jasima_config.host_connect or jasima_host +local upstream_ips = utils.resolve_origin(host_connect) +ngx.ctx.upstream_ips = utils.filter_bogons(upstream_ips) + +if #ngx.ctx.upstream_ips == 0 then + ngx.log(ngx.ERR, "no A records found") + return ngx.exit(500) +end + +-- Set the nginx host variables +ngx.var.jasima_host = jasima_host +ngx.var.jasima_host_header = ngx.ctx.jasima_config.host_header or jasima_host +ngx.var.jasima_host_ssl = ngx.ctx.jasima_config.host_ssl or jasima_host diff --git a/src/lua/balancer.lua b/src/lua/balancer.lua new file mode 100644 index 0000000..2ab369f --- /dev/null +++ b/src/lua/balancer.lua @@ -0,0 +1,24 @@ +local balancer = require "ngx.balancer" + +local upstream_ips = ngx.ctx.upstream_ips +if not upstream_ips then + ngx.log(ngx.ERR, "No upstream IPs in context") + return ngx.exit(500) +end + +ngx.ctx.balancer_try = (ngx.ctx.balancer_try or 0) + 1 +local try_index = ngx.ctx.balancer_try + +if try_index > #upstream_ips then + ngx.log(ngx.ERR, "All upstream IPs tried, none succeeded") + return ngx.exit(500) +end + +local ip = upstream_ips[try_index] +ngx.log(ngx.INFO, "Trying upstream IP: ", ip) + +local ok, err = balancer.set_current_peer(ip, 443) +if not ok then + ngx.log(ngx.ERR, "failed to set peer: ", err) + return ngx.exit(500) +end diff --git a/src/lua/body_filter.lua b/src/lua/body_filter.lua new file mode 100644 index 0000000..d309048 --- /dev/null +++ b/src/lua/body_filter.lua @@ -0,0 +1,63 @@ +local function matomo_tracking_code(site_id) + return [=[ + + + + + ]=] +end + +local function rewrite_body(body, eof) + if not ngx.ctx.jasima_pool_map or ngx.ctx.jasima_config.rewrite_disable then + return body + end + for from, to in pairs(ngx.ctx.jasima_pool_map) do + if ngx.ctx.jasima_config.rewrite_case_insensitive then + local pattern = ngx.re.escape(from) + body = ngx.re.gsub(body, pattern, to, "ijo") + else + -- We expect that str:match("^[%w%-%.]+$") ~= nil + local pattern = from:gsub("([%^%$%(%)%%%.%[%]%*%+%-%?])", "%%%1") -- escape Lua patterns + body = body:gsub(pattern, to) + end + end + if eof and ngx.ctx.jasima_config.matomo_site_id then + body = body:gsub("", matomo_tracking_code(ngx.ctx.jasima_config.matomo_site_id)) + -- TODO: Ensure that tracking code was added when it's HTML, but only for HTML + end + return body +end + +if ngx.ctx.rewriting then + local chunk = ngx.arg[1] + local eof = ngx.arg[2] + + ngx.ctx.buffered = (ngx.ctx.buffered or "") .. (chunk or "") + + if #ngx.ctx.buffered > 5 * 1024 * 1024 and not eof then + -- Don't just consume memory forever + ngx.arg[1] = rewrite_body(ngx.ctx.buffered, eof) -- We still do our best + ngx.ctx.rewriting = false + return + end + + if eof then + ngx.arg[1] = rewrite_body(ngx.ctx.buffered, eof) + else + ngx.arg[1] = nil + end +end diff --git a/src/lua/config.lua b/src/lua/config.lua new file mode 100644 index 0000000..9421484 --- /dev/null +++ b/src/lua/config.lua @@ -0,0 +1,95 @@ +local ck = require "resty.cookie" +local cjson = require "cjson.safe" +local redis = require "resty.redis" + +local _M = {} + +function _M.get_jasima_host() + local headers = ngx.req.get_headers() + if headers["Jasima-Host"] then + return headers["Jasima-Host"] + end + + local cookie, err = ck:new() + if not cookie then + ngx.log(ngx.ERR, "failed to get cookie: ", err) + return nil + end + + local jasima_cookie, err = cookie:get("jasima_host") + if jasima_cookie then + return jasima_cookie + elseif err then + ngx.log(ngx.ERR, "failed to get jasima_host cookie: ", err) + end + + return nil +end + +function _M.get_jasima_pool() + local headers = ngx.req.get_headers() + if headers["Jasima-Pool"] then + return headers["Jasima-Pool"] + end + + local cookie, err = ck:new() + if not cookie then + ngx.log(ngx.ERR, "failed to get cookie: ", err) + return nil + end + + local jasima_cookie, err = cookie:get("jasima_pool") + if jasima_cookie then + return jasima_cookie + elseif err then + ngx.log(ngx.ERR, "failed to get jasima_pool cookie: ", err) + end + + return nil +end + +function _M.load_pool_mapping(pool_name) + if not pool_name then pool_name = "public" end + + local cache = ngx.shared.jasima_cache + local cache_key = "poolmap:" .. pool_name + local cached = cache:get(cache_key) + if cached then return cjson.decode(cached) end + + local red = redis:new() + red:set_timeout(1000) + local ok, err = red:connect("redis", 6379) + if not ok then return nil, "Redis connect failed: " .. err end + + local key = "jasima:poolmap:" .. pool_name + local res, err = red:get(key) + if not res or res == ngx.null then return nil, "No pool mapping found" end + + red:set_keepalive(10000, 100) + cache:set(cache_key, res, 60) + + return cjson.decode(res) +end + +function _M.load_config(jasima_host) + local cache = ngx.shared.jasima_cache + local cache_key = "config:" .. jasima_host + local cached = cache:get(cache_key) + if cached then return cjson.decode(cached) end + + local red = redis:new() + red:set_timeout(1000) + local ok, err = red:connect("redis", 6379) + if not ok then return nil, "Redis connect failed: " .. err end + + local key = "jasima:config:" .. jasima_host + local res, err = red:get(key) + if not res or res == ngx.null then return nil, "No config in Redis" end + + red:set_keepalive(10000, 100) + + cache:set(cache_key, res, 60) + return cjson.decode(res) +end + +return _M diff --git a/src/lua/geo.lua b/src/lua/geo.lua new file mode 100644 index 0000000..0fed6c9 --- /dev/null +++ b/src/lua/geo.lua @@ -0,0 +1,49 @@ +local _M = {} + +function _M.viewer_country() + -- Maybe the CDN was nice and gave this to us + local country = ngx.var.http_cloudfront_viewer_country or -- AWS CloudFront + ngx.var.http_fastly_client_country or -- Fastly + ngx.var.http_cf_ipcountry -- CloudFlare + if not country then return nil end + return country:upper() + + -- TODO: Fallback to GeoIP lookup +end + +function _M.needs_mirror(country) + if not country then return true end + + -- TODO: Allow override of safe countries in host config + + local safe_countries = { + US = true, -- United States + GB = true, -- United Kingdom + IE = true, -- Ireland + FR = true, -- France + DE = true, -- Germany + NL = true, -- Netherlands + BE = true, -- Belgium + CH = true, -- Switzerland + AT = true, -- Austria + LU = true, -- Luxembourg + LI = true, -- Liechtenstein + MC = true, -- Monaco + AD = true, -- Andorra + ES = true, -- Spain + PT = true, -- Portugal + IT = true, -- Italy + SM = true, -- San Marino + VA = true, -- Vatican City + MT = true, -- Malta + NO = true, -- Norway + SE = true, -- Sweden + DK = true, -- Denmark + FI = true, -- Finland + IS = true -- Iceland + } + if safe_countries[country] then return false end + return true +end + +return _M diff --git a/src/lua/header_filter.lua b/src/lua/header_filter.lua new file mode 100644 index 0000000..6dcfeef --- /dev/null +++ b/src/lua/header_filter.lua @@ -0,0 +1,14 @@ +if ngx.header["Content-Type"] then + local content_type = ngx.header["Content-Type"] + if content_type:find("text/html") or + content_type:find("text/css") or + content_type:find("text/xml") or + content_type:find("application/javascript") or + content_type:find("application/rss+xml") or + content_type:find("application/atom+xml") or + content_type:find("application/vnd.mpegurl") or + content_type:find("application/x-mpegurl") then + ngx.ctx.rewriting = true + ngx.header["Content-Length"] = nil + end +end diff --git a/src/lua/utils.lua b/src/lua/utils.lua new file mode 100644 index 0000000..b6f2ae0 --- /dev/null +++ b/src/lua/utils.lua @@ -0,0 +1,75 @@ +local resolver = require "resty.dns.resolver" +local iputils = require "resty.iputils" + +iputils.enable_lrucache() + +local bogon_ips = iputils.parse_cidrs({ + "0.0.0.0/8", + "10.0.0.0/8", + "100.64.0.0/10", + "127.0.0.0/8", + "169.254.0.0/16", + "172.16.0.0/12", + "192.0.0.0/24", + "192.0.2.0/24", + "192.168.0.0/16", + "198.18.0.0/15", + "198.51.100.0/24", + "203.0.113.0/24", + "224.0.0.0/4", + "240.0.0.0/4" +}) + +local _M = {} + +function _M.resolve_origin(origin_host) + local cache = ngx.shared.jasima_cache + local cache_key = "upstream_ips:" .. origin_host + local cached = cache:get(cache_key) + if cached then return cached end + + local r, err = resolver:new{ + nameservers = {"8.8.8.8", "8.8.4.4"}, + retrans = 5, + timeout = 2000, + } + + if not r then + ngx.log(ngx.ERR, "failed to instantiate resolver: ", err) + return ngx.exit(500) + end + + local answers, err = r:query(origin_host, {qtype = r.TYPE_A}) + if not answers then + ngx.log(ngx.ERR, "failed to query: ", err) + return ngx.exit(500) + end + + if answers.errcode then + ngx.log(ngx.ERR, "DNS error code: ", answers.errcode, ": ", answers.errstr) + return ngx.exit(500) + end + + local origin_ips = {} + for _, ans in pairs(answers) do + if ans.address then + table.insert(origin_ips, ans.address) + end + end + + cache:set(cache_key, origin_ips, 60) + + return origin_ips +end + +function _M.filter_bogons(ip_list) + local filtered = {} + for _, ip in ipairs(ip_list) do + if not iputils.ip_in_cidrs(ip, bogon_ips) then + table.insert(filtered, ip) + end + end + return filtered +end + +return _M