feat: initial commit

This commit is contained in:
Iain Learmonth 2025-04-27 17:20:02 +01:00
commit 6179dea246
19 changed files with 693 additions and 0 deletions

51
.gitignore vendored Normal file
View file

@ -0,0 +1,51 @@
# Environment files
sitelen.env
updater.env
# Updater configurations
/legacy/configs/*.json
# JetBrains
*.iml
/.idea
# Compiled Lua sources
luac.out
# luarocks build files
*.src.rock
*.zip
*.tar.gz
# Object files
*.o
*.os
*.ko
*.obj
*.elf
# Precompiled Headers
*.gch
*.pch
# Libraries
*.lib
*.a
*.la
*.lo
*.def
*.exp
# Shared objects (inc. Windows DLLs)
*.dll
*.so
*.so.*
*.dylib
# Executables
*.exe
*.out
*.app
*.i*86
*.x86_64
*.hex

46
README.md Normal file
View file

@ -0,0 +1,46 @@
ilo sitelen tu
==============
A rewriting proxy service for use as the backend of a live web mirroring service for resilient and robust access to your website.
---
Origin Configuration
--------------------
Each origin is required to have a JSON configuration object available in the Redis backend under the key
`jasima:config:<canonical host>`.
The following keys are meaningful in this object:
| Key | Description | Default | Example |
|----------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------|-----------------------------|
| `headers` | Append additional headers to the request sent to the origin. | No additional headers are added. | `{"X-Example": "Hello"}` |
| `host_connect` | Override the hostname that is used to connect to the origin. This can be useful in the case that the origin is begind a CDN that you want to avoid having to traverse, either due to DDoS protection tools being falsely triggered or simply to save costs. | The canonical host is used. | `"real-origin.example.com"` |
| `host_header` | Override the hostname that is used as the `Host` header when connecting to the origin. | The canonical host is used. | `"news.example.org"` |
| `host_ssl` | Override the hostname that is used for TLS SNI and for verification of the upstream's TLS certificate. | The canonical host is used. | `"news.example.org"` |
| `matomo_site_id` | The site ID to use for Matomo tracking. | No tracking code is added to HTML pages. | `20` |
| `rewrite_case_insensitive` | Rewrite references to other domain names using a case-insensitive match. This requires the use of the PCRE engine which is approximately 10% of the speed of the Lua pattern matching engine, so only enable if required. | Domains are matched based on the case in the pool mapping. | `true` |
| `rewrite_disable` | Disable content rewriting for this host. This disables both URL rewriting, and the addition of Matomo tracking code, but not the replacement of absolute links to the canonical hostname with relative links. | Rewriting is enabled. | `true` |
Note: the default case applies when the key is omitted.
Copyright
---------
Copyright © 2025 SR2 Communications Limited.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following
disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES,
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

19
docker-compose.yml Normal file
View file

@ -0,0 +1,19 @@
services:
sitelen-tu:
build:
context: src
dockerfile: Dockerfile
env_file: "sitelen.env"
ports:
- "127.0.0.1:80:80"
# updater:
# build:
# context: legacy
# dockerfile: Dockerfile
# env_file: "updater.env"
# volumes:
# - ./legacy/configs:/configs
redis:
image: redis:latest
ports:
- "127.0.0.1:6379:6379"

13
legacy/Dockerfile Normal file
View file

@ -0,0 +1,13 @@
FROM akorn/luarocks:lua5.1-alpine
RUN apk add build-base openssl-dev
RUN luarocks install luasocket \
&& luarocks install luasec \
&& luarocks install lua-cjson \
&& luarocks install redis-lua \
&& luarocks install luaposix
WORKDIR /opt/updater
COPY updater.lua .
CMD ["lua", "updater.lua"]

24
legacy/README.md Normal file
View file

@ -0,0 +1,24 @@
Legacy Updater Tool
===================
This updater exists to facilitate a transition from the legacy mirrors orchestration to its replacement.
As far as I know, no one else has ever deployed the original orchestrator so this will almost certainly not be useful
to you.
Configuration
-------------
When working with the Docker Compose file, configure the sources via the environment file: `updater.env`.
One environment variable per configured pool with an HTTP URL to the Bypass Censorship Mirrors JSON file for each pool.
For example:
```shell
JASIMA_MIRRORS_POOL_example=https://www.example.com/mirrors.json
```
### Per-site Configurations
In the configs directory, create a file named `default.json` to provide the default site configuration.
Create files named `<host>.json` to override the default.
This is a complete override so duplicate anything from the default you wanted to keep.

View file

@ -0,0 +1,3 @@
{
"matomo_site_id": 200
}

View file

@ -0,0 +1,4 @@
{
"matomo_site_id": 300,
"host_connect": "www.example.org"
}

88
legacy/updater.lua Normal file
View file

@ -0,0 +1,88 @@
local cjson = require('cjson')
local http = require('socket.http')
local stdlib = require('posix.stdlib')
local redis = require('redis')
local client = redis.connect('redis', 6379)
local function get_mirrors(source)
local body, _, _, _ = http.request(source)
return cjson.decode(body)
end
local function prepare_pool_map(mirrors)
local pool_map = {}
for _, site in pairs(mirrors['sites']) do
for _, alternative in pairs(site['available_alternatives']) do
if alternative['type'] == 'mirror' then
local canonical_host = site['main_domain']
if select(2, canonical_host:gsub("%.", "")) == 1 then
canonical_host = "www." .. canonical_host
end
pool_map[canonical_host] = alternative['url']:sub(9)
break
end
end
end
return pool_map
end
local function redis_set(client, key, data)
local res, err = client:set(key, data)
if not res then
print("Error setting key in Redis:", err)
else
print("Data successfully stored in Redis under '" .. key .. "'")
end
end
local function get_config(host)
local function read_file(file_path)
local file = io.open(file_path, "r")
if file then
local content = file:read("*a")
file:close()
return content
else
return nil
end
end
local file_paths = {
"/configs/" .. host .. ".json", -- Check host-specific file
"/configs/default.json" -- Check default file
}
for _, file_path in ipairs(file_paths) do
local content = read_file(file_path)
if content then
print("Using " .. file_path .. " for " .. host .. " configuration.")
return content
end
end
return "{}"
end
local function update_pool_data(pool_name, pool_source)
local pool_map = prepare_pool_map(get_mirrors(pool_source))
redis_set(client, 'jasima:poolmap:' .. pool_name, cjson.encode(pool_map))
for host, _ in pairs(pool_map) do
local redis_key = 'jasima:config:' .. host
local config = get_config(host)
redis_set(client, redis_key, config)
end
end
local interval = 5 * 60 -- 5 minutes in seconds
local last_update_time = os.clock() - interval
while true do
local current_time = os.clock()
if current_time - last_update_time >= interval then
for pool_name, pool_source in pairs(stdlib.getenv()) do
if pool_name:match("^JASIMA_MIRRORS_POOL_") then
local pool_name = pool_name:sub(21)
update_pool_data(pool_name, pool_source)
end
end
last_update_time = current_time
end
end

1
sitelen.env.example Normal file
View file

@ -0,0 +1 @@
JASIMA_MATOMO_HOST=matomo.example.com

9
src/Dockerfile Normal file
View file

@ -0,0 +1,9 @@
FROM openresty/openresty:alpine-fat
RUN /usr/local/openresty/luajit/bin/luarocks install lua-resty-http
RUN /usr/local/openresty/luajit/bin/luarocks install lua-resty-cookie
RUN /usr/local/openresty/luajit/bin/luarocks install lua-resty-iputils
COPY default.conf /etc/nginx/conf.d/default.conf
COPY env.main /etc/nginx/conf.d/env.main
COPY lua/* /opt/sitelen-tu/

42
src/default.conf Normal file
View file

@ -0,0 +1,42 @@
error_log /dev/stdout;
lua_shared_dict jasima_cache 20m;
lua_package_path "/opt/sitelen-tu/?.lua;;";
resolver 127.0.0.11 valid=60 ipv6=off;
upstream origin {
server 127.0.0.1;
balancer_by_lua_file /opt/sitelen-tu/balancer.lua;
}
server {
listen 80;
server_name localhost default;
location / {
# These variables are set in the access_by_lua stage
# TODO: These might be better to set with a set_by_lua_block
set $jasima_host "fallback.invalid";
set $jasima_host_header "fallback.invalid";
set $jasima_host_ssl "fallback.invalid";
access_by_lua_file /opt/sitelen-tu/access.lua;
proxy_pass https://origin;
proxy_ssl_server_name on;
proxy_ssl_name $jasima_host_ssl;
proxy_set_header Accept-Encoding "";
proxy_set_header Host $jasima_host_header;
sub_filter_once off;
sub_filter_types text/html text/css text/xml application/javascript application/rss+xml application/atom+xml application/vnd.mpegurl application/x-mpegurl;
sub_filter 'http://$jasima_host' '/';
sub_filter 'https://$jasima_host' '/';
sub_filter '//$jasima_host' '/';
sub_filter 'REWRITE_JASIMA_HOST_PLACEHOLDER' $jasima_host;
header_filter_by_lua_file /opt/sitelen-tu/header_filter.lua;
body_filter_by_lua_file /opt/sitelen-tu/body_filter.lua;
}
}

1
src/env.main Normal file
View file

@ -0,0 +1 @@
env JASIMA_MATOMO_HOST;

72
src/lua/access.lua Normal file
View file

@ -0,0 +1,72 @@
local http = require "resty.http"
local config = require "config"
local geo = require "geo"
local utils = require "utils"
local jasima_host = config.get_jasima_host()
ngx.ctx.jasima_host = jasima_host
if not jasima_host then
return ngx.exit(400)
end
local err
ngx.ctx.jasima_config, err = config.load_config(jasima_host)
if err then
ngx.status = 500
ngx.log(ngx.ERR, "Could not load config: " .. err)
return ngx.exit(500)
end
if not ngx.ctx.jasima_config then
ngx.status = 403
ngx.log(ngx.ERR, "Requested a canonical host that has no configuration specified: " .. jasima_host)
ngx.exit(403)
end
local country = geo.viewer_country()
if not ngx.ctx.jasima_config.geo_redirect_disable and not geo.needs_mirror(country) then
local request_uri = ngx.var.request_uri
local new_url = "https://" .. jasima_host .. request_uri
return ngx.redirect(new_url, ngx.HTTP_MOVED_TEMPORARILY)
end
-- Get jasima_pool (not critical)
local jasima_pool = config.get_jasima_pool()
ngx.ctx.jasima_pool_map, err = config.load_pool_mapping(jasima_pool)
if err then
ngx.status = 500
ngx.log(ngx.WARN, "Could not load pool mapping: " .. err)
return ngx.exit(500)
end
local headers = ngx.req.get_headers()
-- Remove the headers that should not be proxied to the origin
for k, v in pairs(headers) do
if k:lower():match("^jasima%-") then
ngx.req.clear_header(k)
end
end
-- Add additional headers that have been specified in the configuration
if ngx.ctx.jasima_config.headers then
for k, v in pairs(ngx.ctx.jasima_config.headers) do
ngx.req.set_header(k, v)
end
end
-- Look up the IP to connect to the origin
local host_connect = ngx.ctx.jasima_config.host_connect or jasima_host
local upstream_ips = utils.resolve_origin(host_connect)
ngx.ctx.upstream_ips = utils.filter_bogons(upstream_ips)
if #ngx.ctx.upstream_ips == 0 then
ngx.log(ngx.ERR, "no A records found")
return ngx.exit(500)
end
-- Set the nginx host variables
ngx.var.jasima_host = jasima_host
ngx.var.jasima_host_header = ngx.ctx.jasima_config.host_header or jasima_host
ngx.var.jasima_host_ssl = ngx.ctx.jasima_config.host_ssl or jasima_host

24
src/lua/balancer.lua Normal file
View file

@ -0,0 +1,24 @@
local balancer = require "ngx.balancer"
local upstream_ips = ngx.ctx.upstream_ips
if not upstream_ips then
ngx.log(ngx.ERR, "No upstream IPs in context")
return ngx.exit(500)
end
ngx.ctx.balancer_try = (ngx.ctx.balancer_try or 0) + 1
local try_index = ngx.ctx.balancer_try
if try_index > #upstream_ips then
ngx.log(ngx.ERR, "All upstream IPs tried, none succeeded")
return ngx.exit(500)
end
local ip = upstream_ips[try_index]
ngx.log(ngx.INFO, "Trying upstream IP: ", ip)
local ok, err = balancer.set_current_peer(ip, 443)
if not ok then
ngx.log(ngx.ERR, "failed to set peer: ", err)
return ngx.exit(500)
end

63
src/lua/body_filter.lua Normal file
View file

@ -0,0 +1,63 @@
local function matomo_tracking_code(site_id)
return [=[
<!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
var p = "https://REWRITE_JASIMA_HOST_PLACEHOLDER" + window.location.pathname;
_paq.push(["setCustomUrl", p]);
_paq.push(["setExcludedQueryParams", ["roomName","account","accountnum","address","address1","address2","address3","addressline1","addressline2","adres","adresse","age","alter","auth","authpw","bic","billingaddress","billingaddress1","billingaddress2","calle","cardnumber","cc","ccc","cccsc","cccvc","cccvv","ccexpiry","ccexpmonth","ccexpyear","ccname","ccnumber","cctype","cell","cellphone","city","clientid","clientsecret","company","consumerkey","consumersecret","contrasenya","contrase\u00f1a","creditcard","creditcardnumber","cvc","cvv","dateofbirth","debitcard","direcci\u00f3n","dob","domain","ebost","email","emailaddress","emailadresse","epos","epost","eposta","exp","familyname","firma","firstname","formlogin","fullname","gender","geschlecht","gst","gstnumber","handynummer","has\u0142o","heslo","iban","ibanaccountnum","ibanaccountnumber","id","identifier","indirizzo","kartakredytowa","kennwort","keyconsumerkey","keyconsumersecret","konto","kontonr","kontonummer","kredietkaart","kreditkarte","kreditkort","lastname","login","mail","mobiili","mobile","mobilne","nachname","name","nickname","false","osoite","parole","pass","passord","password","passwort","pasword","paswort","paword","phone","pin","plz","postalcode","postcode","postleitzahl","privatekey","publickey","pw","pwd","pword","pwrd","rue","secret","secretq","secretquestion","shippingaddress","shippingaddress1","shippingaddress2","socialsec","socialsecuritynumber","socsec","sokak","ssn","steuernummer","strasse","street","surname","swift","tax","taxnumber","tel","telefon","telefonnr","telefonnummer","telefono","telephone","token","token_auth","tokenauth","t\u00e9l\u00e9phone","ulica","user","username","vat","vatnumber","via","vorname","wachtwoord","wagwoord","webhooksecret","website","zip","zipcode"]]);
_paq.push(["trackPageView", p]);
_paq.push(["enableLinkTracking"]);
(function() {
var u="//]=] .. os.getenv("JASIMA_MATOMO_HOST") .. [=[/";
_paq.push(["setTrackerUrl", u+"matomo.php"]);
_paq.push(["setSiteId", "]=] .. tostring(ngx.ctx.jasima_config.matomo_site_id) .. [=["]);
var d=document, g=d.createElement("script"), s=d.getElementsByTagName("script")[0];
g.async=true; g.src=u+"matomo.js"; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</body>
]=]
end
local function rewrite_body(body, eof)
if not ngx.ctx.jasima_pool_map or ngx.ctx.jasima_config.rewrite_disable then
return body
end
for from, to in pairs(ngx.ctx.jasima_pool_map) do
if ngx.ctx.jasima_config.rewrite_case_insensitive then
local pattern = ngx.re.escape(from)
body = ngx.re.gsub(body, pattern, to, "ijo")
else
-- We expect that str:match("^[%w%-%.]+$") ~= nil
local pattern = from:gsub("([%^%$%(%)%%%.%[%]%*%+%-%?])", "%%%1") -- escape Lua patterns
body = body:gsub(pattern, to)
end
end
if eof and ngx.ctx.jasima_config.matomo_site_id then
body = body:gsub("</body>", matomo_tracking_code(ngx.ctx.jasima_config.matomo_site_id))
-- TODO: Ensure that tracking code was added when it's HTML, but only for HTML
end
return body
end
if ngx.ctx.rewriting then
local chunk = ngx.arg[1]
local eof = ngx.arg[2]
ngx.ctx.buffered = (ngx.ctx.buffered or "") .. (chunk or "")
if #ngx.ctx.buffered > 5 * 1024 * 1024 and not eof then
-- Don't just consume memory forever
ngx.arg[1] = rewrite_body(ngx.ctx.buffered, eof) -- We still do our best
ngx.ctx.rewriting = false
return
end
if eof then
ngx.arg[1] = rewrite_body(ngx.ctx.buffered, eof)
else
ngx.arg[1] = nil
end
end

95
src/lua/config.lua Normal file
View file

@ -0,0 +1,95 @@
local ck = require "resty.cookie"
local cjson = require "cjson.safe"
local redis = require "resty.redis"
local _M = {}
function _M.get_jasima_host()
local headers = ngx.req.get_headers()
if headers["Jasima-Host"] then
return headers["Jasima-Host"]
end
local cookie, err = ck:new()
if not cookie then
ngx.log(ngx.ERR, "failed to get cookie: ", err)
return nil
end
local jasima_cookie, err = cookie:get("jasima_host")
if jasima_cookie then
return jasima_cookie
elseif err then
ngx.log(ngx.ERR, "failed to get jasima_host cookie: ", err)
end
return nil
end
function _M.get_jasima_pool()
local headers = ngx.req.get_headers()
if headers["Jasima-Pool"] then
return headers["Jasima-Pool"]
end
local cookie, err = ck:new()
if not cookie then
ngx.log(ngx.ERR, "failed to get cookie: ", err)
return nil
end
local jasima_cookie, err = cookie:get("jasima_pool")
if jasima_cookie then
return jasima_cookie
elseif err then
ngx.log(ngx.ERR, "failed to get jasima_pool cookie: ", err)
end
return nil
end
function _M.load_pool_mapping(pool_name)
if not pool_name then pool_name = "public" end
local cache = ngx.shared.jasima_cache
local cache_key = "poolmap:" .. pool_name
local cached = cache:get(cache_key)
if cached then return cjson.decode(cached) end
local red = redis:new()
red:set_timeout(1000)
local ok, err = red:connect("redis", 6379)
if not ok then return nil, "Redis connect failed: " .. err end
local key = "jasima:poolmap:" .. pool_name
local res, err = red:get(key)
if not res or res == ngx.null then return nil, "No pool mapping found" end
red:set_keepalive(10000, 100)
cache:set(cache_key, res, 60)
return cjson.decode(res)
end
function _M.load_config(jasima_host)
local cache = ngx.shared.jasima_cache
local cache_key = "config:" .. jasima_host
local cached = cache:get(cache_key)
if cached then return cjson.decode(cached) end
local red = redis:new()
red:set_timeout(1000)
local ok, err = red:connect("redis", 6379)
if not ok then return nil, "Redis connect failed: " .. err end
local key = "jasima:config:" .. jasima_host
local res, err = red:get(key)
if not res or res == ngx.null then return nil, "No config in Redis" end
red:set_keepalive(10000, 100)
cache:set(cache_key, res, 60)
return cjson.decode(res)
end
return _M

49
src/lua/geo.lua Normal file
View file

@ -0,0 +1,49 @@
local _M = {}
function _M.viewer_country()
-- Maybe the CDN was nice and gave this to us
local country = ngx.var.http_cloudfront_viewer_country or -- AWS CloudFront
ngx.var.http_fastly_client_country or -- Fastly
ngx.var.http_cf_ipcountry -- CloudFlare
if not country then return nil end
return country:upper()
-- TODO: Fallback to GeoIP lookup
end
function _M.needs_mirror(country)
if not country then return true end
-- TODO: Allow override of safe countries in host config
local safe_countries = {
US = true, -- United States
GB = true, -- United Kingdom
IE = true, -- Ireland
FR = true, -- France
DE = true, -- Germany
NL = true, -- Netherlands
BE = true, -- Belgium
CH = true, -- Switzerland
AT = true, -- Austria
LU = true, -- Luxembourg
LI = true, -- Liechtenstein
MC = true, -- Monaco
AD = true, -- Andorra
ES = true, -- Spain
PT = true, -- Portugal
IT = true, -- Italy
SM = true, -- San Marino
VA = true, -- Vatican City
MT = true, -- Malta
NO = true, -- Norway
SE = true, -- Sweden
DK = true, -- Denmark
FI = true, -- Finland
IS = true -- Iceland
}
if safe_countries[country] then return false end
return true
end
return _M

14
src/lua/header_filter.lua Normal file
View file

@ -0,0 +1,14 @@
if ngx.header["Content-Type"] then
local content_type = ngx.header["Content-Type"]
if content_type:find("text/html") or
content_type:find("text/css") or
content_type:find("text/xml") or
content_type:find("application/javascript") or
content_type:find("application/rss+xml") or
content_type:find("application/atom+xml") or
content_type:find("application/vnd.mpegurl") or
content_type:find("application/x-mpegurl") then
ngx.ctx.rewriting = true
ngx.header["Content-Length"] = nil
end
end

75
src/lua/utils.lua Normal file
View file

@ -0,0 +1,75 @@
local resolver = require "resty.dns.resolver"
local iputils = require "resty.iputils"
iputils.enable_lrucache()
local bogon_ips = iputils.parse_cidrs({
"0.0.0.0/8",
"10.0.0.0/8",
"100.64.0.0/10",
"127.0.0.0/8",
"169.254.0.0/16",
"172.16.0.0/12",
"192.0.0.0/24",
"192.0.2.0/24",
"192.168.0.0/16",
"198.18.0.0/15",
"198.51.100.0/24",
"203.0.113.0/24",
"224.0.0.0/4",
"240.0.0.0/4"
})
local _M = {}
function _M.resolve_origin(origin_host)
local cache = ngx.shared.jasima_cache
local cache_key = "upstream_ips:" .. origin_host
local cached = cache:get(cache_key)
if cached then return cached end
local r, err = resolver:new{
nameservers = {"8.8.8.8", "8.8.4.4"},
retrans = 5,
timeout = 2000,
}
if not r then
ngx.log(ngx.ERR, "failed to instantiate resolver: ", err)
return ngx.exit(500)
end
local answers, err = r:query(origin_host, {qtype = r.TYPE_A})
if not answers then
ngx.log(ngx.ERR, "failed to query: ", err)
return ngx.exit(500)
end
if answers.errcode then
ngx.log(ngx.ERR, "DNS error code: ", answers.errcode, ": ", answers.errstr)
return ngx.exit(500)
end
local origin_ips = {}
for _, ans in pairs(answers) do
if ans.address then
table.insert(origin_ips, ans.address)
end
end
cache:set(cache_key, origin_ips, 60)
return origin_ips
end
function _M.filter_bogons(ip_list)
local filtered = {}
for _, ip in ipairs(ip_list) do
if not iputils.ip_in_cidrs(ip, bogon_ips) then
table.insert(filtered, ip)
end
end
return filtered
end
return _M