LUA为Nginx添加reCAPTCHA验证码质询 | 防止HTTP-DDoS (CC) 攻击

AI摘要:
正在生成中……

提醒：本文已经超过 512 天未修改，其中某些信息可能已经过时，请谨慎使用！
你似乎正在查看一篇很久远的文章。
为了你这样的访客，我特地保留了我的历史博文。不要笑话过去的我，用温柔的目光看下去吧。

#本文章仅供参考，不具有实用意义#

为了学习了解cloudflare人机质询的验证原理，尝试使用PHP+Redis还原了一个初版人机质询，不过使用PHP实在是太蠢了，高频率访问情况下依然会导致宕机，遂将其重构为LUA，借此机会学习LUA语言。

# 准备工作

请确认Nginx必须安装了LUA模块才能使用此代码，Tengine和Openresty已经默认安装LUA模块，执行nginx -V (大写) 检查是否安装了LUA模块。雷池的Tengine没有安装LUA模块，请不要将文章内代码用雷池Tengine运行。

人机验证需要LUA的cjson和curl库支持，相关安装教程已经非常详细不再赘述。使用了宝塔的用户需要注意，lua_package_path和lua_package_cpath可能在宝塔WAF或FreeWAF的配置文件/www/server/panel/vhost/nginx/free_waf.conf中。

# 代码部署

再次提示，以下代码仅供参考，不具有实用意义

将以下代码放置于一个目录。如/www/lua

-- 导入需要的库
local cjson = require "cjson"
local curl = require "lcurl.safe"

-- 验证码页面
function displayChallengePage(challengeUri)
    ngx.header.content_type = 'text/html'
    ngx.say([[
        <meta name="robots" content="noindex">
        <script>
            window.onload = function() {
                window.challenge_conf = {
                    'is_interactive': 1,
                    'cserver_addr': "]] .. challengeUri .. [[",
                    'rule_id': 1
                };
                var xhr = new XMLHttpRequest();
                xhr.open('GET', "]] .. challengeUri .. [[" + '?&' + Math.random());
                xhr.send();
                xhr.onload = function() {
                    if (xhr.status == 200) {
                        console.log(xhr.response);
                        document.write(xhr.response);
                        document.close();
                    }
                };
            }
        </script>
        <h1>Waiting....</h1>
    ]])
end

-- 创建共享内存区域
local tokens = ngx.shared.tokens

-- 生成随机令牌
local token = ngx.md5(ngx.var.remote_addr .. ngx.var.http_user_agent .. math.random())

-- 生成 User-Agent 和 IP 地址的摘要
local ua_ip = ngx.md5(ngx.var.http_user_agent .. ngx.var.remote_addr)

-- 从Cookie头中获取cf-verified的值
local cookie_header = ngx.var.http_Cookie or ""
local cf_verified = string.match(cookie_header, "cf%-verified=([^;]+)")

-- 检查 cf_verified 是否为空，如果为空，则设置为 "none"，虽然改为了reCAPTCHA但是这里懒得改了
if not cf_verified then
    cf_verified = "none"
end

-- 检查是否已经验证
--if not tokens:get(cf_verified) then
if not tokens:get(cf_verified .. "-" .. ua_ip) then
    -- 检查是否提交了reCAPTCHA的响应
    if ngx.var.request_method == 'POST' then
        -- 需要替换为你的reCAPTCHA的密钥
        local secretKey = " "
        
        -- 使用ngx.req.get_body_data()来获取POST请求体中的参数，防止获取失败
        ngx.req.read_body()
        local body_data = ngx.req.get_body_data()
        local response = string.match(body_data, "g%-recaptcha%-response=([^&]+)")

        -- 检查response是否有值
        if not response then
            ngx.header.content_type = 'text/plain'
            ngx.say("Error: Missing g-recaptcha-response parameter. Value: ", response, ". POST body data: ", body_data)
            ngx.exit(ngx.HTTP_BAD_REQUEST)
        end

        -- 初始化curl
        local easy = curl.easy()

        -- 设置curl选项
        easy:setopt_url("https://recaptcha.net/recaptcha/api/siteverify")
        easy:setopt_postfields("secret=" .. secretKey .. "&response=" .. response)

        -- 创建一个表来存储响应体
        local response_body = {}

        -- 设置写入函数
        easy:setopt_writefunction(function(str)
            table.insert(response_body, str)
        end)

        -- 执行curl请求
        easy:perform()

        -- 将响应体的各个部分连接成一个字符串
        local verifyResponse = table.concat(response_body)

        -- 检查verifyResponseStr是否是一个有效的JSON字符串
        if pcall(cjson.decode, verifyResponse) then
            local responseData = cjson.decode(verifyResponse)

            -- 检查响应是否有效
            if responseData.success then
                -- 验证成功，将用户标记为已验证
                ngx.header["Set-Cookie"] = "cf-verified=" .. token .. "; path=/; Max-Age=86400"
                --tokens:set(token, 'verified', 86400) -- 将令牌存储到共享内存区域，有效期为1天
                tokens:set(token.. "-" .. ua_ip, 'verified', 86400)

                -- 重定向用户到他们原本请求的页面
                return ngx.redirect(ngx.var.request_uri)
            else
                -- 验证失败，输出重新验证页面，可以和之前的验证页面不一致。
                displayChallengePage("https://challenges.gymxbl.com/captcha4.html")
                ngx.exit(ngx.HTTP_OK)
            end
        else
            ngx.header.content_type = 'text/plain'
            ngx.say("Bad Response: ", verifyResponse)
            ngx.exit(ngx.HTTP_BAD_REQUEST)
        end
    end
    -- 显示验证码页面
    displayChallengePage("https://challenges.gymxbl.com/captcha4.html")
    ngx.exit(ngx.HTTP_OK)
end

Nginx的主配置文件中，我们需要为其在http配置段增加一个内存共享区域，此区域将用于存储验证后的tokens。

lua_shared_dict tokens 10m;

接下来，只需要在需要开启验证的站点中的location / { }块中，引用这个脚本即可。宝塔面板可能需要在站点管理-站点-伪静态中添加修改。

access_by_lua_file /www/lua/captcha.lua;

现在，每个访客访问时都将经过reCAPTCHA验证，这会使用户体验直线下降，且不利于SEO。能否在特定情况下才进行人机验证？当然可以，我们可以在代码之前加入如下判断代码即可，为了方便使用了json来存储配置文件。

-- 获取用户代理、国家、ASN、城市和IP地址信息
local user_agent = ngx.var.http_user_agent
local country = ngx.var.http_cf_ipcountry
local asn = ngx.var.http_c_asn
local ip = ngx.var.remote_addr
local url = ngx.var.request_uri

function table.contains(table, element)
  for _, value in pairs(table) do
    if value == element then
      return true
    end
  end
  return false
end

-- 读取JSON文件的函数
function read_json(path)
    local file = io.open(path, "r")
    if not file then return nil end
    local content = file:read "*a"
    file:close()
    return cjson.decode(content)
end

-- 读取规则列表
local rules = read_json("/www/conf/captcha/rules.json")

-- 创建一个标志来跟踪是否有任何规则匹配
local rule_matched = false

-- 遍历规则列表
for _, rule in ipairs(rules) do
    if rule.type == 'under_attack' and rule.value == 'true' then
        rule_matched = true
        break
    elseif rule.type == 'user_agent' and string.find(user_agent, rule.value) then
        rule_matched = true
        break
    elseif rule.type == 'country' and table.contains(rule.values, country) then
        rule_matched = true
        break
    elseif rule.type == 'asn' and table.contains(rule.values, asn) then
        rule_matched = true
        break
    elseif rule.type == 'firefox_version' and string.match(user_agent, 'Firefox/' .. rule.value) then
        rule_matched = true
        break
    elseif rule.type == 'ip' and ip == rule.value then
        rule_matched = true
        break
    elseif rule.type == 'url' and url == rule.value then
        rule_matched = true
        break
    end
end
-- 如果匹配任意规则，则在日志中添加"captcha"
if rule_matched then
    ngx.var.captcha_log = 'captcha'

-- 如果没有任何规则匹配，结束脚本运行
if not rule_matched then
    return
end

将如上代码添加到引用库文件之后，质询代码之前，并在/www/conf/captcha目录创建一个rules.json，存储匹配条件:

[
    {"type": "under_attack", "value": "false"},
    {"type": "user_agent", "value": "TestUA"},
    {"type": "url", "value": "/reg/"},
    {"type": "country", "values": ["TW","MX","SG","CH","AS","UA"]},
    {"type": "asn", "values": ["62041", "45102", "140227","152194","45753","47890","60068","146952"]},
    {"type": "firefox_version", "value": "([1-7]%d|8[0-5]|[1-9])"},
    {"type": "ip", "value": "192.168.1.1"}
]

注:其中的国家、城市、ASN号码匹配需要CDN支持返回相关头部并做好了自定义回源头部规则，否则将无法生效。

如果需要添加IP白名单，或UA白名单，也可参考如下设置:

if user_agent:lower():find("uptime") or user_agent:lower():find("monitor") or user_agent:lower():find("gpt") or user_agent:lower():find("TeSTUA") then
    -- 如果用户代理包含特定的字符串，则终止脚本的运行
    return
end

为了能够记录哪些请求触发了人机质询，我们需要在Nginx的http块增加一个自定义的日志记录格式

         log_format  captcha  '$remote_addr - $remote_user [$time_local] "$request" '
                        '$status $body_bytes_sent "$http_referer" '
                        '"$http_user_agent" '
                        '"$captcha_log"';

在需要使用该自定义日志的站点配置文件中，找到access_log 指令，在其末尾添加 captcha。示例如下

access_log  /www/logs/www.gymxbl.com.log captcha;

当然，你也可以为此日志格式添加更多的记录内容，如CDN返回的访客地理位置，ASN编码，机器人管理分数等等。

完成代码部署后，重启Nginx使其生效。

# 部署验证页面

在demo代码中，我们使用了JavaScript动态方式加载质询页面，这样可以初步过滤一些不支持JavaScript的恶意机器请求，减少服务器带宽占用，你需要在非开启质询的站点放置一个静态页面，提供验证质询。该页面可以放置在任何静态站点中，例如jsdelivr、github pages、或云函数 workers。如不想自己搭建也可使用本站示例页面https://challenges.gymxbl.com/captcha5.html?skey="你的reCAPTCHA站点密钥"。

<html lang="zh">
<head>
    <title>你是机器人咩？ | 孤影墨香</title>
    <meta charset="utf-8">
    <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
    <meta name="robots" content="noindex">
    <!-- Fonts -->
    <link rel="dns-prefetch" href="//fonts.gstatic.com">
    <link href="https://fonts.googleapis.com/css?family=Montserrat:400,500,600,900" rel="stylesheet">
    <link rel="stylesheet" type="text/css" href="https://jsd.gymxbl.com/403/safeline.css?v=0.0.2">
    <script src="https://recaptcha.net/recaptcha/api.js" async defer></script>
</head>
<body class="antialiased font-sans">
    <div class="md:flex min-h-screen main-body">
        <div class="w-full md:w-1/2 bg-white flex items-center justify-center left-sec">
            <div class="max-w-sm m-8 content-body">
                <div class="text-black text-5xl md:text-15xl font-black"> ¿ 你是机器人咩 ? </div>

                <p class="text-grey-darker text-2xl md:text-3xl mb-7 leading-normal">
                    </p><p class="sub-header"><h1>让我康康！(。・∀・)ノ </h1></p>
                <br>
                <form id="captcha-form" method="POST">
                    <div class="g-recaptcha" data-sitekey="你的站点密钥"></div>
                </form>

                <br>

                <p class="highlight">¿为什么我要通过CAPTCHA测试?</p>
                <p class="std-text">因为网站的安全要求，我们不得不开启验证码呢，sry！</p>
                <p class="std-text">这只是一个简单的人机验证演示页面，可能出现错误。</p>
                
                 <div class="sub-footer">
                    <div class="w-full h-2 bg-grey-light my-3 md:my-6"></div>
                    <ul>
                        <li>Performance & Security by Google reCAPTTCHA</li>
                    </ul>
                </div> 

            </div>
        </div>

        <div class="relative pb-full md:flex md:pb-0 md:min-h-screen w-full md:w-1/2 right-sec">
        </div>
    </div>
    <script>
        window.addEventListener('load', function() {
            var checkExist = setInterval(function() {
               var captchaResponse = document.querySelector('textarea[name="g-recaptcha-response"]');
               if (captchaResponse && captchaResponse.value) {
                  clearInterval(checkExist);
                  setTimeout(function() {
                     document.getElementById('captcha-form').submit();
                  }, 500);
               }
            }, 500);
        });
    </script>
</body>
</html>

该脚本也可配合其他WAF/LUA的限速封禁脚本（如访问量不高，可将文中redis替换为内存共享区域），在WAF/脚本执行封禁动作之前，减少服务器资源占用，防止服务器因系统资源不足而宕机。可以在此站点内任何URL加上?captcha=3来体验验证。该脚本仅供参考，请勿将其用于生产环境，所造成一切后果与本网站无关。

版权声明:转载时请以超链接形式标明文章原始出处和作者信息，来源孤影墨香
本文链接: https://www.gymxbl.com/4197.html
访问时间:2026-06-04 08:10:05

查看评论 - 4 条评论

Comments | 4 条评论

博主 Sh1n3zZ

回复

发布于 2024-07-24 15:39 ( Safari 605.1.15 Mac OSX 11_6 ) ( ) 来自: 广东省广电网络

是因为没有别的可以研究了吗（
怎么天天在人机验证这些东西上面下功夫(=・ω・=)
- 博主 small_xu038
  
  回复
  
  发布于 2024-07-24 19:48 ( Edge 126 Windows 10/11 ) ( ) 来自: · 天津市 · 联通IPV6
  
  @Sh1n3zZ 是这样的（
  要不给我点研究的方向，我是想到什么做什么，非常随性。
博主方澪可

回复

发布于 2024-07-23 23:28 ( QQBrowser 6.2 Android 13 ) ( ) 来自: 广东省韶关市移动

你说的对，但是我看不懂，以后再说
博主 SnowRainySkr

回复

发布于 2024-07-23 21:50 ( Edge 126 Windows 10/11 ) ( ) 来自: 广东省广州市移动

好好玩的网站(°∀°)ﾉ