爬虫关键函数定位方法_python

1. 加密的一般步骤

2. 事件定位优缺点

优点: 定位的位置比较靠前, 一般会定位到用户输入明文步骤
缺点: 离加密函数比较远, 也没法看到调用栈

3. 事件定位步骤

首先用定位到相应的控件
查看事件列表, 通过名字大致猜测可疑事件
可以通过删除该事件的js, 看看是否有触发事件后网页是否反应来确定关键js文件

4. 举例

https://www.gm99.com/

首先抓一下这个登录的发包过程

下面这两个不用看, 所以发包的应该是第一个请求

可以看到好多字段都被加密了, 下面就使用事件来定位关键函数

首先选中登入这个按钮, 然后查看Event Listeners

首先通过名字确定可疑的事件, MSG_CLICK、click就是可疑的, 可以通过删除该事件的js, 看看是否有反应来确定关键js文件

最后确定了事件是下面这个

格式化代码

t.loginBtn.on("click", function(i) {
    if (!t.isLogining) {
        t.isLogining = !0;
        var s = r(i.currentTarget);
        if (t._checkLoginForm() === !1)
            return t.isLogining = !1,
                !1;
        e(s)
    }
})

t._checkLoginForm函数用来对输入的用户名, 密码和验证码进行初步判断, 判断长度

// t._checkLoginForm函数
n.prototype._checkLoginForm = function() {
    for (var t, e = this, i = r("#login-username").val(), s = r("#login-password").val(), n = e.loginAuthCode.val(), o = (r("#remember").attr("checked"),
    r("#login-username")), a = r(".username-normal"), c = r("#login-tip-warn"), l = r("#login-password"), u = r(".password-normal"), p = r(".defend-normal"), d = r("#login-tip-error"), h = (r("#circle-success"),
    r("#circle-warn")), f = [i, s, n], g = /^[A-Za-z0-9._@-]{1,100}$/, m = 0; m < f.length; m++)
        return "" !== f[0] && g.test(f[0]) ? "" === f[1] || f[1].length > 16 ? (t = "6-16個字元，包括字母和數字",
        this._groupOpera(l, u, c, "password", t, 99),
        !1) : "" !== f[2] && 4 === e.loginAuthCode.val().length || (t = "驗證碼錯誤，請重新輸入",
        this._groupOpera(e.loginAuthCode, p, d, "defend", t, 140),
        h.removeClass("hide"),
        r(".login-auth-codes").click(),
        !1) : (t = "長度為4~16個字元，包括字母、數字、下底線、@、-，郵箱類帳號合法長度為6~100個字元",
        this._groupOpera(o, a, c, "username", t, 56),
        !1)
}

e函数, 这个函数就做了一件事, 就是调用了a.getData, 然后穿了三个参数, 第一个是一个接口信息, 后面两个是两个函数

// e函数
e = function(e) {
    a.getData({
        url: "//passport.gm99.com/ajax/check_code",
        data: {
            ckcode: t.loginAuthCode.val()
        }
    }, function(s) {
        var n, o = r("#remember").prop("checked");
        o ? n = "checked" : (a.setCookie("uname", ""),
        n = ""),
        t.loginBtn.text("登入中...");
        var c = r("#data-forward").attr("data-forward")
            , l = r("#login-password").val();
        if ("sdk" !== t.currLoc && "sdk-reg" !== t.currLoc || "75" == a._GET("gameId")) {
            if (t.loginBtn.hasClass("disable"))
                return !1;
            t.loginBtn.addClass("disable"),
            a.login({
                uname: r("#login-username").val(),
                password: l,
                forward: c,
                remember: n,
                ckcode: t.loginAuthCode.val(),
                type: "index"
            }, e, i.call(t))
        } else
            a.setCookie("uname", r("#login-username").val()),
            a.zmqsdklogin({
                uname: r("#login-username").val(),
                password: l,
                forward: c,
                remember: n,
                ckcode: t.loginAuthCode.val(),
                type: "index"
            }, e, i.call(t))
    }, function(e) {
        t.isLogining = !1;
        var i = "驗證碼錯誤，請重新輸入";
        t._groupOpera(r("#login-auth-code"), r(".defend-normal"), r("#login-tip-error"), "defend", i, 140),
        r("#login-tip-error").css("top", "140px"),
        r("#circle-warn-2").removeClass("hide"),
        r(".login-auth-codes").click()
    })
}

a.getData内部, 首先判断接口是否传送数据, 然后调用ajax发送了一个请求, 来判断验证码是否输入正确, 如果正确就会执行e函数去发送登录用户和密码的数据包, 如果不正确就会执行i函数来d出验证码错误信息框

// a.getData
n.prototype.getData = function(t, e, i) {
    t.data = t.data ? t.data : {},
        r.ajax({
        url: t.url,
        type: t.type || "get",
        data: t.data,
        xhrFields: {
            withCredentials: !0
        },
        jsonp: "callback",
        async: !0,
        dataType: "jsonp"
    }).done(function(t) {
        1 === parseInt(t.result, 10) ? e(t.data, t.msg) : i(t.msg)
    })
}

下面分析e函数, 也就是a.getData传入的第二个参数

function(s) {
    var n, o = r("#remember").prop("checked");
    o ? n = "checked" : (a.setCookie("uname", ""),
    n = ""),
    t.loginBtn.text("登入中...");
    var c = r("#data-forward").attr("data-forward")
        , l = r("#login-password").val();
    if ("sdk" !== t.currLoc && "sdk-reg" !== t.currLoc || "75" == a._GET("gameId")) {
        if (t.loginBtn.hasClass("disable"))
            return !1;
        t.loginBtn.addClass("disable"),
        a.login({
            uname: r("#login-username").val(),
            password: l,
            forward: c,
            remember: n,
            ckcode: t.loginAuthCode.val(),
            type: "index"
        }, e, i.call(t))
    } else
        a.setCookie("uname", r("#login-username").val()),
        a.zmqsdklogin({
            uname: r("#login-username").val(),
            password: l,
            forward: c,
            remember: n,
            ckcode: t.loginAuthCode.val(),
            type: "index"
        }, e, i.call(t))
}

虽然e函数传入了两个参数, 但是这里只有一个参数, 所以只会获取到t.data, 这是个空列表, 当然此函数也没用这个参数

继续分析, 这里有两个发包函数, 猜测是该网站又两个客户端, 采用了不同的参数, 当前环境下采用了第一个函数

接着会调用a.login函数, 这个函数同样传入了三个参数

// a.login函数
n.prototype.login = function(t, e, i) {
    var s = (new Date).getTime()
      , n = t.uname
      , o = a.encode(t.password, s)
      , c = t.forward
      , l = t.remember
      , u = t.g_recaptcha
      , p = t.type
      , d = t.is_recent
      , h = this
      , f = {
        encrypt: 1,
        uname: n,
        password: o,
        forward: c,
        remember: l
    };
    "passport" == p ? (f.g_recaptcha = u, 1 == d && (f.is_recent = 1)) : f.ckcode = t.ckcode;
    var g = window.location.href;
    g.indexOf("www.gm99.com") == -1 || g.split("//")[1].split("/")[1] || e && "login-btn" == e.attr("id") || (f.t_source = "index_pop"),
    r.ajax({
        url: "//passport.gm99.com/login/login3",
        type: "get",
        data: f,
        xhrFields: {
            withCredentials: !0
        },
        jsonp: "callback",
        async: !0,
        dataType: "jsonp",
        success: function(t) {
            if (r("#login-btn").removeClass("disable"),
            i && i instanceof Function ? i() : "",
            t.hasOwnProperty("result") && 1 == t.result)
                t.hasOwnProperty("bbs_sync") && t.bbs_sync && r("head").append(t.bbs_sync.replace("\", "")),
                t.hasOwnProperty("forward") && setTimeout(function() {
                    window.top.location.href = decodeURIComponent(t.forward) || "/"
                }, 1e3);
            else {
                if ("pop" === p)
                    r(".pop-login-auth-codes").click();
                else if ("index" === p)
                    r(".login-auth-codes").click();
                else if ("passport" === p) {
                    var e = "undefined" != typeof grecaptcha;
                    e && (grecaptcha.reset(window.googleVerificationWidget),
                    window.googleVerificationToken = null)
                }
                r("#login-btn").text("登入"),
                r("#login-auth-code,#pop-login-auth-code").val(""),
                h.dialog({
                    content: t.msg
                })
            }
        }
    })
}

可以发现发送ajax的数据就是f, 而f中, uname没有加密, password=o

这个o = a.encode(t.password, s), t.password就是明文密码, s是时间戳

所以关键的加密函数就是a.encode

// a.encode函数, t是明文密码, e是时间戳
n.prototype.encode = function(t, e) {
    var i = e ? e + "|" + t : t;
    return encodeURIComponent(this.jsencrypt.encrypt(i))
}