连接跟踪超时扩展_随笔

连接跟踪超时扩展

连接跟踪超时扩展，允许应用层对连接的超时时长进行修改控制。如下，新增IPv4 TCP协议超时策略(tcp0，最大4个字符)，其中指定4个状态的超时时长，其它采用默认值。随后，使用iptables命令将此策略应用到所有的连接跟踪tcp报文。

# nfct timeout add tcp0 inet tcp established 1000 close 10 time_wait 10 last_ack 10
# 
# nfct timeout list
.tcp0 = {
        .l3proto = 2,
        .l4proto = 6,
        .policy = {
                .SYN_SENT = 120,
                .SYN_RECV = 60,
                .ESTABLISHED = 1000,
                .FIN_WAIT = 120,
                .CLOSE_WAIT = 60,
                .LAST_ACK = 10,
                .TIME_WAIT = 10,
                .CLOSE = 10,
                .SYN_SENT2 = 120,
                .RETRANS = 300,
                .UNACKNOWLEDGED = 300,
        },
};

将超时策略应用到指定的连接。

# iptables -I PREROUTING -t raw -p tcp -j CT --timeout tcp0
# iptables -I OUTPUT -t raw -p tcp -j CT --timeout tcp0
# 
# iptables -v -t raw -L      
Chain PREROUTING (policy ACCEPT 427 packets, 90404 bytes)
 pkts bytes target     prot opt in     out     source               destination         
   77  6472 CT         tcp  --  any    any     anywhere             anywhere             CT timeout tcp0

Chain OUTPUT (policy ACCEPT 50 packets, 12048 bytes)
 pkts bytes target     prot opt in     out     source               destination         
   70 15776 CT         tcp  --  any    any     anywhere             anywhere             CT timeout tcp0

启动conntrack进行事件监听，可见，远端主机192.168.1.108建立到本机的SSH连接，TCP建立状态的超时时长为1000秒。

# conntrack -E -p TCP
    [NEW] tcp      6 120 SYN_SENT src=192.168.1.108 dst=192.168.1.134 sport=50090 dport=22 [UNREPLIED] src=192.168.1.134 dst=192.168.1.108 sport=22 dport=50090
 [UPDATe] tcp      6 60 SYN_RECV src=192.168.1.108 dst=192.168.1.134 sport=50090 dport=22 src=192.168.1.134 dst=192.168.1.108 sport=22 dport=50090
 [UPDATE] tcp      6 1000 ESTABLISHED src=192.168.1.108 dst=192.168.1.134 sport=50090 dport=22 src=192.168.1.134 dst=192.168.1.108 sport=22 dport=50090 [ASSURED]

超时策略初始化

由函数cttimeout_init进行初始化。其注册了命名空间处理结构cttimeout_ops，以及nfnetlink子系统结构cttimeout_subsys。

static struct pernet_operations cttimeout_ops = {
    .init   = cttimeout_net_init,
    .exit   = cttimeout_net_exit,
};
static int __init cttimeout_init(void)
{
    ret = register_pernet_subsys(&cttimeout_ops);
    if (ret < 0)
        return ret;

    ret = nfnetlink_subsys_register(&cttimeout_subsys);
    if (ret < 0) {
        pr_err("cttimeout_init: cannot register cttimeout with nfnetlink.n");
        goto err_out;
    }
    RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, ctnl_timeout_find_get);
    RCU_INIT_POINTER(nf_ct_timeout_put_hook, ctnl_timeout_put);

命名空间初始化函数中，初始化了nfct_timeout_list链表。

static int __net_init cttimeout_net_init(struct net *net)
{
    INIT_LIST_HEAD(&net->nfct_timeout_list);

nfnetlink子系统结构，主要处理以下5种类型的netlink消息。NEW/GET/DELETE/DEFAULT_SET/DEFAULT_GET。

static const struct nfnl_callback cttimeout_cb[IPCTNL_MSG_TIMEOUT_MAX] = {
    [IPCTNL_MSG_TIMEOUT_NEW]    = { .call = cttimeout_new_timeout,
                        .attr_count = CTA_TIMEOUT_MAX,
                        .policy = cttimeout_nla_policy },
    [IPCTNL_MSG_TIMEOUT_GET]    = { .call = cttimeout_get_timeout,
                        .attr_count = CTA_TIMEOUT_MAX,
                        .policy = cttimeout_nla_policy },
    [IPCTNL_MSG_TIMEOUT_DELETE] = { .call = cttimeout_del_timeout,
                        .attr_count = CTA_TIMEOUT_MAX,
                        .policy = cttimeout_nla_policy },
    [IPCTNL_MSG_TIMEOUT_DEFAULT_SET]= { .call = cttimeout_default_set,
                        .attr_count = CTA_TIMEOUT_MAX,
                        .policy = cttimeout_nla_policy },
    [IPCTNL_MSG_TIMEOUT_DEFAULT_GET]= { .call = cttimeout_default_get,
                        .attr_count = CTA_TIMEOUT_MAX,
                        .policy = cttimeout_nla_policy },
};
static const struct nfnetlink_subsystem cttimeout_subsys = {
    .name               = "conntrack_timeout",
    .subsys_id          = NFNL_SUBSYS_CTNETlink_TIMEOUT,
    .cb_count           = IPCTNL_MSG_TIMEOUT_MAX,
    .cb             = cttimeout_cb,

新增超时策略

如下函数cttimeout_new_timeout，首先在命名空间全局链表nfct_timeout_list进行遍历查找，确认是否存在相同名称的超时策略。

static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
                 struct sk_buff *skb, const struct nlmsghdr *nlh,
                 const struct nlattr * const cda[],
                 struct netlink_ext_ack *extack)
{
    __u16 l3num;
    __u8 l4num;
    const struct nf_conntrack_l4proto *l4proto;
    struct ctnl_timeout *timeout, *matching = NULL;

    name = nla_data(cda[CTA_TIMEOUT_NAME]);
    l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO]));
    l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]);

    list_for_each_entry(timeout, &net->nfct_timeout_list, head) {
        if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
            continue;

        if (nlh->nlmsg_flags & NLM_F_EXCL)
            return -EEXIST;

        matching = timeout;
        break;

如果找到同名的超时策略，在设置了替换标志NLM_F_REPLACE时，如果两者定义的L3和L4协议号都相同，更新已有策略的超时时长。

    if (matching) {
        if (nlh->nlmsg_flags & NLM_F_REPLACE) {
            
            if (matching->timeout.l3num != l3num ||
                matching->timeout.l4proto->l4proto != l4num)
                return -EINVAL;

            return ctnl_timeout_parse_policy(&matching->timeout.data,
                             matching->timeout.l4proto,
                             net, cda[CTA_TIMEOUT_DATA]);
        }

        return -EBUSY;

如果没有找到同名的超时策略，新分配一个超时策略，由函数ctnl_timeout_parse_policy解析netlink数据，对其进行赋值。最后，将其链接到命名空间nfct_timeout_list链表。

    l4proto = nf_ct_l4proto_find(l4num);

    
    if (l4proto->l4proto != l4num) {
        ret = -EOPNOTSUPP;
        goto err_proto_put;
    }

    timeout = kzalloc(sizeof(struct ctnl_timeout) +
              l4proto->ctnl_timeout.obj_size, GFP_KERNEL);
    if (timeout == NULL) {
        ret = -ENOMEM;
        goto err_proto_put;
    }

    ret = ctnl_timeout_parse_policy(&timeout->timeout.data, l4proto, net, cda[CTA_TIMEOUT_DATA]);
    if (ret < 0)
        goto err;

    strcpy(timeout->name, nla_data(cda[CTA_TIMEOUT_NAME]));
    timeout->timeout.l3num = l3num;
    timeout->timeout.l4proto = l4proto;
    refcount_set(&timeout->refcnt, 1);
    list_add_tail_rcu(&timeout->head, &net->nfct_timeout_list);

如下函数解析用户层下发的超时策略数据，调用相应的4层函数进行处理。

static int
ctnl_timeout_parse_policy(void *timeout,
              const struct nf_conntrack_l4proto *l4proto,
              struct net *net, const struct nlattr *attr)
{
    struct nlattr **tb;

    tb = kcalloc(l4proto->ctnl_timeout.nlattr_max + 1, sizeof(*tb), GFP_KERNEL);
    if (!tb)
        return -ENOMEM;

    ret = nla_parse_nested_deprecated(tb,
                      l4proto->ctnl_timeout.nlattr_max,
                      attr,
                      l4proto->ctnl_timeout.nla_policy,
                      NULL);
    if (ret < 0) goto err;

    ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeout);

对于TCP协议，由函数tcp_timeout_nlattr_to_obj进行处理。首先，将timeouts数组初始化为命名空间中的timeouts的默认值，之后，再由netlink消息中的配置值进行覆盖，这样，没有设置的值保持默认。

另外，如果timeouts参数为空，表明要修改命名空间的默认timeouts值。即IPCTNL_MSG_TIMEOUT_DEFAULT_SET *** 作。

static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[],
                     struct net *net, void *data)
{
    struct nf_tcp_net *tn = nf_tcp_pernet(net);
    unsigned int *timeouts = data;

    if (!timeouts)
        timeouts = tn->timeouts;
    
    for (i=0; itimeouts[i];

    if (tb[CTA_TIMEOUT_TCP_SYN_SENT]) {
        timeouts[TCP_CONNTRACK_SYN_SENT] =
            ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT]))*HZ;
    }

    if (tb[CTA_TIMEOUT_TCP_SYN_RECV]) {
        timeouts[TCP_CONNTRACK_SYN_RECV] =
            ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_RECV]))*HZ;
    }
    if (tb[CTA_TIMEOUT_TCP_ESTABLISHED]) {
        timeouts[TCP_CONNTRACK_ESTABLISHED] =
            ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_ESTABLISHED]))*HZ;
    }

超时扩展

如下初始化超时扩展timeout_extend，ID值为NF_CT_EXT_TIMEOUT。

static const struct nf_ct_ext_type timeout_extend = {
    .len    = sizeof(struct nf_conn_timeout),
    .align  = __alignof__(struct nf_conn_timeout),
    .id = NF_CT_EXT_TIMEOUT,
};

int nf_conntrack_timeout_init(void)
{
    int ret = nf_ct_extend_register(&timeout_extend);

核心函数nf_ct_set_timeout根据名称找到超时策略结构timeout，之后为连接（ct）增加NF_CT_EXT_TIMEOUT类型的扩展，并将timeout赋值给新创建的扩展。另外，在上节超时策略初始化函数cttimeout_init中，将ctnl_timeout_find_get赋值于指针nf_ct_timeout_find_get_hook。在找到超时策略之后，将其引用计数进行了递增。

int nf_ct_set_timeout(struct net *net, struct nf_conn *ct,
              u8 l3num, u8 l4num, const char *timeout_name)
{
    typeof(nf_ct_timeout_find_get_hook) timeout_find_get;
    struct nf_ct_timeout *timeout;
    struct nf_conn_timeout *timeout_ext;

    timeout_find_get = rcu_dereference(nf_ct_timeout_find_get_hook);
    if (!timeout_find_get) {
        ret = -ENOENT;
        goto out;
    }
    timeout = timeout_find_get(net, timeout_name);
    if (!timeout) {
        ret = -ENOENT;
        goto out;
    }
    if (timeout->l3num != l3num) {
        ret = -EINVAL;
        goto err_put_timeout;
    }
    if (timeout->l4proto->l4proto != l4num) {
        ret = -EINVAL;
        goto err_put_timeout;
    }
    timeout_ext = nf_ct_timeout_ext_add(ct, timeout, GFP_ATOMIC);

CT目标参数timeout

函数xt_ct_tg_init注册CT目标。

static int __init xt_ct_tg_init(void)
{
    ret = xt_register_targets(xt_ct_tg_reg, ARRAY_SIZE(xt_ct_tg_reg));
    if (ret < 0) {
        xt_unregister_target(¬rack_tg_reg);
        return ret;
    }

CT目标在raw表中添加如下的xt_target，名称为CT。检测函数为xt_ct_tg_check_v2，target函数为xt_ct_target_v1。

static struct xt_target xt_ct_tg_reg[] __read_mostly = {
    {
        .name       = "CT",
        .family     = NFPROTO_UNSPEC,
        .revision   = 2,
        .targetsize = sizeof(struct xt_ct_target_info_v1),
        .usersize   = offsetof(struct xt_ct_target_info, ct),
        .checkentry = xt_ct_tg_check_v2,
        .destroy    = xt_ct_tg_destroy_v1,
        .target     = xt_ct_target_v1,
        .table      = "raw",
        .me     = THIS_MODULE,
    },

在检测函数中，创建连接跟踪模板，将其关联超时扩展。

static int xt_ct_tg_check(const struct xt_tgchk_param *par,
              struct xt_ct_target_info_v1 *info)
{
    struct nf_conn *ct;

    ct = nf_ct_tmpl_alloc(par->net, &zone, GFP_KERNEL);
    if (!ct) {
        ret = -ENOMEM;
        goto err2;
    }

    if (info->timeout[0]) {
        if (strnlen(info->timeout, sizeof(info->timeout)) == sizeof(info->timeout)) {
            ret = -ENAMETOOLONG;
            goto err4;
        }

        ret = xt_ct_set_timeout(ct, par, info->timeout);
        if (ret < 0)
            goto err4;
    }
    __set_bit(IPS_/confirm/iED_BIT, &ct->status);
    nf_conntrack_get(&ct->ct_general);
out:
    info->ct = ct;

如下函数xt_ct_set_timeout根据CT目标参数–timeout指定的超时策略名称，利用以上函数nf_ct_set_timeout将超时策略赋值与连接跟踪结构nf_conn，即为连接增加timeout扩展。

static int
xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par,
          const char *timeout_name)
{
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
    const struct nf_conntrack_l4proto *l4proto;
    u8 proto;

    proto = xt_ct_find_proto(par);
    if (!proto) {
        pr_info_ratelimited("You must specify a L4 protocol and not "
                    "use inversions on it");
        return -EINVAL;
    }
    l4proto = nf_ct_l4proto_find(proto);
    return nf_ct_set_timeout(par->net, ct, par->family, l4proto->l4proto,
                 timeout_name);

最后，将创建的连接跟踪模板，赋值给xt_tgchk_param结构成员targinfo的ct变量。

static int xt_ct_tg_check_v2(const struct xt_tgchk_param *par)
{
    struct xt_ct_target_info_v1 *info = par->targinfo;

    if (info->flags & ~XT_CT_MASK)
        return -EINVAL;

    return xt_ct_tg_check(par, par->targinfo);

targinfo为ipt_entry结构中xt_entry_target的data字段。

static int check_target(struct ipt_entry *e, struct net *net, const char *name)
{
    struct xt_entry_target *t = ipt_get_target(e);
    struct xt_tgchk_param par = {
        .net       = net,
        .table     = name,
        .entryinfo = e,
        .target    = t->u.kernel.target,
        .targinfo  = t->data,
        .hook_mask = e->comefrom,
        .family    = NFPROTO_IPV4,
    };
        
    return xt_check_target(&par, t->u.target_size - sizeof(*t),
                   e->ip.proto, e->ip.invflags & IPT_INV_PROTO);
				   
int xt_check_target(struct xt_tgchk_param *par,
            unsigned int size, u16 proto, bool inv_proto)
{  

    if (par->target->checkentry != NULL) {
        ret = par->target->checkentry(par);

IPTABLE规则CT目标

在规则match匹配之后，取出ipt_entry结构中的target（变量t），调用timeout的target函数，即上节提到的xt_ct_target_v1函数。

unsigned int
ipt_do_table(struct sk_buff *skb,
 const struct nf_hook_state *state,
         struct xt_table *table)
{
    struct ipt_entry *e, **jumpstack;
    struct xt_action_param acpar;

    e = get_entry(table_base, private->hook_entry[hook]);

    do {
        const struct xt_entry_target *t;

        xt_ematch_foreach(ematch, e) {
            acpar.match     = ematch->u.kernel.match;
            acpar.matchinfo = ematch->data;
            if (!acpar.match->match(skb, &acpar))
                goto no_match;
        }

        counter = xt_get_this_cpu_counter(&e->counters);
        ADD_COUNTER(*counter, skb->len, 1);

        t = ipt_get_target_c(e);
        WARN_ON(!t->u.kernel.target);

        acpar.target   = t->u.kernel.target;
        acpar.targinfo = t->data; 

        verdict = t->u.kernel.target->target(skb, &acpar);

连接跟踪创建

函数xt_ct_target_v1取出targinfo中保存的连接模板，将其赋值给skb的成员结构_nfct。

static unsigned int xt_ct_target_v1(struct sk_buff *skb,
                    const struct xt_action_param *par)
{
    const struct xt_ct_target_info_v1 *info = par->targinfo;
    struct nf_conn *ct = info->ct;

    return xt_ct_target(skb, ct);
}
static inline int xt_ct_target(struct sk_buff *skb, struct nf_conn *ct)
{
    
    if (skb->_nfct != 0)
        return XT_CONTINUE;

    if (ct) {
        atomic_inc(&ct->ct_general.use);
        nf_ct_set(skb, ct, IP_CT_NEW);
    } else {
        nf_ct_set(skb, ct, IP_CT_UNTRACKED);
    }    

    return XT_CONTINUE;

在创建连接时，由skb中取出连接跟踪模板。

unsigned int
nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state)
{
    enum ip_conntrack_info ctinfo;
    struct nf_conn *ct, *tmpl;
    u_int8_t protonum;
    int dataoff, ret;

    tmpl = nf_ct_get(skb, &ctinfo);

在初始化新连接时，如果连接模板设置了timeout扩展，将此扩展赋值给新创建的连接。

static noinline struct nf_conntrack_tuple_hash *
init_conntrack(struct net *net, struct nf_conn *tmpl,
           const struct nf_conntrack_tuple *tuple,
           struct sk_buff *skb, unsigned int dataoff, u32 hash)
{
    struct nf_conn *ct;
    struct nf_conn_timeout *timeout_ext;

    zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
    ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC, hash);
    if (IS_ERR(ct))
        return (struct nf_conntrack_tuple_hash *)ct;

    timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL;

    if (timeout_ext)
        nf_ct_timeout_ext_add(ct, rcu_dereference(timeout_ext->timeout),
                      GFP_ATOMIC);

对于TCP连接，如果连接没有timeout扩展，使用命名空间默认的timeout值。

int nf_conntrack_tcp_packet(struct nf_conn *ct,
                struct sk_buff *skb,
                unsigned int dataoff,
                enum ip_conntrack_info ctinfo,
                const struct nf_hook_state *state)
{
    struct net *net = nf_ct_net(ct);
    struct nf_tcp_net *tn = nf_tcp_pernet(net);
    enum tcp_conntrack new_state, old_state;
    unsigned int index, *timeouts;

    timeouts = nf_ct_timeout_lookup(ct);
    if (!timeouts)
        timeouts = tn->timeouts;

内核版本 5.10

欢迎分享，转载请注明来源：内存溢出

原文地址: http://outofmemory.cn/zaji/5704239.html

连接跟踪超时扩展

发表评论

评论列表（0条）