连接跟踪超时扩展,允许应用层对连接的超时时长进行修改控制。如下,新增IPv4 TCP协议超时策略(tcp0,最大4个字符),其中指定4个状态的超时时长,其它采用默认值。随后,使用iptables命令将此策略应用到所有的连接跟踪tcp报文。
# nfct timeout add tcp0 inet tcp established 1000 close 10 time_wait 10 last_ack 10 # # nfct timeout list .tcp0 = { .l3proto = 2, .l4proto = 6, .policy = { .SYN_SENT = 120, .SYN_RECV = 60, .ESTABLISHED = 1000, .FIN_WAIT = 120, .CLOSE_WAIT = 60, .LAST_ACK = 10, .TIME_WAIT = 10, .CLOSE = 10, .SYN_SENT2 = 120, .RETRANS = 300, .UNACKNOWLEDGED = 300, }, };
将超时策略应用到指定的连接。
# iptables -I PREROUTING -t raw -p tcp -j CT --timeout tcp0 # iptables -I OUTPUT -t raw -p tcp -j CT --timeout tcp0 # # iptables -v -t raw -L Chain PREROUTING (policy ACCEPT 427 packets, 90404 bytes) pkts bytes target prot opt in out source destination 77 6472 CT tcp -- any any anywhere anywhere CT timeout tcp0 Chain OUTPUT (policy ACCEPT 50 packets, 12048 bytes) pkts bytes target prot opt in out source destination 70 15776 CT tcp -- any any anywhere anywhere CT timeout tcp0
启动conntrack进行事件监听,可见,远端主机192.168.1.108建立到本机的SSH连接,TCP建立状态的超时时长为1000秒。
# conntrack -E -p TCP [NEW] tcp 6 120 SYN_SENT src=192.168.1.108 dst=192.168.1.134 sport=50090 dport=22 [UNREPLIED] src=192.168.1.134 dst=192.168.1.108 sport=22 dport=50090 [UPDATe] tcp 6 60 SYN_RECV src=192.168.1.108 dst=192.168.1.134 sport=50090 dport=22 src=192.168.1.134 dst=192.168.1.108 sport=22 dport=50090 [UPDATE] tcp 6 1000 ESTABLISHED src=192.168.1.108 dst=192.168.1.134 sport=50090 dport=22 src=192.168.1.134 dst=192.168.1.108 sport=22 dport=50090 [ASSURED]超时策略初始化
由函数cttimeout_init进行初始化。其注册了命名空间处理结构cttimeout_ops,以及nfnetlink子系统结构cttimeout_subsys。
static struct pernet_operations cttimeout_ops = { .init = cttimeout_net_init, .exit = cttimeout_net_exit, }; static int __init cttimeout_init(void) { ret = register_pernet_subsys(&cttimeout_ops); if (ret < 0) return ret; ret = nfnetlink_subsys_register(&cttimeout_subsys); if (ret < 0) { pr_err("cttimeout_init: cannot register cttimeout with nfnetlink.n"); goto err_out; } RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, ctnl_timeout_find_get); RCU_INIT_POINTER(nf_ct_timeout_put_hook, ctnl_timeout_put);
命名空间初始化函数中,初始化了nfct_timeout_list链表。
static int __net_init cttimeout_net_init(struct net *net) { INIT_LIST_HEAD(&net->nfct_timeout_list);
nfnetlink子系统结构,主要处理以下5种类型的netlink消息。NEW/GET/DELETE/DEFAULT_SET/DEFAULT_GET。
static const struct nfnl_callback cttimeout_cb[IPCTNL_MSG_TIMEOUT_MAX] = { [IPCTNL_MSG_TIMEOUT_NEW] = { .call = cttimeout_new_timeout, .attr_count = CTA_TIMEOUT_MAX, .policy = cttimeout_nla_policy }, [IPCTNL_MSG_TIMEOUT_GET] = { .call = cttimeout_get_timeout, .attr_count = CTA_TIMEOUT_MAX, .policy = cttimeout_nla_policy }, [IPCTNL_MSG_TIMEOUT_DELETE] = { .call = cttimeout_del_timeout, .attr_count = CTA_TIMEOUT_MAX, .policy = cttimeout_nla_policy }, [IPCTNL_MSG_TIMEOUT_DEFAULT_SET]= { .call = cttimeout_default_set, .attr_count = CTA_TIMEOUT_MAX, .policy = cttimeout_nla_policy }, [IPCTNL_MSG_TIMEOUT_DEFAULT_GET]= { .call = cttimeout_default_get, .attr_count = CTA_TIMEOUT_MAX, .policy = cttimeout_nla_policy }, }; static const struct nfnetlink_subsystem cttimeout_subsys = { .name = "conntrack_timeout", .subsys_id = NFNL_SUBSYS_CTNETlink_TIMEOUT, .cb_count = IPCTNL_MSG_TIMEOUT_MAX, .cb = cttimeout_cb,新增超时策略
如下函数cttimeout_new_timeout,首先在命名空间全局链表nfct_timeout_list进行遍历查找,确认是否存在相同名称的超时策略。
static int cttimeout_new_timeout(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[], struct netlink_ext_ack *extack) { __u16 l3num; __u8 l4num; const struct nf_conntrack_l4proto *l4proto; struct ctnl_timeout *timeout, *matching = NULL; name = nla_data(cda[CTA_TIMEOUT_NAME]); l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO])); l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]); list_for_each_entry(timeout, &net->nfct_timeout_list, head) { if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) continue; if (nlh->nlmsg_flags & NLM_F_EXCL) return -EEXIST; matching = timeout; break;
如果找到同名的超时策略,在设置了替换标志NLM_F_REPLACE时,如果两者定义的L3和L4协议号都相同,更新已有策略的超时时长。
if (matching) { if (nlh->nlmsg_flags & NLM_F_REPLACE) { if (matching->timeout.l3num != l3num || matching->timeout.l4proto->l4proto != l4num) return -EINVAL; return ctnl_timeout_parse_policy(&matching->timeout.data, matching->timeout.l4proto, net, cda[CTA_TIMEOUT_DATA]); } return -EBUSY;
如果没有找到同名的超时策略,新分配一个超时策略,由函数ctnl_timeout_parse_policy解析netlink数据,对其进行赋值。最后,将其链接到命名空间nfct_timeout_list链表。
l4proto = nf_ct_l4proto_find(l4num); if (l4proto->l4proto != l4num) { ret = -EOPNOTSUPP; goto err_proto_put; } timeout = kzalloc(sizeof(struct ctnl_timeout) + l4proto->ctnl_timeout.obj_size, GFP_KERNEL); if (timeout == NULL) { ret = -ENOMEM; goto err_proto_put; } ret = ctnl_timeout_parse_policy(&timeout->timeout.data, l4proto, net, cda[CTA_TIMEOUT_DATA]); if (ret < 0) goto err; strcpy(timeout->name, nla_data(cda[CTA_TIMEOUT_NAME])); timeout->timeout.l3num = l3num; timeout->timeout.l4proto = l4proto; refcount_set(&timeout->refcnt, 1); list_add_tail_rcu(&timeout->head, &net->nfct_timeout_list);
如下函数解析用户层下发的超时策略数据,调用相应的4层函数进行处理。
static int ctnl_timeout_parse_policy(void *timeout, const struct nf_conntrack_l4proto *l4proto, struct net *net, const struct nlattr *attr) { struct nlattr **tb; tb = kcalloc(l4proto->ctnl_timeout.nlattr_max + 1, sizeof(*tb), GFP_KERNEL); if (!tb) return -ENOMEM; ret = nla_parse_nested_deprecated(tb, l4proto->ctnl_timeout.nlattr_max, attr, l4proto->ctnl_timeout.nla_policy, NULL); if (ret < 0) goto err; ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeout);
对于TCP协议,由函数tcp_timeout_nlattr_to_obj进行处理。首先,将timeouts数组初始化为命名空间中的timeouts的默认值,之后,再由netlink消息中的配置值进行覆盖,这样,没有设置的值保持默认。
另外,如果timeouts参数为空,表明要修改命名空间的默认timeouts值。即IPCTNL_MSG_TIMEOUT_DEFAULT_SET *** 作。
static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[], struct net *net, void *data) { struct nf_tcp_net *tn = nf_tcp_pernet(net); unsigned int *timeouts = data; if (!timeouts) timeouts = tn->timeouts; for (i=0; i超时扩展timeouts[i]; if (tb[CTA_TIMEOUT_TCP_SYN_SENT]) { timeouts[TCP_CONNTRACK_SYN_SENT] = ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT]))*HZ; } if (tb[CTA_TIMEOUT_TCP_SYN_RECV]) { timeouts[TCP_CONNTRACK_SYN_RECV] = ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_RECV]))*HZ; } if (tb[CTA_TIMEOUT_TCP_ESTABLISHED]) { timeouts[TCP_CONNTRACK_ESTABLISHED] = ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_ESTABLISHED]))*HZ; }
如下初始化超时扩展timeout_extend,ID值为NF_CT_EXT_TIMEOUT。
static const struct nf_ct_ext_type timeout_extend = { .len = sizeof(struct nf_conn_timeout), .align = __alignof__(struct nf_conn_timeout), .id = NF_CT_EXT_TIMEOUT, }; int nf_conntrack_timeout_init(void) { int ret = nf_ct_extend_register(&timeout_extend);
核心函数nf_ct_set_timeout根据名称找到超时策略结构timeout,之后为连接(ct)增加NF_CT_EXT_TIMEOUT类型的扩展,并将timeout赋值给新创建的扩展。另外,在上节超时策略初始化函数cttimeout_init中,将ctnl_timeout_find_get赋值于指针nf_ct_timeout_find_get_hook。在找到超时策略之后,将其引用计数进行了递增。
int nf_ct_set_timeout(struct net *net, struct nf_conn *ct, u8 l3num, u8 l4num, const char *timeout_name) { typeof(nf_ct_timeout_find_get_hook) timeout_find_get; struct nf_ct_timeout *timeout; struct nf_conn_timeout *timeout_ext; timeout_find_get = rcu_dereference(nf_ct_timeout_find_get_hook); if (!timeout_find_get) { ret = -ENOENT; goto out; } timeout = timeout_find_get(net, timeout_name); if (!timeout) { ret = -ENOENT; goto out; } if (timeout->l3num != l3num) { ret = -EINVAL; goto err_put_timeout; } if (timeout->l4proto->l4proto != l4num) { ret = -EINVAL; goto err_put_timeout; } timeout_ext = nf_ct_timeout_ext_add(ct, timeout, GFP_ATOMIC);CT目标参数timeout
函数xt_ct_tg_init注册CT目标。
static int __init xt_ct_tg_init(void) { ret = xt_register_targets(xt_ct_tg_reg, ARRAY_SIZE(xt_ct_tg_reg)); if (ret < 0) { xt_unregister_target(¬rack_tg_reg); return ret; }
CT目标在raw表中添加如下的xt_target,名称为CT。检测函数为xt_ct_tg_check_v2,target函数为xt_ct_target_v1。
static struct xt_target xt_ct_tg_reg[] __read_mostly = { { .name = "CT", .family = NFPROTO_UNSPEC, .revision = 2, .targetsize = sizeof(struct xt_ct_target_info_v1), .usersize = offsetof(struct xt_ct_target_info, ct), .checkentry = xt_ct_tg_check_v2, .destroy = xt_ct_tg_destroy_v1, .target = xt_ct_target_v1, .table = "raw", .me = THIS_MODULE, },
在检测函数中,创建连接跟踪模板,将其关联超时扩展。
static int xt_ct_tg_check(const struct xt_tgchk_param *par, struct xt_ct_target_info_v1 *info) { struct nf_conn *ct; ct = nf_ct_tmpl_alloc(par->net, &zone, GFP_KERNEL); if (!ct) { ret = -ENOMEM; goto err2; } if (info->timeout[0]) { if (strnlen(info->timeout, sizeof(info->timeout)) == sizeof(info->timeout)) { ret = -ENAMETOOLONG; goto err4; } ret = xt_ct_set_timeout(ct, par, info->timeout); if (ret < 0) goto err4; } __set_bit(IPS_/confirm/iED_BIT, &ct->status); nf_conntrack_get(&ct->ct_general); out: info->ct = ct;
如下函数xt_ct_set_timeout根据CT目标参数–timeout指定的超时策略名称,利用以上函数nf_ct_set_timeout将超时策略赋值与连接跟踪结构nf_conn,即为连接增加timeout扩展。
static int xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par, const char *timeout_name) { #ifdef CONFIG_NF_CONNTRACK_TIMEOUT const struct nf_conntrack_l4proto *l4proto; u8 proto; proto = xt_ct_find_proto(par); if (!proto) { pr_info_ratelimited("You must specify a L4 protocol and not " "use inversions on it"); return -EINVAL; } l4proto = nf_ct_l4proto_find(proto); return nf_ct_set_timeout(par->net, ct, par->family, l4proto->l4proto, timeout_name);
最后,将创建的连接跟踪模板,赋值给xt_tgchk_param结构成员targinfo的ct变量。
static int xt_ct_tg_check_v2(const struct xt_tgchk_param *par) { struct xt_ct_target_info_v1 *info = par->targinfo; if (info->flags & ~XT_CT_MASK) return -EINVAL; return xt_ct_tg_check(par, par->targinfo);
targinfo为ipt_entry结构中xt_entry_target的data字段。
static int check_target(struct ipt_entry *e, struct net *net, const char *name) { struct xt_entry_target *t = ipt_get_target(e); struct xt_tgchk_param par = { .net = net, .table = name, .entryinfo = e, .target = t->u.kernel.target, .targinfo = t->data, .hook_mask = e->comefrom, .family = NFPROTO_IPV4, }; return xt_check_target(&par, t->u.target_size - sizeof(*t), e->ip.proto, e->ip.invflags & IPT_INV_PROTO); int xt_check_target(struct xt_tgchk_param *par, unsigned int size, u16 proto, bool inv_proto) { if (par->target->checkentry != NULL) { ret = par->target->checkentry(par);IPTABLE规则CT目标
在规则match匹配之后,取出ipt_entry结构中的target(变量t),调用timeout的target函数,即上节提到的xt_ct_target_v1函数。
unsigned int ipt_do_table(struct sk_buff *skb, const struct nf_hook_state *state, struct xt_table *table) { struct ipt_entry *e, **jumpstack; struct xt_action_param acpar; e = get_entry(table_base, private->hook_entry[hook]); do { const struct xt_entry_target *t; xt_ematch_foreach(ematch, e) { acpar.match = ematch->u.kernel.match; acpar.matchinfo = ematch->data; if (!acpar.match->match(skb, &acpar)) goto no_match; } counter = xt_get_this_cpu_counter(&e->counters); ADD_COUNTER(*counter, skb->len, 1); t = ipt_get_target_c(e); WARN_ON(!t->u.kernel.target); acpar.target = t->u.kernel.target; acpar.targinfo = t->data; verdict = t->u.kernel.target->target(skb, &acpar);连接跟踪创建
函数xt_ct_target_v1取出targinfo中保存的连接模板,将其赋值给skb的成员结构_nfct。
static unsigned int xt_ct_target_v1(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_ct_target_info_v1 *info = par->targinfo; struct nf_conn *ct = info->ct; return xt_ct_target(skb, ct); } static inline int xt_ct_target(struct sk_buff *skb, struct nf_conn *ct) { if (skb->_nfct != 0) return XT_CONTINUE; if (ct) { atomic_inc(&ct->ct_general.use); nf_ct_set(skb, ct, IP_CT_NEW); } else { nf_ct_set(skb, ct, IP_CT_UNTRACKED); } return XT_CONTINUE;
在创建连接时,由skb中取出连接跟踪模板。
unsigned int nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state) { enum ip_conntrack_info ctinfo; struct nf_conn *ct, *tmpl; u_int8_t protonum; int dataoff, ret; tmpl = nf_ct_get(skb, &ctinfo);
在初始化新连接时,如果连接模板设置了timeout扩展,将此扩展赋值给新创建的连接。
static noinline struct nf_conntrack_tuple_hash * init_conntrack(struct net *net, struct nf_conn *tmpl, const struct nf_conntrack_tuple *tuple, struct sk_buff *skb, unsigned int dataoff, u32 hash) { struct nf_conn *ct; struct nf_conn_timeout *timeout_ext; zone = nf_ct_zone_tmpl(tmpl, skb, &tmp); ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC, hash); if (IS_ERR(ct)) return (struct nf_conntrack_tuple_hash *)ct; timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL; if (timeout_ext) nf_ct_timeout_ext_add(ct, rcu_dereference(timeout_ext->timeout), GFP_ATOMIC);
对于TCP连接,如果连接没有timeout扩展,使用命名空间默认的timeout值。
int nf_conntrack_tcp_packet(struct nf_conn *ct, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, const struct nf_hook_state *state) { struct net *net = nf_ct_net(ct); struct nf_tcp_net *tn = nf_tcp_pernet(net); enum tcp_conntrack new_state, old_state; unsigned int index, *timeouts; timeouts = nf_ct_timeout_lookup(ct); if (!timeouts) timeouts = tn->timeouts;
内核版本 5.10
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)