深入理解Linux网络技术内幕 第13章 协议处理函数

协议处理函数

每种协议都会有一个初始化函数,如果协议被静态编译到内核中初始化函数在引导期间执行,如果被编译成模块,就在模块加载时执行。
设备驱动接收到一个报文后,将其保存在sk_buff结构内,然后对protocol字段初始化。

1
skb->protocol = eth_type_trans(skb, nic->netdev);

protocol字段被前面章节出现过的__netif_receive_skb函数使用,找到合适的L3处理函数并调用deliver_skb函数将skb传递到上层进行处理。
一个报文可以被传递给多个处理函数。比如当报文嗅探程序运行时。

协议处理函数组织

ptype_base是一个由16个元素组成的hash表,使用哪个protocol字段做为hash表的key查找注册的报文处理函数。
ptype_all时一个链表,ETH_P_ALL协议被组织到这个链表中。

1
2
struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
struct list_head ptype_all __read_mostly;   /* Taps */

协议处理函数注册

内核调用dev_add_pack注册一个协议。这个函数的唯一参数是struct packet_type。
struct packet_type结构是ptype_baseHash表和ptype_all链表的元素。
packet_type结构的dev时网卡设备指针,意思是这个协议为该设备开启。置位NULL时,就是为所有设备开启。比如tcpdump -i eth0 命令通过PF_PACKET套接字建立一个packet_type,并将dev指针初始化为eth0相关联的net_device实例。
func是上层协议的处理函数指针。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
struct packet_type {
    __be16          type;   /* This is really htons(ether_type). */
    struct net_device   *dev;   /* NULL is wildcarded here       */
    int         (*func) (struct sk_buff *,
                     struct net_device *,
                     struct packet_type *,
                     struct net_device *);
    void            (*list_func) (struct list_head *,
                          struct packet_type *,
                          struct net_device *);
    bool            (*id_match)(struct packet_type *ptype,
                        struct sock *sk);
    void            *af_packet_priv;
    struct list_head    list;
};

当有多个packet_type实例关联到相同的协议,则匹配type的输入报文会为这些packet_type调用func函数,转交所有这些协议处理函数实例。

前面提到协议注册通过dev_add_pack函数,这个函数就是将pacekt_type放到ptype_base hash表或者ptype_all链表上。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
/*
 *  Add a protocol ID to the list. Now that the input handler is
 *  smarter we can dispense with all the messy stuff that used to be
 *  here.
 *
 *  BEWARE!!! Protocol handlers, mangling input packets,
 *  MUST BE last in hash buckets and checking protocol handlers
 *  MUST start from promiscuous ptype_all chain in net_bh.
 *  It is true now, do not change it.
 *  Explanation follows: if protocol handler, mangling packet, will
 *  be the first on list, it is not able to sense, that packet
 *  is cloned and should be copied-on-write, so that it will
 *  change it and subsequent readers will get broken packet.
 *                          --ANK (980803)
 */

static inline struct list_head *ptype_head(const struct packet_type *pt)
{
    if (pt->type == htons(ETH_P_ALL))
        return pt->dev ? &pt->dev->ptype_all : &ptype_all;
    else
        return pt->dev ? &pt->dev->ptype_specific :
                 &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
}
void dev_add_pack(struct packet_type *pt)
{
    struct list_head *head = ptype_head(pt);

    spin_lock(&ptype_lock);
    list_add_rcu(&pt->list, head);
    spin_unlock(&ptype_lock);
}

IPV4协议注册

IPV4协议在inet_init函数注册协议处理函数。

1
2
3
4
5
6
static struct packet_type ip_packet_type __read_mostly = {
    .type = cpu_to_be16(ETH_P_IP),
    .func = ip_rcv,
    .list_func = ip_list_rcv,
};
dev_add_pack(&ip_packet_type);

设置报文类型

驱动程序使用eth_type_trans函数设置报文类型,这个函数主要完成两个功能:

  • 根据报文目的MAC地址设置sk_buff的pkt_type字段。
  • 取出L3层协议字段。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#define PACKET_HOST     0       /* To us        */
#define PACKET_BROADCAST    1       /* To all       */
#define PACKET_MULTICAST    2       /* To group     */
#define PACKET_OTHERHOST    3       /* To someone else  */

/**
 * eth_type_trans - determine the packet's protocol ID.
 * @skb: received socket data
 * @dev: receiving network device
 *
 * The rule here is that we
 * assume 802.3 if the type field is short enough to be a length.
 * This is normal practice and works for any 'now in use' protocol.
 */
__be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
{
    unsigned short _service_access_point;
    const unsigned short *sap;
    const struct ethhdr *eth;

    skb->dev = dev;
    skb_reset_mac_header(skb);

    eth = (struct ethhdr *)skb->data;
    skb_pull_inline(skb, ETH_HLEN);

    if (unlikely(is_multicast_ether_addr_64bits(eth->h_dest))) {
        if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast))
            skb->pkt_type = PACKET_BROADCAST;
        else
            skb->pkt_type = PACKET_MULTICAST;
    }
    else if (unlikely(!ether_addr_equal_64bits(eth->h_dest,
                           dev->dev_addr)))
        skb->pkt_type = PACKET_OTHERHOST;

    /*
     * Some variants of DSA tagging don't have an ethertype field
     * at all, so we check here whether one of those tagging
     * variants has been configured on the receiving interface,
     * and if so, set skb->protocol without looking at the packet.
     */
    if (unlikely(netdev_uses_dsa(dev)))
        return htons(ETH_P_XDSA);

    if (likely(eth_proto_is_802_3(eth->h_proto)))
        return eth->h_proto;

    /*
     *      This is a magic hack to spot IPX packets. Older Novell breaks
     *      the protocol design and runs IPX over 802.3 without an 802.2 LLC
     *      layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
     *      won't work for fault tolerant netware but does for the rest.
     */
    sap = skb_header_pointer(skb, 0, sizeof(*sap), &_service_access_point);
    if (sap && *sap == 0xFFFF)
        return htons(ETH_P_802_3);

    /*
     *      Real 802.2 LLC
     */
    return htons(ETH_P_802_2);
}