DPDK版本19.02
初始化:

/* Launch threads, called at application init(). */
int
rte_eal_init(int argc, char **argv)
{
...
/* rte_eal_cpu_init() ->
* eal_cpu_core_id()
* eal_cpu_socket_id()
* 读取/sys/devices/system/[cpu|node]
* 设置lcore_config->[core_role|core_id|socket_id] */
if (rte_eal_cpu_init() < 0) {
rte_eal_init_alert("Cannot detect lcores.");
rte_errno = ENOTSUP;
return -1;
}
/* eal_parse_args() ->
* eal_parse_common_option() ->
* eal_parse_coremask()
* eal_parse_master_lcore()
* eal_parse_lcores()
* eal_adjust_config()
* 解析-c、--master_lcore、--lcores参数
* 在eal_parse_lcores()中确认可用的logical CPU
* 在eal_adjust_config()中设置rte_config.master_lcore为0 (设置第一个lcore为MASTER lcore) */
fctret = eal_parse_args(argc, argv);
if (fctret < 0) {
rte_eal_init_alert("Invalid 'command line' arguments.");
rte_errno = EINVAL;
rte_atomic32_clear(&run_once);
return -1;
}
...
/* 初始化大页信息 */
if (rte_eal_memory_init() < 0) {
rte_eal_init_alert("Cannot init memory\n");
rte_errno = ENOMEM;
return -1;
}
...
/* eal_thread_init_master() ->
* eal_thread_set_affinity()
* 设置当前线程为MASTER lcore
* 在eal_thread_set_affinity()中绑定MASTER lcore到logical CPU */
eal_thread_init_master(rte_config.master_lcore);
...
/* rte_bus_scan() ->
* rte_pci_scan() ->
* pci_scan_one() ->
* pci_parse_sysfs_resource()
* rte_pci_add_device()
* 遍历rte_bus_list链表,调用每个bus的scan函数,pci为rte_pci_scan()
* 遍历/sys/bus/pci/devices目录,为每个DBSF分配struct rte_pci_device
* 逐行读取并解析每个DBSF的resource,保存到dev->mem_resource[i]
* 将dev插入rte_pci_bus.device_list链表 */
if (rte_bus_scan()) {
rte_eal_init_alert("Cannot scan the buses for devices\n");
rte_errno = ENODEV;
return -1;
}
/* pthread_create() ->
* eal_thread_loop() ->
* eal_thread_set_affinity()
* 为每个SLAVE lcore创建线程,线程函数为eal_thread_loop()
* 在eal_thread_set_affinity()中绑定SLAVE lcore到logical CPU */
RTE_LCORE_FOREACH_SLAVE(i) {
/*
* create communication pipes between master thread
* and children
*/
/* MASTER lcore创建pipes用于MASTER和SLAVE lcore间通信(父子线程间通信) */
if (pipe(lcore_config[i].pipe_master2slave) < 0)
rte_panic("Cannot create pipe\n");
if (pipe(lcore_config[i].pipe_slave2master) < 0)
rte_panic("Cannot create pipe\n");
lcore_config[i].state = WAIT; /* 设置SLAVE lcore的状态为WAIT */
/* create a thread for each lcore */
ret = pthread_create(&lcore_config[i].thread_id, NULL,
eal_thread_loop, NULL);
...
}
/*
* Launch a dummy function on all slave lcores, so that master lcore
* knows they are all ready when this function returns.
*/
rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER);
rte_eal_mp_wait_lcore();
...
/* Probe all the buses and devices/drivers on them */
/* rte_bus_probe() ->
* rte_pci_probe() ->
* pci_probe_all_drivers() ->
* rte_pci_probe_one_driver() ->
* rte_pci_match()
* rte_pci_map_device() ->
* pci_uio_map_resource()
* eth_ixgbe_pci_probe()
* 遍历rte_bus_list链表,调用每个bus的probe函数,pci为rte_pci_probe()
* rte_pci_probe()/pci_probe_all_drivers()分别遍历rte_pci_bus.device_list/driver_list链表,匹配设备和驱动
* 映射BAR,调用驱动的probe函数,ixgbe为eth_ixgbe_pci_probe() */
if (rte_bus_probe()) {
rte_eal_init_alert("Cannot probe devices\n");
rte_errno = ENOTSUP;
return -1;
}
...
}

在dpdk 16.11是没有bud这一层抽象的,直接通过rte_eal_initàrte_eal_pci_init调用pci设备的初始化。
也就是dpdk 16.11只支持pci这一种总线设备。但是到了dpdk17.11引入了bus的概念。在rte_bus_scan 进行bus的初始化

1 /* Scan all the buses for registered devices */
2 int
3 rte_bus_scan(void)
4 {
5 int ret;
6 struct rte_bus *bus = NULL;
7
8 TAILQ_FOREACH(bus, &rte_bus_list, next) {
9 ret = bus->scan();
10 if (ret)
11 RTE_LOG(ERR, EAL, "Scan for (%s) bus failed.\n",
12 bus->name);
13 }
14
15 return 0;
16 }

这个函数会调用rte_bus_list上注册的所有bus的scan函数,这些bus是通过rte_bus_register函数注册上去的,而宏RTE_REGISTER_BUS又是rte_bus_register的封装。
重要结构体
rte_bus_list

1 struct rte_bus {
2 TAILQ_ENTRY(rte_bus) next; /**< Next bus object in linked list */
3 const char *name; /**< Name of the bus */
4 rte_bus_scan_t scan; /**< Scan for devices attached to bus */
5 rte_bus_probe_t probe; /**< Probe devices on bus */
6 rte_bus_find_device_t find_device; /**< Find a device on the bus */
7 rte_bus_plug_t plug; /**< Probe single device for drivers */
8 rte_bus_unplug_t unplug; /**< Remove single device from driver */
9 rte_bus_parse_t parse; /**< Parse a device name */
10 struct rte_bus_conf conf; /**< Bus configuration */
11 };
12
13 TAILQ_HEAD(rte_bus_list, rte_bus);
14
15 #define TAILQ_HEAD(name, type) \
16 struct name { \
17 struct type *tqh_first; /* first element */ \
18 struct type **tqh_last; /* addr of last next element */ \
19 }
20
21 /* 定义rte_bus_list */
22 struct rte_bus_list rte_bus_list =
23 TAILQ_HEAD_INITIALIZER(rte_bus_list);

注册pci bus
将rte_pci_bus插入rte_bus_list链表

1 struct rte_pci_bus {
2 struct rte_bus bus; /**< Inherit the generic class */
3 struct rte_pci_device_list device_list; /**< List of PCI devices */
4 struct rte_pci_driver_list driver_list; /**< List of PCI drivers */
5 };
6
7 /* 定义rte_pci_bus */
8 struct rte_pci_bus rte_pci_bus = {
9 .bus = {
10 .scan = rte_pci_scan,
11 .probe = rte_pci_probe,
12 .find_device = pci_find_device,
13 .plug = pci_plug,
14 .unplug = pci_unplug,
15 .parse = pci_parse,
16 },
17 .device_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.device_list),
18 .driver_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.driver_list),
19 };
20
21 RTE_REGISTER_BUS(pci, rte_pci_bus.bus);
22
23 #define RTE_REGISTER_BUS(nm, bus) \
24 RTE_INIT_PRIO(businitfn_ ##nm, 101); \ /* 声明为gcc构造函数,先于main()执行 */
25 static void businitfn_ ##nm(void) \
26 {\
27 (bus).name = RTE_STR(nm);\
28 rte_bus_register(&bus); \
29 }
30
31 void
32 rte_bus_register(struct rte_bus *bus)
33 {
34 RTE_VERIFY(bus);
35 RTE_VERIFY(bus->name && strlen(bus->name));
36 /* A bus should mandatorily have the scan implemented */
37 RTE_VERIFY(bus->scan);
38 RTE_VERIFY(bus->probe);
39 RTE_VERIFY(bus->find_device);
40 /* Buses supporting driver plug also require unplug. */
41 RTE_VERIFY(!bus->plug || bus->unplug);
42
43 /* 将rte_pci_bus.bus插入rte_bus_list链表 */
44 TAILQ_INSERT_TAIL(&rte_bus_list, bus, next);
45 RTE_LOG(DEBUG, EAL, "Registered [%s] bus.\n", bus->name);
46 }

设备初始化过程:
pci设备的初始化是通过以下路径完成的:rte_eal_inità rte_bus_scan->rte_pci_scan。而对应驱动的加载则是通过如下调用完成的:rte_eal_init->rte_bus_probe->rte_pci_probe完成的。
注册pci driver
将rte_ixgbe_pmd插入rte_pci_bus.driver_list链表

1 struct rte_pci_driver {
2 TAILQ_ENTRY(rte_pci_driver) next; /**< Next in list. */
3 struct rte_driver driver; /**< Inherit core driver. */
4 struct rte_pci_bus *bus; /**< PCI bus reference. */
5 pci_probe_t *probe; /**< Device Probe function. */
6 pci_remove_t *remove; /**< Device Remove function. */
7 const struct rte_pci_id *id_table; /**< ID table, NULL terminated. */
8 uint32_t drv_flags; /**< Flags contolling handling of device. */
9 };
10
11 /* 定义rte_ixgbe_pmd */
12 static struct rte_pci_driver rte_ixgbe_pmd = {
13 .id_table = pci_id_ixgbe_map,
14 .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
15 .probe = eth_ixgbe_pci_probe,
16 .remove = eth_ixgbe_pci_remove,
17 };
18
19 RTE_PMD_REGISTER_PCI(net_ixgbe, rte_ixgbe_pmd);
20
21 #define RTE_PMD_REGISTER_PCI(nm, pci_drv) \
22 RTE_INIT(pciinitfn_ ##nm); \ /* 声明为gcc构造函数,先于main()执行 */
23 static void pciinitfn_ ##nm(void) \
24 {\
25 (pci_drv).driver.name = RTE_STR(nm);\
26 rte_pci_register(&pci_drv); \
27 } \
28 RTE_PMD_EXPORT_NAME(nm, __COUNTER__)
29
30 void
31 rte_pci_register(struct rte_pci_driver *driver)
32 {
33 /* 将rte_ixgbe_pmd插入rte_pci_bus.driver_list链表 */
34 TAILQ_INSERT_TAIL(&rte_pci_bus.driver_list, driver, next);
35 driver->bus = &rte_pci_bus;
36 }

eth_ixgbe_dev_init()

1 static int
2 eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev)
3 {
4 ...
5 eth_dev->dev_ops = &ixgbe_eth_dev_ops; /* 注册ixgbe_eth_dev_ops函数表 */
6 eth_dev->rx_pkt_burst = &ixgbe_recv_pkts; /* burst收包函数 */
7 eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts; /* burst发包函数 */
8 eth_dev->tx_pkt_prepare = &ixgbe_prep_pkts;
9 ...
10 hw->device_id = pci_dev->id.device_id; /* device_id */
11 hw->vendor_id = pci_dev->id.vendor_id; /* vendor_id */
12 hw->hw_addr = (void *)pci_dev->mem_resource[0].addr; /* mmap()得到的BAR的虚拟地址 */
13 ...
14 /* ixgbe_init_shared_code() ->
15 * ixgbe_set_mac_type()
16 * ixgbe_init_ops_82599()
17 * 在ixgbe_set_mac_type()中根据vendor_id和device_id设置hw->mac.type,82599为ixgbe_mac_82599EB
18 * 根据hw->mac.type调用对应的函数设置hw->mac.ops,82599为ixgbe_init_ops_82599() */
19 diag = ixgbe_init_shared_code(hw);
20 ...
21 /* ixgbe_init_hw() ->
22 * ixgbe_call_func() ->
23 * ixgbe_init_hw_generic() ->
24 * ixgbe_reset_hw_82599() ->
25 * ixgbe_get_mac_addr_generic()
26 * 得到网卡的mac地址 */
27 diag = ixgbe_init_hw(hw);
28 ...
29 ether_addr_copy((struct ether_addr *) hw->mac.perm_addr,
30 ð_dev->data->mac_addrs[0]); /* 复制网卡的mac地址到eth_dev->data->mac_addrs */
31 ...
32 }
33
34 static const struct eth_dev_ops ixgbe_eth_dev_ops = {
35 .dev_configure = ixgbe_dev_configure,
36 .dev_start = ixgbe_dev_start,
37 ...
38 .rx_queue_setup = ixgbe_dev_rx_queue_setup,
39 ...
40 .tx_queue_setup = ixgbe_dev_tx_queue_setup,
41 ...
42 }
(免费订阅,永久学习)学习地址: Dpdk/网络协议栈/vpp/OvS/DDos/NFV/虚拟化/高性能专家-学习视频教程-腾讯课堂
更多DPDK相关学习资料有需要的可以自行报名学习,免费订阅,永久学习,或点击这里加qun免费
领取,关注我持续更新哦! !
eth_ixgbe_pci_probe()

1 static int eth_ixgbe_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
2 struct rte_pci_device *pci_dev)
3 {
4 return rte_eth_dev_pci_generic_probe(pci_dev,
5 sizeof(struct ixgbe_adapter), eth_ixgbe_dev_init);
6 }
7
8 static inline int
9 rte_eth_dev_pci_generic_probe(struct rte_pci_device *pci_dev,
10 size_t private_data_size, eth_dev_pci_callback_t dev_init)
11 {
12 ...
13 eth_dev = rte_eth_dev_pci_allocate(pci_dev, private_data_size);
14 ...
15 ret = dev_init(eth_dev); /* ixgbe为eth_ixgbe_dev_init() */
16 ...
17 }
18
19 static inline struct rte_eth_dev *
20 rte_eth_dev_pci_allocate(struct rte_pci_device *dev, size_t private_data_size)
21 {
22 ...
23 /* rte_eth_dev_allocate() ->
24 * rte_eth_dev_find_free_port()
25 * rte_eth_dev_data_alloc()
26 * eth_dev_get() */
27 eth_dev = rte_eth_dev_allocate(name);
28 ...
29 /* 分配private data,ixgbe为struct ixgbe_adapter */
30 eth_dev->data->dev_private = rte_zmalloc_socket(name,
31 private_data_size, RTE_CACHE_LINE_SIZE,
32 dev->device.numa_node);
33 ...
34 }
35
36 struct rte_eth_dev *
37 rte_eth_dev_allocate(const char *name)
38 {
39 ...
40 /* 遍历rte_eth_devices数组,找到一个空闲的设备 */
41 port_id = rte_eth_dev_find_free_port();
42 ...
43 /* 分配rte_eth_dev_data数组 */
44 rte_eth_dev_data_alloc();
45 ...
46 /* 设置port_id对应的设备的state为RTE_ETH_DEV_ATTACHED */
47 eth_dev = eth_dev_get(port_id);
48 ...
49 }

ixgbe_recv_pkts()
接收时回写:
1、网卡使用DMA写Rx FIFO中的Frame到Rx Ring Buffer中的mbuf,设置desc的DD为1
2、网卡驱动取走mbuf后,设置desc的DD为0,更新RDT

1 uint16_t
2 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
3 uint16_t nb_pkts)
4 {
5 ...
6 nb_rx = 0;
7 nb_hold = 0;
8 rxq = rx_queue;
9 rx_id = rxq->rx_tail; /* 相当于ixgbe的next_to_clean */
10 rx_ring = rxq->rx_ring;
11 sw_ring = rxq->sw_ring;
12 ...
13 while (nb_rx < nb_pkts) {
14 ...
15 /* 得到rx_tail指向的desc的指针 */
16 rxdp = &rx_ring[rx_id];
17 /* 若网卡回写的DD为0,跳出循环 */
18 staterr = rxdp->wb.upper.status_error;
19 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
20 break;
21 /* 得到rx_tail指向的desc */
22 rxd = *rxdp;
23 ...
24 /* 分配新mbuf */
25 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
26 ...
27 nb_hold++; /* 统计接收的mbuf数 */
28 rxe = &sw_ring[rx_id]; /* 得到旧mbuf */
29 rx_id++; /* 得到下一个desc的index,注意是一个环形缓冲区 */
30 if (rx_id == rxq->nb_rx_desc)
31 rx_id = 0;
32 ...
33 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf); /* 预取下一个mbuf */
34 ...
35 if ((rx_id & 0x3) == 0) {
36 rte_ixgbe_prefetch(&rx_ring[rx_id]);
37 rte_ixgbe_prefetch(&sw_ring[rx_id]);
38 }
39 ...
40 rxm = rxe->mbuf; /* rxm指向旧mbuf */
41 rxe->mbuf = nmb; /* rxe->mbuf指向新mbuf */
42 dma_addr =
43 rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb)); /* 得到新mbuf的总线地址 */
44 rxdp->read.hdr_addr = 0; /* 清零新mbuf对应的desc的DD,后续网卡会读desc */
45 rxdp->read.pkt_addr = dma_addr; /* 设置新mbuf对应的desc的总线地址,后续网卡会读desc */
46 ...
47 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
48 rxq->crc_len); /* 包长 */
49 rxm->data_off = RTE_PKTMBUF_HEADROOM;
50 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
51 rxm->nb_segs = 1;
52 rxm->next = NULL;
53 rxm->pkt_len = pkt_len;
54 rxm->data_len = pkt_len;
55 rxm->port = rxq->port_id;
56 ...
57 if (likely(pkt_flags & PKT_RX_RSS_HASH)) /* RSS */
58 rxm->hash.rss = rte_le_to_cpu_32(
59 rxd.wb.lower.hi_dword.rss);
60 else if (pkt_flags & PKT_RX_FDIR) { /* FDIR */
61 rxm->hash.fdir.hash = rte_le_to_cpu_16(
62 rxd.wb.lower.hi_dword.csum_ip.csum) &
63 IXGBE_ATR_HASH_MASK;
64 rxm->hash.fdir.id = rte_le_to_cpu_16(
65 rxd.wb.lower.hi_dword.csum_ip.ip_id);
66 }
67 ...
68 rx_pkts[nb_rx++] = rxm; /* 将旧mbuf放入rx_pkts数组 */
69 }
70 rxq->rx_tail = rx_id; /* rx_tail指向下一个desc */
71 ...
72 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
73 /* 若已处理的mbuf数大于上限(默认为32),更新RDT */
74 if (nb_hold > rxq->rx_free_thresh) {
75 ...
76 rx_id = (uint16_t) ((rx_id == 0) ?
77 (rxq->nb_rx_desc - 1) : (rx_id - 1));
78 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id); /* 将rx_id写入RDT */
79 nb_hold = 0; /* 清零nb_hold */
80 }
81 rxq->nb_rx_hold = nb_hold; /* 更新nb_rx_hold */
82 return nb_rx;
83 }

ixgbe_xmit_pkts()

1 uint16_t
2 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
3 uint16_t nb_pkts)
4 {
5 ...
6 txq = tx_queue;
7 sw_ring = txq->sw_ring;
8 txr = txq->tx_ring;
9 tx_id = txq->tx_tail; /* 相当于ixgbe的next_to_use */
10 txe = &sw_ring[tx_id]; /* 得到tx_tail指向的entry */
11 txp = NULL;
12 ...
13 /* 若空闲的mbuf数小于下限(默认为32),清理空闲的mbuf */
14 if (txq->nb_tx_free < txq->tx_free_thresh)
15 ixgbe_xmit_cleanup(txq);
16 ...
17 /* TX loop */
18 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
19 ...
20 tx_pkt = *tx_pkts++; /* 待发送的mbuf */
21 pkt_len = tx_pkt->pkt_len; /* 待发送的mbuf的长度 */
22 ...
23 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx); /* 使用的desc数 */
24 ...
25 tx_last = (uint16_t) (tx_id + nb_used - 1); /* tx_last指向最后一个desc */
26 ...
27 if (tx_last >= txq->nb_tx_desc) /* 注意是一个环形缓冲区 */
28 tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
29 ...
30 if (nb_used > txq->nb_tx_free) {
31 ...
32 if (ixgbe_xmit_cleanup(txq) != 0) {
33 /* Could not clean any descriptors */
34 if (nb_tx == 0) /* 若是第一个包(未发包),return 0 */
35 return 0;
36 goto end_of_tx; /* 若非第一个包(已发包),停止发包,更新发送队列参数 */
37 }
38 ...
39 }
40 ...
41 /* 每个包可能包含多个分段,m_seg指向第一个分段 */
42 m_seg = tx_pkt;
43 do {
44 txd = &txr[tx_id]; /* desc */
45 txn = &sw_ring[txe->next_id]; /* 下一个entry */
46 ...
47 txe->mbuf = m_seg; /* 将m_seg挂载到txe */
48 ...
49 slen = m_seg->data_len; /* m_seg的长度 */
50 buf_dma_addr = rte_mbuf_data_dma_addr(m_seg); /* m_seg的总线地址 */
51 txd->read.buffer_addr =
52 rte_cpu_to_le_64(buf_dma_addr); /* 总线地址赋给txd->read.buffer_addr */
53 txd->read.cmd_type_len =
54 rte_cpu_to_le_32(cmd_type_len | slen); /* 长度赋给txd->read.cmd_type_len */
55 ...
56 txe->last_id = tx_last; /* last_id指向最后一个desc */
57 tx_id = txe->next_id; /* tx_id指向下一个desc */
58 txe = txn; /* txe指向下一个entry */
59 m_seg = m_seg->next; /* m_seg指向下一个分段 */
60 } while (m_seg != NULL);
61 ...
62 /* 最后一个分段 */
63 cmd_type_len |= IXGBE_TXD_CMD_EOP;
64 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used); /* 更新nb_tx_used */
65 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used); /* 更新nb_tx_free */
66 ...
67 if (txq->nb_tx_used >= txq->tx_rs_thresh) { /* 若使用的mbuf数大于上限(默认为32),设置RS */
68 ...
69 cmd_type_len |= IXGBE_TXD_CMD_RS;
70 ...
71 txp = NULL; /* txp为NULL表示已设置RS */
72 } else
73 txp = txd; /* txp非NULL表示未设置RS */
74 ...
75 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
76 }
77 ...
78 end_of_tx:
79 /* burst发包的最后一个包的最后一个分段 */
80 ...
81 if (txp != NULL) /* 若未设置RS,设置RS */
82 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
83 ...
84 IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id); /* 将tx_id写入TDT */
85 txq->tx_tail = tx_id; /* tx_tail指向下一个desc */
86 ...
87 return nb_tx;
88 }


rte_eth_rx/tx_burst()

1 static inline uint16_t
2 rte_eth_rx_burst(uint8_t port_id, uint16_t queue_id,
3 struct rte_mbuf **rx_pkts, const uint16_t nb_pkts)
4 {
5 /* 得到port_id对应的设备 */
6 struct rte_eth_dev *dev = &rte_eth_devices[port_id];
7 ...
8 /* ixgbe为ixgbe_recv_pkts() */
9 int16_t nb_rx = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id],
10 rx_pkts, nb_pkts);
11 ...
12 }
13
14 static inline uint16_t
15 rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id,
16 struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
17 {
18 /* 得到port_id对应的设备 */
19 struct rte_eth_dev *dev = &rte_eth_devices[port_id];
20 ...
21 /* ixgbe为ixgbe_xmit_pkts */
22 return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, nb_pkts);
23 }

1、从描述符中获取报文返回给应用层
首先根据应用层最后一次获取报文的位置,进而从描述符队列找到待被应用层接收的描述符。此时会判断描述符中的status_error是否已经打上了dd标记,有dd标记说明dma控制器已经把报文放到mbuf中了。这里解释下dd标记,当dma控制器将接收到的报文保存到描述符指向的mbuf空间时,由dma控制器打上dd标记,表示dma控制器已经把报文放到mbuf中了。应用层在获取完报文后,需要清除dd标记。
找到了描述符的位置,也就找到了mbuf空间。此时会根据描述符里面保存的信息,填充mbuf结构。例如填充报文的长度,vlanid, rss等信息。填充完mbuf后,将这个mbuf保存到应用层传进来的结构中,返回给应用层,这样应用层就获取到了这个报文。

1 uint16_t eth_igb_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
2 {
3 while (nb_rx < nb_pkts)
4 {
5 //从描述符队列中找到待被应用层最后一次接收的那个描述符位置
6 rxdp = &rx_ring[rx_id];
7 staterr = rxdp->wb.upper.status_error;
8 //检查状态是否为dd, 不是则说明驱动还没有把报文放到接收队列,直接退出
9 if (! (staterr & rte_cpu_to_le_32(E1000_RXD_STAT_DD)))
10 {
11 break;
12 };
13 //找到了描述符的位置,也就从软件队列中找到了mbuf
14 rxe = &sw_ring[rx_id];
15 rx_id++;
16 rxm = rxe->mbuf;
17 //填充mbuf
18 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) - rxq->crc_len);
19 rxm->data_off = RTE_PKTMBUF_HEADROOM;
20 rxm->nb_segs = 1;
21 rxm->pkt_len = pkt_len;
22 rxm->data_len = pkt_len;
23 rxm->port = rxq->port_id;
24 rxm->hash.rss = rxd.wb.lower.hi_dword.rss;
25 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
26 //保存到应用层
27 rx_pkts[nb_rx++] = rxm;
28 }
29 }

2、从内存池中获取新的mbuf告诉dma控制器
当应用层从软件队列中获取到mbuf后, 需要重新从内存池申请一个mbuf空间,并将mbuf地址放到描述符队列中, 相当于告诉dma控制器,后续将收到的报文保存到这个新的mbuf中, 这也是狸猫换太子的过程。描述符是mbuf与dma控制器的中介,那dma控制器怎么知道描述符队列的地址呢?这在上一篇文章中已经介绍过了,将描述符队列的地址写入到了寄存器中,dma控制器通过读取寄存器就知道描述符队列的地址。
需要注意的是,将mbuf的地址保存到描述符中,此时会将dd标记给清0,这样dma控制器就认为这个mbuf里面的内容已经被应用层接收了,收到新报文后可以重新放到这个mbuf中。

1 uint16_t eth_igb_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
2 {
3 while (nb_rx < nb_pkts)
4 {
5 //申请一个新的mbuf
6 nmb = rte_rxmbuf_alloc(rxq->mb_pool);
7 //因为原来的mbuf被应用层取走了。这里替换原来的软件队列mbuf,这样网卡收到报文后可以放到这个新的mbuf
8 rxe->mbuf = nmb;
9 dma_addr = rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(nmb));
10 //将mbuf地址保存到描述符中,相当于高速dma控制器mbuf的地址。
11 rxdp->read.hdr_addr = dma_addr; //这里会将dd标记清0
12 rxdp->read.pkt_addr = dma_addr;
13 }
14 }
原文链接:https://www.cnblogs.com/mysky007/p/11219593.html










![[附源码]计算机毕业设计Python的桌游信息管理系统(程序+源码+LW文档)](https://img-blog.csdnimg.cn/ff4f0c8d31e24d629e35aa890ba50738.png)








