You need to enable JavaScript to run this app.
优惠活动
大模型
产品
解决方案
定价
更多
文档控制台
免费开始使用

Mellanox ConnectX-5与DPDK中VFIO-PCI绑定问题:Verbs设备未找到错误

Mellanox ConnectX-5绑定VFIO-PCI后DPDK提示“Verbs设备未找到”问题排查

我使用Mellanox ConnectX-5 MCX516A-CCAT网卡,尝试在客户端PC与SoC板之间实现100G以太网数据传输。已安装Mellanox驱动与DPDK,计划通过DPDK结合VFIO-PCI进行数据传输。使用mlx5_core驱动配合testpmd测试时传输正常,但切换到VFIO-PCI后,尽管设备显示已绑定,仍出现“Verbs设备未找到”的错误。

测试C程序

#include <stdio.h>
#include <stdint.h>
#include <inttypes.h>
#include <rte_eal.h>
#include <rte_ethdev.h>
#include <rte_mbuf.h>
#include <rte_ether.h>
#include <string.h>

#define RX_RING_SIZE 1024
#define NUM_MBUFS 8191
#define MBUF_CACHE_SIZE 250
#define BURST_SIZE 32

static const struct rte_ether_addr client_mac = {
    .addr_bytes = {0xb8, 0xce, 0xf6, 0x76, 0x11, 0x6f}
};

static const struct rte_ether_addr rf_soc_mac = {
    .addr_bytes = {0xfc, 0xc2, 0x3d, 0x5d, 0x05, 0x9c}
};

int main(int argc, char *argv[]) {
    struct rte_mempool *mbuf_pool;
    uint16_t port_id = 0;
    int ret;
    uint16_t nb_rxd = RX_RING_SIZE;

    ret = rte_eal_init(argc, argv);
    if (ret < 0) rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
    argc -= ret; argv += ret;

    mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", NUM_MBUFS * rte_eth_dev_count_avail(),
        MBUF_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
    if (mbuf_pool == NULL) rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");

    struct rte_eth_conf port_conf = {0};
    port_conf.rxmode.max_lro_pkt_size = RTE_ETHER_MAX_LEN;

    ret = rte_eth_dev_configure(port_id, 1, 1, &port_conf);
    if (ret < 0) rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%u\n", ret, port_id);

    ret = rte_eth_rx_queue_setup(port_id, 0, nb_rxd, rte_eth_dev_socket_id(port_id), NULL, mbuf_pool);
    if (ret < 0) rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup:err=%d, port=%u\n", ret, port_id);

    ret = rte_eth_tx_queue_setup(port_id, 0, 512, rte_eth_dev_socket_id(port_id), NULL);
    if (ret < 0) rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup:err=%d, port=%u\n", ret, port_id);

    ret = rte_eth_dev_start(port_id);
    if (ret < 0) rte_exit(EXIT_FAILURE, "rte_eth_dev_start:err=%d, port=%u\n", ret, port_id);

    printf("Started port %u\n", port_id);

    struct rte_mbuf *bufs[BURST_SIZE];
    while (1) {
        const uint16_t nb_rx = rte_eth_rx_burst(port_id, 0, bufs, BURST_SIZE);
        if (nb_rx == 0) continue;

        for (int i = 0; i < nb_rx; i++) {
            struct rte_mbuf *mbuf = bufs[i];
            struct rte_ether_hdr *eth_hdr = rte_pktmbuf_mtod(mbuf, struct rte_ether_hdr *);

            if (rte_is_same_ether_addr(&eth_hdr->dst_addr, &client_mac) &&
                rte_is_same_ether_addr(&eth_hdr->src_addr, &rf_soc_mac)) {

                printf("Received matching frame, length: %u bytes\n", rte_pktmbuf_pkt_len(mbuf));
            } else {
                printf("Received frame but MAC not matched\n");
            }

            rte_pktmbuf_free(mbuf);
        }
    }

    return 0;
}

编译执行命令

gcc -o dpdk_rx_example dpdk_rx_example.c $(pkg-config --cflags --libs libdpdk)
sudo ./dpdk_rx_example -l 0-3 -n 4 -a 0000:0f:00.1

错误信息

mlx5_common: Verbs device not found: 0000:0f:00.1
mlx5_common: Failed to initialize device context.
EAL: Requested device 0000:0f:00.1 cannot be used
EAL: Bus (pci) probe failed.
EAL: Error - exiting with code: 1
Cause: Cannot create mbuf pool

已确认设备绑定状态

Network devices using DPDK-compatible driver
============================================
0000:0f:00.0 'MT27800 Family [ConnectX-5] 1017' drv=vfio-pci unused=mlx5_core
0000:0f:00.1 'MT27800 Family [ConnectX-5] 1017' drv=vfio-pci unused=mlx5_core

已尝试的排查步骤

  • 检查mlx5 PMD是否启用:
    grep CONFIG_RTE_LIBRTE_MLX5_PMD ~/dpdk-22.11.8/config/common_base
    
    预期输出:CONFIG_RTE_LIBRTE_MLX5_PMD=y
  • 检查并安装rdma-core:
    dpkg -l | grep rdma-core
    
    未安装则执行:
    sudo apt-get update
    sudo apt-get install rdma-core
    
  • 验证设备绑定状态:
    sudo ./usertools/dpdk-devbind.py --status
    
    预期网卡绑定到vfio-pciuio_pci_generic
  • 确认以root用户执行:
    whoami
    
    非root则切换:
    sudo -i
    
  • 查看内核日志:
    dmesg | grep mlx5
    dmesg | grep vfio
    dmesg | grep dpdk
    
  • 重新绑定设备:
    sudo ip link set dev <interface_name> down
    sudo systemctl stop NetworkManager  # if applicable
    sudo modprobe -r mlx5_ib mlx5_core
    
    echo 15b3 1017 | sudo tee /sys/bus/pci/drivers/vfio-pci/new_id
    echo 0000:0f:00.1 | sudo tee /sys/bus/pci/drivers/vfio-pci/unbind
    echo 0000:0f:00.1 | sudo tee /sys/bus/pci/drivers/vfio-pci/bind
    
    操作后再次用dpdk-devbind.py --status验证绑定状态

核心疑问

  1. Mellanox ConnectX-5是否真的兼容DPDK中的VFIO-PCI?
  2. 该问题的原因是什么,如何解决?

内容的提问来源于stack exchange,提问作者이지나

火山引擎 最新活动