第7章多核网络优化

学习目标

理解RPS、RFS、XPS多核扩展机制
掌握中断亲和性和CPU绑定技术
了解网络队列管理和负载均衡
能够配置和调优多核网络性能

🔬 原理

RPS（Receive Packet Steering）

RPS原理：

软件层面将包分发到不同CPU
基于包的4元组哈希（源IP、目标IP、源端口、目标端口）
避免单核瓶颈，提升处理能力

RPS配置：

# 查看网卡队列
ls /sys/class/net/eth0/queues/

# 配置RPS（将eth0的rx-0队列分发到CPU 0-3）
echo f > /sys/class/net/eth0/queues/rx-0/rps_cpus  # f = 1111 = CPU 0-3

RPS实现：

static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
                       struct rps_dev_flow **rflowp) {
    struct rps_sock_flow_table *sock_flow_table;
    struct rps_dev_flow_table *flow_table;
    struct rps_map *map;
    u16 tcpu;
    u32 hash;
    
    // 计算4元组哈希
    hash = skb_get_hash(skb);
    
    // 根据哈希选择CPU
    map = rcu_dereference(dev->rps_map);
    if (map) {
        tcpu = map->cpus[hash & map->len];
        if (tcpu != RPS_NO_CPU) {
            return tcpu;
        }
    }
    
    return -1;
}

RFS（Receive Flow Steering）

RFS原理：

将包发送到处理对应socket的CPU
提升缓存命中率
需要RPS支持

RFS配置：

# 配置RFS
sudo sysctl -w net.core.rps_sock_flow_entries=32768
echo 2048 > /sys/class/net/eth0/queues/rx-0/rps_flow_cnt

RFS实现：

static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
                       struct rps_dev_flow **rflowp) {
    struct rps_sock_flow_table *sock_flow_table;
    struct rps_dev_flow_table *flow_table;
    struct rps_map *map;
    u16 tcpu;
    u32 hash;
    
    // 计算4元组哈希
    hash = skb_get_hash(skb);
    
    // 查找socket流表
    sock_flow_table = rcu_dereference(rps_sock_flow_table);
    if (sock_flow_table) {
        tcpu = sock_flow_table->ents[hash & sock_flow_table->mask];
        if (tcpu != RPS_NO_CPU) {
            return tcpu;
        }
    }
    
    // 查找设备流表
    flow_table = rcu_dereference(dev->rps_flow_table);
    if (flow_table) {
        struct rps_dev_flow *rflow;
        rflow = &flow_table->flows[hash & flow_table->mask];
        
        if (rflow->last_qtail != per_cpu(softnet_data, rflow->cpu).input_queue_head) {
            rflow->last_qtail = per_cpu(softnet_data, rflow->cpu).input_queue_head;
            rflow->last_flow = hash;
        }
        
        if (rflow->last_flow == hash) {
            tcpu = rflow->cpu;
            if (tcpu != RPS_NO_CPU) {
                *rflowp = rflow;
                return tcpu;
            }
        }
    }
    
    return -1;
}

XPS（Transmit Packet Steering）

XPS原理：

发送方向的多队列调度
基于发送队列选择CPU
提升发送性能

XPS配置：

# 配置XPS
echo f > /sys/class/net/eth0/queues/tx-0/xps_cpus

中断亲和性

中断亲和性原理：

将中断绑定到特定CPU
避免中断在CPU间迁移
提升缓存命中率

中断亲和性配置：

# 查看中断分布
cat /proc/interrupts | grep eth0

# 设置中断亲和性
echo 2 > /proc/irq/24/smp_affinity  # 绑定到CPU 1
echo 4 > /proc/irq/25/smp_affinity  # 绑定到CPU 2

️ 实现

多队列网卡支持

网卡队列结构：

struct net_device {
    struct netdev_queue *_tx;      // 发送队列
    unsigned int num_tx_queues;    // 发送队列数
    unsigned int real_num_tx_queues;
    
    struct netdev_rx_queue *_rx;   // 接收队列
    unsigned int num_rx_queues;    // 接收队列数
    unsigned int real_num_rx_queues;
    
    // ... 更多字段
};

struct netdev_rx_queue {
    struct net_device *dev;
    struct kobject kobj;
    struct netdev_rx_queue *first; // 第一个队列
    int index;                     // 队列索引
    struct rps_map *rps_map;       // RPS映射
    struct rps_dev_flow_table *rps_flow_table; // RFS流表
};

队列初始化：

static int netif_alloc_netdev_queues(struct net_device *dev) {
    unsigned int count = dev->num_tx_queues;
    struct netdev_queue *tx, *rx;
    int i;
    
    // 分配发送队列
    dev->_tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL);
    if (!dev->_tx) {
        return -ENOMEM;
    }
    
    // 分配接收队列
    dev->_rx = kcalloc(dev->num_rx_queues, sizeof(struct netdev_rx_queue), GFP_KERNEL);
    if (!dev->_rx) {
        kfree(dev->_tx);
        return -ENOMEM;
    }
    
    // 初始化队列
    for (i = 0; i < count; i++) {
        tx = &dev->_tx[i];
        tx->dev = dev;
        tx->numa_node = NUMA_NO_NODE;
        tx->index = i;
    }
    
    return 0;
}

RPS实现

RPS核心函数：

static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
                       struct rps_dev_flow **rflowp) {
    struct rps_sock_flow_table *sock_flow_table;
    struct rps_dev_flow_table *flow_table;
    struct rps_map *map;
    u16 tcpu;
    u32 hash;
    
    // 计算4元组哈希
    hash = skb_get_hash(skb);
    
    // 查找socket流表
    sock_flow_table = rcu_dereference(rps_sock_flow_table);
    if (sock_flow_table) {
        tcpu = sock_flow_table->ents[hash & sock_flow_table->mask];
        if (tcpu != RPS_NO_CPU) {
            return tcpu;
        }
    }
    
    // 查找设备流表
    flow_table = rcu_dereference(dev->rps_flow_table);
    if (flow_table) {
        struct rps_dev_flow *rflow;
        rflow = &flow_table->flows[hash & flow_table->mask];
        
        if (rflow->last_qtail != per_cpu(softnet_data, rflow->cpu).input_queue_head) {
            rflow->last_qtail = per_cpu(softnet_data, rflow->cpu).input_queue_head;
            rflow->last_flow = hash;
        }
        
        if (rflow->last_flow == hash) {
            tcpu = rflow->cpu;
            if (tcpu != RPS_NO_CPU) {
                *rflowp = rflow;
                return tcpu;
            }
        }
    }
    
    return -1;
}

🛠️ 命令

多核配置命令

# 查看CPU信息
lscpu

# 查看中断分布
cat /proc/interrupts | grep eth0

# 查看中断亲和性
cat /proc/irq/*/smp_affinity_list

# 查看网卡队列
ls /sys/class/net/eth0/queues/

RPS/RFS配置

# 配置RPS
echo f > /sys/class/net/eth0/queues/rx-0/rps_cpus

# 配置RFS
sudo sysctl -w net.core.rps_sock_flow_entries=32768
echo 2048 > /sys/class/net/eth0/queues/rx-0/rps_flow_cnt

# 配置XPS
echo f > /sys/class/net/eth0/queues/tx-0/xps_cpus

性能监控

# 查看CPU使用率
mpstat -P ALL 1

# 查看软中断统计
cat /proc/softirqs | head -n2

# 查看网络统计
cat /proc/net/dev

代码

多核性能监控程序

// multi_core_monitor.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/sysinfo.h>

struct cpu_stats {
    unsigned long user;
    unsigned long nice;
    unsigned long system;
    unsigned long idle;
    unsigned long iowait;
    unsigned long irq;
    unsigned long softirq;
    unsigned long steal;
    unsigned long guest;
    unsigned long guest_nice;
};

int parse_cpu_stats(struct cpu_stats *stats, int cpu_count) {
    FILE *fp = fopen("/proc/stat", "r");
    if (!fp) {
        perror("fopen");
        return -1;
    }
    
    char line[256];
    int count = 0;
    
    while (fgets(line, sizeof(line), fp) && count < cpu_count) {
        if (strncmp(line, "cpu", 3) == 0) {
            if (sscanf(line, "cpu%*d %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",
                       &stats[count].user, &stats[count].nice, &stats[count].system,
                       &stats[count].idle, &stats[count].iowait, &stats[count].irq,
                       &stats[count].softirq, &stats[count].steal, &stats[count].guest,
                       &stats[count].guest_nice) == 10) {
                count++;
            }
        }
    }
    
    fclose(fp);
    return count;
}

int main() {
    int cpu_count = sysconf(_SC_NPROCESSORS_ONLN);
    struct cpu_stats *stats1 = malloc(cpu_count * sizeof(struct cpu_stats));
    struct cpu_stats *stats2 = malloc(cpu_count * sizeof(struct cpu_stats));
    
    if (!stats1 || !stats2) {
        perror("malloc");
        return 1;
    }
    
    // 第一次采样
    int count1 = parse_cpu_stats(stats1, cpu_count);
    if (count1 < 0) {
        return 1;
    }
    
    sleep(1);
    
    // 第二次采样
    int count2 = parse_cpu_stats(stats2, cpu_count);
    if (count2 < 0) {
        return 1;
    }
    
    printf("CPU Usage (per second):\n");
    printf("%-4s %8s %8s %8s %8s %8s %8s\n", 
           "CPU", "User%", "System%", "IOWait%", "IRQ%", "SoftIRQ%", "Idle%");
    
    for (int i = 0; i < count1 && i < count2; i++) {
        unsigned long total1 = stats1[i].user + stats1[i].nice + stats1[i].system +
                              stats1[i].idle + stats1[i].iowait + stats1[i].irq +
                              stats1[i].softirq + stats1[i].steal;
        
        unsigned long total2 = stats2[i].user + stats2[i].nice + stats2[i].system +
                              stats2[i].idle + stats2[i].iowait + stats2[i].irq +
                              stats2[i].softirq + stats2[i].steal;
        
        unsigned long diff_total = total2 - total1;
        if (diff_total == 0) continue;
        
        unsigned long user_diff = stats2[i].user - stats1[i].user;
        unsigned long system_diff = stats2[i].system - stats1[i].system;
        unsigned long iowait_diff = stats2[i].iowait - stats1[i].iowait;
        unsigned long irq_diff = stats2[i].irq - stats1[i].irq;
        unsigned long softirq_diff = stats2[i].softirq - stats1[i].softirq;
        unsigned long idle_diff = stats2[i].idle - stats1[i].idle;
        
        printf("%-4d %8.1f %8.1f %8.1f %8.1f %8.1f %8.1f\n",
               i,
               (double)user_diff * 100 / diff_total,
               (double)system_diff * 100 / diff_total,
               (double)iowait_diff * 100 / diff_total,
               (double)irq_diff * 100 / diff_total,
               (double)softirq_diff * 100 / diff_total,
               (double)idle_diff * 100 / diff_total);
    }
    
    free(stats1);
    free(stats2);
    return 0;
}

编译运行：

gcc multi_core_monitor.c -o multi_core_monitor
./multi_core_monitor

🧪 实验

实验1：RPS效果测试

目标：测试RPS对多核网络性能的影响

步骤：

# 1. 查看当前CPU使用率
mpstat -P ALL 1 &

# 2. 测试基线性能（单核）
iperf3 -s &
iperf3 -c localhost -t 30

# 3. 启用RPS
echo f > /sys/class/net/lo/queues/rx-0/rps_cpus

# 4. 测试性能
iperf3 -c localhost -t 30

# 5. 观察CPU使用率
# 应该看到多个CPU参与网络处理

预期结果：

理解RPS的作用
观察多核负载均衡
掌握RPS配置

实验2：中断亲和性优化

目标：优化中断处理性能

步骤：

# 1. 查看当前中断分布
cat /proc/interrupts | grep eth0

# 2. 查看CPU核心数
nproc

# 3. 设置中断亲和性
for i in {0..3}; do
    IRQ=$(grep eth0-TxRx-$i /proc/interrupts | awk -F: '{print $1}')
    if [ ! -z "$IRQ" ]; then
        echo $i | sudo tee /proc/irq/$IRQ/smp_affinity_list
    fi
done

# 4. 验证设置
cat /proc/interrupts | grep eth0

# 5. 测试性能
iperf3 -c localhost -t 30

预期结果：

理解中断亲和性
观察性能提升
掌握中断优化

实验3：RFS配置测试

目标：测试RFS对网络性能的影响

步骤：

# 1. 配置RFS
sudo sysctl -w net.core.rps_sock_flow_entries=32768
echo 2048 > /sys/class/net/eth0/queues/rx-0/rps_flow_cnt

# 2. 测试性能
iperf3 -s &
iperf3 -c localhost -t 30

# 3. 查看RFS统计
cat /proc/net/softnet_stat

# 4. 测试多连接
for i in {1..10}; do
    iperf3 -c localhost -t 10 &
done
wait

预期结果：

理解RFS的作用
观察缓存命中率提升
掌握RFS配置

实验4：XPS配置测试

目标：测试XPS对发送性能的影响

步骤：

# 1. 配置XPS
echo f > /sys/class/net/eth0/queues/tx-0/xps_cpus

# 2. 测试发送性能
iperf3 -s &
iperf3 -c localhost -t 30

# 3. 查看发送队列统计
cat /proc/net/dev

# 4. 测试多流发送
iperf3 -c localhost -P 4 -t 30

预期结果：

理解XPS的作用
观察发送性能提升
掌握XPS配置

排错

常见问题排查

问题1：RPS不生效

# 检查RPS配置
cat /sys/class/net/eth0/queues/rx-0/rps_cpus

# 检查网卡队列数
ethtool -l eth0

# 检查内核支持
grep RPS /boot/config-$(uname -r)

问题2：中断分布不均

# 查看中断分布
cat /proc/interrupts | grep eth0

# 检查中断亲和性
cat /proc/irq/*/smp_affinity_list

# 重新设置中断亲和性
for i in {0..3}; do
    IRQ=$(grep eth0-TxRx-$i /proc/interrupts | awk -F: '{print $1}')
    if [ ! -z "$IRQ" ]; then
        echo $i | sudo tee /proc/irq/$IRQ/smp_affinity_list
    fi
done

问题3：CPU使用率不均衡

# 查看CPU使用率
mpstat -P ALL 1

# 检查RPS配置
cat /sys/class/net/eth0/queues/rx-0/rps_cpus

# 调整RPS配置
echo f > /sys/class/net/eth0/queues/rx-0/rps_cpus

排错清单

[ ] 检查CPU核心数（nproc、lscpu）
[ ] 验证网卡队列数（ethtool -l）
[ ] 确认RPS配置（/sys/class/net//queues//rps_cpus）
[ ] 检查中断分布（/proc/interrupts）
[ ] 验证中断亲和性（/proc/irq/*/smp_affinity_list）
[ ] 测试网络性能（iperf3、netperf）
[ ] 查看系统日志（dmesg、/var/log/syslog）