第7章 多核网络优化
学习目标
- 理解RPS、RFS、XPS多核扩展机制
- 掌握中断亲和性和CPU绑定技术
- 了解网络队列管理和负载均衡
- 能够配置和调优多核网络性能
🔬 原理
RPS(Receive Packet Steering)
RPS原理:
- 软件层面将包分发到不同CPU
- 基于包的4元组哈希(源IP、目标IP、源端口、目标端口)
- 避免单核瓶颈,提升处理能力
RPS配置:
# 查看网卡队列
ls /sys/class/net/eth0/queues/
# 配置RPS(将eth0的rx-0队列分发到CPU 0-3)
echo f > /sys/class/net/eth0/queues/rx-0/rps_cpus # f = 1111 = CPU 0-3
RPS实现:
static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
struct rps_dev_flow **rflowp) {
struct rps_sock_flow_table *sock_flow_table;
struct rps_dev_flow_table *flow_table;
struct rps_map *map;
u16 tcpu;
u32 hash;
// 计算4元组哈希
hash = skb_get_hash(skb);
// 根据哈希选择CPU
map = rcu_dereference(dev->rps_map);
if (map) {
tcpu = map->cpus[hash & map->len];
if (tcpu != RPS_NO_CPU) {
return tcpu;
}
}
return -1;
}
RFS(Receive Flow Steering)
RFS原理:
- 将包发送到处理对应socket的CPU
- 提升缓存命中率
- 需要RPS支持
RFS配置:
# 配置RFS
sudo sysctl -w net.core.rps_sock_flow_entries=32768
echo 2048 > /sys/class/net/eth0/queues/rx-0/rps_flow_cnt
RFS实现:
static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
struct rps_dev_flow **rflowp) {
struct rps_sock_flow_table *sock_flow_table;
struct rps_dev_flow_table *flow_table;
struct rps_map *map;
u16 tcpu;
u32 hash;
// 计算4元组哈希
hash = skb_get_hash(skb);
// 查找socket流表
sock_flow_table = rcu_dereference(rps_sock_flow_table);
if (sock_flow_table) {
tcpu = sock_flow_table->ents[hash & sock_flow_table->mask];
if (tcpu != RPS_NO_CPU) {
return tcpu;
}
}
// 查找设备流表
flow_table = rcu_dereference(dev->rps_flow_table);
if (flow_table) {
struct rps_dev_flow *rflow;
rflow = &flow_table->flows[hash & flow_table->mask];
if (rflow->last_qtail != per_cpu(softnet_data, rflow->cpu).input_queue_head) {
rflow->last_qtail = per_cpu(softnet_data, rflow->cpu).input_queue_head;
rflow->last_flow = hash;
}
if (rflow->last_flow == hash) {
tcpu = rflow->cpu;
if (tcpu != RPS_NO_CPU) {
*rflowp = rflow;
return tcpu;
}
}
}
return -1;
}
XPS(Transmit Packet Steering)
XPS原理:
- 发送方向的多队列调度
- 基于发送队列选择CPU
- 提升发送性能
XPS配置:
# 配置XPS
echo f > /sys/class/net/eth0/queues/tx-0/xps_cpus
中断亲和性
中断亲和性原理:
- 将中断绑定到特定CPU
- 避免中断在CPU间迁移
- 提升缓存命中率
中断亲和性配置:
# 查看中断分布
cat /proc/interrupts | grep eth0
# 设置中断亲和性
echo 2 > /proc/irq/24/smp_affinity # 绑定到CPU 1
echo 4 > /proc/irq/25/smp_affinity # 绑定到CPU 2
️ 实现
多队列网卡支持
网卡队列结构:
struct net_device {
struct netdev_queue *_tx; // 发送队列
unsigned int num_tx_queues; // 发送队列数
unsigned int real_num_tx_queues;
struct netdev_rx_queue *_rx; // 接收队列
unsigned int num_rx_queues; // 接收队列数
unsigned int real_num_rx_queues;
// ... 更多字段
};
struct netdev_rx_queue {
struct net_device *dev;
struct kobject kobj;
struct netdev_rx_queue *first; // 第一个队列
int index; // 队列索引
struct rps_map *rps_map; // RPS映射
struct rps_dev_flow_table *rps_flow_table; // RFS流表
};
队列初始化:
static int netif_alloc_netdev_queues(struct net_device *dev) {
unsigned int count = dev->num_tx_queues;
struct netdev_queue *tx, *rx;
int i;
// 分配发送队列
dev->_tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL);
if (!dev->_tx) {
return -ENOMEM;
}
// 分配接收队列
dev->_rx = kcalloc(dev->num_rx_queues, sizeof(struct netdev_rx_queue), GFP_KERNEL);
if (!dev->_rx) {
kfree(dev->_tx);
return -ENOMEM;
}
// 初始化队列
for (i = 0; i < count; i++) {
tx = &dev->_tx[i];
tx->dev = dev;
tx->numa_node = NUMA_NO_NODE;
tx->index = i;
}
return 0;
}
RPS实现
RPS核心函数:
static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
struct rps_dev_flow **rflowp) {
struct rps_sock_flow_table *sock_flow_table;
struct rps_dev_flow_table *flow_table;
struct rps_map *map;
u16 tcpu;
u32 hash;
// 计算4元组哈希
hash = skb_get_hash(skb);
// 查找socket流表
sock_flow_table = rcu_dereference(rps_sock_flow_table);
if (sock_flow_table) {
tcpu = sock_flow_table->ents[hash & sock_flow_table->mask];
if (tcpu != RPS_NO_CPU) {
return tcpu;
}
}
// 查找设备流表
flow_table = rcu_dereference(dev->rps_flow_table);
if (flow_table) {
struct rps_dev_flow *rflow;
rflow = &flow_table->flows[hash & flow_table->mask];
if (rflow->last_qtail != per_cpu(softnet_data, rflow->cpu).input_queue_head) {
rflow->last_qtail = per_cpu(softnet_data, rflow->cpu).input_queue_head;
rflow->last_flow = hash;
}
if (rflow->last_flow == hash) {
tcpu = rflow->cpu;
if (tcpu != RPS_NO_CPU) {
*rflowp = rflow;
return tcpu;
}
}
}
return -1;
}
🛠️ 命令
多核配置命令
# 查看CPU信息
lscpu
# 查看中断分布
cat /proc/interrupts | grep eth0
# 查看中断亲和性
cat /proc/irq/*/smp_affinity_list
# 查看网卡队列
ls /sys/class/net/eth0/queues/
RPS/RFS配置
# 配置RPS
echo f > /sys/class/net/eth0/queues/rx-0/rps_cpus
# 配置RFS
sudo sysctl -w net.core.rps_sock_flow_entries=32768
echo 2048 > /sys/class/net/eth0/queues/rx-0/rps_flow_cnt
# 配置XPS
echo f > /sys/class/net/eth0/queues/tx-0/xps_cpus
性能监控
# 查看CPU使用率
mpstat -P ALL 1
# 查看软中断统计
cat /proc/softirqs | head -n2
# 查看网络统计
cat /proc/net/dev
代码
多核性能监控程序
// multi_core_monitor.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/sysinfo.h>
struct cpu_stats {
unsigned long user;
unsigned long nice;
unsigned long system;
unsigned long idle;
unsigned long iowait;
unsigned long irq;
unsigned long softirq;
unsigned long steal;
unsigned long guest;
unsigned long guest_nice;
};
int parse_cpu_stats(struct cpu_stats *stats, int cpu_count) {
FILE *fp = fopen("/proc/stat", "r");
if (!fp) {
perror("fopen");
return -1;
}
char line[256];
int count = 0;
while (fgets(line, sizeof(line), fp) && count < cpu_count) {
if (strncmp(line, "cpu", 3) == 0) {
if (sscanf(line, "cpu%*d %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",
&stats[count].user, &stats[count].nice, &stats[count].system,
&stats[count].idle, &stats[count].iowait, &stats[count].irq,
&stats[count].softirq, &stats[count].steal, &stats[count].guest,
&stats[count].guest_nice) == 10) {
count++;
}
}
}
fclose(fp);
return count;
}
int main() {
int cpu_count = sysconf(_SC_NPROCESSORS_ONLN);
struct cpu_stats *stats1 = malloc(cpu_count * sizeof(struct cpu_stats));
struct cpu_stats *stats2 = malloc(cpu_count * sizeof(struct cpu_stats));
if (!stats1 || !stats2) {
perror("malloc");
return 1;
}
// 第一次采样
int count1 = parse_cpu_stats(stats1, cpu_count);
if (count1 < 0) {
return 1;
}
sleep(1);
// 第二次采样
int count2 = parse_cpu_stats(stats2, cpu_count);
if (count2 < 0) {
return 1;
}
printf("CPU Usage (per second):\n");
printf("%-4s %8s %8s %8s %8s %8s %8s\n",
"CPU", "User%", "System%", "IOWait%", "IRQ%", "SoftIRQ%", "Idle%");
for (int i = 0; i < count1 && i < count2; i++) {
unsigned long total1 = stats1[i].user + stats1[i].nice + stats1[i].system +
stats1[i].idle + stats1[i].iowait + stats1[i].irq +
stats1[i].softirq + stats1[i].steal;
unsigned long total2 = stats2[i].user + stats2[i].nice + stats2[i].system +
stats2[i].idle + stats2[i].iowait + stats2[i].irq +
stats2[i].softirq + stats2[i].steal;
unsigned long diff_total = total2 - total1;
if (diff_total == 0) continue;
unsigned long user_diff = stats2[i].user - stats1[i].user;
unsigned long system_diff = stats2[i].system - stats1[i].system;
unsigned long iowait_diff = stats2[i].iowait - stats1[i].iowait;
unsigned long irq_diff = stats2[i].irq - stats1[i].irq;
unsigned long softirq_diff = stats2[i].softirq - stats1[i].softirq;
unsigned long idle_diff = stats2[i].idle - stats1[i].idle;
printf("%-4d %8.1f %8.1f %8.1f %8.1f %8.1f %8.1f\n",
i,
(double)user_diff * 100 / diff_total,
(double)system_diff * 100 / diff_total,
(double)iowait_diff * 100 / diff_total,
(double)irq_diff * 100 / diff_total,
(double)softirq_diff * 100 / diff_total,
(double)idle_diff * 100 / diff_total);
}
free(stats1);
free(stats2);
return 0;
}
编译运行:
gcc multi_core_monitor.c -o multi_core_monitor
./multi_core_monitor
🧪 实验
实验1:RPS效果测试
目标:测试RPS对多核网络性能的影响
步骤:
# 1. 查看当前CPU使用率
mpstat -P ALL 1 &
# 2. 测试基线性能(单核)
iperf3 -s &
iperf3 -c localhost -t 30
# 3. 启用RPS
echo f > /sys/class/net/lo/queues/rx-0/rps_cpus
# 4. 测试性能
iperf3 -c localhost -t 30
# 5. 观察CPU使用率
# 应该看到多个CPU参与网络处理
预期结果:
- 理解RPS的作用
- 观察多核负载均衡
- 掌握RPS配置
实验2:中断亲和性优化
目标:优化中断处理性能
步骤:
# 1. 查看当前中断分布
cat /proc/interrupts | grep eth0
# 2. 查看CPU核心数
nproc
# 3. 设置中断亲和性
for i in {0..3}; do
IRQ=$(grep eth0-TxRx-$i /proc/interrupts | awk -F: '{print $1}')
if [ ! -z "$IRQ" ]; then
echo $i | sudo tee /proc/irq/$IRQ/smp_affinity_list
fi
done
# 4. 验证设置
cat /proc/interrupts | grep eth0
# 5. 测试性能
iperf3 -c localhost -t 30
预期结果:
- 理解中断亲和性
- 观察性能提升
- 掌握中断优化
实验3:RFS配置测试
目标:测试RFS对网络性能的影响
步骤:
# 1. 配置RFS
sudo sysctl -w net.core.rps_sock_flow_entries=32768
echo 2048 > /sys/class/net/eth0/queues/rx-0/rps_flow_cnt
# 2. 测试性能
iperf3 -s &
iperf3 -c localhost -t 30
# 3. 查看RFS统计
cat /proc/net/softnet_stat
# 4. 测试多连接
for i in {1..10}; do
iperf3 -c localhost -t 10 &
done
wait
预期结果:
- 理解RFS的作用
- 观察缓存命中率提升
- 掌握RFS配置
实验4:XPS配置测试
目标:测试XPS对发送性能的影响
步骤:
# 1. 配置XPS
echo f > /sys/class/net/eth0/queues/tx-0/xps_cpus
# 2. 测试发送性能
iperf3 -s &
iperf3 -c localhost -t 30
# 3. 查看发送队列统计
cat /proc/net/dev
# 4. 测试多流发送
iperf3 -c localhost -P 4 -t 30
预期结果:
- 理解XPS的作用
- 观察发送性能提升
- 掌握XPS配置
排错
常见问题排查
问题1:RPS不生效
# 检查RPS配置
cat /sys/class/net/eth0/queues/rx-0/rps_cpus
# 检查网卡队列数
ethtool -l eth0
# 检查内核支持
grep RPS /boot/config-$(uname -r)
问题2:中断分布不均
# 查看中断分布
cat /proc/interrupts | grep eth0
# 检查中断亲和性
cat /proc/irq/*/smp_affinity_list
# 重新设置中断亲和性
for i in {0..3}; do
IRQ=$(grep eth0-TxRx-$i /proc/interrupts | awk -F: '{print $1}')
if [ ! -z "$IRQ" ]; then
echo $i | sudo tee /proc/irq/$IRQ/smp_affinity_list
fi
done
问题3:CPU使用率不均衡
# 查看CPU使用率
mpstat -P ALL 1
# 检查RPS配置
cat /sys/class/net/eth0/queues/rx-0/rps_cpus
# 调整RPS配置
echo f > /sys/class/net/eth0/queues/rx-0/rps_cpus
排错清单
- [ ] 检查CPU核心数(nproc、lscpu)
- [ ] 验证网卡队列数(ethtool -l)
- [ ] 确认RPS配置(/sys/class/net//queues//rps_cpus)
- [ ] 检查中断分布(/proc/interrupts)
- [ ] 验证中断亲和性(/proc/irq/*/smp_affinity_list)
- [ ] 测试网络性能(iperf3、netperf)
- [ ] 查看系统日志(dmesg、/var/log/syslog)