06-网络模型与Netpoll

章节概述

Go 语言的网络模型基于 epoll/kqueue 实现，通过 netpoll 机制实现高效的异步 I/O。本章将深入解析 Go 网络模型的设计原理、源码实现和性能优化策略，帮助读者理解 Go 如何实现百万级并发网络服务。

学习目标

理解 netpoll 架构的设计原理
掌握 pollDesc 与 fd 绑定机制
了解 epoll/kqueue 事件循环实现
学会 goroutine 与 I/O 协作机制
能够进行网络性能调优

️ 网络模型架构

Netpoll 层次结构

┌─────────────────────────────────────────────────────────┐
│                    Go 网络模型                          │
├─────────────────────────────────────────────────────────┤
│  应用层: net.Conn, net.Listen, net.Dial                │
│  ┌─────────────────────────────────────────────────────┐ │
│  │ 用户友好的网络接口                                  │ │
│  └─────────────────────────────────────────────────────┘ │
│                                                         │
│  Net层: netpoll, pollDesc, netFD                       │
│  ┌─────────────────────────────────────────────────────┐ │
│  │ 网络事件管理，fd 与 goroutine 绑定                  │ │
│  └─────────────────────────────────────────────────────┘ │
│                                                         │
│  Runtime层: GMP 调度器                                  │
│  ┌─────────────────────────────────────────────────────┐ │
│  │ 协程调度，I/O 事件与 goroutine 协作                 │ │
│  └─────────────────────────────────────────────────────┘ │
│                                                         │
│  OS层: epoll/kqueue/IOCP                               │
│  ┌─────────────────────────────────────────────────────┐ │
│  │ 系统级 I/O 多路复用                                 │ │
│  └─────────────────────────────────────────────────────┘ │
└─────────────────────────────────────────────────────────┘

事件驱动模型

┌─────────────────────────────────────────────────────────┐
│                   事件驱动模型                          │
├─────────────────────────────────────────────────────────┤
│  Accept 事件     Read 事件      Write 事件             │
│  ┌─────────────┐  ┌─────────────┐  ┌─────────────┐    │
│  │ 新连接到达  │  │ 数据可读    │  │ 数据可写    │    │
│  │ 创建新 fd   │  │ 唤醒读 G    │  │ 唤醒写 G    │    │
│  └─────────────┘  └─────────────┘  └─────────────┘    │
│                                                         │
│  Close 事件      Error 事件                             │
│  ┌─────────────┐  ┌─────────────┐                      │
│  │ 连接关闭    │  │ 错误发生    │                      │
│  │ 清理资源    │  │ 错误处理    │                      │
│  └─────────────┘  └─────────────┘                      │
└─────────────────────────────────────────────────────────┘

核心数据结构

pollDesc 结构

文件位置：src/runtime/netpoll.go

type pollDesc struct {
    link    *pollDesc // 链表指针
    fd      uintptr   // 文件描述符
    rg      uintptr   // 读等待的 goroutine
    wg      uintptr   // 写等待的 goroutine
    closing bool      // 是否正在关闭
    seq     uintptr   // 序列号，防止复用
    lock    mutex     // 保护字段的锁
}

netFD 结构

文件位置：src/net/fd_unix.go

type netFD struct {
    pfd poll.FD
    
    // 网络地址
    family      int
    sotype      int
    isConnected bool
    net         string
    laddr       Addr
    raddr       Addr
}

epoll 事件结构

文件位置：src/runtime/netpoll_epoll.go

type epollevent struct {
    events uint32
    data   [8]byte
}

const (
    _EPOLLIN      = 0x1
    _EPOLLOUT     = 0x4
    _EPOLLERR     = 0x8
    _EPOLLHUP     = 0x10
    _EPOLLRDHUP   = 0x2000
    _EPOLLET      = 0x80000000
    _EPOLLONESHOT = 0x40000000
)

Netpoll 初始化

epoll 初始化

func netpollinit() {
    epfd = epollcreate1(_EPOLL_CLOEXEC)
    if epfd < 0 {
        epfd = epollcreate(1024)
        if epfd < 0 {
            println("runtime: epollcreate failed with", -epfd)
            throw("runtime: netpollinit failed")
        }
        closeonexec(epfd)
    }
    
    r, w, errno := nonblockingPipe()
    if errno != 0 {
        println("runtime: pipe failed with", -errno)
        throw("runtime: pipe failed")
    }
    
    ev := epollevent{
        events: _EPOLLIN,
    }
    *(**uintptr)(unsafe.Pointer(&ev.data)) = &netpollBreakRd
    errno = epollctl(epfd, _EPOLL_CTL_ADD, r, &ev)
    if errno != 0 {
        println("runtime: epollctl failed with", -errno)
        throw("runtime: epollctl failed")
    }
    
    netpollBreakRd = uintptr(r)
    netpollBreakWr = uintptr(w)
}

pollDesc 初始化

func netpollopen(fd uintptr, pd *pollDesc) int32 {
    var ev epollevent
    ev.events = _EPOLLIN | _EPOLLOUT | _EPOLLRDHUP | _EPOLLET
    *(**uintptr)(unsafe.Pointer(&ev.data)) = pd
    return -epollctl(epfd, _EPOLL_CTL_ADD, int32(fd), &ev)
}

网络 I/O 操作

读操作实现

func (fd *netFD) Read(p []byte) (n int, err error) {
    n, err = fd.pfd.Read(p)
    runtime.KeepAlive(fd)
    return n, wrapSyscallError("read", err)
}

func (fd *FD) Read(p []byte) (int, error) {
    for {
        n, err := syscall.Read(fd.Sysfd, p)
        if err != nil {
            n = 0
            if err == syscall.EAGAIN {
                if fd.pd.runtime_pollWaitRead() {
                    continue
                }
            }
        }
        return n, err
    }
}

写操作实现

func (fd *netFD) Write(p []byte) (n int, err error) {
    n, err = fd.pfd.Write(p)
    runtime.KeepAlive(fd)
    return n, wrapSyscallError("write", err)
}

func (fd *FD) Write(p []byte) (int, error) {
    for {
        n, err := syscall.Write(fd.Sysfd, p)
        if err != nil {
            n = 0
            if err == syscall.EAGAIN {
                if fd.pd.runtime_pollWaitWrite() {
                    continue
                }
            }
        }
        return n, err
    }
}

阻塞等待机制

func (pd *pollDesc) runtime_pollWaitRead() bool {
    return runtime_pollWait(pd, 'r')
}

func (pd *pollDesc) runtime_pollWaitWrite() bool {
    return runtime_pollWait(pd, 'w')
}

func runtime_pollWait(pd *pollDesc, mode int) bool {
    for !netpollblock(pd, int32(mode), false) {
        if !netpollblock(pd, int32(mode), true) {
            return false
        }
    }
    return true
}

事件循环处理

epoll_wait 循环

func netpoll(block bool) gList {
    if epfd == -1 {
        return gList{}
    }
    
    waitms := int32(-1)
    if !block {
        waitms = 0
    }
    
    var events [128]epollevent
retry:
    n := epollwait(epfd, &events[0], int32(len(events)), waitms)
    if n < 0 {
        if n != -_EINTR {
            println("runtime: epollwait on fd", epfd, "failed with", -n)
            throw("runtime: netpoll failed")
        }
        goto retry
    }
    
    var toRun gList
    for i := int32(0); i < n; i++ {
        ev := &events[i]
        if ev.events == 0 {
            continue
        }
        
        if *(**uintptr)(unsafe.Pointer(&ev.data)) == &netpollBreakRd {
            if ev.events != _EPOLLIN {
                println("runtime: netpoll: break fd ready for", ev.events)
                throw("runtime: netpoll: break fd ready for something unexpected")
            }
            if delay != 0 && delay < 5 {
                // 避免忙等待
                usleep(delay)
                delay <<= 1
            }
            continue
        }
        
        var mode int32
        if ev.events&(_EPOLLIN|_EPOLLRDHUP|_EPOLLHUP|_EPOLLERR) != 0 {
            mode += 'r'
        }
        if ev.events&(_EPOLLOUT|_EPOLLHUP|_EPOLLERR) != 0 {
            mode += 'w'
        }
        if mode != 0 {
            pd := *(**pollDesc)(unsafe.Pointer(&ev.data))
            pd.everr = false
            if ev.events == _EPOLLERR {
                pd.everr = true
            }
            netpollready(&toRun, pd, mode)
        }
    }
    
    return toRun
}

事件就绪处理

func netpollready(toRun *gList, pd *pollDesc, mode int32) {
    var rg, wg *g
    if mode == 'r' || mode == 'r'+'w' {
        rg = netpollunblock(pd, 'r', true)
    }
    if mode == 'w' || mode == 'r'+'w' {
        wg = netpollunblock(pd, 'w', true)
    }
    if rg != nil {
        toRun.push(rg)
    }
    if wg != nil {
        toRun.push(wg)
    }
}

️ 实战代码

1. 简单 HTTP 服务器

package main

import (
    "fmt"
    "net"
    "net/http"
    "runtime"
    "time"
)

func main() {
    // 设置 GOMAXPROCS
    runtime.GOMAXPROCS(runtime.NumCPU())
    
    // 创建 HTTP 服务器
    server := &http.Server{
        Addr: ":8080",
        Handler: http.HandlerFunc(handler),
    }
    
    // 启动服务器
    fmt.Println("Server starting on :8080")
    if err := server.ListenAndServe(); err != nil {
        fmt.Printf("Server error: %v\n", err)
    }
}

func handler(w http.ResponseWriter, r *http.Request) {
    // 模拟处理时间
    time.Sleep(10 * time.Millisecond)
    
    // 返回响应
    w.Header().Set("Content-Type", "text/plain")
    fmt.Fprintf(w, "Hello, World! Goroutine ID: %d\n", getGoroutineID())
}

func getGoroutineID() int {
    var buf [64]byte
    n := runtime.Stack(buf[:], false)
    id := -1
    fmt.Sscanf(string(buf[:n]), "goroutine %d", &id)
    return id
}

2. 高性能 Echo 服务器

package main

import (
    "bufio"
    "fmt"
    "net"
    "runtime"
    "sync"
    "time"
)

type EchoServer struct {
    listener net.Listener
    wg       sync.WaitGroup
    quit     chan struct{}
}

func NewEchoServer(addr string) (*EchoServer, error) {
    listener, err := net.Listen("tcp", addr)
    if err != nil {
        return nil, err
    }
    
    return &EchoServer{
        listener: listener,
        quit:     make(chan struct{}),
    }, nil
}

func (s *EchoServer) Start() {
    fmt.Printf("Echo server starting on %s\n", s.listener.Addr())
    
    for {
        conn, err := s.listener.Accept()
        if err != nil {
            select {
            case <-s.quit:
                return
            default:
                fmt.Printf("Accept error: %v\n", err)
                continue
            }
        }
        
        s.wg.Add(1)
        go s.handleConnection(conn)
    }
}

func (s *EchoServer) handleConnection(conn net.Conn) {
    defer s.wg.Done()
    defer conn.Close()
    
    reader := bufio.NewReader(conn)
    writer := bufio.NewWriter(conn)
    
    for {
        // 设置读取超时
        conn.SetReadDeadline(time.Now().Add(30 * time.Second))
        
        line, err := reader.ReadLine()
        if err != nil {
            if netErr, ok := err.(net.Error); ok && netErr.Timeout() {
                fmt.Println("Connection timeout")
            } else {
                fmt.Printf("Read error: %v\n", err)
            }
            return
        }
        
        // 回显数据
        response := fmt.Sprintf("Echo: %s\n", string(line))
        if _, err := writer.WriteString(response); err != nil {
            fmt.Printf("Write error: %v\n", err)
            return
        }
        
        if err := writer.Flush(); err != nil {
            fmt.Printf("Flush error: %v\n", err)
            return
        }
    }
}

func (s *EchoServer) Stop() {
    close(s.quit)
    s.listener.Close()
    s.wg.Wait()
    fmt.Println("Echo server stopped")
}

func main() {
    // 设置 GOMAXPROCS
    runtime.GOMAXPROCS(runtime.NumCPU())
    
    server, err := NewEchoServer(":8080")
    if err != nil {
        fmt.Printf("Failed to create server: %v\n", err)
        return
    }
    
    // 启动服务器
    go server.Start()
    
    // 等待中断信号
    time.Sleep(30 * time.Second)
    server.Stop()
}

3. 连接池实现

package main

import (
    "fmt"
    "net"
    "sync"
    "time"
)

type ConnectionPool struct {
    connections chan net.Conn
    factory     func() (net.Conn, error)
    maxSize     int
    minSize     int
    mutex       sync.RWMutex
    closed      bool
}

func NewConnectionPool(factory func() (net.Conn, error), minSize, maxSize int) *ConnectionPool {
    pool := &ConnectionPool{
        connections: make(chan net.Conn, maxSize),
        factory:     factory,
        maxSize:     maxSize,
        minSize:     minSize,
    }
    
    // 初始化最小连接数
    for i := 0; i < minSize; i++ {
        conn, err := factory()
        if err != nil {
            fmt.Printf("Failed to create connection: %v\n", err)
            continue
        }
        pool.connections <- conn
    }
    
    return pool
}

func (p *ConnectionPool) Get() (net.Conn, error) {
    p.mutex.RLock()
    if p.closed {
        p.mutex.RUnlock()
        return nil, fmt.Errorf("pool is closed")
    }
    p.mutex.RUnlock()
    
    select {
    case conn := <-p.connections:
        // 检查连接是否有效
        if p.isValid(conn) {
            return conn, nil
        }
        // 连接无效，创建新连接
        return p.factory()
    default:
        // 池中没有可用连接，创建新连接
        return p.factory()
    }
}

func (p *ConnectionPool) Put(conn net.Conn) {
    if conn == nil {
        return
    }
    
    p.mutex.RLock()
    if p.closed {
        p.mutex.RUnlock()
        conn.Close()
        return
    }
    p.mutex.RUnlock()
    
    select {
    case p.connections <- conn:
        // 成功放回池中
    default:
        // 池已满，关闭连接
        conn.Close()
    }
}

func (p *ConnectionPool) isValid(conn net.Conn) bool {
    // 简单的连接有效性检查
    conn.SetReadDeadline(time.Now().Add(1 * time.Millisecond))
    one := []byte{0}
    _, err := conn.Read(one)
    if err != nil {
        return false
    }
    return true
}

func (p *ConnectionPool) Close() {
    p.mutex.Lock()
    defer p.mutex.Unlock()
    
    if p.closed {
        return
    }
    
    p.closed = true
    close(p.connections)
    
    // 关闭所有连接
    for conn := range p.connections {
        conn.Close()
    }
}

func main() {
    // 创建连接池
    pool := NewConnectionPool(func() (net.Conn, error) {
        return net.Dial("tcp", "localhost:8080")
    }, 5, 20)
    
    // 使用连接池
    for i := 0; i < 100; i++ {
        go func(id int) {
            conn, err := pool.Get()
            if err != nil {
                fmt.Printf("Failed to get connection: %v\n", err)
                return
            }
            defer pool.Put(conn)
            
            // 使用连接
            fmt.Printf("Goroutine %d using connection\n", id)
            time.Sleep(100 * time.Millisecond)
        }(i)
    }
    
    time.Sleep(5 * time.Second)
    pool.Close()
}

4. 网络性能测试

package main

import (
    "fmt"
    "net"
    "runtime"
    "sync"
    "time"
)

func benchmarkNetwork(count int) time.Duration {
    // 启动测试服务器
    listener, err := net.Listen("tcp", ":0")
    if err != nil {
        panic(err)
    }
    defer listener.Close()
    
    // 启动服务器 goroutine
    go func() {
        for {
            conn, err := listener.Accept()
            if err != nil {
                return
            }
            go func() {
                defer conn.Close()
                buf := make([]byte, 1024)
                for {
                    _, err := conn.Read(buf)
                    if err != nil {
                        return
                    }
                    conn.Write(buf)
                }
            }()
        }
    }()
    
    // 等待服务器启动
    time.Sleep(100 * time.Millisecond)
    
    // 获取服务器地址
    addr := listener.Addr().String()
    
    // 测试客户端
    start := time.Now()
    var wg sync.WaitGroup
    
    for i := 0; i < count; i++ {
        wg.Add(1)
        go func() {
            defer wg.Done()
            
            conn, err := net.Dial("tcp", addr)
            if err != nil {
                return
            }
            defer conn.Close()
            
            // 发送数据
            data := []byte("Hello, World!")
            conn.Write(data)
            
            // 接收响应
            buf := make([]byte, len(data))
            conn.Read(buf)
        }()
    }
    
    wg.Wait()
    return time.Since(start)
}

func main() {
    // 设置 GOMAXPROCS
    runtime.GOMAXPROCS(runtime.NumCPU())
    
    // 测试不同并发数
    counts := []int{100, 1000, 10000}
    
    for _, count := range counts {
        duration := benchmarkNetwork(count)
        fmt.Printf("Concurrency: %d, Duration: %v, QPS: %.2f\n", 
            count, duration, float64(count)/duration.Seconds())
    }
}

性能优化

网络优化策略

连接复用：使用连接池减少连接开销
批量处理：批量处理 I/O 操作
内存池：复用内存缓冲区
协程池：限制协程数量

性能监控

package main

import (
    "fmt"
    "net"
    "runtime"
    "time"
)

func monitorNetwork() {
    var m runtime.MemStats
    
    for {
        runtime.ReadMemStats(&m)
        
        fmt.Printf("=== 网络监控 ===\n")
        fmt.Printf("Goroutine 数量: %d\n", runtime.NumGoroutine())
        fmt.Printf("堆内存: %d KB\n", m.HeapAlloc/1024)
        fmt.Printf("系统内存: %d KB\n", m.Sys/1024)
        fmt.Printf("GC 次数: %d\n", m.NumGC)
        
        time.Sleep(5 * time.Second)
    }
}

func main() {
    go monitorNetwork()
    
    // 创建大量连接
    for i := 0; i < 1000; i++ {
        go func(id int) {
            conn, err := net.Dial("tcp", "localhost:8080")
            if err != nil {
                return
            }
            defer conn.Close()
            
            for j := 0; j < 100; j++ {
                conn.Write([]byte("Hello"))
                buf := make([]byte, 1024)
                conn.Read(buf)
                time.Sleep(100 * time.Millisecond)
            }
        }(i)
    }
    
    time.Sleep(30 * time.Second)
}

面试题库

基础问题

Go 网络模型的特点？
- 基于 epoll/kqueue 事件驱动
- 每个连接一个 goroutine
- 非阻塞 I/O
- 高并发支持
pollDesc 的作用？
- 管理文件描述符
- 绑定 goroutine
- 处理 I/O 事件
- 防止 fd 复用
netpoll 的工作流程？
- 注册 fd 到 epoll
- 等待 I/O 事件
- 唤醒等待的 goroutine
- 处理 I/O 操作

进阶问题

如何优化网络性能？
- 使用连接池
- 批量处理 I/O
- 内存池复用
- 协程池限制
网络编程的常见问题？
- 连接泄漏
- 内存泄漏
- 协程泄漏
- 死锁问题
epoll 的优势？
- 事件驱动
- 高效 I/O 多路复用
- 支持大量连接
- 减少系统调用

源码问题

netpoll 的初始化过程？
- 创建 epoll 实例
- 设置管道
- 注册中断事件
- 启动事件循环
I/O 事件的处理流程？
- epoll_wait 等待事件
- 解析事件类型
- 唤醒对应 goroutine
- 处理 I/O 操作