16-进阶场景与优化
学习目标
- 掌握容器的高级应用场景
- 了解容器性能优化的技术和方法
- 能够进行容器安全加固
- 掌握容器监控和运维技术
- 理解容器在生产环境中的最佳实践
前置知识
- 容器基础原理
- 系统性能优化
- 安全加固技术
- 监控运维基础
一、多容器管理
1.1 容器编排
graph TD
A[容器编排] --> B[服务发现]
A --> C[负载均衡]
A --> D[健康检查]
A --> E[滚动更新]
A --> F[故障恢复]
B --> B1[DNS 解析]
B --> B2[服务注册]
B --> B3[配置管理]
C --> C1[流量分发]
C --> C2[会话保持]
C --> C3[故障转移]
D --> D1[存活检查]
D --> D2[就绪检查]
D --> D3[启动检查]
E --> E1[蓝绿部署]
E --> E2[金丝雀发布]
E --> E3[回滚机制]
F --> F1[自动重启]
F --> F2[故障转移]
F --> F3[资源调度]
1.2 容器网络优化
#!/bin/bash
# 容器网络优化脚本
echo "=== 容器网络优化 ==="
# 1. 启用网络优化
echo "1. 启用网络优化..."
# 启用 GRO (Generic Receive Offload)
ethtool -K eth0 gro on
# 启用 LRO (Large Receive Offload)
ethtool -K eth0 lro on
# 启用 TSO (TCP Segmentation Offload)
ethtool -K eth0 tso on
# 启用 GSO (Generic Segmentation Offload)
ethtool -K eth0 gso on
# 2. 调整网络缓冲区
echo "2. 调整网络缓冲区..."
echo 'net.core.rmem_max = 134217728' >> /etc/sysctl.conf
echo 'net.core.wmem_max = 134217728' >> /etc/sysctl.conf
echo 'net.core.rmem_default = 262144' >> /etc/sysctl.conf
echo 'net.core.wmem_default = 262144' >> /etc/sysctl.conf
# 3. 调整 TCP 参数
echo "3. 调整 TCP 参数..."
echo 'net.ipv4.tcp_rmem = 4096 87380 134217728' >> /etc/sysctl.conf
echo 'net.ipv4.tcp_wmem = 4096 65536 134217728' >> /etc/sysctl.conf
echo 'net.ipv4.tcp_congestion_control = bbr' >> /etc/sysctl.conf
# 4. 调整网络队列
echo "4. 调整网络队列..."
echo 'net.core.netdev_max_backlog = 5000' >> /etc/sysctl.conf
echo 'net.core.netdev_budget = 600' >> /etc/sysctl.conf
# 5. 应用配置
sysctl -p
echo "网络优化完成"
1.3 容器存储优化
#!/bin/bash
# 容器存储优化脚本
echo "=== 容器存储优化 ==="
# 1. 选择存储驱动
echo "1. 选择存储驱动..."
# 推荐使用 overlay2 驱动
cat > /etc/docker/daemon.json << 'EOF'
{
"storage-driver": "overlay2",
"storage-opts": [
"overlay2.override_kernel_check=true",
"overlay2.size=20G"
]
}
EOF
# 2. 优化存储性能
echo "2. 优化存储性能..."
# 使用 SSD 存储
# 调整 I/O 调度器
echo mq-deadline > /sys/block/sda/queue/scheduler
# 3. 启用存储压缩
echo "3. 启用存储压缩..."
# 使用 zstd 压缩
echo '{"storage-driver": "overlay2", "storage-opts": ["overlay2.compress=zstd"]}' > /etc/docker/daemon.json
# 4. 清理存储空间
echo "4. 清理存储空间..."
docker system prune -a -f
docker volume prune -f
# 5. 监控存储使用
echo "5. 监控存储使用..."
docker system df
df -h
echo "存储优化完成"
二、性能优化
2.1 CPU 优化
#!/bin/bash
# CPU 优化脚本
echo "=== CPU 优化 ==="
# 1. 设置 CPU 调度策略
echo "1. 设置 CPU 调度策略..."
# 使用 CFS 调度器
echo 'kernel.sched_rt_runtime_us = -1' >> /etc/sysctl.conf
echo 'kernel.sched_rt_period_us = 1000000' >> /etc/sysctl.conf
# 2. 优化 CPU 频率
echo "2. 优化 CPU 频率..."
# 设置性能模式
echo performance > /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
# 3. 调整 CPU 亲和性
echo "3. 调整 CPU 亲和性..."
# 绑定容器到特定 CPU 核心
taskset -c 0-3 docker run -d nginx
# 4. 启用 CPU 缓存
echo "4. 启用 CPU 缓存..."
# 启用 CPU 缓存预取
echo 'kernel.sched_mc_power_savings = 0' >> /etc/sysctl.conf
# 5. 监控 CPU 使用
echo "5. 监控 CPU 使用..."
top -bn1 | grep "Cpu(s)"
htop
echo "CPU 优化完成"
2.2 内存优化
#!/bin/bash
# 内存优化脚本
echo "=== 内存优化 ==="
# 1. 调整内存管理
echo "1. 调整内存管理..."
echo 'vm.swappiness = 10' >> /etc/sysctl.conf
echo 'vm.dirty_ratio = 15' >> /etc/sysctl.conf
echo 'vm.dirty_background_ratio = 5' >> /etc/sysctl.conf
# 2. 启用内存压缩
echo "2. 启用内存压缩..."
echo 'vm.compaction_proactiveness = 1' >> /etc/sysctl.conf
echo 'vm.zone_reclaim_mode = 1' >> /etc/sysctl.conf
# 3. 调整内存分配
echo "3. 调整内存分配..."
echo 'vm.overcommit_memory = 1' >> /etc/sysctl.conf
echo 'vm.overcommit_ratio = 50' >> /etc/sysctl.conf
# 4. 启用透明大页
echo "4. 启用透明大页..."
echo always > /sys/kernel/mm/transparent_hugepage/enabled
echo always > /sys/kernel/mm/transparent_hugepage/defrag
# 5. 监控内存使用
echo "5. 监控内存使用..."
free -h
cat /proc/meminfo
echo "内存优化完成"
2.3 I/O 优化
#!/bin/bash
# I/O 优化脚本
echo "=== I/O 优化 ==="
# 1. 调整 I/O 调度器
echo "1. 调整 I/O 调度器..."
# 使用 mq-deadline 调度器
echo mq-deadline > /sys/block/sda/queue/scheduler
# 2. 优化 I/O 队列
echo "2. 优化 I/O 队列..."
echo 1024 > /sys/block/sda/queue/nr_requests
echo 128 > /sys/block/sda/queue/read_ahead_kb
# 3. 启用 I/O 合并
echo "3. 启用 I/O 合并..."
echo 1 > /sys/block/sda/queue/nomerges
# 4. 调整 I/O 超时
echo "4. 调整 I/O 超时..."
echo 30 > /sys/block/sda/queue/io_timeout
# 5. 监控 I/O 性能
echo "5. 监控 I/O 性能..."
iostat -x 1
iotop
echo "I/O 优化完成"
三、安全加固
3.1 容器安全配置
#!/bin/bash
# 容器安全加固脚本
echo "=== 容器安全加固 ==="
# 1. 启用 AppArmor
echo "1. 启用 AppArmor..."
# 安装 AppArmor
apt-get install -y apparmor apparmor-utils
# 启动 AppArmor
systemctl start apparmor
systemctl enable apparmor
# 2. 启用 SELinux
echo "2. 启用 SELinux..."
# 安装 SELinux
apt-get install -y selinux-utils selinux-policy-default
# 配置 SELinux
setenforce 1
echo 'SELINUX=enforcing' > /etc/selinux/config
# 3. 配置 Seccomp
echo "3. 配置 Seccomp..."
# 创建 Seccomp 配置文件
cat > /etc/docker/seccomp-profile.json << 'EOF'
{
"defaultAction": "SCMP_ACT_ERRNO",
"architectures": ["SCMP_ARCH_X86_64"],
"syscalls": [
{
"names": ["read", "write", "open", "close"],
"action": "SCMP_ACT_ALLOW"
}
]
}
EOF
# 4. 配置 Capabilities
echo "4. 配置 Capabilities..."
# 移除危险的能力
docker run --cap-drop=ALL --cap-add=NET_BIND_SERVICE nginx
# 5. 启用用户命名空间
echo "5. 启用用户命名空间..."
echo '{"userns-remap": "default"}' > /etc/docker/daemon.json
# 6. 配置资源限制
echo "6. 配置资源限制..."
# 设置内存限制
docker run -m 512m nginx
# 设置 CPU 限制
docker run --cpus="1.5" nginx
# 设置进程数限制
docker run --pids-limit=100 nginx
echo "安全加固完成"
3.2 网络安全
#!/bin/bash
# 网络安全加固脚本
echo "=== 网络安全加固 ==="
# 1. 配置防火墙
echo "1. 配置防火墙..."
# 安装 ufw
apt-get install -y ufw
# 配置防火墙规则
ufw default deny incoming
ufw default allow outgoing
ufw allow ssh
ufw allow 80/tcp
ufw allow 443/tcp
# 启用防火墙
ufw enable
# 2. 配置 iptables
echo "2. 配置 iptables..."
# 创建 iptables 规则
iptables -A INPUT -i lo -j ACCEPT
iptables -A INPUT -m state --state ESTABLISHED,RELATED -j ACCEPT
iptables -A INPUT -p tcp --dport 22 -j ACCEPT
iptables -A INPUT -p tcp --dport 80 -j ACCEPT
iptables -A INPUT -p tcp --dport 443 -j ACCEPT
iptables -A INPUT -j DROP
# 3. 配置网络隔离
echo "3. 配置网络隔离..."
# 创建网络隔离
docker network create --driver bridge --subnet=172.20.0.0/16 isolated-network
# 4. 启用 TLS
echo "4. 启用 TLS..."
# 配置 Docker TLS
mkdir -p /etc/docker/tls
openssl genrsa -out /etc/docker/tls/server-key.pem 2048
openssl req -new -key /etc/docker/tls/server-key.pem -out /etc/docker/tls/server.csr
openssl x509 -req -in /etc/docker/tls/server.csr -signkey /etc/docker/tls/server-key.pem -out /etc/docker/tls/server-cert.pem
# 5. 监控网络流量
echo "5. 监控网络流量..."
# 安装网络监控工具
apt-get install -y nethogs iftop
# 监控网络使用
nethogs
iftop
echo "网络安全加固完成"
3.3 镜像安全
#!/bin/bash
# 镜像安全加固脚本
echo "=== 镜像安全加固 ==="
# 1. 扫描镜像漏洞
echo "1. 扫描镜像漏洞..."
# 安装 Trivy
wget https://github.com/aquasecurity/trivy/releases/download/v0.18.3/trivy_0.18.3_Linux-64bit.tar.gz
tar -xzf trivy_0.18.3_Linux-64bit.tar.gz
mv trivy /usr/local/bin/
# 扫描镜像
trivy image ubuntu:20.04
# 2. 签名镜像
echo "2. 签名镜像..."
# 安装 Notary
wget https://github.com/theupdateframework/notary/releases/download/v0.6.1/notary-Linux-amd64
chmod +x notary-Linux-amd64
mv notary-Linux-amd64 /usr/local/bin/notary
# 签名镜像
notary -s https://notary.docker.io -d ~/.docker/trust init
notary -s https://notary.docker.io -d ~/.docker/trust publish
# 3. 使用最小镜像
echo "3. 使用最小镜像..."
# 使用 Alpine 镜像
docker run alpine:latest
# 使用 Distroless 镜像
docker run gcr.io/distroless/base
# 4. 多阶段构建
echo "4. 多阶段构建..."
cat > Dockerfile << 'EOF'
FROM golang:1.19 AS builder
WORKDIR /app
COPY . .
RUN go build -o app .
FROM alpine:latest
RUN apk --no-cache add ca-certificates
WORKDIR /root/
COPY --from=builder /app/app .
CMD ["./app"]
EOF
# 5. 清理镜像
echo "5. 清理镜像..."
# 清理悬空镜像
docker image prune -f
# 清理未使用镜像
docker image prune -a -f
echo "镜像安全加固完成"
四、监控与运维
4.1 容器监控
#!/bin/bash
# 容器监控脚本
echo "=== 容器监控 ==="
# 1. 安装监控工具
echo "1. 安装监控工具..."
# 安装 Prometheus
wget https://github.com/prometheus/prometheus/releases/download/v2.40.0/prometheus-2.40.0.linux-amd64.tar.gz
tar -xzf prometheus-2.40.0.linux-amd64.tar.gz
mv prometheus-2.40.0.linux-amd64 /opt/prometheus
# 安装 Grafana
wget https://dl.grafana.com/oss/release/grafana-9.3.0.linux-amd64.tar.gz
tar -xzf grafana-9.3.0.linux-amd64.tar.gz
mv grafana-9.3.0 /opt/grafana
# 2. 配置监控
echo "2. 配置监控..."
# 创建 Prometheus 配置
cat > /opt/prometheus/prometheus.yml << 'EOF'
global:
scrape_interval: 15s
scrape_configs:
- job_name: 'docker'
static_configs:
- targets: ['localhost:9323']
EOF
# 3. 启动监控服务
echo "3. 启动监控服务..."
# 启动 Prometheus
/opt/prometheus/prometheus --config.file=/opt/prometheus/prometheus.yml &
# 启动 Grafana
/opt/grafana/bin/grafana-server &
# 4. 监控容器指标
echo "4. 监控容器指标..."
# 监控 CPU 使用率
docker stats --no-stream
# 监控内存使用率
docker stats --format "table {{.Container}}\t{{.MemUsage}}\t{{.MemPerc}}"
# 监控网络使用率
docker stats --format "table {{.Container}}\t{{.NetIO}}\t{{.BlockIO}}"
# 5. 设置告警
echo "5. 设置告警..."
# 创建告警规则
cat > /opt/prometheus/alerts.yml << 'EOF'
groups:
- name: container_alerts
rules:
- alert: HighCPUUsage
expr: container_cpu_usage_seconds_total > 0.8
for: 5m
labels:
severity: warning
annotations:
summary: "High CPU usage detected"
- alert: HighMemoryUsage
expr: container_memory_usage_bytes > 0.8
for: 5m
labels:
severity: warning
annotations:
summary: "High memory usage detected"
EOF
echo "容器监控完成"
4.2 日志管理
#!/bin/bash
# 日志管理脚本
echo "=== 日志管理 ==="
# 1. 安装日志工具
echo "1. 安装日志工具..."
# 安装 ELK Stack
wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-8.5.0-linux-x86_64.tar.gz
tar -xzf elasticsearch-8.5.0-linux-x86_64.tar.gz
mv elasticsearch-8.5.0 /opt/elasticsearch
wget https://artifacts.elastic.co/downloads/logstash/logstash-8.5.0-linux-x86_64.tar.gz
tar -xzf logstash-8.5.0-linux-x86_64.tar.gz
mv logstash-8.5.0 /opt/logstash
wget https://artifacts.elastic.co/downloads/kibana/kibana-8.5.0-linux-x86_64.tar.gz
tar -xzf kibana-8.5.0-linux-x86_64.tar.gz
mv kibana-8.5.0 /opt/kibana
# 2. 配置日志收集
echo "2. 配置日志收集..."
# 创建 Logstash 配置
cat > /opt/logstash/config/logstash.conf << 'EOF'
input {
file {
path => "/var/log/containers/*.log"
type => "container"
}
}
filter {
if [type] == "container" {
grok {
match => { "message" => "%{TIMESTAMP_ISO8601:timestamp} %{WORD:level} %{GREEDYDATA:message}" }
}
}
}
output {
elasticsearch {
hosts => ["localhost:9200"]
}
}
EOF
# 3. 启动日志服务
echo "3. 启动日志服务..."
# 启动 Elasticsearch
/opt/elasticsearch/bin/elasticsearch &
# 启动 Logstash
/opt/logstash/bin/logstash -f /opt/logstash/config/logstash.conf &
# 启动 Kibana
/opt/kibana/bin/kibana &
# 4. 配置日志轮转
echo "4. 配置日志轮转..."
# 创建 logrotate 配置
cat > /etc/logrotate.d/containers << 'EOF'
/var/log/containers/*.log {
daily
rotate 7
compress
delaycompress
missingok
notifempty
create 644 root root
}
EOF
# 5. 监控日志
echo "5. 监控日志..."
# 实时监控日志
tail -f /var/log/containers/*.log
# 搜索日志
grep -r "error" /var/log/containers/
echo "日志管理完成"
4.3 自动化运维
#!/bin/bash
# 自动化运维脚本
echo "=== 自动化运维 ==="
# 1. 创建健康检查
echo "1. 创建健康检查..."
# 创建健康检查脚本
cat > /usr/local/bin/container-health-check.sh << 'EOF'
#!/bin/bash
# 检查容器健康状态
# 检查容器是否运行
if ! docker ps | grep -q "my-container"; then
echo "Container is not running"
exit 1
fi
# 检查容器资源使用
CPU_USAGE=$(docker stats --no-stream --format "{{.CPUPerc}}" my-container | sed 's/%//')
MEMORY_USAGE=$(docker stats --no-stream --format "{{.MemPerc}}" my-container | sed 's/%//')
if (( $(echo "$CPU_USAGE > 80" | bc -l) )); then
echo "High CPU usage: $CPU_USAGE%"
exit 1
fi
if (( $(echo "$MEMORY_USAGE > 80" | bc -l) )); then
echo "High memory usage: $MEMORY_USAGE%"
exit 1
fi
echo "Container is healthy"
exit 0
EOF
chmod +x /usr/local/bin/container-health-check.sh
# 2. 创建自动重启
echo "2. 创建自动重启..."
# 创建 systemd 服务
cat > /etc/systemd/system/container-monitor.service << 'EOF'
[Unit]
Description=Container Health Monitor
After=docker.service
[Service]
Type=oneshot
ExecStart=/usr/local/bin/container-health-check.sh
User=root
[Install]
WantedBy=multi-user.target
EOF
# 创建定时器
cat > /etc/systemd/system/container-monitor.timer << 'EOF'
[Unit]
Description=Run container health check every 5 minutes
Requires=container-monitor.service
[Timer]
OnCalendar=*:0/5
Persistent=true
[Install]
WantedBy=timers.target
EOF
# 启用定时器
systemctl enable container-monitor.timer
systemctl start container-monitor.timer
# 3. 创建自动备份
echo "3. 创建自动备份..."
# 创建备份脚本
cat > /usr/local/bin/container-backup.sh << 'EOF'
#!/bin/bash
# 备份容器数据
BACKUP_DIR="/backup/containers"
DATE=$(date +%Y%m%d_%H%M%S)
# 创建备份目录
mkdir -p $BACKUP_DIR
# 备份容器镜像
docker save my-image:latest | gzip > $BACKUP_DIR/my-image_$DATE.tar.gz
# 备份容器数据
docker run --rm -v my-container-data:/data -v $BACKUP_DIR:/backup alpine tar czf /backup/container-data_$DATE.tar.gz -C /data .
# 清理旧备份
find $BACKUP_DIR -name "*.tar.gz" -mtime +7 -delete
echo "Backup completed: $DATE"
EOF
chmod +x /usr/local/bin/container-backup.sh
# 4. 创建自动更新
echo "4. 创建自动更新..."
# 创建更新脚本
cat > /usr/local/bin/container-update.sh << 'EOF'
#!/bin/bash
# 更新容器
# 拉取最新镜像
docker pull my-image:latest
# 停止旧容器
docker stop my-container
# 删除旧容器
docker rm my-container
# 启动新容器
docker run -d --name my-container my-image:latest
echo "Container updated successfully"
EOF
chmod +x /usr/local/bin/container-update.sh
# 5. 创建监控面板
echo "5. 创建监控面板..."
# 创建监控脚本
cat > /usr/local/bin/container-dashboard.sh << 'EOF'
#!/bin/bash
# 容器监控面板
echo "=== Container Dashboard ==="
echo "Date: $(date)"
echo ""
echo "=== Container Status ==="
docker ps -a
echo ""
echo "=== Resource Usage ==="
docker stats --no-stream
echo ""
echo "=== Disk Usage ==="
docker system df
echo ""
echo "=== Network Status ==="
docker network ls
echo ""
echo "=== Volume Status ==="
docker volume ls
echo ""
EOF
chmod +x /usr/local/bin/container-dashboard.sh
echo "自动化运维完成"
五、验证检查清单
基础功能
- [ ] 能够进行容器性能优化
- [ ] 能够进行容器安全加固
- [ ] 能够进行容器监控
- [ ] 能够进行自动化运维
高级功能
- [ ] 能够进行多容器管理
- [ ] 能够进行容器编排
- [ ] 能够进行容器网络优化
- [ ] 能够进行容器存储优化
生产实践
- [ ] 能够进行容器安全加固
- [ ] 能够进行容器监控
- [ ] 能够进行自动化运维
- [ ] 能够进行故障排查
相关链接
- 15-OCI规范与标准化 - 标准化实现
- 17-环境准备与依赖 - 环境配置
- 18-参考资源与扩展阅读 - 扩展阅读
下一步:让我们学习环境准备与依赖,这是容器技术学习的基础!