HiHuo
首页
博客
手册
工具
关于
首页
博客
手册
工具
关于
  • 运维手册

    • Kubernetes 全栈实战与性能原理教程
    • 第一部分:Linux 基础与系统原理
    • 第二部分:Kubernetes 网络深度解析
    • 第三部分:Kubernetes 存储管理实战
    • 第四部分:Kubernetes 调度器深度解析
    • 第五部分:Kubernetes 性能调优实战
    • 第六部分:CNI 与 eBPF 网络深度实践
    • 第七部分:Kubernetes 生产级调优案例
    • 第八部分:命令速查与YAML模板库
    • 第九部分:实验环境快速搭建指南
    • 第十部分:面试题库与进阶路径
    • 第11章:Kubernetes 网络·存储·大文件排查专项手册

第九部分:实验环境快速搭建指南

一键部署完整的 Kubernetes 实验环境

目录

  • 环境方案选择
  • 单节点环境搭建
  • 多节点集群搭建
  • 工具栈快速部署
  • 环境验证与测试

环境方案选择

方案对比

方案适用场景优点缺点资源需求
kind本地开发/CI快速、轻量、多集群网络受限2C/4G
k3s边缘计算/IoT轻量、生产级功能简化1C/2G
minikube学习测试易用、插件丰富性能一般2C/4G
kubeadm生产环境完整、可定制配置复杂4C/8G

推荐配置

学习实验:

  • CPU: 4 核
  • 内存: 8GB
  • 磁盘: 50GB
  • 方案: kind 或 minikube

性能测试:

  • CPU: 8 核
  • 内存: 16GB
  • 磁盘: 100GB
  • 方案: kubeadm 多节点

单节点环境搭建

方案1:kind(推荐)

安装 kind

#!/bin/bash
# 安装 kind - Kubernetes in Docker

set -e

echo "=== 安装 kind ==="

# 检测操作系统
OS=$(uname -s | tr '[:upper:]' '[:lower:]')
ARCH=$(uname -m)

case $ARCH in
  x86_64) ARCH="amd64" ;;
  aarch64) ARCH="arm64" ;;
esac

# 下载 kind
VERSION="v0.20.0"
curl -Lo ./kind "https://kind.sigs.k8s.io/dl/${VERSION}/kind-${OS}-${ARCH}"
chmod +x ./kind
sudo mv ./kind /usr/local/bin/kind

# 验证安装
kind version

echo " kind 安装完成"

创建集群

#!/bin/bash
# 创建 kind 集群

cat <<EOF | kind create cluster --name lab --config=-
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
nodes:
- role: control-plane
  kubeadmConfigPatches:
  - |
    kind: InitConfiguration
    nodeRegistration:
      kubeletExtraArgs:
        node-labels: "ingress-ready=true"
  extraPortMappings:
  - containerPort: 80
    hostPort: 80
    protocol: TCP
  - containerPort: 443
    hostPort: 443
    protocol: TCP
- role: worker
- role: worker
EOF

# 设置 kubeconfig
kubectl cluster-info --context kind-lab

echo " kind 集群创建完成"

方案2:k3s

#!/bin/bash
# 安装 k3s

set -e

echo "=== 安装 k3s ==="

# 安装 k3s server
curl -sfL https://get.k3s.io | sh -

# 等待节点就绪
echo "等待节点就绪..."
sleep 10

# 配置 kubectl
sudo cp /etc/rancher/k3s/k3s.yaml ~/.kube/config
sudo chown $(whoami):$(whoami) ~/.kube/config

# 验证
kubectl get nodes

echo " k3s 安装完成"

方案3:minikube

#!/bin/bash
# 安装 minikube

set -e

echo "=== 安装 minikube ==="

# 安装 minikube
curl -LO https://storage.googleapis.com/minikube/releases/latest/minikube-linux-amd64
sudo install minikube-linux-amd64 /usr/local/bin/minikube

# 启动集群
minikube start \
  --cpus=4 \
  --memory=8192 \
  --disk-size=50g \
  --driver=docker \
  --kubernetes-version=v1.28.0

# 启用插件
minikube addons enable ingress
minikube addons enable metrics-server
minikube addons enable dashboard

# 验证
kubectl get nodes

echo " minikube 安装完成"
echo "使用 'minikube dashboard' 打开仪表板"

多节点集群搭建

kubeadm 完整部署脚本

1. 准备工作(所有节点)

#!/bin/bash
# prepare-nodes.sh - 节点准备脚本

set -e

echo "=== 准备节点环境 ==="

# 1. 禁用 swap
sudo swapoff -a
sudo sed -i '/ swap / s/^\(.*\)$/#\1/g' /etc/fstab

# 2. 加载内核模块
cat <<EOF | sudo tee /etc/modules-load.d/k8s.conf
overlay
br_netfilter
EOF

sudo modprobe overlay
sudo modprobe br_netfilter

# 3. 设置系统参数
cat <<EOF | sudo tee /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-iptables  = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward                 = 1
EOF

sudo sysctl --system

# 4. 安装 containerd
sudo apt-get update
sudo apt-get install -y containerd

# 配置 containerd
sudo mkdir -p /etc/containerd
containerd config default | sudo tee /etc/containerd/config.toml
sudo sed -i 's/SystemdCgroup = false/SystemdCgroup = true/' /etc/containerd/config.toml
sudo systemctl restart containerd
sudo systemctl enable containerd

# 5. 安装 kubeadm, kubelet, kubectl
sudo apt-get update
sudo apt-get install -y apt-transport-https ca-certificates curl

curl -fsSL https://pkgs.k8s.io/core:/stable:/v1.28/deb/Release.key | sudo gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg

echo 'deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v1.28/deb/ /' | sudo tee /etc/apt/sources.list.d/kubernetes.list

sudo apt-get update
sudo apt-get install -y kubelet kubeadm kubectl
sudo apt-mark hold kubelet kubeadm kubectl

echo " 节点准备完成"

2. 初始化控制平面(Master 节点)

#!/bin/bash
# init-master.sh - 初始化 Master 节点

set -e

echo "=== 初始化控制平面 ==="

# 初始化集群
sudo kubeadm init \
  --pod-network-cidr=10.244.0.0/16 \
  --service-cidr=10.96.0.0/12 \
  --kubernetes-version=v1.28.0

# 配置 kubectl
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config

# 验证
kubectl get nodes

echo " 控制平面初始化完成"
echo ""
echo "保存以下命令,用于添加 worker 节点:"
kubeadm token create --print-join-command

3. 添加 Worker 节点

#!/bin/bash
# join-worker.sh - Worker 节点加入集群

# 使用 master 节点输出的 join 命令
sudo kubeadm join <master-ip>:6443 \
  --token <token> \
  --discovery-token-ca-cert-hash sha256:<hash>

4. 一键部署脚本

#!/bin/bash
# deploy-k8s-cluster.sh - 一键部署完整集群

set -e

echo "=== Kubernetes 集群一键部署 ==="

# 配置
MASTER_NODE="192.168.1.10"
WORKER_NODES=("192.168.1.11" "192.168.1.12")
SSH_USER="ubuntu"

# 1. 准备所有节点
echo "1. 准备节点..."
for node in $MASTER_NODE ${WORKER_NODES[@]}; do
  echo "准备节点: $node"
  scp prepare-nodes.sh $SSH_USER@$node:/tmp/
  ssh $SSH_USER@$node "bash /tmp/prepare-nodes.sh"
done

# 2. 初始化 Master
echo "2. 初始化 Master 节点..."
scp init-master.sh $SSH_USER@$MASTER_NODE:/tmp/
ssh $SSH_USER@$MASTER_NODE "bash /tmp/init-master.sh" | tee /tmp/join-command.sh

# 3. 加入 Worker 节点
echo "3. 添加 Worker 节点..."
for node in ${WORKER_NODES[@]}; do
  echo "添加节点: $node"
  scp /tmp/join-command.sh $SSH_USER@$node:/tmp/
  ssh $SSH_USER@$node "sudo bash /tmp/join-command.sh"
done

# 4. 验证集群
echo "4. 验证集群状态..."
kubectl get nodes

echo " 集群部署完成"

工具栈快速部署

1. CNI 网络插件

Flannel

#!/bin/bash
# 安装 Flannel

kubectl apply -f https://github.com/flannel-io/flannel/releases/latest/download/kube-flannel.yml

# 验证
kubectl get pods -n kube-flannel
kubectl get nodes

Calico

#!/bin/bash
# 安装 Calico

kubectl create -f https://raw.githubusercontent.com/projectcalico/calico/v3.26.0/manifests/tigera-operator.yaml

kubectl create -f - <<EOF
apiVersion: operator.tigera.io/v1
kind: Installation
metadata:
  name: default
spec:
  calicoNetwork:
    ipPools:
    - blockSize: 26
      cidr: 10.244.0.0/16
      encapsulation: VXLANCrossSubnet
      natOutgoing: Enabled
      nodeSelector: all()
EOF

# 验证
kubectl get pods -n calico-system

Cilium

#!/bin/bash
# 安装 Cilium

# 安装 Cilium CLI
CILIUM_CLI_VERSION=$(curl -s https://raw.githubusercontent.com/cilium/cilium-cli/master/stable.txt)
CLI_ARCH=amd64
curl -L --fail --remote-name-all https://github.com/cilium/cilium-cli/releases/download/${CILIUM_CLI_VERSION}/cilium-linux-${CLI_ARCH}.tar.gz{,.sha256sum}
sha256sum --check cilium-linux-${CLI_ARCH}.tar.gz.sha256sum
sudo tar xzvfC cilium-linux-${CLI_ARCH}.tar.gz /usr/local/bin
rm cilium-linux-${CLI_ARCH}.tar.gz{,.sha256sum}

# 安装 Cilium
cilium install --version 1.14.0

# 验证
cilium status
kubectl get pods -n kube-system -l k8s-app=cilium

2. Ingress Controller

#!/bin/bash
# 安装 NGINX Ingress Controller

kubectl apply -f https://raw.githubusercontent.com/kubernetes/ingress-nginx/controller-v1.8.1/deploy/static/provider/cloud/deploy.yaml

# 等待就绪
kubectl wait --namespace ingress-nginx \
  --for=condition=ready pod \
  --selector=app.kubernetes.io/component=controller \
  --timeout=120s

# 验证
kubectl get pods -n ingress-nginx
kubectl get svc -n ingress-nginx

3. 监控栈(Prometheus + Grafana)

#!/bin/bash
# 部署 Prometheus + Grafana

set -e

echo "=== 部署监控栈 ==="

# 1. 添加 Helm 仓库
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
helm repo update

# 2. 安装 kube-prometheus-stack
helm install prometheus prometheus-community/kube-prometheus-stack \
  --namespace monitoring \
  --create-namespace \
  --set prometheus.prometheusSpec.retention=7d \
  --set prometheus.prometheusSpec.storageSpec.volumeClaimTemplate.spec.resources.requests.storage=50Gi \
  --set grafana.adminPassword=admin123 \
  --set grafana.persistence.enabled=true \
  --set grafana.persistence.size=10Gi

# 3. 等待部署完成
echo "等待 Pod 就绪..."
kubectl wait --for=condition=Ready pods --all -n monitoring --timeout=300s

# 4. 暴露服务
echo "暴露 Grafana 服务..."
kubectl port-forward -n monitoring svc/prometheus-grafana 3000:80 &

echo " 监控栈部署完成"
echo "Grafana: http://localhost:3000 (admin/admin123)"
echo "Prometheus: kubectl port-forward -n monitoring svc/prometheus-kube-prometheus-prometheus 9090:9090"

4. 存储后端

NFS Provisioner

#!/bin/bash
# 部署 NFS Provisioner

set -e

# 1. 安装 NFS 服务器(在一台节点上)
sudo apt-get install -y nfs-kernel-server
sudo mkdir -p /nfs/data
sudo chown nobody:nogroup /nfs/data
sudo chmod 777 /nfs/data

# 配置 NFS 导出
echo "/nfs/data *(rw,sync,no_subtree_check,no_root_squash)" | sudo tee -a /etc/exports
sudo exportfs -ra
sudo systemctl restart nfs-kernel-server

# 2. 在所有节点安装 NFS 客户端
sudo apt-get install -y nfs-common

# 3. 部署 NFS Provisioner
helm repo add nfs-subdir-external-provisioner https://kubernetes-sigs.github.io/nfs-subdir-external-provisioner/
helm install nfs-provisioner nfs-subdir-external-provisioner/nfs-subdir-external-provisioner \
  --set nfs.server=<NFS_SERVER_IP> \
  --set nfs.path=/nfs/data \
  --set storageClass.name=nfs \
  --namespace kube-system

# 4. 验证
kubectl get sc
kubectl get pods -n kube-system -l app=nfs-subdir-external-provisioner

echo " NFS Provisioner 部署完成"

Local Path Provisioner

#!/bin/bash
# 部署 Local Path Provisioner

kubectl apply -f https://raw.githubusercontent.com/rancher/local-path-provisioner/v0.0.24/deploy/local-path-storage.yaml

# 设置为默认 StorageClass
kubectl patch storageclass local-path -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}'

# 验证
kubectl get sc
kubectl get pods -n local-path-storage

5. 压测工具集

#!/bin/bash
# 部署压测工具 Pod

cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: Pod
metadata:
  name: bench-tools
  labels:
    app: bench-tools
spec:
  containers:
  - name: tools
    image: nicolaka/netshoot
    command: ["sleep", "infinity"]
    resources:
      requests:
        cpu: 1
        memory: 1Gi
  restartPolicy: Never
EOF

# 安装工具到 Pod
kubectl exec -it bench-tools -- sh -c "
  apk add --no-cache \
    wrk \
    iperf3 \
    fio \
    stress-ng \
    sysbench
"

echo " 压测工具 Pod 部署完成"
echo "使用: kubectl exec -it bench-tools -- bash"

环境验证与测试

完整验证脚本

#!/bin/bash
# verify-cluster.sh - 集群验证脚本

set -e

echo "=== Kubernetes 集群验证 ==="

# 1. 检查节点
echo "1. 检查节点状态..."
kubectl get nodes
ALL_READY=$(kubectl get nodes | grep -v NotReady | wc -l)
if [ $ALL_READY -eq 0 ]; then
  echo " 有节点未就绪"
  exit 1
fi
echo " 所有节点就绪"

# 2. 检查系统 Pod
echo "2. 检查系统 Pod..."
kubectl get pods -A
NOT_RUNNING=$(kubectl get pods -A --field-selector=status.phase!=Running,status.phase!=Succeeded | grep -v NAME | wc -l)
if [ $NOT_RUNNING -gt 0 ]; then
  echo " 有 Pod 未运行"
  kubectl get pods -A --field-selector=status.phase!=Running,status.phase!=Succeeded
  exit 1
fi
echo " 所有 Pod 运行正常"

# 3. 测试 DNS
echo "3. 测试 DNS 解析..."
kubectl run test-dns --image=busybox --rm -it --restart=Never -- nslookup kubernetes.default
echo " DNS 解析正常"

# 4. 测试网络连通性
echo "4. 测试 Pod 间网络..."
kubectl create deployment nginx --image=nginx --replicas=2
kubectl wait --for=condition=Ready pod -l app=nginx --timeout=60s
kubectl expose deployment nginx --port=80
kubectl run test-net --image=busybox --rm -it --restart=Never -- wget -O- http://nginx
kubectl delete deployment nginx
kubectl delete svc nginx
echo " 网络连通正常"

# 5. 测试存储
echo "5. 测试存储..."
cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: test-pvc
spec:
  accessModes:
    - ReadWriteOnce
  resources:
    requests:
      storage: 1Gi
EOF

sleep 5
PVC_STATUS=$(kubectl get pvc test-pvc -o jsonpath='{.status.phase}')
if [ "$PVC_STATUS" != "Bound" ]; then
  echo " PVC 未绑定"
  kubectl get pvc test-pvc
  exit 1
fi
kubectl delete pvc test-pvc
echo " 存储功能正常"

# 6. 性能基准测试
echo "6. 运行性能基准测试..."
kubectl run test-perf --image=nginx --restart=Never
kubectl wait --for=condition=Ready pod test-perf --timeout=60s
kubectl expose pod test-perf --port=80
kubectl run bench --image=williamyeh/wrk --rm -it --restart=Never -- \
  -t4 -c100 -d10s http://test-perf
kubectl delete pod test-perf
kubectl delete svc test-perf
echo " 性能测试完成"

echo ""
echo "==============================================="
echo " 集群验证全部通过"
echo "==============================================="
echo ""
echo "集群信息:"
kubectl cluster-info
echo ""
echo "资源使用情况:"
kubectl top nodes 2>/dev/null || echo "Metrics Server 未安装"

快速测试脚本

#!/bin/bash
# quick-test.sh - 快速功能测试

# 创建测试 namespace
kubectl create ns test

# 部署测试应用
kubectl create deployment nginx --image=nginx -n test
kubectl expose deployment nginx --port=80 -n test

# 测试访问
kubectl run curl --image=curlimages/curl -i --rm --restart=Never -n test -- \
  curl -s http://nginx

# 清理
kubectl delete ns test

echo " 快速测试完成"

一键清理脚本

#!/bin/bash
# cleanup.sh - 清理环境

echo "⚠️  即将清理 Kubernetes 环境"
read -p "确认继续?(yes/no) " -n 3 -r
echo
if [[ ! $REPLY =~ ^yes$ ]]; then
  exit 1
fi

# kind
kind delete cluster --name lab

# k3s
/usr/local/bin/k3s-uninstall.sh

# minikube
minikube delete

# kubeadm
sudo kubeadm reset -f
sudo rm -rf ~/.kube
sudo rm -rf /etc/cni/net.d
sudo rm -rf /var/lib/etcd

echo " 清理完成"

Prev
第八部分:命令速查与YAML模板库
Next
第十部分:面试题库与进阶路径