第九部分:实验环境快速搭建指南
一键部署完整的 Kubernetes 实验环境
目录
环境方案选择
方案对比
| 方案 | 适用场景 | 优点 | 缺点 | 资源需求 |
|---|---|---|---|---|
| kind | 本地开发/CI | 快速、轻量、多集群 | 网络受限 | 2C/4G |
| k3s | 边缘计算/IoT | 轻量、生产级 | 功能简化 | 1C/2G |
| minikube | 学习测试 | 易用、插件丰富 | 性能一般 | 2C/4G |
| kubeadm | 生产环境 | 完整、可定制 | 配置复杂 | 4C/8G |
推荐配置
学习实验:
- CPU: 4 核
- 内存: 8GB
- 磁盘: 50GB
- 方案: kind 或 minikube
性能测试:
- CPU: 8 核
- 内存: 16GB
- 磁盘: 100GB
- 方案: kubeadm 多节点
单节点环境搭建
方案1:kind(推荐)
安装 kind
#!/bin/bash
# 安装 kind - Kubernetes in Docker
set -e
echo "=== 安装 kind ==="
# 检测操作系统
OS=$(uname -s | tr '[:upper:]' '[:lower:]')
ARCH=$(uname -m)
case $ARCH in
x86_64) ARCH="amd64" ;;
aarch64) ARCH="arm64" ;;
esac
# 下载 kind
VERSION="v0.20.0"
curl -Lo ./kind "https://kind.sigs.k8s.io/dl/${VERSION}/kind-${OS}-${ARCH}"
chmod +x ./kind
sudo mv ./kind /usr/local/bin/kind
# 验证安装
kind version
echo " kind 安装完成"
创建集群
#!/bin/bash
# 创建 kind 集群
cat <<EOF | kind create cluster --name lab --config=-
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
nodes:
- role: control-plane
kubeadmConfigPatches:
- |
kind: InitConfiguration
nodeRegistration:
kubeletExtraArgs:
node-labels: "ingress-ready=true"
extraPortMappings:
- containerPort: 80
hostPort: 80
protocol: TCP
- containerPort: 443
hostPort: 443
protocol: TCP
- role: worker
- role: worker
EOF
# 设置 kubeconfig
kubectl cluster-info --context kind-lab
echo " kind 集群创建完成"
方案2:k3s
#!/bin/bash
# 安装 k3s
set -e
echo "=== 安装 k3s ==="
# 安装 k3s server
curl -sfL https://get.k3s.io | sh -
# 等待节点就绪
echo "等待节点就绪..."
sleep 10
# 配置 kubectl
sudo cp /etc/rancher/k3s/k3s.yaml ~/.kube/config
sudo chown $(whoami):$(whoami) ~/.kube/config
# 验证
kubectl get nodes
echo " k3s 安装完成"
方案3:minikube
#!/bin/bash
# 安装 minikube
set -e
echo "=== 安装 minikube ==="
# 安装 minikube
curl -LO https://storage.googleapis.com/minikube/releases/latest/minikube-linux-amd64
sudo install minikube-linux-amd64 /usr/local/bin/minikube
# 启动集群
minikube start \
--cpus=4 \
--memory=8192 \
--disk-size=50g \
--driver=docker \
--kubernetes-version=v1.28.0
# 启用插件
minikube addons enable ingress
minikube addons enable metrics-server
minikube addons enable dashboard
# 验证
kubectl get nodes
echo " minikube 安装完成"
echo "使用 'minikube dashboard' 打开仪表板"
多节点集群搭建
kubeadm 完整部署脚本
1. 准备工作(所有节点)
#!/bin/bash
# prepare-nodes.sh - 节点准备脚本
set -e
echo "=== 准备节点环境 ==="
# 1. 禁用 swap
sudo swapoff -a
sudo sed -i '/ swap / s/^\(.*\)$/#\1/g' /etc/fstab
# 2. 加载内核模块
cat <<EOF | sudo tee /etc/modules-load.d/k8s.conf
overlay
br_netfilter
EOF
sudo modprobe overlay
sudo modprobe br_netfilter
# 3. 设置系统参数
cat <<EOF | sudo tee /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward = 1
EOF
sudo sysctl --system
# 4. 安装 containerd
sudo apt-get update
sudo apt-get install -y containerd
# 配置 containerd
sudo mkdir -p /etc/containerd
containerd config default | sudo tee /etc/containerd/config.toml
sudo sed -i 's/SystemdCgroup = false/SystemdCgroup = true/' /etc/containerd/config.toml
sudo systemctl restart containerd
sudo systemctl enable containerd
# 5. 安装 kubeadm, kubelet, kubectl
sudo apt-get update
sudo apt-get install -y apt-transport-https ca-certificates curl
curl -fsSL https://pkgs.k8s.io/core:/stable:/v1.28/deb/Release.key | sudo gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg
echo 'deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v1.28/deb/ /' | sudo tee /etc/apt/sources.list.d/kubernetes.list
sudo apt-get update
sudo apt-get install -y kubelet kubeadm kubectl
sudo apt-mark hold kubelet kubeadm kubectl
echo " 节点准备完成"
2. 初始化控制平面(Master 节点)
#!/bin/bash
# init-master.sh - 初始化 Master 节点
set -e
echo "=== 初始化控制平面 ==="
# 初始化集群
sudo kubeadm init \
--pod-network-cidr=10.244.0.0/16 \
--service-cidr=10.96.0.0/12 \
--kubernetes-version=v1.28.0
# 配置 kubectl
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
# 验证
kubectl get nodes
echo " 控制平面初始化完成"
echo ""
echo "保存以下命令,用于添加 worker 节点:"
kubeadm token create --print-join-command
3. 添加 Worker 节点
#!/bin/bash
# join-worker.sh - Worker 节点加入集群
# 使用 master 节点输出的 join 命令
sudo kubeadm join <master-ip>:6443 \
--token <token> \
--discovery-token-ca-cert-hash sha256:<hash>
4. 一键部署脚本
#!/bin/bash
# deploy-k8s-cluster.sh - 一键部署完整集群
set -e
echo "=== Kubernetes 集群一键部署 ==="
# 配置
MASTER_NODE="192.168.1.10"
WORKER_NODES=("192.168.1.11" "192.168.1.12")
SSH_USER="ubuntu"
# 1. 准备所有节点
echo "1. 准备节点..."
for node in $MASTER_NODE ${WORKER_NODES[@]}; do
echo "准备节点: $node"
scp prepare-nodes.sh $SSH_USER@$node:/tmp/
ssh $SSH_USER@$node "bash /tmp/prepare-nodes.sh"
done
# 2. 初始化 Master
echo "2. 初始化 Master 节点..."
scp init-master.sh $SSH_USER@$MASTER_NODE:/tmp/
ssh $SSH_USER@$MASTER_NODE "bash /tmp/init-master.sh" | tee /tmp/join-command.sh
# 3. 加入 Worker 节点
echo "3. 添加 Worker 节点..."
for node in ${WORKER_NODES[@]}; do
echo "添加节点: $node"
scp /tmp/join-command.sh $SSH_USER@$node:/tmp/
ssh $SSH_USER@$node "sudo bash /tmp/join-command.sh"
done
# 4. 验证集群
echo "4. 验证集群状态..."
kubectl get nodes
echo " 集群部署完成"
工具栈快速部署
1. CNI 网络插件
Flannel
#!/bin/bash
# 安装 Flannel
kubectl apply -f https://github.com/flannel-io/flannel/releases/latest/download/kube-flannel.yml
# 验证
kubectl get pods -n kube-flannel
kubectl get nodes
Calico
#!/bin/bash
# 安装 Calico
kubectl create -f https://raw.githubusercontent.com/projectcalico/calico/v3.26.0/manifests/tigera-operator.yaml
kubectl create -f - <<EOF
apiVersion: operator.tigera.io/v1
kind: Installation
metadata:
name: default
spec:
calicoNetwork:
ipPools:
- blockSize: 26
cidr: 10.244.0.0/16
encapsulation: VXLANCrossSubnet
natOutgoing: Enabled
nodeSelector: all()
EOF
# 验证
kubectl get pods -n calico-system
Cilium
#!/bin/bash
# 安装 Cilium
# 安装 Cilium CLI
CILIUM_CLI_VERSION=$(curl -s https://raw.githubusercontent.com/cilium/cilium-cli/master/stable.txt)
CLI_ARCH=amd64
curl -L --fail --remote-name-all https://github.com/cilium/cilium-cli/releases/download/${CILIUM_CLI_VERSION}/cilium-linux-${CLI_ARCH}.tar.gz{,.sha256sum}
sha256sum --check cilium-linux-${CLI_ARCH}.tar.gz.sha256sum
sudo tar xzvfC cilium-linux-${CLI_ARCH}.tar.gz /usr/local/bin
rm cilium-linux-${CLI_ARCH}.tar.gz{,.sha256sum}
# 安装 Cilium
cilium install --version 1.14.0
# 验证
cilium status
kubectl get pods -n kube-system -l k8s-app=cilium
2. Ingress Controller
#!/bin/bash
# 安装 NGINX Ingress Controller
kubectl apply -f https://raw.githubusercontent.com/kubernetes/ingress-nginx/controller-v1.8.1/deploy/static/provider/cloud/deploy.yaml
# 等待就绪
kubectl wait --namespace ingress-nginx \
--for=condition=ready pod \
--selector=app.kubernetes.io/component=controller \
--timeout=120s
# 验证
kubectl get pods -n ingress-nginx
kubectl get svc -n ingress-nginx
3. 监控栈(Prometheus + Grafana)
#!/bin/bash
# 部署 Prometheus + Grafana
set -e
echo "=== 部署监控栈 ==="
# 1. 添加 Helm 仓库
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
helm repo update
# 2. 安装 kube-prometheus-stack
helm install prometheus prometheus-community/kube-prometheus-stack \
--namespace monitoring \
--create-namespace \
--set prometheus.prometheusSpec.retention=7d \
--set prometheus.prometheusSpec.storageSpec.volumeClaimTemplate.spec.resources.requests.storage=50Gi \
--set grafana.adminPassword=admin123 \
--set grafana.persistence.enabled=true \
--set grafana.persistence.size=10Gi
# 3. 等待部署完成
echo "等待 Pod 就绪..."
kubectl wait --for=condition=Ready pods --all -n monitoring --timeout=300s
# 4. 暴露服务
echo "暴露 Grafana 服务..."
kubectl port-forward -n monitoring svc/prometheus-grafana 3000:80 &
echo " 监控栈部署完成"
echo "Grafana: http://localhost:3000 (admin/admin123)"
echo "Prometheus: kubectl port-forward -n monitoring svc/prometheus-kube-prometheus-prometheus 9090:9090"
4. 存储后端
NFS Provisioner
#!/bin/bash
# 部署 NFS Provisioner
set -e
# 1. 安装 NFS 服务器(在一台节点上)
sudo apt-get install -y nfs-kernel-server
sudo mkdir -p /nfs/data
sudo chown nobody:nogroup /nfs/data
sudo chmod 777 /nfs/data
# 配置 NFS 导出
echo "/nfs/data *(rw,sync,no_subtree_check,no_root_squash)" | sudo tee -a /etc/exports
sudo exportfs -ra
sudo systemctl restart nfs-kernel-server
# 2. 在所有节点安装 NFS 客户端
sudo apt-get install -y nfs-common
# 3. 部署 NFS Provisioner
helm repo add nfs-subdir-external-provisioner https://kubernetes-sigs.github.io/nfs-subdir-external-provisioner/
helm install nfs-provisioner nfs-subdir-external-provisioner/nfs-subdir-external-provisioner \
--set nfs.server=<NFS_SERVER_IP> \
--set nfs.path=/nfs/data \
--set storageClass.name=nfs \
--namespace kube-system
# 4. 验证
kubectl get sc
kubectl get pods -n kube-system -l app=nfs-subdir-external-provisioner
echo " NFS Provisioner 部署完成"
Local Path Provisioner
#!/bin/bash
# 部署 Local Path Provisioner
kubectl apply -f https://raw.githubusercontent.com/rancher/local-path-provisioner/v0.0.24/deploy/local-path-storage.yaml
# 设置为默认 StorageClass
kubectl patch storageclass local-path -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}'
# 验证
kubectl get sc
kubectl get pods -n local-path-storage
5. 压测工具集
#!/bin/bash
# 部署压测工具 Pod
cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: Pod
metadata:
name: bench-tools
labels:
app: bench-tools
spec:
containers:
- name: tools
image: nicolaka/netshoot
command: ["sleep", "infinity"]
resources:
requests:
cpu: 1
memory: 1Gi
restartPolicy: Never
EOF
# 安装工具到 Pod
kubectl exec -it bench-tools -- sh -c "
apk add --no-cache \
wrk \
iperf3 \
fio \
stress-ng \
sysbench
"
echo " 压测工具 Pod 部署完成"
echo "使用: kubectl exec -it bench-tools -- bash"
环境验证与测试
完整验证脚本
#!/bin/bash
# verify-cluster.sh - 集群验证脚本
set -e
echo "=== Kubernetes 集群验证 ==="
# 1. 检查节点
echo "1. 检查节点状态..."
kubectl get nodes
ALL_READY=$(kubectl get nodes | grep -v NotReady | wc -l)
if [ $ALL_READY -eq 0 ]; then
echo " 有节点未就绪"
exit 1
fi
echo " 所有节点就绪"
# 2. 检查系统 Pod
echo "2. 检查系统 Pod..."
kubectl get pods -A
NOT_RUNNING=$(kubectl get pods -A --field-selector=status.phase!=Running,status.phase!=Succeeded | grep -v NAME | wc -l)
if [ $NOT_RUNNING -gt 0 ]; then
echo " 有 Pod 未运行"
kubectl get pods -A --field-selector=status.phase!=Running,status.phase!=Succeeded
exit 1
fi
echo " 所有 Pod 运行正常"
# 3. 测试 DNS
echo "3. 测试 DNS 解析..."
kubectl run test-dns --image=busybox --rm -it --restart=Never -- nslookup kubernetes.default
echo " DNS 解析正常"
# 4. 测试网络连通性
echo "4. 测试 Pod 间网络..."
kubectl create deployment nginx --image=nginx --replicas=2
kubectl wait --for=condition=Ready pod -l app=nginx --timeout=60s
kubectl expose deployment nginx --port=80
kubectl run test-net --image=busybox --rm -it --restart=Never -- wget -O- http://nginx
kubectl delete deployment nginx
kubectl delete svc nginx
echo " 网络连通正常"
# 5. 测试存储
echo "5. 测试存储..."
cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: test-pvc
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
EOF
sleep 5
PVC_STATUS=$(kubectl get pvc test-pvc -o jsonpath='{.status.phase}')
if [ "$PVC_STATUS" != "Bound" ]; then
echo " PVC 未绑定"
kubectl get pvc test-pvc
exit 1
fi
kubectl delete pvc test-pvc
echo " 存储功能正常"
# 6. 性能基准测试
echo "6. 运行性能基准测试..."
kubectl run test-perf --image=nginx --restart=Never
kubectl wait --for=condition=Ready pod test-perf --timeout=60s
kubectl expose pod test-perf --port=80
kubectl run bench --image=williamyeh/wrk --rm -it --restart=Never -- \
-t4 -c100 -d10s http://test-perf
kubectl delete pod test-perf
kubectl delete svc test-perf
echo " 性能测试完成"
echo ""
echo "==============================================="
echo " 集群验证全部通过"
echo "==============================================="
echo ""
echo "集群信息:"
kubectl cluster-info
echo ""
echo "资源使用情况:"
kubectl top nodes 2>/dev/null || echo "Metrics Server 未安装"
快速测试脚本
#!/bin/bash
# quick-test.sh - 快速功能测试
# 创建测试 namespace
kubectl create ns test
# 部署测试应用
kubectl create deployment nginx --image=nginx -n test
kubectl expose deployment nginx --port=80 -n test
# 测试访问
kubectl run curl --image=curlimages/curl -i --rm --restart=Never -n test -- \
curl -s http://nginx
# 清理
kubectl delete ns test
echo " 快速测试完成"
一键清理脚本
#!/bin/bash
# cleanup.sh - 清理环境
echo "⚠️ 即将清理 Kubernetes 环境"
read -p "确认继续?(yes/no) " -n 3 -r
echo
if [[ ! $REPLY =~ ^yes$ ]]; then
exit 1
fi
# kind
kind delete cluster --name lab
# k3s
/usr/local/bin/k3s-uninstall.sh
# minikube
minikube delete
# kubeadm
sudo kubeadm reset -f
sudo rm -rf ~/.kube
sudo rm -rf /etc/cni/net.d
sudo rm -rf /var/lib/etcd
echo " 清理完成"