Files
xgj/setup-env/examples/troubleshooting.yml

133 lines
4.6 KiB
YAML

# 故障排除和调试示例
name: Troubleshooting Example
on:
workflow_dispatch:
inputs:
debug_mode:
description: '启用调试模式'
required: false
default: false
type: boolean
jobs:
debug-setup:
runs-on: ubuntu-latest
steps:
- name: 检出代码
uses: actions/checkout@v4
- name: 配置环境(调试模式)
id: setup
uses: actions/xgj/setup-env@v1
with:
docker-password: ${{ secrets.DOCKER_PASSWORD }}
kube-config: ${{ secrets.KUBE_CONFIG }}
cache-key: 'debug-env'
env:
# 启用详细日志
ACTIONS_STEP_DEBUG: ${{ inputs.debug_mode }}
ACTIONS_RUNNER_DEBUG: ${{ inputs.debug_mode }}
- name: 环境信息检查
run: |
echo "=== 系统信息 ==="
uname -a
cat /etc/os-release
echo -e "\n=== 已安装工具版本 ==="
echo "Docker: ${{ steps.setup.outputs.docker-version }}"
echo "kubectl: ${{ steps.setup.outputs.kubectl-version }}"
echo "Git: $(git --version)"
echo -e "\n=== 缓存状态 ==="
echo "Cache Hit: ${{ steps.setup.outputs.cache-hit }}"
echo -e "\n=== Docker 状态 ==="
docker version || echo "Docker 命令失败"
docker info || echo "Docker info 失败"
echo -e "\n=== kubectl 状态 ==="
kubectl version --client || echo "kubectl 客户端版本获取失败"
kubectl config current-context || echo "kubectl 上下文获取失败"
- name: 网络连接测试
run: |
echo "=== 网络连接测试 ==="
# 测试 Docker 仓库连接
echo "测试 Docker 仓库连接..."
curl -I https://docker-registry.bjxgj.com/v2/ || echo "Docker 仓库连接失败"
# 测试 Kubernetes API
if [[ -n "${{ secrets.KUBE_CONFIG }}" ]]; then
echo "测试 Kubernetes API 连接..."
kubectl cluster-info --request-timeout=10s || echo "Kubernetes API 连接失败"
fi
# 测试外部网络
echo "测试外部网络连接..."
curl -I https://github.com || echo "GitHub 连接失败"
curl -I https://mirrors.aliyun.com || echo "阿里云镜像连接失败"
- name: 权限检查
run: |
echo "=== 权限检查 ==="
# 检查文件权限
ls -la ~/.kube/ || echo "kubectl 配置目录不存在"
ls -la ~/.docker/ || echo "Docker 配置目录不存在"
# 检查用户权限
id
groups
# 检查 sudo 权限
sudo -n true && echo "具有 sudo 权限" || echo "无 sudo 权限"
- name: 清理和重试(失败时)
if: failure()
run: |
echo "=== 清理环境 ==="
# 清理 Docker 配置
rm -rf ~/.docker/config.json || true
# 清理 kubectl 配置
rm -rf ~/.kube/config || true
# 清理缓存
rm -rf /tmp/setup-env-* || true
echo "环境已清理,建议重新运行"
- name: 收集日志
if: always()
run: |
echo "=== 收集系统日志 ==="
# 收集系统日志
sudo dmesg | tail -50 || echo "无法获取系统日志"
# 收集 Docker 日志
sudo journalctl -u docker --no-pager --lines=20 || echo "无法获取 Docker 日志"
# 收集网络信息
ip addr show || echo "无法获取网络信息"
ss -tuln || echo "无法获取端口信息"
- name: 生成故障报告
if: failure()
run: |
echo "## 🚨 故障报告" >> $GITHUB_STEP_SUMMARY
echo "### 环境信息" >> $GITHUB_STEP_SUMMARY
echo "- **运行器**: ${{ runner.os }}" >> $GITHUB_STEP_SUMMARY
echo "- **缓存命中**: ${{ steps.setup.outputs.cache-hit }}" >> $GITHUB_STEP_SUMMARY
echo "- **Docker 版本**: ${{ steps.setup.outputs.docker-version }}" >> $GITHUB_STEP_SUMMARY
echo "- **kubectl 版本**: ${{ steps.setup.outputs.kubectl-version }}" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "### 建议解决方案" >> $GITHUB_STEP_SUMMARY
echo "1. 检查网络连接" >> $GITHUB_STEP_SUMMARY
echo "2. 验证 secrets 配置" >> $GITHUB_STEP_SUMMARY
echo "3. 清除缓存重试" >> $GITHUB_STEP_SUMMARY
echo "4. 联系运维团队" >> $GITHUB_STEP_SUMMARY