# 故障排除和调试示例 name: Troubleshooting Example on: workflow_dispatch: inputs: debug_mode: description: '启用调试模式' required: false default: false type: boolean jobs: debug-setup: runs-on: ubuntu-latest steps: - name: 检出代码 uses: actions/checkout@v4 - name: 配置环境(调试模式) id: setup uses: xgj/actions/setup-env@v1 with: docker-password: ${{ secrets.DOCKER_PASSWORD }} kube-config: ${{ secrets.KUBE_CONFIG }} cache-key: 'debug-env' env: # 启用详细日志 ACTIONS_STEP_DEBUG: ${{ inputs.debug_mode }} ACTIONS_RUNNER_DEBUG: ${{ inputs.debug_mode }} - name: 环境信息检查 run: | echo "=== 系统信息 ===" uname -a cat /etc/os-release echo -e "\n=== 已安装工具版本 ===" echo "Docker: ${{ steps.setup.outputs.docker-version }}" echo "kubectl: ${{ steps.setup.outputs.kubectl-version }}" echo "Git: $(git --version)" echo -e "\n=== 缓存状态 ===" echo "Cache Hit: ${{ steps.setup.outputs.cache-hit }}" echo -e "\n=== Docker 状态 ===" docker version || echo "Docker 命令失败" docker info || echo "Docker info 失败" echo -e "\n=== kubectl 状态 ===" kubectl version --client || echo "kubectl 客户端版本获取失败" kubectl config current-context || echo "kubectl 上下文获取失败" - name: 网络连接测试 run: | echo "=== 网络连接测试 ===" # 测试 Docker 仓库连接 echo "测试 Docker 仓库连接..." curl -I https://docker-registry.bjxgj.com/v2/ || echo "Docker 仓库连接失败" # 测试 Kubernetes API if [[ -n "${{ secrets.KUBE_CONFIG }}" ]]; then echo "测试 Kubernetes API 连接..." kubectl cluster-info --request-timeout=10s || echo "Kubernetes API 连接失败" fi # 测试外部网络 echo "测试外部网络连接..." curl -I https://github.com || echo "GitHub 连接失败" curl -I https://mirrors.aliyun.com || echo "阿里云镜像连接失败" - name: 权限检查 run: | echo "=== 权限检查 ===" # 检查文件权限 ls -la ~/.kube/ || echo "kubectl 配置目录不存在" ls -la ~/.docker/ || echo "Docker 配置目录不存在" # 检查用户权限 id groups # 检查 sudo 权限 sudo -n true && echo "具有 sudo 权限" || echo "无 sudo 权限" - name: 清理和重试(失败时) if: failure() run: | echo "=== 清理环境 ===" # 清理 Docker 配置 rm -rf ~/.docker/config.json || true # 清理 kubectl 配置 rm -rf ~/.kube/config || true # 清理缓存 rm -rf /tmp/setup-env-* || true echo "环境已清理,建议重新运行" - name: 收集日志 if: always() run: | echo "=== 收集系统日志 ===" # 收集系统日志 sudo dmesg | tail -50 || echo "无法获取系统日志" # 收集 Docker 日志 sudo journalctl -u docker --no-pager --lines=20 || echo "无法获取 Docker 日志" # 收集网络信息 ip addr show || echo "无法获取网络信息" ss -tuln || echo "无法获取端口信息" - name: 生成故障报告 if: failure() run: | echo "## 🚨 故障报告" >> $GITHUB_STEP_SUMMARY echo "### 环境信息" >> $GITHUB_STEP_SUMMARY echo "- **运行器**: ${{ runner.os }}" >> $GITHUB_STEP_SUMMARY echo "- **缓存命中**: ${{ steps.setup.outputs.cache-hit }}" >> $GITHUB_STEP_SUMMARY echo "- **Docker 版本**: ${{ steps.setup.outputs.docker-version }}" >> $GITHUB_STEP_SUMMARY echo "- **kubectl 版本**: ${{ steps.setup.outputs.kubectl-version }}" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "### 建议解决方案" >> $GITHUB_STEP_SUMMARY echo "1. 检查网络连接" >> $GITHUB_STEP_SUMMARY echo "2. 验证 secrets 配置" >> $GITHUB_STEP_SUMMARY echo "3. 清除缓存重试" >> $GITHUB_STEP_SUMMARY echo "4. 联系运维团队" >> $GITHUB_STEP_SUMMARY