name: Auto-Retry Flaky Tests # This example demonstrates using structured outputs to detect flaky test failures # and automatically retry them, reducing noise from intermittent failures. # # Use case: When CI fails, automatically determine if it's likely flaky and retry if so. on: workflow_run: workflows: ["CI"] types: [completed] permissions: contents: read actions: write jobs: detect-flaky: runs-on: ubuntu-latest if: ${{ github.event.workflow_run.conclusion == 'failure' }} steps: - name: Checkout repository uses: actions/checkout@v4 - name: Detect flaky test failures id: detect uses: anthropics/claude-code-action@main with: anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} prompt: | The CI workflow failed: ${{ github.event.workflow_run.html_url }} Check the logs: gh run view ${{ github.event.workflow_run.id }} --log-failed Determine if this looks like a flaky test failure by checking for: - Timeout errors - Race conditions - Network errors - "Expected X but got Y" intermittent failures - Tests that passed in previous commits Return: - is_flaky: true if likely flaky, false if real bug - confidence: number 0-1 indicating confidence level - summary: brief one-sentence explanation claude_args: | --json-schema '{"type":"object","properties":{"is_flaky":{"type":"boolean","description":"Whether this appears to be a flaky test failure"},"confidence":{"type":"number","minimum":0,"maximum":1,"description":"Confidence level in the determination"},"summary":{"type":"string","description":"One-sentence explanation of the failure"}},"required":["is_flaky","confidence","summary"]}' # Auto-retry only if flaky AND high confidence (>= 0.7) - name: Retry flaky tests if: | fromJSON(steps.detect.outputs.structured_output).is_flaky == true && fromJSON(steps.detect.outputs.structured_output).confidence >= 0.7 env: GH_TOKEN: ${{ github.token }} run: | OUTPUT='${{ steps.detect.outputs.structured_output }}' CONFIDENCE=$(echo "$OUTPUT" | jq -r '.confidence') SUMMARY=$(echo "$OUTPUT" | jq -r '.summary') echo "🔄 Flaky test detected (confidence: $CONFIDENCE)" echo "Summary: $SUMMARY" echo "" echo "Triggering automatic retry..." gh workflow run "${{ github.event.workflow_run.name }}" \ --ref "${{ github.event.workflow_run.head_branch }}" # Low confidence flaky detection - skip retry - name: Low confidence detection if: | fromJSON(steps.detect.outputs.structured_output).is_flaky == true && fromJSON(steps.detect.outputs.structured_output).confidence < 0.7 run: | OUTPUT='${{ steps.detect.outputs.structured_output }}' CONFIDENCE=$(echo "$OUTPUT" | jq -r '.confidence') echo "⚠️ Possible flaky test but confidence too low ($CONFIDENCE)" echo "Not retrying automatically - manual review recommended" # Comment on PR if this was a PR build - name: Comment on PR if: github.event.workflow_run.event == 'pull_request' env: GH_TOKEN: ${{ github.token }} run: | OUTPUT='${{ steps.detect.outputs.structured_output }}' IS_FLAKY=$(echo "$OUTPUT" | jq -r '.is_flaky') CONFIDENCE=$(echo "$OUTPUT" | jq -r '.confidence') SUMMARY=$(echo "$OUTPUT" | jq -r '.summary') pr_number=$(gh pr list --head "${{ github.event.workflow_run.head_branch }}" --json number --jq '.[0].number') if [ -n "$pr_number" ]; then if [ "$IS_FLAKY" = "true" ]; then TITLE="🔄 Flaky Test Detected" ACTION="✅ Automatically retrying the workflow" else TITLE="❌ Test Failure" ACTION="⚠️ This appears to be a real bug - manual intervention needed" fi gh pr comment "$pr_number" --body "$(cat <