feat: add structured output support via --json-schema argument (#687)

* feat: add structured output support Add support for Agent SDK structured outputs. New input: json_schema Output: structured_output (JSON string) Access: fromJSON(steps.id.outputs.structured_output).field Docs: https://docs.claude.com/en/docs/agent-sdk/structured-outputs * rm unused * refactor: simplify structured outputs to use claude_args Remove json_schema input in favor of passing --json-schema flag directly in claude_args. This simplifies the interface by treating structured outputs like other CLI flags (--model, --max-turns, etc.) instead of as a special input that gets injected. Users now specify: claude_args: '--json-schema {...}' Instead of separate: json_schema: {...} 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> * chore: remove unused json-schema util and revert version - Remove src/utils/json-schema.ts (no longer used after refactor) - Revert Claude Code version from 2.0.45 back to 2.0.42 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> --------- Co-authored-by: Claude <noreply@anthropic.com>
2026-01-22 14:24:13 +08:00 · 2025-11-18 17:18:05 -08:00
parent e45f28fae7
commit 6902c227aa
9 changed files with 730 additions and 2 deletions
--- a/examples/test-failure-analysis.yml
+++ b/examples/test-failure-analysis.yml
@@ -0,0 +1,114 @@
+name: Auto-Retry Flaky Tests
+
+# This example demonstrates using structured outputs to detect flaky test failures
+# and automatically retry them, reducing noise from intermittent failures.
+#
+# Use case: When CI fails, automatically determine if it's likely flaky and retry if so.
+
+on:
+  workflow_run:
+    workflows: ["CI"]
+    types: [completed]
+
+permissions:
+  contents: read
+  actions: write
+
+jobs:
+  detect-flaky:
+    runs-on: ubuntu-latest
+    if: ${{ github.event.workflow_run.conclusion == 'failure' }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Detect flaky test failures
+        id: detect
+        uses: anthropics/claude-code-action@main
+        with:
+          anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
+          prompt: |
+            The CI workflow failed: ${{ github.event.workflow_run.html_url }}
+
+            Check the logs: gh run view ${{ github.event.workflow_run.id }} --log-failed
+
+            Determine if this looks like a flaky test failure by checking for:
+            - Timeout errors
+            - Race conditions
+            - Network errors
+            - "Expected X but got Y" intermittent failures
+            - Tests that passed in previous commits
+
+            Return:
+            - is_flaky: true if likely flaky, false if real bug
+            - confidence: number 0-1 indicating confidence level
+            - summary: brief one-sentence explanation
+          claude_args: |
+            --json-schema '{"type":"object","properties":{"is_flaky":{"type":"boolean","description":"Whether this appears to be a flaky test failure"},"confidence":{"type":"number","minimum":0,"maximum":1,"description":"Confidence level in the determination"},"summary":{"type":"string","description":"One-sentence explanation of the failure"}},"required":["is_flaky","confidence","summary"]}'
+
+      # Auto-retry only if flaky AND high confidence (>= 0.7)
+      - name: Retry flaky tests
+        if: |
+          fromJSON(steps.detect.outputs.structured_output).is_flaky == true &&
+          fromJSON(steps.detect.outputs.structured_output).confidence >= 0.7
+        env:
+          GH_TOKEN: ${{ github.token }}
+        run: |
+          OUTPUT='${{ steps.detect.outputs.structured_output }}'
+          CONFIDENCE=$(echo "$OUTPUT" | jq -r '.confidence')
+          SUMMARY=$(echo "$OUTPUT" | jq -r '.summary')
+
+          echo "🔄 Flaky test detected (confidence: $CONFIDENCE)"
+          echo "Summary: $SUMMARY"
+          echo ""
+          echo "Triggering automatic retry..."
+
+          gh workflow run "${{ github.event.workflow_run.name }}" \
+            --ref "${{ github.event.workflow_run.head_branch }}"
+
+      # Low confidence flaky detection - skip retry
+      - name: Low confidence detection
+        if: |
+          fromJSON(steps.detect.outputs.structured_output).is_flaky == true &&
+          fromJSON(steps.detect.outputs.structured_output).confidence < 0.7
+        run: |
+          OUTPUT='${{ steps.detect.outputs.structured_output }}'
+          CONFIDENCE=$(echo "$OUTPUT" | jq -r '.confidence')
+
+          echo "⚠️ Possible flaky test but confidence too low ($CONFIDENCE)"
+          echo "Not retrying automatically - manual review recommended"
+
+      # Comment on PR if this was a PR build
+      - name: Comment on PR
+        if: github.event.workflow_run.event == 'pull_request'
+        env:
+          GH_TOKEN: ${{ github.token }}
+        run: |
+          OUTPUT='${{ steps.detect.outputs.structured_output }}'
+          IS_FLAKY=$(echo "$OUTPUT" | jq -r '.is_flaky')
+          CONFIDENCE=$(echo "$OUTPUT" | jq -r '.confidence')
+          SUMMARY=$(echo "$OUTPUT" | jq -r '.summary')
+
+          pr_number=$(gh pr list --head "${{ github.event.workflow_run.head_branch }}" --json number --jq '.[0].number')
+
+          if [ -n "$pr_number" ]; then
+            if [ "$IS_FLAKY" = "true" ]; then
+              TITLE="🔄 Flaky Test Detected"
+              ACTION="✅ Automatically retrying the workflow"
+            else
+              TITLE="❌ Test Failure"
+              ACTION="⚠️ This appears to be a real bug - manual intervention needed"
+            fi
+
+            gh pr comment "$pr_number" --body "$(cat <<EOF
+          ## $TITLE
+
+          **Analysis**: $SUMMARY
+          **Confidence**: $CONFIDENCE
+
+          $ACTION
+
+          [View workflow run](${{ github.event.workflow_run.html_url }})
+          EOF
+          )"
+          fi