From 6902c227aaa9536481b99d56f3014bbbad6c6da8 Mon Sep 17 00:00:00 2001 From: bogini Date: Tue, 18 Nov 2025 17:18:05 -0800 Subject: [PATCH] feat: add structured output support via --json-schema argument (#687) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: add structured output support Add support for Agent SDK structured outputs. New input: json_schema Output: structured_output (JSON string) Access: fromJSON(steps.id.outputs.structured_output).field Docs: https://docs.claude.com/en/docs/agent-sdk/structured-outputs * rm unused * refactor: simplify structured outputs to use claude_args Remove json_schema input in favor of passing --json-schema flag directly in claude_args. This simplifies the interface by treating structured outputs like other CLI flags (--model, --max-turns, etc.) instead of as a special input that gets injected. Users now specify: claude_args: '--json-schema {...}' Instead of separate: json_schema: {...} 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude * chore: remove unused json-schema util and revert version - Remove src/utils/json-schema.ts (no longer used after refactor) - Revert Claude Code version from 2.0.45 back to 2.0.42 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --------- Co-authored-by: Claude --- .github/workflows/test-structured-output.yml | 307 +++++++++++++++++++ README.md | 1 + action.yml | 3 + base-action/action.yml | 3 + base-action/src/run-claude.ts | 64 +++- base-action/test/run-claude.test.ts | 14 + base-action/test/structured-output.test.ts | 158 ++++++++++ docs/usage.md | 68 ++++ examples/test-failure-analysis.yml | 114 +++++++ 9 files changed, 730 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/test-structured-output.yml create mode 100644 base-action/test/structured-output.test.ts create mode 100644 examples/test-failure-analysis.yml diff --git a/.github/workflows/test-structured-output.yml b/.github/workflows/test-structured-output.yml new file mode 100644 index 0000000..9b33360 --- /dev/null +++ b/.github/workflows/test-structured-output.yml @@ -0,0 +1,307 @@ +name: Test Structured Outputs + +on: + push: + branches: + - main + pull_request: + workflow_dispatch: + +permissions: + contents: read + +jobs: + test-basic-types: + name: Test Basic Type Conversions + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + + - name: Test with explicit values + id: test + uses: ./base-action + with: + prompt: | + Run this command: echo "test" + + Then return EXACTLY these values: + - text_field: "hello" + - number_field: 42 + - boolean_true: true + - boolean_false: false + anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} + claude_args: | + --allowedTools Bash + --json-schema '{"type":"object","properties":{"text_field":{"type":"string"},"number_field":{"type":"number"},"boolean_true":{"type":"boolean"},"boolean_false":{"type":"boolean"}},"required":["text_field","number_field","boolean_true","boolean_false"]}' + + - name: Verify outputs + run: | + # Parse the structured_output JSON + OUTPUT='${{ steps.test.outputs.structured_output }}' + + # Test string pass-through + TEXT_FIELD=$(echo "$OUTPUT" | jq -r '.text_field') + if [ "$TEXT_FIELD" != "hello" ]; then + echo "❌ String: expected 'hello', got '$TEXT_FIELD'" + exit 1 + fi + + # Test number → string conversion + NUMBER_FIELD=$(echo "$OUTPUT" | jq -r '.number_field') + if [ "$NUMBER_FIELD" != "42" ]; then + echo "❌ Number: expected '42', got '$NUMBER_FIELD'" + exit 1 + fi + + # Test boolean → "true" conversion + BOOLEAN_TRUE=$(echo "$OUTPUT" | jq -r '.boolean_true') + if [ "$BOOLEAN_TRUE" != "true" ]; then + echo "❌ Boolean true: expected 'true', got '$BOOLEAN_TRUE'" + exit 1 + fi + + # Test boolean → "false" conversion + BOOLEAN_FALSE=$(echo "$OUTPUT" | jq -r '.boolean_false') + if [ "$BOOLEAN_FALSE" != "false" ]; then + echo "❌ Boolean false: expected 'false', got '$BOOLEAN_FALSE'" + exit 1 + fi + + echo "✅ All basic type conversions correct" + + test-complex-types: + name: Test Arrays and Objects + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + + - name: Test complex types + id: test + uses: ./base-action + with: + prompt: | + Run: echo "ready" + + Return EXACTLY: + - items: ["apple", "banana", "cherry"] + - config: {"key": "value", "count": 3} + - empty_array: [] + anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} + claude_args: | + --allowedTools Bash + --json-schema '{"type":"object","properties":{"items":{"type":"array","items":{"type":"string"}},"config":{"type":"object"},"empty_array":{"type":"array"}},"required":["items","config","empty_array"]}' + + - name: Verify JSON stringification + run: | + # Parse the structured_output JSON + OUTPUT='${{ steps.test.outputs.structured_output }}' + + # Arrays should be JSON stringified + if ! echo "$OUTPUT" | jq -e '.items | length == 3' > /dev/null; then + echo "❌ Array not properly formatted" + echo "$OUTPUT" | jq '.items' + exit 1 + fi + + # Objects should be JSON stringified + if ! echo "$OUTPUT" | jq -e '.config.key == "value"' > /dev/null; then + echo "❌ Object not properly formatted" + echo "$OUTPUT" | jq '.config' + exit 1 + fi + + # Empty arrays should work + if ! echo "$OUTPUT" | jq -e '.empty_array | length == 0' > /dev/null; then + echo "❌ Empty array not properly formatted" + echo "$OUTPUT" | jq '.empty_array' + exit 1 + fi + + echo "✅ All complex types handled correctly" + + test-edge-cases: + name: Test Edge Cases + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + + - name: Test edge cases + id: test + uses: ./base-action + with: + prompt: | + Run: echo "test" + + Return EXACTLY: + - zero: 0 + - empty_string: "" + - negative: -5 + - decimal: 3.14 + anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} + claude_args: | + --allowedTools Bash + --json-schema '{"type":"object","properties":{"zero":{"type":"number"},"empty_string":{"type":"string"},"negative":{"type":"number"},"decimal":{"type":"number"}},"required":["zero","empty_string","negative","decimal"]}' + + - name: Verify edge cases + run: | + # Parse the structured_output JSON + OUTPUT='${{ steps.test.outputs.structured_output }}' + + # Zero should be "0", not empty or falsy + ZERO=$(echo "$OUTPUT" | jq -r '.zero') + if [ "$ZERO" != "0" ]; then + echo "❌ Zero: expected '0', got '$ZERO'" + exit 1 + fi + + # Empty string should be empty (not "null" or missing) + EMPTY_STRING=$(echo "$OUTPUT" | jq -r '.empty_string') + if [ "$EMPTY_STRING" != "" ]; then + echo "❌ Empty string: expected '', got '$EMPTY_STRING'" + exit 1 + fi + + # Negative numbers should work + NEGATIVE=$(echo "$OUTPUT" | jq -r '.negative') + if [ "$NEGATIVE" != "-5" ]; then + echo "❌ Negative: expected '-5', got '$NEGATIVE'" + exit 1 + fi + + # Decimals should preserve precision + DECIMAL=$(echo "$OUTPUT" | jq -r '.decimal') + if [ "$DECIMAL" != "3.14" ]; then + echo "❌ Decimal: expected '3.14', got '$DECIMAL'" + exit 1 + fi + + echo "✅ All edge cases handled correctly" + + test-name-sanitization: + name: Test Output Name Sanitization + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + + - name: Test special characters in field names + id: test + uses: ./base-action + with: + prompt: | + Run: echo "test" + Return EXACTLY: {test-result: "passed", item_count: 10} + anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} + claude_args: | + --allowedTools Bash + --json-schema '{"type":"object","properties":{"test-result":{"type":"string"},"item_count":{"type":"number"}},"required":["test-result","item_count"]}' + + - name: Verify sanitized names work + run: | + # Parse the structured_output JSON + OUTPUT='${{ steps.test.outputs.structured_output }}' + + # Hyphens should be preserved in the JSON + TEST_RESULT=$(echo "$OUTPUT" | jq -r '.["test-result"]') + if [ "$TEST_RESULT" != "passed" ]; then + echo "❌ Hyphenated name failed: expected 'passed', got '$TEST_RESULT'" + exit 1 + fi + + # Underscores should work + ITEM_COUNT=$(echo "$OUTPUT" | jq -r '.item_count') + if [ "$ITEM_COUNT" != "10" ]; then + echo "❌ Underscore name failed: expected '10', got '$ITEM_COUNT'" + exit 1 + fi + + echo "✅ Name sanitization works" + + test-execution-file-structure: + name: Test Execution File Format + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + + - name: Run with structured output + id: test + uses: ./base-action + with: + prompt: "Run: echo 'complete'. Return: {done: true}" + anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} + claude_args: | + --allowedTools Bash + --json-schema '{"type":"object","properties":{"done":{"type":"boolean"}},"required":["done"]}' + + - name: Verify execution file contains structured_output + run: | + FILE="${{ steps.test.outputs.execution_file }}" + + # Check file exists + if [ ! -f "$FILE" ]; then + echo "❌ Execution file missing" + exit 1 + fi + + # Check for structured_output field + if ! jq -e '.[] | select(.type == "result") | .structured_output' "$FILE" > /dev/null; then + echo "❌ No structured_output in execution file" + cat "$FILE" + exit 1 + fi + + # Verify the actual value + DONE=$(jq -r '.[] | select(.type == "result") | .structured_output.done' "$FILE") + if [ "$DONE" != "true" ]; then + echo "❌ Wrong value in execution file" + exit 1 + fi + + echo "✅ Execution file format correct" + + test-summary: + name: Summary + runs-on: ubuntu-latest + needs: + - test-basic-types + - test-complex-types + - test-edge-cases + - test-name-sanitization + - test-execution-file-structure + if: always() + steps: + - name: Generate Summary + run: | + echo "# Structured Output Tests (Optimized)" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "Fast, deterministic tests using explicit prompts" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "| Test | Result |" >> $GITHUB_STEP_SUMMARY + echo "|------|--------|" >> $GITHUB_STEP_SUMMARY + echo "| Basic Types | ${{ needs.test-basic-types.result == 'success' && '✅ PASS' || '❌ FAIL' }} |" >> $GITHUB_STEP_SUMMARY + echo "| Complex Types | ${{ needs.test-complex-types.result == 'success' && '✅ PASS' || '❌ FAIL' }} |" >> $GITHUB_STEP_SUMMARY + echo "| Edge Cases | ${{ needs.test-edge-cases.result == 'success' && '✅ PASS' || '❌ FAIL' }} |" >> $GITHUB_STEP_SUMMARY + echo "| Name Sanitization | ${{ needs.test-name-sanitization.result == 'success' && '✅ PASS' || '❌ FAIL' }} |" >> $GITHUB_STEP_SUMMARY + echo "| Execution File | ${{ needs.test-execution-file-structure.result == 'success' && '✅ PASS' || '❌ FAIL' }} |" >> $GITHUB_STEP_SUMMARY + + # Check if all passed + ALL_PASSED=${{ + needs.test-basic-types.result == 'success' && + needs.test-complex-types.result == 'success' && + needs.test-edge-cases.result == 'success' && + needs.test-name-sanitization.result == 'success' && + needs.test-execution-file-structure.result == 'success' + }} + + if [ "$ALL_PASSED" = "true" ]; then + echo "" >> $GITHUB_STEP_SUMMARY + echo "## ✅ All Tests Passed" >> $GITHUB_STEP_SUMMARY + else + echo "" >> $GITHUB_STEP_SUMMARY + echo "## ❌ Some Tests Failed" >> $GITHUB_STEP_SUMMARY + exit 1 + fi diff --git a/README.md b/README.md index d93366f..b1c0f41 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,7 @@ A general-purpose [Claude Code](https://claude.ai/code) action for GitHub PRs an - 💬 **PR/Issue Integration**: Works seamlessly with GitHub comments and PR reviews - 🛠️ **Flexible Tool Access**: Access to GitHub APIs and file operations (additional tools can be enabled via configuration) - 📋 **Progress Tracking**: Visual progress indicators with checkboxes that dynamically update as Claude completes tasks +- 📊 **Structured Outputs**: Get validated JSON results that automatically become GitHub Action outputs for complex automations - 🏃 **Runs on Your Infrastructure**: The action executes entirely on your own GitHub runner (Anthropic API calls go to your chosen provider) - ⚙️ **Simplified Configuration**: Unified `prompt` and `claude_args` inputs provide clean, powerful configuration aligned with Claude Code SDK diff --git a/action.yml b/action.yml index 1f5af04..a4c43b6 100644 --- a/action.yml +++ b/action.yml @@ -124,6 +124,9 @@ outputs: github_token: description: "The GitHub token used by the action (Claude App token if available)" value: ${{ steps.prepare.outputs.github_token }} + structured_output: + description: "JSON string containing all structured output fields when --json-schema is provided in claude_args. Use fromJSON() to parse: fromJSON(steps.id.outputs.structured_output).field_name" + value: ${{ steps.claude-code.outputs.structured_output }} runs: using: "composite" diff --git a/base-action/action.yml b/base-action/action.yml index b718245..62ada78 100644 --- a/base-action/action.yml +++ b/base-action/action.yml @@ -75,6 +75,9 @@ outputs: execution_file: description: "Path to the JSON file containing Claude Code execution log" value: ${{ steps.run_claude.outputs.execution_file }} + structured_output: + description: "JSON string containing all structured output fields when --json-schema is provided in claude_args (use fromJSON() or jq to parse)" + value: ${{ steps.run_claude.outputs.structured_output }} runs: using: "composite" diff --git a/base-action/src/run-claude.ts b/base-action/src/run-claude.ts index 2ffbc19..e330894 100644 --- a/base-action/src/run-claude.ts +++ b/base-action/src/run-claude.ts @@ -1,7 +1,7 @@ import * as core from "@actions/core"; import { exec } from "child_process"; import { promisify } from "util"; -import { unlink, writeFile, stat } from "fs/promises"; +import { unlink, writeFile, stat, readFile } from "fs/promises"; import { createWriteStream } from "fs"; import { spawn } from "child_process"; import { parse as parseShellArgs } from "shell-quote"; @@ -122,9 +122,54 @@ export function prepareRunConfig( }; } +/** + * Parses structured_output from execution file and sets GitHub Action outputs + * Only runs if --json-schema was explicitly provided in claude_args + * Exported for testing + */ +export async function parseAndSetStructuredOutputs( + executionFile: string, +): Promise { + try { + const content = await readFile(executionFile, "utf-8"); + const messages = JSON.parse(content) as { + type: string; + structured_output?: Record; + }[]; + + // Search backwards - result is typically last or second-to-last message + const result = messages.findLast( + (m) => m.type === "result" && m.structured_output, + ); + + if (!result?.structured_output) { + throw new Error( + `--json-schema was provided but Claude did not return structured_output.\n` + + `Found ${messages.length} messages. Result exists: ${!!result}\n`, + ); + } + + // Set the complete structured output as a single JSON string + // This works around GitHub Actions limitation that composite actions can't have dynamic outputs + const structuredOutputJson = JSON.stringify(result.structured_output); + core.setOutput("structured_output", structuredOutputJson); + core.info( + `Set structured_output with ${Object.keys(result.structured_output).length} field(s)`, + ); + } catch (error) { + if (error instanceof Error) { + throw error; // Preserve original error and stack trace + } + throw new Error(`Failed to parse structured outputs: ${error}`); + } +} + export async function runClaude(promptPath: string, options: ClaudeOptions) { const config = prepareRunConfig(promptPath, options); + // Detect if --json-schema is present in claude args + const hasJsonSchema = options.claudeArgs?.includes("--json-schema") ?? false; + // Create a named pipe try { await unlink(PIPE_PATH); @@ -308,8 +353,23 @@ export async function runClaude(promptPath: string, options: ClaudeOptions) { core.warning(`Failed to process output for execution metrics: ${e}`); } - core.setOutput("conclusion", "success"); core.setOutput("execution_file", EXECUTION_FILE); + + // Parse and set structured outputs only if user provided --json-schema in claude_args + if (hasJsonSchema) { + try { + await parseAndSetStructuredOutputs(EXECUTION_FILE); + } catch (error) { + const errorMessage = + error instanceof Error ? error.message : String(error); + core.setFailed(errorMessage); + core.setOutput("conclusion", "failure"); + process.exit(1); + } + } + + // Set conclusion to success if we reached here + core.setOutput("conclusion", "success"); } else { core.setOutput("conclusion", "failure"); diff --git a/base-action/test/run-claude.test.ts b/base-action/test/run-claude.test.ts index 1c7d131..10b385f 100644 --- a/base-action/test/run-claude.test.ts +++ b/base-action/test/run-claude.test.ts @@ -78,5 +78,19 @@ describe("prepareRunConfig", () => { "stream-json", ]); }); + + test("should include json-schema flag when provided", () => { + const options: ClaudeOptions = { + claudeArgs: + '--json-schema \'{"type":"object","properties":{"result":{"type":"boolean"}}}\'', + }; + + const prepared = prepareRunConfig("/tmp/test-prompt.txt", options); + + expect(prepared.claudeArgs).toContain("--json-schema"); + expect(prepared.claudeArgs).toContain( + '{"type":"object","properties":{"result":{"type":"boolean"}}}', + ); + }); }); }); diff --git a/base-action/test/structured-output.test.ts b/base-action/test/structured-output.test.ts new file mode 100644 index 0000000..dba8312 --- /dev/null +++ b/base-action/test/structured-output.test.ts @@ -0,0 +1,158 @@ +#!/usr/bin/env bun + +import { describe, test, expect, afterEach, beforeEach, spyOn } from "bun:test"; +import { writeFile, unlink } from "fs/promises"; +import { tmpdir } from "os"; +import { join } from "path"; +import { parseAndSetStructuredOutputs } from "../src/run-claude"; +import * as core from "@actions/core"; + +// Mock execution file path +const TEST_EXECUTION_FILE = join(tmpdir(), "test-execution-output.json"); + +// Helper to create mock execution file with structured output +async function createMockExecutionFile( + structuredOutput?: Record, + includeResult: boolean = true, +): Promise { + const messages: any[] = [ + { type: "system", subtype: "init" }, + { type: "turn", content: "test" }, + ]; + + if (includeResult) { + messages.push({ + type: "result", + cost_usd: 0.01, + duration_ms: 1000, + structured_output: structuredOutput, + }); + } + + await writeFile(TEST_EXECUTION_FILE, JSON.stringify(messages)); +} + +// Spy on core functions +let setOutputSpy: any; +let infoSpy: any; + +beforeEach(() => { + setOutputSpy = spyOn(core, "setOutput").mockImplementation(() => {}); + infoSpy = spyOn(core, "info").mockImplementation(() => {}); +}); + +describe("parseAndSetStructuredOutputs", () => { + afterEach(async () => { + setOutputSpy?.mockRestore(); + infoSpy?.mockRestore(); + try { + await unlink(TEST_EXECUTION_FILE); + } catch { + // Ignore if file doesn't exist + } + }); + + test("should set structured_output with valid data", async () => { + await createMockExecutionFile({ + is_flaky: true, + confidence: 0.85, + summary: "Test looks flaky", + }); + + await parseAndSetStructuredOutputs(TEST_EXECUTION_FILE); + + expect(setOutputSpy).toHaveBeenCalledWith( + "structured_output", + '{"is_flaky":true,"confidence":0.85,"summary":"Test looks flaky"}', + ); + expect(infoSpy).toHaveBeenCalledWith( + "Set structured_output with 3 field(s)", + ); + }); + + test("should handle arrays and nested objects", async () => { + await createMockExecutionFile({ + items: ["a", "b", "c"], + config: { key: "value", nested: { deep: true } }, + }); + + await parseAndSetStructuredOutputs(TEST_EXECUTION_FILE); + + const callArgs = setOutputSpy.mock.calls[0]; + expect(callArgs[0]).toBe("structured_output"); + const parsed = JSON.parse(callArgs[1]); + expect(parsed).toEqual({ + items: ["a", "b", "c"], + config: { key: "value", nested: { deep: true } }, + }); + }); + + test("should handle special characters in field names", async () => { + await createMockExecutionFile({ + "test-result": "passed", + "item.count": 10, + "user@email": "test", + }); + + await parseAndSetStructuredOutputs(TEST_EXECUTION_FILE); + + const callArgs = setOutputSpy.mock.calls[0]; + const parsed = JSON.parse(callArgs[1]); + expect(parsed["test-result"]).toBe("passed"); + expect(parsed["item.count"]).toBe(10); + expect(parsed["user@email"]).toBe("test"); + }); + + test("should throw error when result exists but structured_output is undefined", async () => { + const messages = [ + { type: "system", subtype: "init" }, + { type: "result", cost_usd: 0.01, duration_ms: 1000 }, + ]; + await writeFile(TEST_EXECUTION_FILE, JSON.stringify(messages)); + + await expect( + parseAndSetStructuredOutputs(TEST_EXECUTION_FILE), + ).rejects.toThrow( + "--json-schema was provided but Claude did not return structured_output", + ); + }); + + test("should throw error when no result message exists", async () => { + const messages = [ + { type: "system", subtype: "init" }, + { type: "turn", content: "test" }, + ]; + await writeFile(TEST_EXECUTION_FILE, JSON.stringify(messages)); + + await expect( + parseAndSetStructuredOutputs(TEST_EXECUTION_FILE), + ).rejects.toThrow( + "--json-schema was provided but Claude did not return structured_output", + ); + }); + + test("should throw error with malformed JSON", async () => { + await writeFile(TEST_EXECUTION_FILE, "{ invalid json"); + + await expect( + parseAndSetStructuredOutputs(TEST_EXECUTION_FILE), + ).rejects.toThrow(); + }); + + test("should throw error when file does not exist", async () => { + await expect( + parseAndSetStructuredOutputs("/nonexistent/file.json"), + ).rejects.toThrow(); + }); + + test("should handle empty structured_output object", async () => { + await createMockExecutionFile({}); + + await parseAndSetStructuredOutputs(TEST_EXECUTION_FILE); + + expect(setOutputSpy).toHaveBeenCalledWith("structured_output", "{}"); + expect(infoSpy).toHaveBeenCalledWith( + "Set structured_output with 0 field(s)", + ); + }); +}); diff --git a/docs/usage.md b/docs/usage.md index 818b0c8..aad6611 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -185,6 +185,74 @@ For a comprehensive guide on migrating from v0.x to v1.0, including step-by-step Focus on the changed files in this PR. ``` +## Structured Outputs + +Get validated JSON results from Claude that automatically become GitHub Action outputs. This enables building complex automation workflows where Claude analyzes data and subsequent steps use the results. + +### Basic Example + +```yaml +- name: Detect flaky tests + id: analyze + uses: anthropics/claude-code-action@v1 + with: + anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} + prompt: | + Check the CI logs and determine if this is a flaky test. + Return: is_flaky (boolean), confidence (0-1), summary (string) + claude_args: | + --json-schema '{"type":"object","properties":{"is_flaky":{"type":"boolean"},"confidence":{"type":"number"},"summary":{"type":"string"}},"required":["is_flaky"]}' + +- name: Retry if flaky + if: fromJSON(steps.analyze.outputs.structured_output).is_flaky == true + run: gh workflow run CI +``` + +### How It Works + +1. **Define Schema**: Provide a JSON schema via `--json-schema` flag in `claude_args` +2. **Claude Executes**: Claude uses tools to complete your task +3. **Validated Output**: Result is validated against your schema +4. **JSON Output**: All fields are returned in a single `structured_output` JSON string + +### Accessing Structured Outputs + +All structured output fields are available in the `structured_output` output as a JSON string: + +**In GitHub Actions expressions:** + +```yaml +if: fromJSON(steps.analyze.outputs.structured_output).is_flaky == true +run: | + CONFIDENCE=${{ fromJSON(steps.analyze.outputs.structured_output).confidence }} +``` + +**In bash with jq:** + +```yaml +- name: Process results + run: | + OUTPUT='${{ steps.analyze.outputs.structured_output }}' + IS_FLAKY=$(echo "$OUTPUT" | jq -r '.is_flaky') + SUMMARY=$(echo "$OUTPUT" | jq -r '.summary') +``` + +**Note**: Due to GitHub Actions limitations, composite actions cannot expose dynamic outputs. All fields are bundled in the single `structured_output` JSON string. + +### Complete Example + +See `examples/test-failure-analysis.yml` for a working example that: + +- Detects flaky test failures +- Uses confidence thresholds in conditionals +- Auto-retries workflows +- Comments on PRs + +### Documentation + +For complete details on JSON Schema syntax and Agent SDK structured outputs: +https://docs.claude.com/en/docs/agent-sdk/structured-outputs + ## Ways to Tag @claude These examples show how to interact with Claude using comments in PRs and issues. By default, Claude will be triggered anytime you mention `@claude`, but you can customize the exact trigger phrase using the `trigger_phrase` input in the workflow. diff --git a/examples/test-failure-analysis.yml b/examples/test-failure-analysis.yml new file mode 100644 index 0000000..85d63c6 --- /dev/null +++ b/examples/test-failure-analysis.yml @@ -0,0 +1,114 @@ +name: Auto-Retry Flaky Tests + +# This example demonstrates using structured outputs to detect flaky test failures +# and automatically retry them, reducing noise from intermittent failures. +# +# Use case: When CI fails, automatically determine if it's likely flaky and retry if so. + +on: + workflow_run: + workflows: ["CI"] + types: [completed] + +permissions: + contents: read + actions: write + +jobs: + detect-flaky: + runs-on: ubuntu-latest + if: ${{ github.event.workflow_run.conclusion == 'failure' }} + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Detect flaky test failures + id: detect + uses: anthropics/claude-code-action@main + with: + anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} + prompt: | + The CI workflow failed: ${{ github.event.workflow_run.html_url }} + + Check the logs: gh run view ${{ github.event.workflow_run.id }} --log-failed + + Determine if this looks like a flaky test failure by checking for: + - Timeout errors + - Race conditions + - Network errors + - "Expected X but got Y" intermittent failures + - Tests that passed in previous commits + + Return: + - is_flaky: true if likely flaky, false if real bug + - confidence: number 0-1 indicating confidence level + - summary: brief one-sentence explanation + claude_args: | + --json-schema '{"type":"object","properties":{"is_flaky":{"type":"boolean","description":"Whether this appears to be a flaky test failure"},"confidence":{"type":"number","minimum":0,"maximum":1,"description":"Confidence level in the determination"},"summary":{"type":"string","description":"One-sentence explanation of the failure"}},"required":["is_flaky","confidence","summary"]}' + + # Auto-retry only if flaky AND high confidence (>= 0.7) + - name: Retry flaky tests + if: | + fromJSON(steps.detect.outputs.structured_output).is_flaky == true && + fromJSON(steps.detect.outputs.structured_output).confidence >= 0.7 + env: + GH_TOKEN: ${{ github.token }} + run: | + OUTPUT='${{ steps.detect.outputs.structured_output }}' + CONFIDENCE=$(echo "$OUTPUT" | jq -r '.confidence') + SUMMARY=$(echo "$OUTPUT" | jq -r '.summary') + + echo "🔄 Flaky test detected (confidence: $CONFIDENCE)" + echo "Summary: $SUMMARY" + echo "" + echo "Triggering automatic retry..." + + gh workflow run "${{ github.event.workflow_run.name }}" \ + --ref "${{ github.event.workflow_run.head_branch }}" + + # Low confidence flaky detection - skip retry + - name: Low confidence detection + if: | + fromJSON(steps.detect.outputs.structured_output).is_flaky == true && + fromJSON(steps.detect.outputs.structured_output).confidence < 0.7 + run: | + OUTPUT='${{ steps.detect.outputs.structured_output }}' + CONFIDENCE=$(echo "$OUTPUT" | jq -r '.confidence') + + echo "⚠️ Possible flaky test but confidence too low ($CONFIDENCE)" + echo "Not retrying automatically - manual review recommended" + + # Comment on PR if this was a PR build + - name: Comment on PR + if: github.event.workflow_run.event == 'pull_request' + env: + GH_TOKEN: ${{ github.token }} + run: | + OUTPUT='${{ steps.detect.outputs.structured_output }}' + IS_FLAKY=$(echo "$OUTPUT" | jq -r '.is_flaky') + CONFIDENCE=$(echo "$OUTPUT" | jq -r '.confidence') + SUMMARY=$(echo "$OUTPUT" | jq -r '.summary') + + pr_number=$(gh pr list --head "${{ github.event.workflow_run.head_branch }}" --json number --jq '.[0].number') + + if [ -n "$pr_number" ]; then + if [ "$IS_FLAKY" = "true" ]; then + TITLE="🔄 Flaky Test Detected" + ACTION="✅ Automatically retrying the workflow" + else + TITLE="❌ Test Failure" + ACTION="⚠️ This appears to be a real bug - manual intervention needed" + fi + + gh pr comment "$pr_number" --body "$(cat <