mirror of
https://github.com/anthropics/claude-code-action.git
synced 2026-01-22 22:44:13 +08:00
feat: add structured output support via --json-schema argument (#687)
* feat: add structured output support Add support for Agent SDK structured outputs. New input: json_schema Output: structured_output (JSON string) Access: fromJSON(steps.id.outputs.structured_output).field Docs: https://docs.claude.com/en/docs/agent-sdk/structured-outputs * rm unused * refactor: simplify structured outputs to use claude_args Remove json_schema input in favor of passing --json-schema flag directly in claude_args. This simplifies the interface by treating structured outputs like other CLI flags (--model, --max-turns, etc.) instead of as a special input that gets injected. Users now specify: claude_args: '--json-schema {...}' Instead of separate: json_schema: {...} 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> * chore: remove unused json-schema util and revert version - Remove src/utils/json-schema.ts (no longer used after refactor) - Revert Claude Code version from 2.0.45 back to 2.0.42 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> --------- Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
307
.github/workflows/test-structured-output.yml
vendored
Normal file
307
.github/workflows/test-structured-output.yml
vendored
Normal file
@@ -0,0 +1,307 @@
|
|||||||
|
name: Test Structured Outputs
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
pull_request:
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
test-basic-types:
|
||||||
|
name: Test Basic Type Conversions
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
|
||||||
|
|
||||||
|
- name: Test with explicit values
|
||||||
|
id: test
|
||||||
|
uses: ./base-action
|
||||||
|
with:
|
||||||
|
prompt: |
|
||||||
|
Run this command: echo "test"
|
||||||
|
|
||||||
|
Then return EXACTLY these values:
|
||||||
|
- text_field: "hello"
|
||||||
|
- number_field: 42
|
||||||
|
- boolean_true: true
|
||||||
|
- boolean_false: false
|
||||||
|
anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||||
|
claude_args: |
|
||||||
|
--allowedTools Bash
|
||||||
|
--json-schema '{"type":"object","properties":{"text_field":{"type":"string"},"number_field":{"type":"number"},"boolean_true":{"type":"boolean"},"boolean_false":{"type":"boolean"}},"required":["text_field","number_field","boolean_true","boolean_false"]}'
|
||||||
|
|
||||||
|
- name: Verify outputs
|
||||||
|
run: |
|
||||||
|
# Parse the structured_output JSON
|
||||||
|
OUTPUT='${{ steps.test.outputs.structured_output }}'
|
||||||
|
|
||||||
|
# Test string pass-through
|
||||||
|
TEXT_FIELD=$(echo "$OUTPUT" | jq -r '.text_field')
|
||||||
|
if [ "$TEXT_FIELD" != "hello" ]; then
|
||||||
|
echo "❌ String: expected 'hello', got '$TEXT_FIELD'"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Test number → string conversion
|
||||||
|
NUMBER_FIELD=$(echo "$OUTPUT" | jq -r '.number_field')
|
||||||
|
if [ "$NUMBER_FIELD" != "42" ]; then
|
||||||
|
echo "❌ Number: expected '42', got '$NUMBER_FIELD'"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Test boolean → "true" conversion
|
||||||
|
BOOLEAN_TRUE=$(echo "$OUTPUT" | jq -r '.boolean_true')
|
||||||
|
if [ "$BOOLEAN_TRUE" != "true" ]; then
|
||||||
|
echo "❌ Boolean true: expected 'true', got '$BOOLEAN_TRUE'"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Test boolean → "false" conversion
|
||||||
|
BOOLEAN_FALSE=$(echo "$OUTPUT" | jq -r '.boolean_false')
|
||||||
|
if [ "$BOOLEAN_FALSE" != "false" ]; then
|
||||||
|
echo "❌ Boolean false: expected 'false', got '$BOOLEAN_FALSE'"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "✅ All basic type conversions correct"
|
||||||
|
|
||||||
|
test-complex-types:
|
||||||
|
name: Test Arrays and Objects
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
|
||||||
|
|
||||||
|
- name: Test complex types
|
||||||
|
id: test
|
||||||
|
uses: ./base-action
|
||||||
|
with:
|
||||||
|
prompt: |
|
||||||
|
Run: echo "ready"
|
||||||
|
|
||||||
|
Return EXACTLY:
|
||||||
|
- items: ["apple", "banana", "cherry"]
|
||||||
|
- config: {"key": "value", "count": 3}
|
||||||
|
- empty_array: []
|
||||||
|
anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||||
|
claude_args: |
|
||||||
|
--allowedTools Bash
|
||||||
|
--json-schema '{"type":"object","properties":{"items":{"type":"array","items":{"type":"string"}},"config":{"type":"object"},"empty_array":{"type":"array"}},"required":["items","config","empty_array"]}'
|
||||||
|
|
||||||
|
- name: Verify JSON stringification
|
||||||
|
run: |
|
||||||
|
# Parse the structured_output JSON
|
||||||
|
OUTPUT='${{ steps.test.outputs.structured_output }}'
|
||||||
|
|
||||||
|
# Arrays should be JSON stringified
|
||||||
|
if ! echo "$OUTPUT" | jq -e '.items | length == 3' > /dev/null; then
|
||||||
|
echo "❌ Array not properly formatted"
|
||||||
|
echo "$OUTPUT" | jq '.items'
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Objects should be JSON stringified
|
||||||
|
if ! echo "$OUTPUT" | jq -e '.config.key == "value"' > /dev/null; then
|
||||||
|
echo "❌ Object not properly formatted"
|
||||||
|
echo "$OUTPUT" | jq '.config'
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Empty arrays should work
|
||||||
|
if ! echo "$OUTPUT" | jq -e '.empty_array | length == 0' > /dev/null; then
|
||||||
|
echo "❌ Empty array not properly formatted"
|
||||||
|
echo "$OUTPUT" | jq '.empty_array'
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "✅ All complex types handled correctly"
|
||||||
|
|
||||||
|
test-edge-cases:
|
||||||
|
name: Test Edge Cases
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
|
||||||
|
|
||||||
|
- name: Test edge cases
|
||||||
|
id: test
|
||||||
|
uses: ./base-action
|
||||||
|
with:
|
||||||
|
prompt: |
|
||||||
|
Run: echo "test"
|
||||||
|
|
||||||
|
Return EXACTLY:
|
||||||
|
- zero: 0
|
||||||
|
- empty_string: ""
|
||||||
|
- negative: -5
|
||||||
|
- decimal: 3.14
|
||||||
|
anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||||
|
claude_args: |
|
||||||
|
--allowedTools Bash
|
||||||
|
--json-schema '{"type":"object","properties":{"zero":{"type":"number"},"empty_string":{"type":"string"},"negative":{"type":"number"},"decimal":{"type":"number"}},"required":["zero","empty_string","negative","decimal"]}'
|
||||||
|
|
||||||
|
- name: Verify edge cases
|
||||||
|
run: |
|
||||||
|
# Parse the structured_output JSON
|
||||||
|
OUTPUT='${{ steps.test.outputs.structured_output }}'
|
||||||
|
|
||||||
|
# Zero should be "0", not empty or falsy
|
||||||
|
ZERO=$(echo "$OUTPUT" | jq -r '.zero')
|
||||||
|
if [ "$ZERO" != "0" ]; then
|
||||||
|
echo "❌ Zero: expected '0', got '$ZERO'"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Empty string should be empty (not "null" or missing)
|
||||||
|
EMPTY_STRING=$(echo "$OUTPUT" | jq -r '.empty_string')
|
||||||
|
if [ "$EMPTY_STRING" != "" ]; then
|
||||||
|
echo "❌ Empty string: expected '', got '$EMPTY_STRING'"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Negative numbers should work
|
||||||
|
NEGATIVE=$(echo "$OUTPUT" | jq -r '.negative')
|
||||||
|
if [ "$NEGATIVE" != "-5" ]; then
|
||||||
|
echo "❌ Negative: expected '-5', got '$NEGATIVE'"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Decimals should preserve precision
|
||||||
|
DECIMAL=$(echo "$OUTPUT" | jq -r '.decimal')
|
||||||
|
if [ "$DECIMAL" != "3.14" ]; then
|
||||||
|
echo "❌ Decimal: expected '3.14', got '$DECIMAL'"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "✅ All edge cases handled correctly"
|
||||||
|
|
||||||
|
test-name-sanitization:
|
||||||
|
name: Test Output Name Sanitization
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
|
||||||
|
|
||||||
|
- name: Test special characters in field names
|
||||||
|
id: test
|
||||||
|
uses: ./base-action
|
||||||
|
with:
|
||||||
|
prompt: |
|
||||||
|
Run: echo "test"
|
||||||
|
Return EXACTLY: {test-result: "passed", item_count: 10}
|
||||||
|
anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||||
|
claude_args: |
|
||||||
|
--allowedTools Bash
|
||||||
|
--json-schema '{"type":"object","properties":{"test-result":{"type":"string"},"item_count":{"type":"number"}},"required":["test-result","item_count"]}'
|
||||||
|
|
||||||
|
- name: Verify sanitized names work
|
||||||
|
run: |
|
||||||
|
# Parse the structured_output JSON
|
||||||
|
OUTPUT='${{ steps.test.outputs.structured_output }}'
|
||||||
|
|
||||||
|
# Hyphens should be preserved in the JSON
|
||||||
|
TEST_RESULT=$(echo "$OUTPUT" | jq -r '.["test-result"]')
|
||||||
|
if [ "$TEST_RESULT" != "passed" ]; then
|
||||||
|
echo "❌ Hyphenated name failed: expected 'passed', got '$TEST_RESULT'"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Underscores should work
|
||||||
|
ITEM_COUNT=$(echo "$OUTPUT" | jq -r '.item_count')
|
||||||
|
if [ "$ITEM_COUNT" != "10" ]; then
|
||||||
|
echo "❌ Underscore name failed: expected '10', got '$ITEM_COUNT'"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "✅ Name sanitization works"
|
||||||
|
|
||||||
|
test-execution-file-structure:
|
||||||
|
name: Test Execution File Format
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
|
||||||
|
|
||||||
|
- name: Run with structured output
|
||||||
|
id: test
|
||||||
|
uses: ./base-action
|
||||||
|
with:
|
||||||
|
prompt: "Run: echo 'complete'. Return: {done: true}"
|
||||||
|
anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||||
|
claude_args: |
|
||||||
|
--allowedTools Bash
|
||||||
|
--json-schema '{"type":"object","properties":{"done":{"type":"boolean"}},"required":["done"]}'
|
||||||
|
|
||||||
|
- name: Verify execution file contains structured_output
|
||||||
|
run: |
|
||||||
|
FILE="${{ steps.test.outputs.execution_file }}"
|
||||||
|
|
||||||
|
# Check file exists
|
||||||
|
if [ ! -f "$FILE" ]; then
|
||||||
|
echo "❌ Execution file missing"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check for structured_output field
|
||||||
|
if ! jq -e '.[] | select(.type == "result") | .structured_output' "$FILE" > /dev/null; then
|
||||||
|
echo "❌ No structured_output in execution file"
|
||||||
|
cat "$FILE"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Verify the actual value
|
||||||
|
DONE=$(jq -r '.[] | select(.type == "result") | .structured_output.done' "$FILE")
|
||||||
|
if [ "$DONE" != "true" ]; then
|
||||||
|
echo "❌ Wrong value in execution file"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "✅ Execution file format correct"
|
||||||
|
|
||||||
|
test-summary:
|
||||||
|
name: Summary
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs:
|
||||||
|
- test-basic-types
|
||||||
|
- test-complex-types
|
||||||
|
- test-edge-cases
|
||||||
|
- test-name-sanitization
|
||||||
|
- test-execution-file-structure
|
||||||
|
if: always()
|
||||||
|
steps:
|
||||||
|
- name: Generate Summary
|
||||||
|
run: |
|
||||||
|
echo "# Structured Output Tests (Optimized)" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "Fast, deterministic tests using explicit prompts" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "| Test | Result |" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "|------|--------|" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "| Basic Types | ${{ needs.test-basic-types.result == 'success' && '✅ PASS' || '❌ FAIL' }} |" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "| Complex Types | ${{ needs.test-complex-types.result == 'success' && '✅ PASS' || '❌ FAIL' }} |" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "| Edge Cases | ${{ needs.test-edge-cases.result == 'success' && '✅ PASS' || '❌ FAIL' }} |" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "| Name Sanitization | ${{ needs.test-name-sanitization.result == 'success' && '✅ PASS' || '❌ FAIL' }} |" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "| Execution File | ${{ needs.test-execution-file-structure.result == 'success' && '✅ PASS' || '❌ FAIL' }} |" >> $GITHUB_STEP_SUMMARY
|
||||||
|
|
||||||
|
# Check if all passed
|
||||||
|
ALL_PASSED=${{
|
||||||
|
needs.test-basic-types.result == 'success' &&
|
||||||
|
needs.test-complex-types.result == 'success' &&
|
||||||
|
needs.test-edge-cases.result == 'success' &&
|
||||||
|
needs.test-name-sanitization.result == 'success' &&
|
||||||
|
needs.test-execution-file-structure.result == 'success'
|
||||||
|
}}
|
||||||
|
|
||||||
|
if [ "$ALL_PASSED" = "true" ]; then
|
||||||
|
echo "" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "## ✅ All Tests Passed" >> $GITHUB_STEP_SUMMARY
|
||||||
|
else
|
||||||
|
echo "" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "## ❌ Some Tests Failed" >> $GITHUB_STEP_SUMMARY
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
@@ -13,6 +13,7 @@ A general-purpose [Claude Code](https://claude.ai/code) action for GitHub PRs an
|
|||||||
- 💬 **PR/Issue Integration**: Works seamlessly with GitHub comments and PR reviews
|
- 💬 **PR/Issue Integration**: Works seamlessly with GitHub comments and PR reviews
|
||||||
- 🛠️ **Flexible Tool Access**: Access to GitHub APIs and file operations (additional tools can be enabled via configuration)
|
- 🛠️ **Flexible Tool Access**: Access to GitHub APIs and file operations (additional tools can be enabled via configuration)
|
||||||
- 📋 **Progress Tracking**: Visual progress indicators with checkboxes that dynamically update as Claude completes tasks
|
- 📋 **Progress Tracking**: Visual progress indicators with checkboxes that dynamically update as Claude completes tasks
|
||||||
|
- 📊 **Structured Outputs**: Get validated JSON results that automatically become GitHub Action outputs for complex automations
|
||||||
- 🏃 **Runs on Your Infrastructure**: The action executes entirely on your own GitHub runner (Anthropic API calls go to your chosen provider)
|
- 🏃 **Runs on Your Infrastructure**: The action executes entirely on your own GitHub runner (Anthropic API calls go to your chosen provider)
|
||||||
- ⚙️ **Simplified Configuration**: Unified `prompt` and `claude_args` inputs provide clean, powerful configuration aligned with Claude Code SDK
|
- ⚙️ **Simplified Configuration**: Unified `prompt` and `claude_args` inputs provide clean, powerful configuration aligned with Claude Code SDK
|
||||||
|
|
||||||
|
|||||||
@@ -124,6 +124,9 @@ outputs:
|
|||||||
github_token:
|
github_token:
|
||||||
description: "The GitHub token used by the action (Claude App token if available)"
|
description: "The GitHub token used by the action (Claude App token if available)"
|
||||||
value: ${{ steps.prepare.outputs.github_token }}
|
value: ${{ steps.prepare.outputs.github_token }}
|
||||||
|
structured_output:
|
||||||
|
description: "JSON string containing all structured output fields when --json-schema is provided in claude_args. Use fromJSON() to parse: fromJSON(steps.id.outputs.structured_output).field_name"
|
||||||
|
value: ${{ steps.claude-code.outputs.structured_output }}
|
||||||
|
|
||||||
runs:
|
runs:
|
||||||
using: "composite"
|
using: "composite"
|
||||||
|
|||||||
@@ -75,6 +75,9 @@ outputs:
|
|||||||
execution_file:
|
execution_file:
|
||||||
description: "Path to the JSON file containing Claude Code execution log"
|
description: "Path to the JSON file containing Claude Code execution log"
|
||||||
value: ${{ steps.run_claude.outputs.execution_file }}
|
value: ${{ steps.run_claude.outputs.execution_file }}
|
||||||
|
structured_output:
|
||||||
|
description: "JSON string containing all structured output fields when --json-schema is provided in claude_args (use fromJSON() or jq to parse)"
|
||||||
|
value: ${{ steps.run_claude.outputs.structured_output }}
|
||||||
|
|
||||||
runs:
|
runs:
|
||||||
using: "composite"
|
using: "composite"
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
import * as core from "@actions/core";
|
import * as core from "@actions/core";
|
||||||
import { exec } from "child_process";
|
import { exec } from "child_process";
|
||||||
import { promisify } from "util";
|
import { promisify } from "util";
|
||||||
import { unlink, writeFile, stat } from "fs/promises";
|
import { unlink, writeFile, stat, readFile } from "fs/promises";
|
||||||
import { createWriteStream } from "fs";
|
import { createWriteStream } from "fs";
|
||||||
import { spawn } from "child_process";
|
import { spawn } from "child_process";
|
||||||
import { parse as parseShellArgs } from "shell-quote";
|
import { parse as parseShellArgs } from "shell-quote";
|
||||||
@@ -122,9 +122,54 @@ export function prepareRunConfig(
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parses structured_output from execution file and sets GitHub Action outputs
|
||||||
|
* Only runs if --json-schema was explicitly provided in claude_args
|
||||||
|
* Exported for testing
|
||||||
|
*/
|
||||||
|
export async function parseAndSetStructuredOutputs(
|
||||||
|
executionFile: string,
|
||||||
|
): Promise<void> {
|
||||||
|
try {
|
||||||
|
const content = await readFile(executionFile, "utf-8");
|
||||||
|
const messages = JSON.parse(content) as {
|
||||||
|
type: string;
|
||||||
|
structured_output?: Record<string, unknown>;
|
||||||
|
}[];
|
||||||
|
|
||||||
|
// Search backwards - result is typically last or second-to-last message
|
||||||
|
const result = messages.findLast(
|
||||||
|
(m) => m.type === "result" && m.structured_output,
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!result?.structured_output) {
|
||||||
|
throw new Error(
|
||||||
|
`--json-schema was provided but Claude did not return structured_output.\n` +
|
||||||
|
`Found ${messages.length} messages. Result exists: ${!!result}\n`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set the complete structured output as a single JSON string
|
||||||
|
// This works around GitHub Actions limitation that composite actions can't have dynamic outputs
|
||||||
|
const structuredOutputJson = JSON.stringify(result.structured_output);
|
||||||
|
core.setOutput("structured_output", structuredOutputJson);
|
||||||
|
core.info(
|
||||||
|
`Set structured_output with ${Object.keys(result.structured_output).length} field(s)`,
|
||||||
|
);
|
||||||
|
} catch (error) {
|
||||||
|
if (error instanceof Error) {
|
||||||
|
throw error; // Preserve original error and stack trace
|
||||||
|
}
|
||||||
|
throw new Error(`Failed to parse structured outputs: ${error}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
export async function runClaude(promptPath: string, options: ClaudeOptions) {
|
export async function runClaude(promptPath: string, options: ClaudeOptions) {
|
||||||
const config = prepareRunConfig(promptPath, options);
|
const config = prepareRunConfig(promptPath, options);
|
||||||
|
|
||||||
|
// Detect if --json-schema is present in claude args
|
||||||
|
const hasJsonSchema = options.claudeArgs?.includes("--json-schema") ?? false;
|
||||||
|
|
||||||
// Create a named pipe
|
// Create a named pipe
|
||||||
try {
|
try {
|
||||||
await unlink(PIPE_PATH);
|
await unlink(PIPE_PATH);
|
||||||
@@ -308,8 +353,23 @@ export async function runClaude(promptPath: string, options: ClaudeOptions) {
|
|||||||
core.warning(`Failed to process output for execution metrics: ${e}`);
|
core.warning(`Failed to process output for execution metrics: ${e}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
core.setOutput("conclusion", "success");
|
|
||||||
core.setOutput("execution_file", EXECUTION_FILE);
|
core.setOutput("execution_file", EXECUTION_FILE);
|
||||||
|
|
||||||
|
// Parse and set structured outputs only if user provided --json-schema in claude_args
|
||||||
|
if (hasJsonSchema) {
|
||||||
|
try {
|
||||||
|
await parseAndSetStructuredOutputs(EXECUTION_FILE);
|
||||||
|
} catch (error) {
|
||||||
|
const errorMessage =
|
||||||
|
error instanceof Error ? error.message : String(error);
|
||||||
|
core.setFailed(errorMessage);
|
||||||
|
core.setOutput("conclusion", "failure");
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set conclusion to success if we reached here
|
||||||
|
core.setOutput("conclusion", "success");
|
||||||
} else {
|
} else {
|
||||||
core.setOutput("conclusion", "failure");
|
core.setOutput("conclusion", "failure");
|
||||||
|
|
||||||
|
|||||||
@@ -78,5 +78,19 @@ describe("prepareRunConfig", () => {
|
|||||||
"stream-json",
|
"stream-json",
|
||||||
]);
|
]);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test("should include json-schema flag when provided", () => {
|
||||||
|
const options: ClaudeOptions = {
|
||||||
|
claudeArgs:
|
||||||
|
'--json-schema \'{"type":"object","properties":{"result":{"type":"boolean"}}}\'',
|
||||||
|
};
|
||||||
|
|
||||||
|
const prepared = prepareRunConfig("/tmp/test-prompt.txt", options);
|
||||||
|
|
||||||
|
expect(prepared.claudeArgs).toContain("--json-schema");
|
||||||
|
expect(prepared.claudeArgs).toContain(
|
||||||
|
'{"type":"object","properties":{"result":{"type":"boolean"}}}',
|
||||||
|
);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
158
base-action/test/structured-output.test.ts
Normal file
158
base-action/test/structured-output.test.ts
Normal file
@@ -0,0 +1,158 @@
|
|||||||
|
#!/usr/bin/env bun
|
||||||
|
|
||||||
|
import { describe, test, expect, afterEach, beforeEach, spyOn } from "bun:test";
|
||||||
|
import { writeFile, unlink } from "fs/promises";
|
||||||
|
import { tmpdir } from "os";
|
||||||
|
import { join } from "path";
|
||||||
|
import { parseAndSetStructuredOutputs } from "../src/run-claude";
|
||||||
|
import * as core from "@actions/core";
|
||||||
|
|
||||||
|
// Mock execution file path
|
||||||
|
const TEST_EXECUTION_FILE = join(tmpdir(), "test-execution-output.json");
|
||||||
|
|
||||||
|
// Helper to create mock execution file with structured output
|
||||||
|
async function createMockExecutionFile(
|
||||||
|
structuredOutput?: Record<string, unknown>,
|
||||||
|
includeResult: boolean = true,
|
||||||
|
): Promise<void> {
|
||||||
|
const messages: any[] = [
|
||||||
|
{ type: "system", subtype: "init" },
|
||||||
|
{ type: "turn", content: "test" },
|
||||||
|
];
|
||||||
|
|
||||||
|
if (includeResult) {
|
||||||
|
messages.push({
|
||||||
|
type: "result",
|
||||||
|
cost_usd: 0.01,
|
||||||
|
duration_ms: 1000,
|
||||||
|
structured_output: structuredOutput,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
await writeFile(TEST_EXECUTION_FILE, JSON.stringify(messages));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Spy on core functions
|
||||||
|
let setOutputSpy: any;
|
||||||
|
let infoSpy: any;
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
setOutputSpy = spyOn(core, "setOutput").mockImplementation(() => {});
|
||||||
|
infoSpy = spyOn(core, "info").mockImplementation(() => {});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("parseAndSetStructuredOutputs", () => {
|
||||||
|
afterEach(async () => {
|
||||||
|
setOutputSpy?.mockRestore();
|
||||||
|
infoSpy?.mockRestore();
|
||||||
|
try {
|
||||||
|
await unlink(TEST_EXECUTION_FILE);
|
||||||
|
} catch {
|
||||||
|
// Ignore if file doesn't exist
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
test("should set structured_output with valid data", async () => {
|
||||||
|
await createMockExecutionFile({
|
||||||
|
is_flaky: true,
|
||||||
|
confidence: 0.85,
|
||||||
|
summary: "Test looks flaky",
|
||||||
|
});
|
||||||
|
|
||||||
|
await parseAndSetStructuredOutputs(TEST_EXECUTION_FILE);
|
||||||
|
|
||||||
|
expect(setOutputSpy).toHaveBeenCalledWith(
|
||||||
|
"structured_output",
|
||||||
|
'{"is_flaky":true,"confidence":0.85,"summary":"Test looks flaky"}',
|
||||||
|
);
|
||||||
|
expect(infoSpy).toHaveBeenCalledWith(
|
||||||
|
"Set structured_output with 3 field(s)",
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("should handle arrays and nested objects", async () => {
|
||||||
|
await createMockExecutionFile({
|
||||||
|
items: ["a", "b", "c"],
|
||||||
|
config: { key: "value", nested: { deep: true } },
|
||||||
|
});
|
||||||
|
|
||||||
|
await parseAndSetStructuredOutputs(TEST_EXECUTION_FILE);
|
||||||
|
|
||||||
|
const callArgs = setOutputSpy.mock.calls[0];
|
||||||
|
expect(callArgs[0]).toBe("structured_output");
|
||||||
|
const parsed = JSON.parse(callArgs[1]);
|
||||||
|
expect(parsed).toEqual({
|
||||||
|
items: ["a", "b", "c"],
|
||||||
|
config: { key: "value", nested: { deep: true } },
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
test("should handle special characters in field names", async () => {
|
||||||
|
await createMockExecutionFile({
|
||||||
|
"test-result": "passed",
|
||||||
|
"item.count": 10,
|
||||||
|
"user@email": "test",
|
||||||
|
});
|
||||||
|
|
||||||
|
await parseAndSetStructuredOutputs(TEST_EXECUTION_FILE);
|
||||||
|
|
||||||
|
const callArgs = setOutputSpy.mock.calls[0];
|
||||||
|
const parsed = JSON.parse(callArgs[1]);
|
||||||
|
expect(parsed["test-result"]).toBe("passed");
|
||||||
|
expect(parsed["item.count"]).toBe(10);
|
||||||
|
expect(parsed["user@email"]).toBe("test");
|
||||||
|
});
|
||||||
|
|
||||||
|
test("should throw error when result exists but structured_output is undefined", async () => {
|
||||||
|
const messages = [
|
||||||
|
{ type: "system", subtype: "init" },
|
||||||
|
{ type: "result", cost_usd: 0.01, duration_ms: 1000 },
|
||||||
|
];
|
||||||
|
await writeFile(TEST_EXECUTION_FILE, JSON.stringify(messages));
|
||||||
|
|
||||||
|
await expect(
|
||||||
|
parseAndSetStructuredOutputs(TEST_EXECUTION_FILE),
|
||||||
|
).rejects.toThrow(
|
||||||
|
"--json-schema was provided but Claude did not return structured_output",
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("should throw error when no result message exists", async () => {
|
||||||
|
const messages = [
|
||||||
|
{ type: "system", subtype: "init" },
|
||||||
|
{ type: "turn", content: "test" },
|
||||||
|
];
|
||||||
|
await writeFile(TEST_EXECUTION_FILE, JSON.stringify(messages));
|
||||||
|
|
||||||
|
await expect(
|
||||||
|
parseAndSetStructuredOutputs(TEST_EXECUTION_FILE),
|
||||||
|
).rejects.toThrow(
|
||||||
|
"--json-schema was provided but Claude did not return structured_output",
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("should throw error with malformed JSON", async () => {
|
||||||
|
await writeFile(TEST_EXECUTION_FILE, "{ invalid json");
|
||||||
|
|
||||||
|
await expect(
|
||||||
|
parseAndSetStructuredOutputs(TEST_EXECUTION_FILE),
|
||||||
|
).rejects.toThrow();
|
||||||
|
});
|
||||||
|
|
||||||
|
test("should throw error when file does not exist", async () => {
|
||||||
|
await expect(
|
||||||
|
parseAndSetStructuredOutputs("/nonexistent/file.json"),
|
||||||
|
).rejects.toThrow();
|
||||||
|
});
|
||||||
|
|
||||||
|
test("should handle empty structured_output object", async () => {
|
||||||
|
await createMockExecutionFile({});
|
||||||
|
|
||||||
|
await parseAndSetStructuredOutputs(TEST_EXECUTION_FILE);
|
||||||
|
|
||||||
|
expect(setOutputSpy).toHaveBeenCalledWith("structured_output", "{}");
|
||||||
|
expect(infoSpy).toHaveBeenCalledWith(
|
||||||
|
"Set structured_output with 0 field(s)",
|
||||||
|
);
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -185,6 +185,74 @@ For a comprehensive guide on migrating from v0.x to v1.0, including step-by-step
|
|||||||
Focus on the changed files in this PR.
|
Focus on the changed files in this PR.
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Structured Outputs
|
||||||
|
|
||||||
|
Get validated JSON results from Claude that automatically become GitHub Action outputs. This enables building complex automation workflows where Claude analyzes data and subsequent steps use the results.
|
||||||
|
|
||||||
|
### Basic Example
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
- name: Detect flaky tests
|
||||||
|
id: analyze
|
||||||
|
uses: anthropics/claude-code-action@v1
|
||||||
|
with:
|
||||||
|
anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||||
|
prompt: |
|
||||||
|
Check the CI logs and determine if this is a flaky test.
|
||||||
|
Return: is_flaky (boolean), confidence (0-1), summary (string)
|
||||||
|
claude_args: |
|
||||||
|
--json-schema '{"type":"object","properties":{"is_flaky":{"type":"boolean"},"confidence":{"type":"number"},"summary":{"type":"string"}},"required":["is_flaky"]}'
|
||||||
|
|
||||||
|
- name: Retry if flaky
|
||||||
|
if: fromJSON(steps.analyze.outputs.structured_output).is_flaky == true
|
||||||
|
run: gh workflow run CI
|
||||||
|
```
|
||||||
|
|
||||||
|
### How It Works
|
||||||
|
|
||||||
|
1. **Define Schema**: Provide a JSON schema via `--json-schema` flag in `claude_args`
|
||||||
|
2. **Claude Executes**: Claude uses tools to complete your task
|
||||||
|
3. **Validated Output**: Result is validated against your schema
|
||||||
|
4. **JSON Output**: All fields are returned in a single `structured_output` JSON string
|
||||||
|
|
||||||
|
### Accessing Structured Outputs
|
||||||
|
|
||||||
|
All structured output fields are available in the `structured_output` output as a JSON string:
|
||||||
|
|
||||||
|
**In GitHub Actions expressions:**
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
if: fromJSON(steps.analyze.outputs.structured_output).is_flaky == true
|
||||||
|
run: |
|
||||||
|
CONFIDENCE=${{ fromJSON(steps.analyze.outputs.structured_output).confidence }}
|
||||||
|
```
|
||||||
|
|
||||||
|
**In bash with jq:**
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
- name: Process results
|
||||||
|
run: |
|
||||||
|
OUTPUT='${{ steps.analyze.outputs.structured_output }}'
|
||||||
|
IS_FLAKY=$(echo "$OUTPUT" | jq -r '.is_flaky')
|
||||||
|
SUMMARY=$(echo "$OUTPUT" | jq -r '.summary')
|
||||||
|
```
|
||||||
|
|
||||||
|
**Note**: Due to GitHub Actions limitations, composite actions cannot expose dynamic outputs. All fields are bundled in the single `structured_output` JSON string.
|
||||||
|
|
||||||
|
### Complete Example
|
||||||
|
|
||||||
|
See `examples/test-failure-analysis.yml` for a working example that:
|
||||||
|
|
||||||
|
- Detects flaky test failures
|
||||||
|
- Uses confidence thresholds in conditionals
|
||||||
|
- Auto-retries workflows
|
||||||
|
- Comments on PRs
|
||||||
|
|
||||||
|
### Documentation
|
||||||
|
|
||||||
|
For complete details on JSON Schema syntax and Agent SDK structured outputs:
|
||||||
|
https://docs.claude.com/en/docs/agent-sdk/structured-outputs
|
||||||
|
|
||||||
## Ways to Tag @claude
|
## Ways to Tag @claude
|
||||||
|
|
||||||
These examples show how to interact with Claude using comments in PRs and issues. By default, Claude will be triggered anytime you mention `@claude`, but you can customize the exact trigger phrase using the `trigger_phrase` input in the workflow.
|
These examples show how to interact with Claude using comments in PRs and issues. By default, Claude will be triggered anytime you mention `@claude`, but you can customize the exact trigger phrase using the `trigger_phrase` input in the workflow.
|
||||||
|
|||||||
114
examples/test-failure-analysis.yml
Normal file
114
examples/test-failure-analysis.yml
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
name: Auto-Retry Flaky Tests
|
||||||
|
|
||||||
|
# This example demonstrates using structured outputs to detect flaky test failures
|
||||||
|
# and automatically retry them, reducing noise from intermittent failures.
|
||||||
|
#
|
||||||
|
# Use case: When CI fails, automatically determine if it's likely flaky and retry if so.
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_run:
|
||||||
|
workflows: ["CI"]
|
||||||
|
types: [completed]
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
actions: write
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
detect-flaky:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
if: ${{ github.event.workflow_run.conclusion == 'failure' }}
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Detect flaky test failures
|
||||||
|
id: detect
|
||||||
|
uses: anthropics/claude-code-action@main
|
||||||
|
with:
|
||||||
|
anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||||
|
prompt: |
|
||||||
|
The CI workflow failed: ${{ github.event.workflow_run.html_url }}
|
||||||
|
|
||||||
|
Check the logs: gh run view ${{ github.event.workflow_run.id }} --log-failed
|
||||||
|
|
||||||
|
Determine if this looks like a flaky test failure by checking for:
|
||||||
|
- Timeout errors
|
||||||
|
- Race conditions
|
||||||
|
- Network errors
|
||||||
|
- "Expected X but got Y" intermittent failures
|
||||||
|
- Tests that passed in previous commits
|
||||||
|
|
||||||
|
Return:
|
||||||
|
- is_flaky: true if likely flaky, false if real bug
|
||||||
|
- confidence: number 0-1 indicating confidence level
|
||||||
|
- summary: brief one-sentence explanation
|
||||||
|
claude_args: |
|
||||||
|
--json-schema '{"type":"object","properties":{"is_flaky":{"type":"boolean","description":"Whether this appears to be a flaky test failure"},"confidence":{"type":"number","minimum":0,"maximum":1,"description":"Confidence level in the determination"},"summary":{"type":"string","description":"One-sentence explanation of the failure"}},"required":["is_flaky","confidence","summary"]}'
|
||||||
|
|
||||||
|
# Auto-retry only if flaky AND high confidence (>= 0.7)
|
||||||
|
- name: Retry flaky tests
|
||||||
|
if: |
|
||||||
|
fromJSON(steps.detect.outputs.structured_output).is_flaky == true &&
|
||||||
|
fromJSON(steps.detect.outputs.structured_output).confidence >= 0.7
|
||||||
|
env:
|
||||||
|
GH_TOKEN: ${{ github.token }}
|
||||||
|
run: |
|
||||||
|
OUTPUT='${{ steps.detect.outputs.structured_output }}'
|
||||||
|
CONFIDENCE=$(echo "$OUTPUT" | jq -r '.confidence')
|
||||||
|
SUMMARY=$(echo "$OUTPUT" | jq -r '.summary')
|
||||||
|
|
||||||
|
echo "🔄 Flaky test detected (confidence: $CONFIDENCE)"
|
||||||
|
echo "Summary: $SUMMARY"
|
||||||
|
echo ""
|
||||||
|
echo "Triggering automatic retry..."
|
||||||
|
|
||||||
|
gh workflow run "${{ github.event.workflow_run.name }}" \
|
||||||
|
--ref "${{ github.event.workflow_run.head_branch }}"
|
||||||
|
|
||||||
|
# Low confidence flaky detection - skip retry
|
||||||
|
- name: Low confidence detection
|
||||||
|
if: |
|
||||||
|
fromJSON(steps.detect.outputs.structured_output).is_flaky == true &&
|
||||||
|
fromJSON(steps.detect.outputs.structured_output).confidence < 0.7
|
||||||
|
run: |
|
||||||
|
OUTPUT='${{ steps.detect.outputs.structured_output }}'
|
||||||
|
CONFIDENCE=$(echo "$OUTPUT" | jq -r '.confidence')
|
||||||
|
|
||||||
|
echo "⚠️ Possible flaky test but confidence too low ($CONFIDENCE)"
|
||||||
|
echo "Not retrying automatically - manual review recommended"
|
||||||
|
|
||||||
|
# Comment on PR if this was a PR build
|
||||||
|
- name: Comment on PR
|
||||||
|
if: github.event.workflow_run.event == 'pull_request'
|
||||||
|
env:
|
||||||
|
GH_TOKEN: ${{ github.token }}
|
||||||
|
run: |
|
||||||
|
OUTPUT='${{ steps.detect.outputs.structured_output }}'
|
||||||
|
IS_FLAKY=$(echo "$OUTPUT" | jq -r '.is_flaky')
|
||||||
|
CONFIDENCE=$(echo "$OUTPUT" | jq -r '.confidence')
|
||||||
|
SUMMARY=$(echo "$OUTPUT" | jq -r '.summary')
|
||||||
|
|
||||||
|
pr_number=$(gh pr list --head "${{ github.event.workflow_run.head_branch }}" --json number --jq '.[0].number')
|
||||||
|
|
||||||
|
if [ -n "$pr_number" ]; then
|
||||||
|
if [ "$IS_FLAKY" = "true" ]; then
|
||||||
|
TITLE="🔄 Flaky Test Detected"
|
||||||
|
ACTION="✅ Automatically retrying the workflow"
|
||||||
|
else
|
||||||
|
TITLE="❌ Test Failure"
|
||||||
|
ACTION="⚠️ This appears to be a real bug - manual intervention needed"
|
||||||
|
fi
|
||||||
|
|
||||||
|
gh pr comment "$pr_number" --body "$(cat <<EOF
|
||||||
|
## $TITLE
|
||||||
|
|
||||||
|
**Analysis**: $SUMMARY
|
||||||
|
**Confidence**: $CONFIDENCE
|
||||||
|
|
||||||
|
$ACTION
|
||||||
|
|
||||||
|
[View workflow run](${{ github.event.workflow_run.html_url }})
|
||||||
|
EOF
|
||||||
|
)"
|
||||||
|
fi
|
||||||
Reference in New Issue
Block a user