feat: add structured output support

Add support for Agent SDK structured outputs feature.

New input: json_schema - accepts JSON schema for validated outputs
Auto-sets GitHub Action outputs for each field in the structured result

Docs: https://docs.claude.com/en/docs/agent-sdk/structured-outputs

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
inigo
2025-11-18 09:35:05 -08:00
parent 08f88abe2b
commit c102f7cd09
9 changed files with 932 additions and 1 deletions

View File

@@ -0,0 +1,335 @@
name: Test Structured Outputs (Optimized)
# This workflow uses EXPLICIT prompts that tell Claude exactly what to return.
# This makes tests fast, deterministic, and focuses on testing OUR code, not Claude's reasoning.
#
# NOTE: Disabled until Agent SDK structured outputs feature is released
# The --json-schema flag is not yet available in public Claude Code releases
on:
# Disabled - uncomment when feature is released
# push:
# branches: [main]
# pull_request:
workflow_dispatch:
permissions:
contents: read
jobs:
test-basic-types:
name: Test Basic Type Conversions
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
- name: Test with explicit values
id: test
uses: ./base-action
with:
# EXPLICIT: Tell Claude exactly what to return - no reasoning needed
prompt: |
Run this command: echo "test"
Then return EXACTLY these values:
- text_field: "hello"
- number_field: 42
- boolean_true: true
- boolean_false: false
json_schema: |
{
"type": "object",
"properties": {
"text_field": {"type": "string"},
"number_field": {"type": "number"},
"boolean_true": {"type": "boolean"},
"boolean_false": {"type": "boolean"}
},
"required": ["text_field", "number_field", "boolean_true", "boolean_false"]
}
anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
allowed_tools: "Bash"
- name: Verify outputs
run: |
# Test string pass-through
if [ "${{ steps.test.outputs.text_field }}" != "hello" ]; then
echo "❌ String: expected 'hello', got '${{ steps.test.outputs.text_field }}'"
exit 1
fi
# Test number → string conversion
if [ "${{ steps.test.outputs.number_field }}" != "42" ]; then
echo "❌ Number: expected '42', got '${{ steps.test.outputs.number_field }}'"
exit 1
fi
# Test boolean → "true" conversion
if [ "${{ steps.test.outputs.boolean_true }}" != "true" ]; then
echo "❌ Boolean true: expected 'true', got '${{ steps.test.outputs.boolean_true }}'"
exit 1
fi
# Test boolean → "false" conversion
if [ "${{ steps.test.outputs.boolean_false }}" != "false" ]; then
echo "❌ Boolean false: expected 'false', got '${{ steps.test.outputs.boolean_false }}'"
exit 1
fi
echo "✅ All basic type conversions correct"
test-complex-types:
name: Test Arrays and Objects
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
- name: Test complex types
id: test
uses: ./base-action
with:
# EXPLICIT: No file reading, no analysis
prompt: |
Run: echo "ready"
Return EXACTLY:
- items: ["apple", "banana", "cherry"]
- config: {"key": "value", "count": 3}
- empty_array: []
json_schema: |
{
"type": "object",
"properties": {
"items": {
"type": "array",
"items": {"type": "string"}
},
"config": {"type": "object"},
"empty_array": {"type": "array"}
},
"required": ["items", "config", "empty_array"]
}
anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
allowed_tools: "Bash"
- name: Verify JSON stringification
run: |
# Arrays should be JSON stringified
ITEMS='${{ steps.test.outputs.items }}'
if ! echo "$ITEMS" | jq -e '. | length == 3' > /dev/null; then
echo "❌ Array not properly stringified: $ITEMS"
exit 1
fi
# Objects should be JSON stringified
CONFIG='${{ steps.test.outputs.config }}'
if ! echo "$CONFIG" | jq -e '.key == "value"' > /dev/null; then
echo "❌ Object not properly stringified: $CONFIG"
exit 1
fi
# Empty arrays should work
EMPTY='${{ steps.test.outputs.empty_array }}'
if ! echo "$EMPTY" | jq -e '. | length == 0' > /dev/null; then
echo "❌ Empty array not properly stringified: $EMPTY"
exit 1
fi
echo "✅ All complex types JSON stringified correctly"
test-edge-cases:
name: Test Edge Cases
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
- name: Test edge cases
id: test
uses: ./base-action
with:
prompt: |
Run: echo "test"
Return EXACTLY:
- zero: 0
- empty_string: ""
- negative: -5
- decimal: 3.14
json_schema: |
{
"type": "object",
"properties": {
"zero": {"type": "number"},
"empty_string": {"type": "string"},
"negative": {"type": "number"},
"decimal": {"type": "number"}
},
"required": ["zero", "empty_string", "negative", "decimal"]
}
anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
allowed_tools: "Bash"
- name: Verify edge cases
run: |
# Zero should be "0", not empty or falsy
if [ "${{ steps.test.outputs.zero }}" != "0" ]; then
echo "❌ Zero: expected '0', got '${{ steps.test.outputs.zero }}'"
exit 1
fi
# Empty string should be empty (not "null" or missing)
if [ "${{ steps.test.outputs.empty_string }}" != "" ]; then
echo "❌ Empty string: expected '', got '${{ steps.test.outputs.empty_string }}'"
exit 1
fi
# Negative numbers should work
if [ "${{ steps.test.outputs.negative }}" != "-5" ]; then
echo "❌ Negative: expected '-5', got '${{ steps.test.outputs.negative }}'"
exit 1
fi
# Decimals should preserve precision
if [ "${{ steps.test.outputs.decimal }}" != "3.14" ]; then
echo "❌ Decimal: expected '3.14', got '${{ steps.test.outputs.decimal }}'"
exit 1
fi
echo "✅ All edge cases handled correctly"
test-name-sanitization:
name: Test Output Name Sanitization
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
- name: Test special characters in field names
id: test
uses: ./base-action
with:
prompt: |
Run: echo "test"
Return EXACTLY: {test-result: "passed", item_count: 10}
json_schema: |
{
"type": "object",
"properties": {
"test-result": {"type": "string"},
"item_count": {"type": "number"}
},
"required": ["test-result", "item_count"]
}
anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
allowed_tools: "Bash"
- name: Verify sanitized names work
run: |
# Hyphens should be preserved (GitHub Actions allows them)
if [ "${{ steps.test.outputs.test-result }}" != "passed" ]; then
echo "❌ Hyphenated name failed"
exit 1
fi
# Underscores should work
if [ "${{ steps.test.outputs.item_count }}" != "10" ]; then
echo "❌ Underscore name failed"
exit 1
fi
echo "✅ Name sanitization works"
test-execution-file-structure:
name: Test Execution File Format
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
- name: Run with structured output
id: test
uses: ./base-action
with:
prompt: "Run: echo 'complete'. Return: {done: true}"
json_schema: |
{
"type": "object",
"properties": {
"done": {"type": "boolean"}
},
"required": ["done"]
}
anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
allowed_tools: "Bash"
- name: Verify execution file contains structured_output
run: |
FILE="${{ steps.test.outputs.execution_file }}"
# Check file exists
if [ ! -f "$FILE" ]; then
echo "❌ Execution file missing"
exit 1
fi
# Check for structured_output field
if ! jq -e '.[] | select(.type == "result") | .structured_output' "$FILE" > /dev/null; then
echo "❌ No structured_output in execution file"
cat "$FILE"
exit 1
fi
# Verify the actual value
DONE=$(jq -r '.[] | select(.type == "result") | .structured_output.done' "$FILE")
if [ "$DONE" != "true" ]; then
echo "❌ Wrong value in execution file"
exit 1
fi
echo "✅ Execution file format correct"
test-summary:
name: Summary
runs-on: ubuntu-latest
needs:
- test-basic-types
- test-complex-types
- test-edge-cases
- test-name-sanitization
- test-execution-file-structure
if: always()
steps:
- name: Generate Summary
run: |
echo "# Structured Output Tests (Optimized)" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "Fast, deterministic tests using explicit prompts" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "| Test | Result |" >> $GITHUB_STEP_SUMMARY
echo "|------|--------|" >> $GITHUB_STEP_SUMMARY
echo "| Basic Types | ${{ needs.test-basic-types.result == 'success' && '✅ PASS' || '❌ FAIL' }} |" >> $GITHUB_STEP_SUMMARY
echo "| Complex Types | ${{ needs.test-complex-types.result == 'success' && '✅ PASS' || '❌ FAIL' }} |" >> $GITHUB_STEP_SUMMARY
echo "| Edge Cases | ${{ needs.test-edge-cases.result == 'success' && '✅ PASS' || '❌ FAIL' }} |" >> $GITHUB_STEP_SUMMARY
echo "| Name Sanitization | ${{ needs.test-name-sanitization.result == 'success' && '✅ PASS' || '❌ FAIL' }} |" >> $GITHUB_STEP_SUMMARY
echo "| Execution File | ${{ needs.test-execution-file-structure.result == 'success' && '✅ PASS' || '❌ FAIL' }} |" >> $GITHUB_STEP_SUMMARY
# Check if all passed
ALL_PASSED=${{
needs.test-basic-types.result == 'success' &&
needs.test-complex-types.result == 'success' &&
needs.test-edge-cases.result == 'success' &&
needs.test-name-sanitization.result == 'success' &&
needs.test-execution-file-structure.result == 'success'
}}
if [ "$ALL_PASSED" = "true" ]; then
echo "" >> $GITHUB_STEP_SUMMARY
echo "## ✅ All Tests Passed" >> $GITHUB_STEP_SUMMARY
else
echo "" >> $GITHUB_STEP_SUMMARY
echo "## ❌ Some Tests Failed" >> $GITHUB_STEP_SUMMARY
exit 1
fi

View File

@@ -113,6 +113,10 @@ inputs:
description: "Newline-separated list of Claude Code plugin marketplace Git URLs to install from (e.g., 'https://github.com/user/marketplace1.git\nhttps://github.com/user/marketplace2.git')" description: "Newline-separated list of Claude Code plugin marketplace Git URLs to install from (e.g., 'https://github.com/user/marketplace1.git\nhttps://github.com/user/marketplace2.git')"
required: false required: false
default: "" default: ""
json_schema:
description: "JSON schema for structured output validation. When provided, Claude will return validated JSON matching this schema, and the action will automatically set GitHub Action outputs for each field."
required: false
default: ""
outputs: outputs:
execution_file: execution_file:
@@ -174,6 +178,7 @@ runs:
TRACK_PROGRESS: ${{ inputs.track_progress }} TRACK_PROGRESS: ${{ inputs.track_progress }}
ADDITIONAL_PERMISSIONS: ${{ inputs.additional_permissions }} ADDITIONAL_PERMISSIONS: ${{ inputs.additional_permissions }}
CLAUDE_ARGS: ${{ inputs.claude_args }} CLAUDE_ARGS: ${{ inputs.claude_args }}
JSON_SCHEMA: ${{ inputs.json_schema }}
ALL_INPUTS: ${{ toJson(inputs) }} ALL_INPUTS: ${{ toJson(inputs) }}
- name: Install Base Action Dependencies - name: Install Base Action Dependencies
@@ -228,6 +233,7 @@ runs:
INPUT_SHOW_FULL_OUTPUT: ${{ inputs.show_full_output }} INPUT_SHOW_FULL_OUTPUT: ${{ inputs.show_full_output }}
INPUT_PLUGINS: ${{ inputs.plugins }} INPUT_PLUGINS: ${{ inputs.plugins }}
INPUT_PLUGIN_MARKETPLACES: ${{ inputs.plugin_marketplaces }} INPUT_PLUGIN_MARKETPLACES: ${{ inputs.plugin_marketplaces }}
JSON_SCHEMA: ${{ inputs.json_schema }}
# Model configuration # Model configuration
GITHUB_TOKEN: ${{ steps.prepare.outputs.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ steps.prepare.outputs.GITHUB_TOKEN }}

View File

@@ -67,6 +67,10 @@ inputs:
description: "Newline-separated list of Claude Code plugin marketplace Git URLs to install from (e.g., 'https://github.com/user/marketplace1.git\nhttps://github.com/user/marketplace2.git')" description: "Newline-separated list of Claude Code plugin marketplace Git URLs to install from (e.g., 'https://github.com/user/marketplace1.git\nhttps://github.com/user/marketplace2.git')"
required: false required: false
default: "" default: ""
json_schema:
description: "JSON schema for structured output validation. When provided, Claude will return validated JSON matching this schema, and the action will automatically set GitHub Action outputs for each field (e.g., access via steps.id.outputs.field_name)"
required: false
default: ""
outputs: outputs:
conclusion: conclusion:

View File

@@ -1,7 +1,7 @@
import * as core from "@actions/core"; import * as core from "@actions/core";
import { exec } from "child_process"; import { exec } from "child_process";
import { promisify } from "util"; import { promisify } from "util";
import { unlink, writeFile, stat } from "fs/promises"; import { unlink, writeFile, stat, readFile } from "fs/promises";
import { createWriteStream } from "fs"; import { createWriteStream } from "fs";
import { spawn } from "child_process"; import { spawn } from "child_process";
import { parse as parseShellArgs } from "shell-quote"; import { parse as parseShellArgs } from "shell-quote";
@@ -12,6 +12,11 @@ const PIPE_PATH = `${process.env.RUNNER_TEMP}/claude_prompt_pipe`;
const EXECUTION_FILE = `${process.env.RUNNER_TEMP}/claude-execution-output.json`; const EXECUTION_FILE = `${process.env.RUNNER_TEMP}/claude-execution-output.json`;
const BASE_ARGS = ["--verbose", "--output-format", "stream-json"]; const BASE_ARGS = ["--verbose", "--output-format", "stream-json"];
type ExecutionMessage = {
type: string;
structured_output?: Record<string, unknown>;
};
/** /**
* Sanitizes JSON output to remove sensitive information when full output is disabled * Sanitizes JSON output to remove sensitive information when full output is disabled
* Returns a safe summary message or null if the message should be completely suppressed * Returns a safe summary message or null if the message should be completely suppressed
@@ -122,6 +127,88 @@ export function prepareRunConfig(
}; };
} }
/**
* Sanitizes output field names to meet GitHub Actions output naming requirements
* GitHub outputs must be alphanumeric, hyphen, or underscore only
*/
function sanitizeOutputName(name: string): string {
return name.replace(/[^a-zA-Z0-9_-]/g, "_");
}
/**
* Converts values to string format for GitHub Actions outputs
* GitHub outputs must always be strings
*/
function convertToString(value: unknown): string {
switch (typeof value) {
case "string":
return value;
case "boolean":
case "number":
return String(value);
case "object":
return value === null ? "" : JSON.stringify(value);
case "undefined":
return "";
default:
// Handle Symbol, Function, etc.
return String(value);
}
}
/**
* Parses structured_output from execution file and sets GitHub Action outputs
* Only runs if json_schema was explicitly provided by the user
*/
async function parseAndSetStructuredOutputs(
executionFile: string,
): Promise<void> {
try {
const content = await readFile(executionFile, "utf-8");
const messages = JSON.parse(content) as ExecutionMessage[];
const result = messages.find(
(m) => m.type === "result" && m.structured_output,
);
if (!result?.structured_output) {
const error = new Error(
"json_schema was provided but Claude did not return structured_output. " +
"The schema may be invalid or Claude failed to call the StructuredOutput tool.",
);
core.setFailed(error.message);
throw error;
}
// Set GitHub Action output for each field
const entries = Object.entries(result.structured_output);
core.info(`Setting ${entries.length} structured output(s)`);
for (const [key, value] of entries) {
const sanitizedKey = sanitizeOutputName(key);
if (!sanitizedKey) {
core.warning(`Skipping invalid output key: "${key}"`);
continue;
}
const stringValue = convertToString(value);
// Truncate long values in logs for readability
const displayValue =
stringValue.length > 100
? `${stringValue.slice(0, 97)}...`
: stringValue;
core.setOutput(sanitizedKey, stringValue);
core.info(`${sanitizedKey}=${displayValue}`);
}
} catch (error) {
const errorMsg = `Failed to parse structured outputs: ${error}`;
core.setFailed(errorMsg);
throw new Error(errorMsg);
}
}
export async function runClaude(promptPath: string, options: ClaudeOptions) { export async function runClaude(promptPath: string, options: ClaudeOptions) {
const config = prepareRunConfig(promptPath, options); const config = prepareRunConfig(promptPath, options);
@@ -310,6 +397,11 @@ export async function runClaude(promptPath: string, options: ClaudeOptions) {
core.setOutput("conclusion", "success"); core.setOutput("conclusion", "success");
core.setOutput("execution_file", EXECUTION_FILE); core.setOutput("execution_file", EXECUTION_FILE);
// Parse and set structured outputs only if user provided json_schema
if (process.env.JSON_SCHEMA) {
await parseAndSetStructuredOutputs(EXECUTION_FILE);
}
} else { } else {
core.setOutput("conclusion", "failure"); core.setOutput("conclusion", "failure");

View File

@@ -78,5 +78,19 @@ describe("prepareRunConfig", () => {
"stream-json", "stream-json",
]); ]);
}); });
test("should include json-schema flag when provided", () => {
const options: ClaudeOptions = {
claudeArgs:
'--json-schema \'{"type":"object","properties":{"result":{"type":"boolean"}}}\'',
};
const prepared = prepareRunConfig("/tmp/test-prompt.txt", options);
expect(prepared.claudeArgs).toContain("--json-schema");
expect(prepared.claudeArgs).toContain(
'{"type":"object","properties":{"result":{"type":"boolean"}}}',
);
});
}); });
}); });

View File

@@ -0,0 +1,341 @@
#!/usr/bin/env bun
import { describe, test, expect, afterEach } from "bun:test";
import { writeFile, unlink } from "fs/promises";
import { tmpdir } from "os";
import { join } from "path";
// Import the type for testing
type ExecutionMessage = {
type: string;
structured_output?: Record<string, unknown>;
};
// Mock execution file path
const TEST_EXECUTION_FILE = join(tmpdir(), "test-execution-output.json");
// Helper to create mock execution file with structured output
async function createMockExecutionFile(
structuredOutput?: Record<string, unknown>,
includeResult: boolean = true,
): Promise<void> {
const messages: ExecutionMessage[] = [
{ type: "system", subtype: "init" } as any,
{ type: "turn", content: "test" } as any,
];
if (includeResult) {
messages.push({
type: "result",
cost_usd: 0.01,
duration_ms: 1000,
structured_output: structuredOutput,
} as any);
}
await writeFile(TEST_EXECUTION_FILE, JSON.stringify(messages));
}
describe("Structured Output - Pure Functions", () => {
afterEach(async () => {
try {
await unlink(TEST_EXECUTION_FILE);
} catch {
// Ignore if file doesn't exist
}
});
describe("sanitizeOutputName", () => {
test("should keep valid characters", () => {
const sanitize = (name: string) => name.replace(/[^a-zA-Z0-9_-]/g, "_");
expect(sanitize("valid_name-123")).toBe("valid_name-123");
});
test("should replace invalid characters with underscores", () => {
const sanitize = (name: string) => name.replace(/[^a-zA-Z0-9_-]/g, "_");
expect(sanitize("invalid@name!")).toBe("invalid_name_");
expect(sanitize("has spaces")).toBe("has_spaces");
expect(sanitize("has.dots")).toBe("has_dots");
});
test("should handle special characters", () => {
const sanitize = (name: string) => name.replace(/[^a-zA-Z0-9_-]/g, "_");
expect(sanitize("$field%name&")).toBe("_field_name_");
expect(sanitize("field[0]")).toBe("field_0_");
});
});
describe("convertToString", () => {
const convertToString = (value: unknown): string => {
switch (typeof value) {
case "string":
return value;
case "boolean":
case "number":
return String(value);
case "object":
return value === null ? "" : JSON.stringify(value);
default:
return JSON.stringify(value);
}
};
test("should keep strings as-is", () => {
expect(convertToString("hello")).toBe("hello");
expect(convertToString("")).toBe("");
});
test("should convert booleans to strings", () => {
expect(convertToString(true)).toBe("true");
expect(convertToString(false)).toBe("false");
});
test("should convert numbers to strings", () => {
expect(convertToString(42)).toBe("42");
expect(convertToString(3.14)).toBe("3.14");
expect(convertToString(0)).toBe("0");
});
test("should convert null to empty string", () => {
expect(convertToString(null)).toBe("");
});
test("should JSON stringify objects", () => {
expect(convertToString({ foo: "bar" })).toBe('{"foo":"bar"}');
});
test("should JSON stringify arrays", () => {
expect(convertToString([1, 2, 3])).toBe("[1,2,3]");
expect(convertToString(["a", "b"])).toBe('["a","b"]');
});
test("should handle nested structures", () => {
const nested = { items: [{ id: 1, name: "test" }] };
expect(convertToString(nested)).toBe(
'{"items":[{"id":1,"name":"test"}]}',
);
});
});
describe("parseAndSetStructuredOutputs integration", () => {
test("should parse and set simple structured outputs", async () => {
await createMockExecutionFile({
is_antonly: true,
confidence: 0.95,
risk: "low",
});
// In a real test, we'd import and call parseAndSetStructuredOutputs
// For now, we simulate the behavior
const content = await Bun.file(TEST_EXECUTION_FILE).text();
const messages = JSON.parse(content) as ExecutionMessage[];
const result = messages.find(
(m) => m.type === "result" && m.structured_output,
);
expect(result?.structured_output).toEqual({
is_antonly: true,
confidence: 0.95,
risk: "low",
});
});
test("should handle array outputs", async () => {
await createMockExecutionFile({
affected_areas: ["auth", "database", "api"],
severity: "high",
});
const content = await Bun.file(TEST_EXECUTION_FILE).text();
const messages = JSON.parse(content) as ExecutionMessage[];
const result = messages.find(
(m) => m.type === "result" && m.structured_output,
);
expect(result?.structured_output?.affected_areas).toEqual([
"auth",
"database",
"api",
]);
});
test("should handle nested objects", async () => {
await createMockExecutionFile({
analysis: {
category: "test",
details: { count: 5, passed: true },
},
});
const content = await Bun.file(TEST_EXECUTION_FILE).text();
const messages = JSON.parse(content) as ExecutionMessage[];
const result = messages.find(
(m) => m.type === "result" && m.structured_output,
);
expect(result?.structured_output?.analysis).toEqual({
category: "test",
details: { count: 5, passed: true },
});
});
test("should handle missing structured_output", async () => {
await createMockExecutionFile(undefined, true);
const content = await Bun.file(TEST_EXECUTION_FILE).text();
const messages = JSON.parse(content) as ExecutionMessage[];
const result = messages.find(
(m) => m.type === "result" && m.structured_output,
);
expect(result).toBeUndefined();
});
test("should handle empty structured_output", async () => {
await createMockExecutionFile({});
const content = await Bun.file(TEST_EXECUTION_FILE).text();
const messages = JSON.parse(content) as ExecutionMessage[];
const result = messages.find(
(m) => m.type === "result" && m.structured_output,
);
expect(result?.structured_output).toEqual({});
});
test("should handle all supported types", async () => {
await createMockExecutionFile({
string_field: "hello",
number_field: 42,
boolean_field: true,
null_field: null,
array_field: [1, 2, 3],
object_field: { nested: "value" },
});
const content = await Bun.file(TEST_EXECUTION_FILE).text();
const messages = JSON.parse(content) as ExecutionMessage[];
const result = messages.find(
(m) => m.type === "result" && m.structured_output,
);
expect(result?.structured_output).toMatchObject({
string_field: "hello",
number_field: 42,
boolean_field: true,
null_field: null,
array_field: [1, 2, 3],
object_field: { nested: "value" },
});
});
});
describe("output naming with prefix", () => {
test("should apply prefix correctly", () => {
const prefix = "CLAUDE_";
const key = "is_antonly";
const sanitizedKey = key.replace(/[^a-zA-Z0-9_-]/g, "_");
const outputName = prefix + sanitizedKey;
expect(outputName).toBe("CLAUDE_is_antonly");
});
test("should handle empty prefix", () => {
const prefix = "";
const key = "result";
const sanitizedKey = key.replace(/[^a-zA-Z0-9_-]/g, "_");
const outputName = prefix + sanitizedKey;
expect(outputName).toBe("result");
});
test("should sanitize and prefix invalid keys", () => {
const prefix = "OUT_";
const key = "invalid@key!";
const sanitizedKey = key.replace(/[^a-zA-Z0-9_-]/g, "_");
const outputName = prefix + sanitizedKey;
expect(outputName).toBe("OUT_invalid_key_");
});
});
describe("error scenarios", () => {
test("should handle malformed JSON", async () => {
await writeFile(TEST_EXECUTION_FILE, "invalid json {");
let error: Error | undefined;
try {
const content = await Bun.file(TEST_EXECUTION_FILE).text();
JSON.parse(content);
} catch (e) {
error = e as Error;
}
expect(error).toBeDefined();
expect(error?.message).toContain("JSON");
});
test("should handle empty execution file", async () => {
await writeFile(TEST_EXECUTION_FILE, "[]");
const content = await Bun.file(TEST_EXECUTION_FILE).text();
const messages = JSON.parse(content) as ExecutionMessage[];
const result = messages.find(
(m) => m.type === "result" && m.structured_output,
);
expect(result).toBeUndefined();
});
test("should handle missing result message", async () => {
const messages = [
{ type: "system", subtype: "init" },
{ type: "turn", content: "test" },
];
await writeFile(TEST_EXECUTION_FILE, JSON.stringify(messages));
const content = await Bun.file(TEST_EXECUTION_FILE).text();
const parsed = JSON.parse(content) as ExecutionMessage[];
const result = parsed.find(
(m) => m.type === "result" && m.structured_output,
);
expect(result).toBeUndefined();
});
});
describe("value truncation in logs", () => {
test("should truncate long string values for display", () => {
const longValue = "a".repeat(150);
const displayValue =
longValue.length > 100 ? `${longValue.slice(0, 97)}...` : longValue;
expect(displayValue).toBe("a".repeat(97) + "...");
expect(displayValue.length).toBe(100);
});
test("should not truncate short values", () => {
const shortValue = "short";
const displayValue =
shortValue.length > 100 ? `${shortValue.slice(0, 97)}...` : shortValue;
expect(displayValue).toBe("short");
});
test("should truncate exactly 100 character values", () => {
const value = "a".repeat(100);
const displayValue =
value.length > 100 ? `${value.slice(0, 97)}...` : value;
expect(displayValue).toBe(value);
});
test("should truncate 101 character values", () => {
const value = "a".repeat(101);
const displayValue =
value.length > 100 ? `${value.slice(0, 97)}...` : value;
expect(displayValue).toBe("a".repeat(97) + "...");
});
});
});

View File

@@ -0,0 +1,113 @@
name: Auto-Retry Flaky Tests
# This example demonstrates using structured outputs to detect flaky test failures
# and automatically retry them, reducing noise from intermittent failures.
#
# Use case: When CI fails, automatically determine if it's likely flaky and retry if so.
on:
workflow_run:
workflows: ["CI"]
types: [completed]
permissions:
contents: read
actions: write
jobs:
detect-flaky:
runs-on: ubuntu-latest
if: ${{ github.event.workflow_run.conclusion == 'failure' }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Detect flaky test failures
id: detect
uses: anthropics/claude-code-action@main
with:
anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
prompt: |
The CI workflow failed: ${{ github.event.workflow_run.html_url }}
Check the logs: gh run view ${{ github.event.workflow_run.id }} --log-failed
Determine if this looks like a flaky test failure by checking for:
- Timeout errors
- Race conditions
- Network errors
- "Expected X but got Y" intermittent failures
- Tests that passed in previous commits
Return:
- is_flaky: true if likely flaky, false if real bug
- confidence: number 0-1 indicating confidence level
- summary: brief one-sentence explanation
json_schema: |
{
"type": "object",
"properties": {
"is_flaky": {
"type": "boolean",
"description": "Whether this appears to be a flaky test failure"
},
"confidence": {
"type": "number",
"minimum": 0,
"maximum": 1,
"description": "Confidence level in the determination"
},
"summary": {
"type": "string",
"description": "One-sentence explanation of the failure"
}
},
"required": ["is_flaky", "confidence", "summary"]
}
# Auto-retry only if flaky AND high confidence (>= 0.7)
- name: Retry flaky tests
if: |
steps.detect.outputs.is_flaky == 'true' &&
steps.detect.outputs.confidence >= '0.7'
env:
GH_TOKEN: ${{ github.token }}
run: |
echo "🔄 Flaky test detected (confidence: ${{ steps.detect.outputs.confidence }})"
echo "Summary: ${{ steps.detect.outputs.summary }}"
echo ""
echo "Triggering automatic retry..."
gh workflow run "${{ github.event.workflow_run.name }}" \
--ref "${{ github.event.workflow_run.head_branch }}"
# Low confidence flaky detection - skip retry
- name: Low confidence detection
if: |
steps.detect.outputs.is_flaky == 'true' &&
steps.detect.outputs.confidence < '0.7'
run: |
echo "⚠️ Possible flaky test but confidence too low (${{ steps.detect.outputs.confidence }})"
echo "Not retrying automatically - manual review recommended"
# Comment on PR if this was a PR build
- name: Comment on PR
if: github.event.workflow_run.event == 'pull_request'
env:
GH_TOKEN: ${{ github.token }}
run: |
pr_number=$(gh pr list --head "${{ github.event.workflow_run.head_branch }}" --json number --jq '.[0].number')
if [ -n "$pr_number" ]; then
gh pr comment "$pr_number" --body "$(cat <<EOF
## ${{ steps.detect.outputs.is_flaky == 'true' && '🔄 Flaky Test Detected' || '❌ Test Failure' }}
**Analysis**: ${{ steps.detect.outputs.summary }}
**Confidence**: ${{ steps.detect.outputs.confidence }}
${{ steps.detect.outputs.is_flaky == 'true' && '✅ Automatically retrying the workflow' || '⚠️ This appears to be a real bug - manual intervention needed' }}
[View workflow run](${{ github.event.workflow_run.html_url }})
EOF
)"
fi

View File

@@ -149,6 +149,19 @@ export const agentMode: Mode = {
claudeArgs = `--mcp-config '${escapedOurConfig}'`; claudeArgs = `--mcp-config '${escapedOurConfig}'`;
} }
// Add JSON schema if provided
const jsonSchema = process.env.JSON_SCHEMA || "";
if (jsonSchema) {
// Validate it's valid JSON
try {
JSON.parse(jsonSchema);
} catch (e) {
throw new Error(`Invalid JSON schema provided: ${e}`);
}
const escapedSchema = jsonSchema.replace(/'/g, "'\\''");
claudeArgs += ` --json-schema '${escapedSchema}'`;
}
// Append user's claude_args (which may have more --mcp-config flags) // Append user's claude_args (which may have more --mcp-config flags)
claudeArgs = `${claudeArgs} ${userClaudeArgs}`.trim(); claudeArgs = `${claudeArgs} ${userClaudeArgs}`.trim();

View File

@@ -177,6 +177,19 @@ export const tagMode: Mode = {
// Add required tools for tag mode // Add required tools for tag mode
claudeArgs += ` --allowedTools "${tagModeTools.join(",")}"`; claudeArgs += ` --allowedTools "${tagModeTools.join(",")}"`;
// Add JSON schema if provided
const jsonSchema = process.env.JSON_SCHEMA || "";
if (jsonSchema) {
// Validate it's valid JSON
try {
JSON.parse(jsonSchema);
} catch (e) {
throw new Error(`Invalid JSON schema provided: ${e}`);
}
const escapedSchema = jsonSchema.replace(/'/g, "'\\''");
claudeArgs += ` --json-schema '${escapedSchema}'`;
}
// Append user's claude_args (which may have more --mcp-config flags) // Append user's claude_args (which may have more --mcp-config flags)
if (userClaudeArgs) { if (userClaudeArgs) {
claudeArgs += ` ${userClaudeArgs}`; claudeArgs += ` ${userClaudeArgs}`;