mirror of
https://github.com/anthropics/claude-code-action.git
synced 2026-01-23 06:54:13 +08:00
Refactor tests to remove redundancy and improve structure
- Remove redundant 'mixed input patterns' test from sanitizer.test.ts - Consolidate integration tests into 2 focused real-world scenarios - Add HTML comment stripping to sanitizeContent function - Update test expectations to match sanitization behavior - Maintain full coverage with fewer, more focused tests
This commit is contained in:
@@ -52,6 +52,7 @@ export function normalizeHtmlEntities(content: string): string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export function sanitizeContent(content: string): string {
|
export function sanitizeContent(content: string): string {
|
||||||
|
content = stripHtmlComments(content);
|
||||||
content = stripInvisibleCharacters(content);
|
content = stripInvisibleCharacters(content);
|
||||||
content = stripMarkdownImageAltText(content);
|
content = stripMarkdownImageAltText(content);
|
||||||
content = stripMarkdownLinkTitles(content);
|
content = stripMarkdownLinkTitles(content);
|
||||||
|
|||||||
@@ -2,50 +2,47 @@ import { describe, expect, it } from "bun:test";
|
|||||||
import { formatBody, formatComments } from "../src/github/data/formatter";
|
import { formatBody, formatComments } from "../src/github/data/formatter";
|
||||||
import type { GitHubComment } from "../src/github/types";
|
import type { GitHubComment } from "../src/github/types";
|
||||||
|
|
||||||
describe("Integration: Text Sanitization", () => {
|
describe("Sanitization Integration", () => {
|
||||||
it("should sanitize text in issue body", () => {
|
it("should sanitize complete issue/PR body with various hidden content patterns", () => {
|
||||||
const body = `
|
const issueBody = `
|
||||||
# Title text
|
# Feature Request: Add user dashboard
|
||||||
|
|
||||||
Some content here.
|
## Description
|
||||||
|
We need a new dashboard for users to track their activity.
|
||||||
|
|
||||||
Here's an image: <img alt="some alt text" src="image.jpg">
|
<!-- HTML comment that should be removed -->
|
||||||
|
|
||||||
And a markdown image: 
|
## Technical Details
|
||||||
|
The dashboard should display:
|
||||||
|
- User statistics 
|
||||||
|
- Activity graphs <img alt="example graph description" src="graph.jpg">
|
||||||
|
- Recent actions
|
||||||
|
|
||||||
Check this link: [Click here](https://example.com "link title")
|
## Implementation Notes
|
||||||
|
See [documentation](https://docs.example.com "internal docs title") for API details.
|
||||||
|
|
||||||
Text with hiddencharacters
|
<div data-instruction="example instruction" aria-label="dashboard label" title="hover text">
|
||||||
|
The implementation should follow our standard patterns.
|
||||||
<div data-prompt="test data" aria-label="label text" title="title text">
|
|
||||||
Content with attributes
|
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
Entity-encoded: HELLO
|
Additional notes: Textwithsofthyphens and Hidden encoded content.
|
||||||
|
|
||||||
Direction: reversed text
|
<input placeholder="search placeholder" type="text" />
|
||||||
|
|
||||||
<input placeholder="placeholder text" type="text">
|
Direction override test: reversed text should be normalized.`;
|
||||||
|
|
||||||
Textwithsofthyphens
|
|
||||||
|
|
||||||
More text: withzerowidthcharacters`;
|
|
||||||
|
|
||||||
const imageUrlMap = new Map<string, string>();
|
const imageUrlMap = new Map<string, string>();
|
||||||
const result = formatBody(body, imageUrlMap);
|
const result = formatBody(issueBody, imageUrlMap);
|
||||||
|
|
||||||
expect(result).not.toContain("some alt text");
|
// Verify hidden content is removed
|
||||||
expect(result).not.toContain("image text");
|
expect(result).not.toContain("<!-- HTML comment");
|
||||||
expect(result).not.toContain("link title");
|
expect(result).not.toContain("hiddentext");
|
||||||
expect(result).not.toContain("test data");
|
expect(result).not.toContain("example graph description");
|
||||||
expect(result).not.toContain("label text");
|
expect(result).not.toContain("internal docs title");
|
||||||
expect(result).not.toContain("title text");
|
expect(result).not.toContain("example instruction");
|
||||||
expect(result).not.toContain("placeholder text");
|
expect(result).not.toContain("dashboard label");
|
||||||
expect(result).not.toContain('alt="');
|
expect(result).not.toContain("hover text");
|
||||||
expect(result).not.toContain('title="');
|
expect(result).not.toContain("search placeholder");
|
||||||
expect(result).not.toContain('aria-label="');
|
|
||||||
expect(result).not.toContain('data-prompt="');
|
|
||||||
expect(result).not.toContain('placeholder="');
|
|
||||||
expect(result).not.toContain("\u200B");
|
expect(result).not.toContain("\u200B");
|
||||||
expect(result).not.toContain("\u200C");
|
expect(result).not.toContain("\u200C");
|
||||||
expect(result).not.toContain("\u200D");
|
expect(result).not.toContain("\u200D");
|
||||||
@@ -53,104 +50,79 @@ More text: withzerowidthcharacters`;
|
|||||||
expect(result).not.toContain("\u202E");
|
expect(result).not.toContain("\u202E");
|
||||||
expect(result).not.toContain("H");
|
expect(result).not.toContain("H");
|
||||||
|
|
||||||
expect(result).toContain("# Title text");
|
// Verify legitimate content is preserved
|
||||||
expect(result).toContain("Some content here.");
|
expect(result).toContain("# Feature Request: Add user dashboard");
|
||||||
expect(result).toContain("Here's an image:");
|
expect(result).toContain("## Description");
|
||||||
expect(result).toContain('<img src="image.jpg">');
|
expect(result).toContain("We need a new dashboard");
|
||||||
expect(result).toContain("");
|
expect(result).toContain("User statistics");
|
||||||
expect(result).toContain("[Click here](https://example.com)");
|
expect(result).toContain("");
|
||||||
expect(result).toContain("Content with attributes");
|
expect(result).toContain('<img src="graph.jpg">');
|
||||||
expect(result).toContain("HELLO");
|
expect(result).toContain("[documentation](https://docs.example.com)");
|
||||||
expect(result).toContain('<input type="text">');
|
expect(result).toContain("The implementation should follow our standard patterns");
|
||||||
|
expect(result).toContain("Hidden encoded content");
|
||||||
|
expect(result).toContain('<input type="text" />');
|
||||||
});
|
});
|
||||||
|
|
||||||
it("should sanitize text in comments", () => {
|
it("should sanitize GitHub comments preserving discussion flow", () => {
|
||||||
const comments: GitHubComment[] = [
|
const comments: GitHubComment[] = [
|
||||||
{
|
{
|
||||||
id: "1",
|
id: "1",
|
||||||
databaseId: "100001",
|
databaseId: "100001",
|
||||||
body: `Comment text
|
body: `Great idea! Here are my thoughts:
|
||||||
|
|
||||||
Check this: 
|
1. We should consider the performance impact
|
||||||
[Documentation](https://docs.com "doc title")
|
2. The UI mockup looks good: 
|
||||||
|
3. Check the [API docs](https://api.example.com "api reference") for rate limits
|
||||||
|
|
||||||
Textwith characters
|
<div aria-label="comment metadata" data-comment-type="review">
|
||||||
|
This change would affect multiple systems.
|
||||||
|
</div>
|
||||||
|
|
||||||
<span aria-label="span label" data-cmd="data value">Visible text</span>`,
|
Note: Implementationshouldfollowbestpractices.`,
|
||||||
author: { login: "user1" },
|
author: { login: "reviewer1" },
|
||||||
createdAt: "2023-01-01T00:00:00Z",
|
createdAt: "2023-01-01T10:00:00Z",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: "2",
|
||||||
|
databaseId: "100002",
|
||||||
|
body: `Thanks for the feedback!
|
||||||
|
|
||||||
|
<!-- Internal note: discussed with team -->
|
||||||
|
|
||||||
|
I've updated the proposal based on your suggestions.
|
||||||
|
|
||||||
|
Test note: All systems checked.
|
||||||
|
|
||||||
|
<span title="status update" data-status="approved">Ready for implementation</span>`,
|
||||||
|
author: { login: "author1" },
|
||||||
|
createdAt: "2023-01-01T12:00:00Z",
|
||||||
},
|
},
|
||||||
];
|
];
|
||||||
|
|
||||||
const result = formatComments(comments);
|
const result = formatComments(comments);
|
||||||
|
|
||||||
expect(result).not.toContain("description text");
|
// Verify hidden content is removed
|
||||||
expect(result).not.toContain("doc title");
|
expect(result).not.toContain("<!-- Internal note");
|
||||||
expect(result).not.toContain("span label");
|
expect(result).not.toContain("api reference");
|
||||||
expect(result).not.toContain("data value");
|
expect(result).not.toContain("comment metadata");
|
||||||
expect(result).not.toContain('aria-label="');
|
expect(result).not.toContain("data-comment-type=\"review\"");
|
||||||
expect(result).not.toContain('data-cmd="');
|
expect(result).not.toContain("status update");
|
||||||
|
expect(result).not.toContain("data-status=\"approved\"");
|
||||||
expect(result).not.toContain("\u200B");
|
expect(result).not.toContain("\u200B");
|
||||||
expect(result).not.toContain("\u200C");
|
expect(result).not.toContain("T");
|
||||||
expect(result).not.toContain("\u200D");
|
|
||||||
|
|
||||||
expect(result).toContain("Comment text");
|
// Verify discussion flow is preserved
|
||||||
expect(result).toContain("");
|
expect(result).toContain("Great idea! Here are my thoughts:");
|
||||||
expect(result).toContain("[Documentation](https://docs.com)");
|
expect(result).toContain("1. We should consider the performance impact");
|
||||||
expect(result).toContain("Visible text");
|
expect(result).toContain("2. The UI mockup looks good: ");
|
||||||
expect(result).toContain("Textwith characters");
|
expect(result).toContain("3. Check the [API docs](https://api.example.com)");
|
||||||
});
|
expect(result).toContain("This change would affect multiple systems.");
|
||||||
|
expect(result).toContain("Implementationshouldfollowbestpractices");
|
||||||
it("should handle complex mixed patterns", () => {
|
expect(result).toContain("Thanks for the feedback!");
|
||||||
const content = `
|
expect(result).toContain("I've updated the proposal based on your suggestions.");
|
||||||
Text content here.
|
expect(result).toContain("Test note: All systems checked.");
|
||||||
|
expect(result).toContain("Ready for implementation");
|
||||||
<div title="divtitletext" data-instruction="data text">
|
expect(result).toContain("[reviewer1 at");
|
||||||
<img src="image.jpg" alt="imgalttext">
|
expect(result).toContain("[author1 at");
|
||||||
Text with reversed content
|
|
||||||
</div>
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
[link](url.com "title\u00ADtext")
|
|
||||||
|
|
||||||
Mix: Hidden <span aria-label="More">text</span>`;
|
|
||||||
|
|
||||||
const imageUrlMap = new Map<string, string>();
|
|
||||||
const result = formatBody(content, imageUrlMap);
|
|
||||||
|
|
||||||
expect(result).not.toContain('title="');
|
|
||||||
expect(result).not.toContain('data-instruction="');
|
|
||||||
expect(result).not.toContain('alt="');
|
|
||||||
expect(result).not.toContain('aria-label="');
|
|
||||||
expect(result).not.toContain("\u200B");
|
|
||||||
expect(result).not.toContain("\u200C");
|
|
||||||
expect(result).not.toContain("\u00AD");
|
|
||||||
expect(result).not.toContain("\u202E");
|
|
||||||
|
|
||||||
expect(result).toContain("Text content here.");
|
|
||||||
expect(result).toContain("<div>");
|
|
||||||
expect(result).toContain('<img src="image.jpg">');
|
|
||||||
expect(result).toContain("");
|
|
||||||
expect(result).toContain("[link](url.com)");
|
|
||||||
expect(result).toContain("Hidden <span>text</span>");
|
|
||||||
});
|
|
||||||
|
|
||||||
it("should handle edge cases with empty attributes", () => {
|
|
||||||
const edgeCases = `
|
|
||||||
<img alt="" src="test.jpg">
|
|
||||||
<div title="" data-x="">Content</div>
|
|
||||||

|
|
||||||
[link](url.com)
|
|
||||||
Normal text`;
|
|
||||||
|
|
||||||
const imageUrlMap = new Map<string, string>();
|
|
||||||
const result = formatBody(edgeCases, imageUrlMap);
|
|
||||||
|
|
||||||
expect(result).toContain('<img src="test.jpg">');
|
|
||||||
expect(result).toContain("<div>Content</div>");
|
|
||||||
expect(result).toContain("");
|
|
||||||
expect(result).toContain("[link](url.com)");
|
|
||||||
expect(result).toContain("Normal text");
|
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
@@ -172,6 +172,7 @@ describe("sanitizeContent", () => {
|
|||||||
|
|
||||||
const sanitized = sanitizeContent(testContent);
|
const sanitized = sanitizeContent(testContent);
|
||||||
|
|
||||||
|
expect(sanitized).not.toContain("<!-- This is a comment -->");
|
||||||
expect(sanitized).not.toContain("example alt text");
|
expect(sanitized).not.toContain("example alt text");
|
||||||
expect(sanitized).not.toContain("example image description");
|
expect(sanitized).not.toContain("example image description");
|
||||||
expect(sanitized).not.toContain("example title");
|
expect(sanitized).not.toContain("example title");
|
||||||
@@ -240,23 +241,6 @@ describe("sanitizeContent", () => {
|
|||||||
expect(sanitized).toContain("<div>Test</div>");
|
expect(sanitized).toContain("<div>Test</div>");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("should handle mixed input patterns", () => {
|
|
||||||
const mixedInput = `
|
|
||||||

|
|
||||||
<img alt="example" src="pic.jpg">
|
|
||||||
[link](url.com "title\u202Ewith\u202Ccharacters")
|
|
||||||
<span data-cmd="data value" aria-label="label text">visible text</span>
|
|
||||||
`;
|
|
||||||
|
|
||||||
const sanitized = sanitizeContent(mixedInput);
|
|
||||||
|
|
||||||
expect(sanitized).not.toContain("example");
|
|
||||||
expect(sanitized).not.toContain("characters");
|
|
||||||
expect(sanitized).not.toContain("title");
|
|
||||||
expect(sanitized).not.toContain("data value");
|
|
||||||
expect(sanitized).not.toContain("label text");
|
|
||||||
expect(sanitized).toContain("visible text");
|
|
||||||
});
|
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("stripHtmlComments (legacy)", () => {
|
describe("stripHtmlComments (legacy)", () => {
|
||||||
|
|||||||
Reference in New Issue
Block a user