From cf04e19dbcc033ecb155875d03f8223d41d17ac0 Mon Sep 17 00:00:00 2001 From: Lina Tawfik Date: Wed, 28 May 2025 18:12:07 -0700 Subject: [PATCH] Refactor tests to remove redundancy and improve structure - Remove redundant 'mixed input patterns' test from sanitizer.test.ts - Consolidate integration tests into 2 focused real-world scenarios - Add HTML comment stripping to sanitizeContent function - Update test expectations to match sanitization behavior - Maintain full coverage with fewer, more focused tests --- src/github/utils/sanitizer.ts | 1 + test/integration-sanitization.test.ts | 206 +++++++++++--------------- test/sanitizer.test.ts | 18 +-- 3 files changed, 91 insertions(+), 134 deletions(-) diff --git a/src/github/utils/sanitizer.ts b/src/github/utils/sanitizer.ts index db9979a..ef5d3cc 100644 --- a/src/github/utils/sanitizer.ts +++ b/src/github/utils/sanitizer.ts @@ -52,6 +52,7 @@ export function normalizeHtmlEntities(content: string): string { } export function sanitizeContent(content: string): string { + content = stripHtmlComments(content); content = stripInvisibleCharacters(content); content = stripMarkdownImageAltText(content); content = stripMarkdownLinkTitles(content); diff --git a/test/integration-sanitization.test.ts b/test/integration-sanitization.test.ts index 32bf232..83cb6ae 100644 --- a/test/integration-sanitization.test.ts +++ b/test/integration-sanitization.test.ts @@ -2,50 +2,47 @@ import { describe, expect, it } from "bun:test"; import { formatBody, formatComments } from "../src/github/data/formatter"; import type { GitHubComment } from "../src/github/types"; -describe("Integration: Text Sanitization", () => { - it("should sanitize text in issue body", () => { - const body = ` -# Title text +describe("Sanitization Integration", () => { + it("should sanitize complete issue/PR body with various hidden content patterns", () => { + const issueBody = ` +# Feature Request: Add user dashboard -Some content here. +## Description +We need a new dashboard for users to track their activity. -Here's an image: some alt text + -And a markdown image: ![image text](screenshot.png) +## Technical Details +The dashboard should display: +- User statistics ![dashboard mockup with hidden​‌‍text](dashboard.png) +- Activity graphs example graph description +- Recent actions -Check this link: [Click here](https://example.com "link title") +## Implementation Notes +See [documentation](https://docs.example.com "internal docs title") for API details. -Text with hidden​‌‍characters - -
- Content with attributes +
+ The implementation should follow our standard patterns.
-Entity-encoded: HELLO +Additional notes: Text­with­soft­hyphens and Hidden encoded content. -Direction: ‮reversed‬ text + - - -Text­with­soft­hyphens - -More text: with‌zero‍width​characters`; +Direction override test: ‮reversed‬ text should be normalized.`; const imageUrlMap = new Map(); - const result = formatBody(body, imageUrlMap); + const result = formatBody(issueBody, imageUrlMap); - expect(result).not.toContain("some alt text"); - expect(result).not.toContain("image text"); - expect(result).not.toContain("link title"); - expect(result).not.toContain("test data"); - expect(result).not.toContain("label text"); - expect(result).not.toContain("title text"); - expect(result).not.toContain("placeholder text"); - expect(result).not.toContain('alt="'); - expect(result).not.toContain('title="'); - expect(result).not.toContain('aria-label="'); - expect(result).not.toContain('data-prompt="'); - expect(result).not.toContain('placeholder="'); + // Verify hidden content is removed + expect(result).not.toContain(" + +I've updated the proposal based on your suggestions. + +Test note: All systems checked. + +Ready for implementation`, + author: { login: "author1" }, + createdAt: "2023-01-01T12:00:00Z", }, ]; const result = formatComments(comments); - expect(result).not.toContain("description text"); - expect(result).not.toContain("doc title"); - expect(result).not.toContain("span label"); - expect(result).not.toContain("data value"); - expect(result).not.toContain('aria-label="'); - expect(result).not.toContain('data-cmd="'); + // Verify hidden content is removed + expect(result).not.toContain(""); expect(sanitized).not.toContain("example alt text"); expect(sanitized).not.toContain("example image description"); expect(sanitized).not.toContain("example title"); @@ -240,23 +241,6 @@ describe("sanitizeContent", () => { expect(sanitized).toContain("
Test
"); }); - it("should handle mixed input patterns", () => { - const mixedInput = ` - ![example\u200Btext\u00ADwith\u00ADcharacters](image.png) - example - [link](url.com "title\u202Ewith\u202Ccharacters") - visible text - `; - - const sanitized = sanitizeContent(mixedInput); - - expect(sanitized).not.toContain("example"); - expect(sanitized).not.toContain("characters"); - expect(sanitized).not.toContain("title"); - expect(sanitized).not.toContain("data value"); - expect(sanitized).not.toContain("label text"); - expect(sanitized).toContain("visible text"); - }); }); describe("stripHtmlComments (legacy)", () => {