Add enhanced text sanitization

This commit is contained in:
Lina Tawfik
2025-05-28 17:29:09 -07:00
parent 176dbc369d
commit 61cd297c18
8 changed files with 541 additions and 175 deletions

View File

@@ -6,10 +6,7 @@ import type {
GitHubReview,
} from "../types";
import type { GitHubFileWithSHA } from "./fetcher";
export function stripHtmlComments(text: string): string {
return text.replace(/<!--[\s\S]*?-->/g, "");
}
import { sanitizeContent } from "../utils/sanitizer";
export function formatContext(
contextData: GitHubPullRequest | GitHubIssue,
@@ -37,13 +34,14 @@ export function formatBody(
body: string,
imageUrlMap: Map<string, string>,
): string {
let processedBody = stripHtmlComments(body);
let processedBody = body;
// Replace image URLs with local paths
for (const [originalUrl, localPath] of imageUrlMap) {
processedBody = processedBody.replaceAll(originalUrl, localPath);
}
processedBody = sanitizeContent(processedBody);
return processedBody;
}
@@ -53,15 +51,16 @@ export function formatComments(
): string {
return comments
.map((comment) => {
let body = stripHtmlComments(comment.body);
let body = comment.body;
// Replace image URLs with local paths if we have a mapping
if (imageUrlMap && body) {
for (const [originalUrl, localPath] of imageUrlMap) {
body = body.replaceAll(originalUrl, localPath);
}
}
body = sanitizeContent(body);
return `[${comment.author.login} at ${comment.createdAt}]: ${body}`;
})
.join("\n\n");
@@ -78,6 +77,19 @@ export function formatReviewComments(
const formattedReviews = reviewData.nodes.map((review) => {
let reviewOutput = `[Review by ${review.author.login} at ${review.submittedAt}]: ${review.state}`;
if (review.body && review.body.trim()) {
let body = review.body;
if (imageUrlMap) {
for (const [originalUrl, localPath] of imageUrlMap) {
body = body.replaceAll(originalUrl, localPath);
}
}
const sanitizedBody = sanitizeContent(body);
reviewOutput += `\n${sanitizedBody}`;
}
if (
review.comments &&
review.comments.nodes &&
@@ -85,15 +97,16 @@ export function formatReviewComments(
) {
const comments = review.comments.nodes
.map((comment) => {
let body = stripHtmlComments(comment.body);
let body = comment.body;
// Replace image URLs with local paths if we have a mapping
if (imageUrlMap) {
for (const [originalUrl, localPath] of imageUrlMap) {
body = body.replaceAll(originalUrl, localPath);
}
}
body = sanitizeContent(body);
return ` [Comment on ${comment.path}:${comment.line || "?"}]: ${body}`;
})
.join("\n");