From 0c5d54472f57859665a75d5e3911e51e17fa58d4 Mon Sep 17 00:00:00 2001 From: atsushi-ishibashi Date: Tue, 5 Aug 2025 11:37:50 +0900 Subject: [PATCH] feat: Add HTML img tag support to GitHub image downloader (#402) * feat: support html img tag * rm files * refactor --- src/github/utils/image-downloader.ts | 20 ++- test/image-downloader.test.ts | 251 +++++++++++++++++++++++++++ 2 files changed, 268 insertions(+), 3 deletions(-) diff --git a/src/github/utils/image-downloader.ts b/src/github/utils/image-downloader.ts index 40cc974..1e819ff 100644 --- a/src/github/utils/image-downloader.ts +++ b/src/github/utils/image-downloader.ts @@ -3,11 +3,17 @@ import path from "path"; import type { Octokits } from "../api/client"; import { GITHUB_SERVER_URL } from "../api/config"; +const escapedUrl = GITHUB_SERVER_URL.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); const IMAGE_REGEX = new RegExp( - `!\\[[^\\]]*\\]\\((${GITHUB_SERVER_URL.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\/user-attachments\\/assets\\/[^)]+)\\)`, + `!\\[[^\\]]*\\]\\((${escapedUrl}\\/user-attachments\\/assets\\/[^)]+)\\)`, "g", ); +const HTML_IMG_REGEX = new RegExp( + `]+src=["']([^"']*${escapedUrl}\\/user-attachments\\/assets\\/[^"']+)["'][^>]*>`, + "gi", +); + type IssueComment = { type: "issue_comment"; id: string; @@ -63,8 +69,16 @@ export async function downloadCommentImages( }> = []; for (const comment of comments) { - const imageMatches = [...comment.body.matchAll(IMAGE_REGEX)]; - const urls = imageMatches.map((match) => match[1] as string); + // Extract URLs from Markdown format + const markdownMatches = [...comment.body.matchAll(IMAGE_REGEX)]; + const markdownUrls = markdownMatches.map((match) => match[1] as string); + + // Extract URLs from HTML format + const htmlMatches = [...comment.body.matchAll(HTML_IMG_REGEX)]; + const htmlUrls = htmlMatches.map((match) => match[1] as string); + + // Combine and deduplicate URLs + const urls = [...new Set([...markdownUrls, ...htmlUrls])]; if (urls.length > 0) { commentsWithImages.push({ comment, urls }); diff --git a/test/image-downloader.test.ts b/test/image-downloader.test.ts index 01f30fa..e00b6d0 100644 --- a/test/image-downloader.test.ts +++ b/test/image-downloader.test.ts @@ -662,4 +662,255 @@ describe("downloadCommentImages", () => { ); expect(result.get(imageUrl2)).toBeUndefined(); }); + + test("should detect and download images from HTML img tags", async () => { + const mockOctokit = createMockOctokit(); + const imageUrl = + "https://github.com/user-attachments/assets/html-image.png"; + const signedUrl = + "https://private-user-images.githubusercontent.com/html.png?jwt=token"; + + // Mock octokit response + // @ts-expect-error Mock implementation doesn't match full type signature + mockOctokit.rest.issues.getComment = jest.fn().mockResolvedValue({ + data: { + body_html: ``, + }, + }); + + // Mock fetch for image download + const mockArrayBuffer = new ArrayBuffer(8); + fetchSpy = spyOn(global, "fetch").mockResolvedValue({ + ok: true, + arrayBuffer: async () => mockArrayBuffer, + } as Response); + + const comments: CommentWithImages[] = [ + { + type: "issue_comment", + id: "777", + body: `Here's an HTML image: test`, + }, + ]; + + const result = await downloadCommentImages( + mockOctokit, + "owner", + "repo", + comments, + ); + + expect(mockOctokit.rest.issues.getComment).toHaveBeenCalledWith({ + owner: "owner", + repo: "repo", + comment_id: 777, + mediaType: { format: "full+json" }, + }); + + expect(fetchSpy).toHaveBeenCalledWith(signedUrl); + expect(fsWriteFileSpy).toHaveBeenCalledWith( + "/tmp/github-images/image-1704067200000-0.png", + Buffer.from(mockArrayBuffer), + ); + + expect(result.size).toBe(1); + expect(result.get(imageUrl)).toBe( + "/tmp/github-images/image-1704067200000-0.png", + ); + expect(consoleLogSpy).toHaveBeenCalledWith( + "Found 1 image(s) in issue_comment 777", + ); + expect(consoleLogSpy).toHaveBeenCalledWith(`Downloading ${imageUrl}...`); + expect(consoleLogSpy).toHaveBeenCalledWith( + "✓ Saved: /tmp/github-images/image-1704067200000-0.png", + ); + }); + + test("should handle HTML img tags with different quote styles", async () => { + const mockOctokit = createMockOctokit(); + const imageUrl1 = + "https://github.com/user-attachments/assets/single-quote.jpg"; + const imageUrl2 = + "https://github.com/user-attachments/assets/double-quote.png"; + const signedUrl1 = + "https://private-user-images.githubusercontent.com/single.jpg?jwt=token1"; + const signedUrl2 = + "https://private-user-images.githubusercontent.com/double.png?jwt=token2"; + + // @ts-expect-error Mock implementation doesn't match full type signature + mockOctokit.rest.issues.getComment = jest.fn().mockResolvedValue({ + data: { + body_html: ``, + }, + }); + + fetchSpy = spyOn(global, "fetch").mockResolvedValue({ + ok: true, + arrayBuffer: async () => new ArrayBuffer(8), + } as Response); + + const comments: CommentWithImages[] = [ + { + type: "issue_comment", + id: "888", + body: `Single quote: test and double quote: test`, + }, + ]; + + const result = await downloadCommentImages( + mockOctokit, + "owner", + "repo", + comments, + ); + + expect(fetchSpy).toHaveBeenCalledTimes(2); + expect(result.size).toBe(2); + expect(result.get(imageUrl1)).toBe( + "/tmp/github-images/image-1704067200000-0.jpg", + ); + expect(result.get(imageUrl2)).toBe( + "/tmp/github-images/image-1704067200000-1.png", + ); + expect(consoleLogSpy).toHaveBeenCalledWith( + "Found 2 image(s) in issue_comment 888", + ); + }); + + test("should handle mixed Markdown and HTML images", async () => { + const mockOctokit = createMockOctokit(); + const markdownUrl = + "https://github.com/user-attachments/assets/markdown.png"; + const htmlUrl = "https://github.com/user-attachments/assets/html.jpg"; + const signedUrl1 = + "https://private-user-images.githubusercontent.com/md.png?jwt=token1"; + const signedUrl2 = + "https://private-user-images.githubusercontent.com/html.jpg?jwt=token2"; + + // @ts-expect-error Mock implementation doesn't match full type signature + mockOctokit.rest.issues.getComment = jest.fn().mockResolvedValue({ + data: { + body_html: ``, + }, + }); + + fetchSpy = spyOn(global, "fetch").mockResolvedValue({ + ok: true, + arrayBuffer: async () => new ArrayBuffer(8), + } as Response); + + const comments: CommentWithImages[] = [ + { + type: "issue_comment", + id: "999", + body: `Markdown: ![test](${markdownUrl}) and HTML: test`, + }, + ]; + + const result = await downloadCommentImages( + mockOctokit, + "owner", + "repo", + comments, + ); + + expect(fetchSpy).toHaveBeenCalledTimes(2); + expect(result.size).toBe(2); + expect(result.get(markdownUrl)).toBe( + "/tmp/github-images/image-1704067200000-0.png", + ); + expect(result.get(htmlUrl)).toBe( + "/tmp/github-images/image-1704067200000-1.jpg", + ); + expect(consoleLogSpy).toHaveBeenCalledWith( + "Found 2 image(s) in issue_comment 999", + ); + }); + + test("should deduplicate identical URLs from Markdown and HTML", async () => { + const mockOctokit = createMockOctokit(); + const imageUrl = "https://github.com/user-attachments/assets/duplicate.png"; + const signedUrl = + "https://private-user-images.githubusercontent.com/dup.png?jwt=token"; + + // @ts-expect-error Mock implementation doesn't match full type signature + mockOctokit.rest.issues.getComment = jest.fn().mockResolvedValue({ + data: { + body_html: ``, + }, + }); + + fetchSpy = spyOn(global, "fetch").mockResolvedValue({ + ok: true, + arrayBuffer: async () => new ArrayBuffer(8), + } as Response); + + const comments: CommentWithImages[] = [ + { + type: "issue_comment", + id: "1000", + body: `Same image twice: ![test](${imageUrl}) and test`, + }, + ]; + + const result = await downloadCommentImages( + mockOctokit, + "owner", + "repo", + comments, + ); + + expect(fetchSpy).toHaveBeenCalledTimes(1); // Only downloaded once + expect(result.size).toBe(1); + expect(result.get(imageUrl)).toBe( + "/tmp/github-images/image-1704067200000-0.png", + ); + expect(consoleLogSpy).toHaveBeenCalledWith( + "Found 1 image(s) in issue_comment 1000", + ); + }); + + test("should handle HTML img tags with additional attributes", async () => { + const mockOctokit = createMockOctokit(); + const imageUrl = + "https://github.com/user-attachments/assets/complex-tag.webp"; + const signedUrl = + "https://private-user-images.githubusercontent.com/complex.webp?jwt=token"; + + // @ts-expect-error Mock implementation doesn't match full type signature + mockOctokit.rest.issues.getComment = jest.fn().mockResolvedValue({ + data: { + body_html: ``, + }, + }); + + fetchSpy = spyOn(global, "fetch").mockResolvedValue({ + ok: true, + arrayBuffer: async () => new ArrayBuffer(8), + } as Response); + + const comments: CommentWithImages[] = [ + { + type: "issue_comment", + id: "1001", + body: `Complex tag: test image`, + }, + ]; + + const result = await downloadCommentImages( + mockOctokit, + "owner", + "repo", + comments, + ); + + expect(fetchSpy).toHaveBeenCalledTimes(1); + expect(result.size).toBe(1); + expect(result.get(imageUrl)).toBe( + "/tmp/github-images/image-1704067200000-0.webp", + ); + expect(consoleLogSpy).toHaveBeenCalledWith( + "Found 1 image(s) in issue_comment 1001", + ); + }); });