Просмотр исходного кода

use isbinaryfile to more safely read contents of folders and files

Saoud Rizwan 1 год назад
Родитель
Сommit
b7617e5f2a
4 измененных файлов с 43 добавлено и 23 удалено
  1. 13 0
      package-lock.json
  2. 1 0
      package.json
  3. 22 11
      src/utils/context-mentions.ts
  4. 7 12
      src/utils/extract-text.ts

+ 13 - 0
package-lock.json

@@ -25,6 +25,7 @@
         "diff": "^5.2.0",
         "fast-deep-equal": "^3.1.3",
         "globby": "^14.0.2",
+        "isbinaryfile": "^5.0.2",
         "mammoth": "^1.8.0",
         "monaco-vscode-textmate-theme-converter": "^0.1.7",
         "openai": "^4.61.0",
@@ -7878,6 +7879,18 @@
       "integrity": "sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ==",
       "license": "MIT"
     },
+    "node_modules/isbinaryfile": {
+      "version": "5.0.2",
+      "resolved": "https://registry.npmjs.org/isbinaryfile/-/isbinaryfile-5.0.2.tgz",
+      "integrity": "sha512-GvcjojwonMjWbTkfMpnVHVqXW/wKMYDfEpY94/8zy8HFMOqb/VL6oeONq9v87q4ttVlaTLnGXnJD4B5B1OTGIg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 18.0.0"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/gjtorikian/"
+      }
+    },
     "node_modules/isexe": {
       "version": "2.0.0",
       "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz",

+ 1 - 0
package.json

@@ -161,6 +161,7 @@
     "diff": "^5.2.0",
     "fast-deep-equal": "^3.1.3",
     "globby": "^14.0.2",
+    "isbinaryfile": "^5.0.2",
     "mammoth": "^1.8.0",
     "monaco-vscode-textmate-theme-converter": "^0.1.7",
     "openai": "^4.61.0",

+ 22 - 11
src/utils/context-mentions.ts

@@ -5,6 +5,7 @@ import { UrlScraper } from "./UrlScraper"
 import { mentionRegexGlobal } from "../shared/context-mentions"
 import fs from "fs/promises"
 import { extractTextFromFile } from "./extract-text"
+import { isBinaryFile } from "isbinaryfile"
 
 export function openMention(mention?: string): void {
 	if (!mention) {
@@ -92,12 +93,16 @@ async function getFileOrFolderContent(mentionPath: string, cwd: string): Promise
 		const stats = await fs.stat(absPath)
 
 		if (stats.isFile()) {
+			const isBinary = await isBinaryFile(absPath).catch(() => false)
+			if (isBinary) {
+				return "(Binary file)"
+			}
 			const content = await extractTextFromFile(absPath)
 			return content
 		} else if (stats.isDirectory()) {
 			const entries = await fs.readdir(absPath, { withFileTypes: true })
 			let directoryContent = ""
-			const fileContentPromises: Promise<string>[] = []
+			const fileContentPromises: Promise<string | undefined>[] = []
 			entries.forEach((entry) => {
 				if (entry.isFile()) {
 					directoryContent += `- File: ${entry.name}\n`
@@ -105,12 +110,18 @@ async function getFileOrFolderContent(mentionPath: string, cwd: string): Promise
 					const absoluteFilePath = path.resolve(absPath, entry.name)
 					// const relativeFilePath = path.relative(cwd, absoluteFilePath);
 					fileContentPromises.push(
-						extractTextFromFile(absoluteFilePath)
-							.then((content) => `<file_content path="${filePath}">\n${content}\n</file_content>`)
-							.catch(
-								(error) =>
-									`<file_content path="${filePath}">\nError fetching content: ${error.message}\n</file_content>`
-							)
+						(async () => {
+							try {
+								const isBinary = await isBinaryFile(absoluteFilePath).catch(() => false)
+								if (isBinary) {
+									return undefined
+								}
+								const content = await extractTextFromFile(absoluteFilePath)
+								return `<file_content path="${filePath}">\n${content}\n</file_content>`
+							} catch (error) {
+								return undefined
+							}
+						})()
 					)
 				} else if (entry.isDirectory()) {
 					directoryContent += `- Directory: ${entry.name}/\n`
@@ -119,10 +130,10 @@ async function getFileOrFolderContent(mentionPath: string, cwd: string): Promise
 					directoryContent += `- Other: ${entry.name}\n`
 				}
 			})
-			const fileContents = await Promise.all(fileContentPromises)
-			return `${directoryContent}\n${fileContents.join("\n")}`
+			const fileContents = (await Promise.all(fileContentPromises)).filter((content) => content)
+			return `${directoryContent}\n${fileContents.join("\n")}`.trim()
 		} else {
-			return "Unsupported file type."
+			return `(Failed to read contents of ${mentionPath})`
 		}
 	} catch (error) {
 		throw new Error(`Failed to access path "${mentionPath}": ${error.message}`)
@@ -149,7 +160,7 @@ async function getWorkspaceDiagnostics(cwd: string): Promise<string> {
 	}
 
 	if (!diagnosticsDetails) {
-		return "No problems detected."
+		return "No errors or warnings detected."
 	}
 
 	return diagnosticsDetails.trim()

+ 7 - 12
src/utils/extract-text.ts

@@ -3,6 +3,7 @@ import * as path from "path"
 import pdf from "pdf-parse/lib/pdf-parse"
 import mammoth from "mammoth"
 import fs from "fs/promises"
+import { isBinaryFile } from "isbinaryfile"
 
 export async function extractTextFromFile(filePath: string): Promise<string> {
 	try {
@@ -18,19 +19,13 @@ export async function extractTextFromFile(filePath: string): Promise<string> {
 			return extractTextFromDOCX(filePath)
 		case ".ipynb":
 			return extractTextFromIPYNB(filePath)
-		case ".jpg":
-		case ".jpeg":
-		case ".png":
-		case ".gif":
-		case ".webp":
-		case ".mp4":
-		case ".mp3":
-		case ".wav":
-		case ".avi":
-		case ".mov":
-			return "Cannot read media file."
 		default:
-			return await fs.readFile(filePath, "utf8")
+			const isBinary = await isBinaryFile(filePath).catch(() => false)
+			if (!isBinary) {
+				return await fs.readFile(filePath, "utf8")
+			} else {
+				throw new Error(`Cannot read text for file type: ${fileExtension}`)
+			}
 	}
 }