Browse Source

Fix PDF/docx parsing

Saoud Rizwan 1 year ago
parent
commit
6cbd232039
2 changed files with 3 additions and 2 deletions
  1. 1 1
      src/ClaudeDev.ts
  2. 2 1
      src/utils/extract-text.ts

+ 1 - 1
src/ClaudeDev.ts

@@ -172,7 +172,7 @@ const tools: Tool[] = [
 	{
 		name: "read_file",
 		description:
-			"Read the contents of a file at the specified path. Use this when you need to examine the contents of an existing file, for example to analyze code, review text files, or extract information from configuration files. Be aware that this tool may not be suitable for very large files or binary files, as it returns the raw content as a string.",
+			"Read the contents of a file at the specified path. Use this when you need to examine the contents of an existing file, for example to analyze code, review text files, or extract information from configuration files. Automatically extracts raw text from PDF and DOCX files. May not be suitable for other types of binary files, as it returns the raw content as a string.",
 		input_schema: {
 			type: "object",
 			properties: {

+ 2 - 1
src/utils/extract-text.ts

@@ -1,5 +1,6 @@
 import * as path from "path"
-import pdf from "pdf-parse"
+// @ts-ignore-next-line
+import pdf from "pdf-parse/lib/pdf-parse"
 import mammoth from "mammoth"
 import { isBinaryFile } from "isbinaryfile"
 import fs from "fs/promises"