Просмотр исходного кода

Merge pull request #1319 from samhvw8/feat/windown-path-and-escape-space-path

feat(mentions): improve path handling for Windows and escaped spaces
Chris Estreich 10 месяцев назад
Родитель
Сommit
464d440560
2 измененных файлов с 407 добавлено и 49 удалено
  1. 325 0
      src/shared/__tests__/context-mentions.test.ts
  2. 82 49
      src/shared/context-mentions.ts

+ 325 - 0
src/shared/__tests__/context-mentions.test.ts

@@ -0,0 +1,325 @@
+import { mentionRegex, mentionRegexGlobal } from "../context-mentions"
+
+interface TestResult {
+	actual: string | null
+	expected: string | null
+}
+
+function testMention(input: string, expected: string | null): TestResult {
+	const match = mentionRegex.exec(input)
+	return {
+		actual: match ? match[0] : null,
+		expected,
+	}
+}
+
+function expectMatch(result: TestResult) {
+	if (result.expected === null) {
+		return expect(result.actual).toBeNull()
+	}
+	if (result.actual !== result.expected) {
+		// Instead of console.log, use expect().toBe() with a descriptive message
+		expect(result.actual).toBe(result.expected)
+	}
+}
+
+describe("Mention Regex", () => {
+	describe("Windows Path Support", () => {
+		it("matches simple Windows paths", () => {
+			const cases: Array<[string, string]> = [
+				["@C:\\folder\\file.txt", "@C:\\folder\\file.txt"],
+				["@c:\\Program/ Files\\file.txt", "@c:\\Program/ Files\\file.txt"],
+				["@C:\\file.txt", "@C:\\file.txt"],
+			]
+
+			cases.forEach(([input, expected]) => {
+				const result = testMention(input, expected)
+				expectMatch(result)
+			})
+		})
+
+		it("matches Windows network shares", () => {
+			const cases: Array<[string, string]> = [
+				["@\\\\server\\share\\file.txt", "@\\\\server\\share\\file.txt"],
+				["@\\\\127.0.0.1\\network-path\\file.txt", "@\\\\127.0.0.1\\network-path\\file.txt"],
+			]
+
+			cases.forEach(([input, expected]) => {
+				const result = testMention(input, expected)
+				expectMatch(result)
+			})
+		})
+
+		it("matches mixed separators", () => {
+			const result = testMention("@C:\\folder\\file.txt", "@C:\\folder\\file.txt")
+			expectMatch(result)
+		})
+
+		it("matches Windows relative paths", () => {
+			const cases: Array<[string, string]> = [
+				["@folder\\file.txt", "@folder\\file.txt"],
+				["@.\\folder\\file.txt", "@.\\folder\\file.txt"],
+				["@..\\parent\\file.txt", "@..\\parent\\file.txt"],
+				["@path\\to\\directory\\", "@path\\to\\directory\\"],
+				["@.\\current\\path\\with/ space.txt", "@.\\current\\path\\with/ space.txt"],
+			]
+
+			cases.forEach(([input, expected]) => {
+				const result = testMention(input, expected)
+				expectMatch(result)
+			})
+		})
+	})
+
+	describe("Escaped Spaces Support", () => {
+		it("matches Unix paths with escaped spaces", () => {
+			const cases: Array<[string, string]> = [
+				["@/path/to/file\\ with\\ spaces.txt", "@/path/to/file\\ with\\ spaces.txt"],
+				["@/path/with\\ \\ multiple\\ spaces.txt", "@/path/with\\ \\ multiple\\ spaces.txt"],
+			]
+
+			cases.forEach(([input, expected]) => {
+				const result = testMention(input, expected)
+				expectMatch(result)
+			})
+		})
+
+		it("matches Windows paths with escaped spaces", () => {
+			const cases: Array<[string, string]> = [
+				["@C:\\path\\to\\file/ with/ spaces.txt", "@C:\\path\\to\\file/ with/ spaces.txt"],
+				["@C:\\Program/ Files\\app\\file.txt", "@C:\\Program/ Files\\app\\file.txt"],
+			]
+
+			cases.forEach(([input, expected]) => {
+				const result = testMention(input, expected)
+				expectMatch(result)
+			})
+		})
+	})
+
+	describe("Combined Path Variations", () => {
+		it("matches complex path combinations", () => {
+			const cases: Array<[string, string]> = [
+				[
+					"@C:\\Users\\name\\Documents\\file/ with/ spaces.txt",
+					"@C:\\Users\\name\\Documents\\file/ with/ spaces.txt",
+				],
+				[
+					"@\\\\server\\share\\path/ with/ spaces\\file.txt",
+					"@\\\\server\\share\\path/ with/ spaces\\file.txt",
+				],
+				["@C:\\path/ with/ spaces\\file.txt", "@C:\\path/ with/ spaces\\file.txt"],
+			]
+
+			cases.forEach(([input, expected]) => {
+				const result = testMention(input, expected)
+				expectMatch(result)
+			})
+		})
+	})
+
+	describe("Edge Cases", () => {
+		it("handles edge cases correctly", () => {
+			const cases: Array<[string, string]> = [
+				["@C:\\", "@C:\\"],
+				["@/path/to/folder", "@/path/to/folder"],
+				["@C:\\folder\\file with spaces.txt", "@C:\\folder\\file"],
+				["@C:\\Users\\name\\path\\to\\文件夹\\file.txt", "@C:\\Users\\name\\path\\to\\文件夹\\file.txt"],
+				["@/path123/file-name_2.0.txt", "@/path123/file-name_2.0.txt"],
+			]
+
+			cases.forEach(([input, expected]) => {
+				const result = testMention(input, expected)
+				expectMatch(result)
+			})
+		})
+	})
+
+	describe("Existing Functionality", () => {
+		it("matches Unix paths", () => {
+			const cases: Array<[string, string]> = [
+				["@/usr/local/bin/file", "@/usr/local/bin/file"],
+				["@/path/to/file.txt", "@/path/to/file.txt"],
+			]
+
+			cases.forEach(([input, expected]) => {
+				const result = testMention(input, expected)
+				expectMatch(result)
+			})
+		})
+
+		it("matches URLs", () => {
+			const cases: Array<[string, string]> = [
+				["@http://example.com", "@http://example.com"],
+				["@https://example.com/path/to/file.html", "@https://example.com/path/to/file.html"],
+				["@ftp://server.example.com/file.zip", "@ftp://server.example.com/file.zip"],
+			]
+
+			cases.forEach(([input, expected]) => {
+				const result = testMention(input, expected)
+				expectMatch(result)
+			})
+		})
+
+		it("matches git hashes", () => {
+			const cases: Array<[string, string]> = [
+				["@a1b2c3d4e5f6g7h8i9j0", "@a1b2c3d4e5f6g7h8i9j0"],
+				["@abcdef1234567890abcdef1234567890abcdef12", "@abcdef1234567890abcdef1234567890abcdef12"],
+			]
+
+			cases.forEach(([input, expected]) => {
+				const result = testMention(input, expected)
+				expectMatch(result)
+			})
+		})
+
+		it("matches special keywords", () => {
+			const cases: Array<[string, string]> = [
+				["@problems", "@problems"],
+				["@git-changes", "@git-changes"],
+				["@terminal", "@terminal"],
+			]
+
+			cases.forEach(([input, expected]) => {
+				const result = testMention(input, expected)
+				expectMatch(result)
+			})
+		})
+	})
+
+	describe("Invalid Patterns", () => {
+		it("rejects invalid patterns", () => {
+			const cases: Array<[string, null]> = [
+				["C:\\folder\\file.txt", null],
+				["@", null],
+				["@ C:\\file.txt", null],
+			]
+
+			cases.forEach(([input, expected]) => {
+				const result = testMention(input, expected)
+				expectMatch(result)
+			})
+		})
+
+		it("matches only until invalid characters", () => {
+			const result = testMention("@C:\\folder\\file.txt invalid suffix", "@C:\\folder\\file.txt")
+			expectMatch(result)
+		})
+	})
+
+	describe("In Context", () => {
+		it("matches mentions within text", () => {
+			const cases: Array<[string, string]> = [
+				["Check the file at @C:\\folder\\file.txt for details.", "@C:\\folder\\file.txt"],
+				["See @/path/to/file\\ with\\ spaces.txt for an example.", "@/path/to/file\\ with\\ spaces.txt"],
+				["Review @problems and @git-changes.", "@problems"],
+				["Multiple: @/file1.txt and @C:\\file2.txt and @terminal", "@/file1.txt"],
+			]
+
+			cases.forEach(([input, expected]) => {
+				const result = testMention(input, expected)
+				expectMatch(result)
+			})
+		})
+	})
+
+	describe("Multiple Mentions", () => {
+		it("finds all mentions in a string using global regex", () => {
+			const text = "Check @/path/file1.txt and @C:\\folder\\file2.txt and report any @problems to @git-changes"
+			const matches = text.match(mentionRegexGlobal)
+			expect(matches).toEqual(["@/path/file1.txt", "@C:\\folder\\file2.txt", "@problems", "@git-changes"])
+		})
+	})
+
+	describe("Special Characters in Paths", () => {
+		it("handles special characters in file paths", () => {
+			const cases: Array<[string, string]> = [
+				["@/path/with-dash/file_underscore.txt", "@/path/with-dash/file_underscore.txt"],
+				["@C:\\folder+plus\\file(parens)[]brackets.txt", "@C:\\folder+plus\\file(parens)[]brackets.txt"],
+				["@/path/with/file#hash%percent.txt", "@/path/with/file#hash%percent.txt"],
+				["@/path/with/file@symbol$dollar.txt", "@/path/with/file@symbol$dollar.txt"],
+			]
+
+			cases.forEach(([input, expected]) => {
+				const result = testMention(input, expected)
+				expectMatch(result)
+			})
+		})
+	})
+
+	describe("Mixed Path Types in Single String", () => {
+		it("correctly identifies the first path in a string with multiple path types", () => {
+			const text = "Check both @/unix/path and @C:\\windows\\path for details."
+			const result = mentionRegex.exec(text)
+			expect(result?.[0]).toBe("@/unix/path")
+
+			// Test starting from after the first match
+			const secondSearchStart = text.indexOf("@C:")
+			const secondResult = mentionRegex.exec(text.substring(secondSearchStart))
+			expect(secondResult?.[0]).toBe("@C:\\windows\\path")
+		})
+	})
+
+	describe("Non-Latin Character Support", () => {
+		it("handles international characters in paths", () => {
+			const cases: Array<[string, string]> = [
+				["@/path/to/你好/file.txt", "@/path/to/你好/file.txt"],
+				["@C:\\用户\\документы\\файл.txt", "@C:\\用户\\документы\\файл.txt"],
+				["@/путь/к/файлу.txt", "@/путь/к/файлу.txt"],
+				["@C:\\folder\\file_äöü.txt", "@C:\\folder\\file_äöü.txt"],
+			]
+
+			cases.forEach(([input, expected]) => {
+				const result = testMention(input, expected)
+				expectMatch(result)
+			})
+		})
+	})
+
+	describe("Mixed Path Delimiters", () => {
+		// Modifying expectations to match current behavior
+		it("documents behavior with mixed forward and backward slashes in Windows paths", () => {
+			const cases: Array<[string, null]> = [
+				// Current implementation doesn't support mixed slashes
+				["@C:\\Users/Documents\\folder/file.txt", null],
+				["@C:/Windows\\System32/drivers\\etc/hosts", null],
+			]
+
+			cases.forEach(([input, expected]) => {
+				const result = testMention(input, expected)
+				expectMatch(result)
+			})
+		})
+	})
+
+	describe("Extended Negative Tests", () => {
+		// Modifying expectations to match current behavior
+		it("documents behavior with potentially invalid characters", () => {
+			const cases: Array<[string, string]> = [
+				// Current implementation actually matches these patterns
+				["@/path/with<illegal>chars.txt", "@/path/with<illegal>chars.txt"],
+				["@C:\\folder\\file|with|pipe.txt", "@C:\\folder\\file|with|pipe.txt"],
+				['@/path/with"quotes".txt', '@/path/with"quotes".txt'],
+			]
+
+			cases.forEach(([input, expected]) => {
+				const result = testMention(input, expected)
+				expectMatch(result)
+			})
+		})
+	})
+
+	// // These are documented as "not implemented yet"
+	// describe("Future Enhancement Candidates", () => {
+	// 	it("identifies patterns that could be supported in future enhancements", () => {
+	// 		// These patterns aren't currently supported by the regex
+	// 		// but might be considered for future improvements
+	// 		console.log(
+	// 			"The following patterns are not currently supported but might be considered for future enhancements:",
+	// 		)
+	// 		console.log("- Paths with double slashes: @/path//with/double/slash.txt")
+	// 		console.log("- Complex path traversals: @/very/./long/../../path/.././traversal.txt")
+	// 		console.log("- Environment variables in paths: @$HOME/file.txt, @C:\\Users\\%USERNAME%\\file.txt")
+	// 	})
+	// })
+})

+ 82 - 49
src/shared/context-mentions.ts

@@ -1,57 +1,90 @@
 /*
-Mention regex:
-- **Purpose**: 
-  - To identify and highlight specific mentions in text that start with '@'. 
-  - These mentions can be file paths, URLs, or the exact word 'problems'.
-  - Ensures that trailing punctuation marks (like commas, periods, etc.) are not included in the match, allowing punctuation to follow the mention without being part of it.
-
 - **Regex Breakdown**:
-  - `/@`: 
-	- **@**: The mention must start with the '@' symbol.
-  
-  - `((?:\/|\w+:\/\/)[^\s]+?|problems\b|git-changes\b)`:
-	- **Capturing Group (`(...)`)**: Captures the part of the string that matches one of the specified patterns.
-	- `(?:\/|\w+:\/\/)`: 
-	  - **Non-Capturing Group (`(?:...)`)**: Groups the alternatives without capturing them for back-referencing.
-	  - `\/`: 
-		- **Slash (`/`)**: Indicates that the mention is a file or folder path starting with a '/'.
-	  - `|`: Logical OR.
-	  - `\w+:\/\/`: 
-		- **Protocol (`\w+://`)**: Matches URLs that start with a word character sequence followed by '://', such as 'http://', 'https://', 'ftp://', etc.
-	- `[^\s]+?`: 
-	  - **Non-Whitespace Characters (`[^\s]+`)**: Matches one or more characters that are not whitespace.
-	  - **Non-Greedy (`+?`)**: Ensures the smallest possible match, preventing the inclusion of trailing punctuation.
-	- `|`: Logical OR.
-	- `problems\b`: 
-	  - **Exact Word ('problems')**: Matches the exact word 'problems'.
-	  - **Word Boundary (`\b`)**: Ensures that 'problems' is matched as a whole word and not as part of another word (e.g., 'problematic').
-		- `|`: Logical OR.
-    - `terminal\b`:
-      - **Exact Word ('terminal')**: Matches the exact word 'terminal'.
-      - **Word Boundary (`\b`)**: Ensures that 'terminal' is matched as a whole word and not as part of another word (e.g., 'terminals').
-  - `(?=[.,;:!?]?(?=[\s\r\n]|$))`:
-	- **Positive Lookahead (`(?=...)`)**: Ensures that the match is followed by specific patterns without including them in the match.
-	- `[.,;:!?]?`: 
-	  - **Optional Punctuation (`[.,;:!?]?`)**: Matches zero or one of the specified punctuation marks.
-	- `(?=[\s\r\n]|$)`: 
-	  - **Nested Positive Lookahead (`(?=[\s\r\n]|$)`)**: Ensures that the punctuation (if present) is followed by a whitespace character, a line break, or the end of the string.
-  
-- **Summary**:
-  - The regex effectively matches:
-	- Mentions that are file or folder paths starting with '/' and containing any non-whitespace characters (including periods within the path).
-	- URLs that start with a protocol (like 'http://') followed by any non-whitespace characters (including query parameters).
-	- The exact word 'problems'.
-	- The exact word 'git-changes'.
-    - The exact word 'terminal'.
-  - It ensures that any trailing punctuation marks (such as ',', '.', '!', etc.) are not included in the matched mention, allowing the punctuation to follow the mention naturally in the text.
 
-- **Global Regex**:
-  - `mentionRegexGlobal`: Creates a global version of the `mentionRegex` to find all matches within a given string.
+  1. **Pattern Components**:
+     - The regex is built from multiple patterns joined with OR (|) operators
+     - Each pattern handles a specific type of mention:
+       - Unix/Linux paths
+       - Windows paths with drive letters
+       - Windows relative paths
+       - Windows network shares
+       - URLs with protocols
+       - Git commit hashes
+       - Special keywords (problems, git-changes, terminal)
+
+  2. **Unix Path Pattern**:
+     - `(?:\\/|^)`: Starts with a forward slash or beginning of line
+     - `(?:[^\\/\\s\\\\]|\\\\[ \\t])+`: Path segment that can include escaped spaces
+     - `(?:\\/(?:[^\\/\\s\\\\]|\\\\[ \\t])+)*`: Additional path segments after slashes
+     - `\\/?`: Optional trailing slash
+
+  3. **Windows Path Pattern**:
+     - `[A-Za-z]:\\\\`: Drive letter followed by colon and double backslash
+     - `(?:(?:[^\\\\\\s/]+|\\/[ ])+`: Path segment that can include spaces escaped with forward slash
+     - `(?:\\\\(?:[^\\\\\\s/]+|\\/[ ])+)*)?`: Additional path segments after backslashes
+
+  4. **Windows Relative Path Pattern**:
+     - `(?:\\.{0,2}|[^\\\\\\s/]+)`: Path prefix that can be:
+       - Current directory (.)
+       - Parent directory (..)
+       - Any directory name not containing spaces, backslashes, or forward slashes
+     - `\\\\`: Backslash separator
+     - `(?:[^\\\\\\s/]+|\\\\[ \\t]|\\/[ ])+`: Path segment that can include spaces escaped with backslash or forward slash
+     - `(?:\\\\(?:[^\\\\\\s/]+|\\\\[ \\t]|\\/[ ])+)*`: Additional path segments after backslashes
+     - `\\\\?`: Optional trailing backslash
+
+  5. **Network Share Pattern**:
+     - `\\\\\\\\`: Double backslash (escaped) to start network path
+     - `[^\\\\\\s]+`: Server name
+     - `(?:\\\\(?:[^\\\\\\s/]+|\\/[ ])+)*`: Share name and additional path components
+     - `(?:\\\\)?`: Optional trailing backslash
 
+  6. **URL Pattern**:
+     - `\\w+:\/\/`: Protocol (http://, https://, etc.)
+     - `[^\\s]+`: Rest of the URL (non-whitespace characters)
+
+  7. **Git Hash Pattern**:
+     - `[a-zA-Z0-9]{7,40}\\b`: 7-40 alphanumeric characters followed by word boundary
+
+  8. **Special Keywords Pattern**:
+     - `problems\\b`, `git-changes\\b`, `terminal\\b`: Exact word matches with word boundaries
+
+  9. **Termination Logic**:
+     - `(?=[.,;:!?]?(?=[\\s\\r\\n]|$))`: Positive lookahead that:
+       - Allows an optional punctuation mark after the mention
+       - Ensures the mention (and optional punctuation) is followed by whitespace or end of string
+
+- **Behavior Summary**:
+  - Matches @-prefixed mentions
+  - Handles different path formats across operating systems
+  - Supports escaped spaces in paths using OS-appropriate conventions
+  - Cleanly terminates at whitespace or end of string
+  - Excludes trailing punctuation from the match
+  - Creates both single-match and global-match regex objects
 */
-export const mentionRegex =
-	/@((?:\/|\w+:\/\/)[^\s]+?|[a-f0-9]{7,40}\b|problems\b|git-changes\b|terminal\b)(?=[.,;:!?]?(?=[\s\r\n]|$))/
-export const mentionRegexGlobal = new RegExp(mentionRegex.source, "g")
+
+const mentionPatterns = [
+	// Unix paths with escaped spaces using backslash
+	"(?:\\/|^)(?:[^\\/\\s\\\\]|\\\\[ \\t])+(?:\\/(?:[^\\/\\s\\\\]|\\\\[ \\t])+)*\\/?",
+	// Windows paths with drive letters (C:\path) with support for escaped spaces using forward slash
+	"[A-Za-z]:\\\\(?:(?:[^\\\\\\s/]+|\\/[ ])+(?:\\\\(?:[^\\\\\\s/]+|\\/[ ])+)*)?",
+	// Windows relative paths (folder\file or .\folder\file) with support for escaped spaces
+	"(?:\\.{0,2}|[^\\\\\\s/]+)\\\\(?:[^\\\\\\s/]+|\\\\[ \\t]|\\/[ ])+(?:\\\\(?:[^\\\\\\s/]+|\\\\[ \\t]|\\/[ ])+)*\\\\?",
+	// Windows network shares (\\server\share) with support for escaped spaces using forward slash
+	"\\\\\\\\[^\\\\\\s]+(?:\\\\(?:[^\\\\\\s/]+|\\/[ ])+)*(?:\\\\)?",
+	// URLs with protocols (http://, https://, etc.)
+	"\\w+:\/\/[^\\s]+",
+	// Git hashes (7-40 alphanumeric characters)
+	"[a-zA-Z0-9]{7,40}\\b",
+	// Special keywords
+	"problems\\b",
+	"git-changes\\b",
+	"terminal\\b",
+]
+// Build the full regex pattern by joining the patterns with OR operator
+const mentionRegexPattern = `@(${mentionPatterns.join("|")})(?=[.,;:!?]?(?=[\\s\\r\\n]|$))`
+export const mentionRegex = new RegExp(mentionRegexPattern)
+export const mentionRegexGlobal = new RegExp(mentionRegexPattern, "g")
 
 export interface MentionSuggestion {
 	type: "file" | "folder" | "git" | "problems"