Просмотр исходного кода

Revert "feat(mentions): improve path handling for Windows and escaped spaces" (#1875)

Matt Rubens 9 месяцев назад
Родитель
Сommit
02081dedf9
2 измененных файлов с 49 добавлено и 407 удалено
  1. 0 325
      src/shared/__tests__/context-mentions.test.ts
  2. 49 82
      src/shared/context-mentions.ts

+ 0 - 325
src/shared/__tests__/context-mentions.test.ts

@@ -1,325 +0,0 @@
-import { mentionRegex, mentionRegexGlobal } from "../context-mentions"
-
-interface TestResult {
-	actual: string | null
-	expected: string | null
-}
-
-function testMention(input: string, expected: string | null): TestResult {
-	const match = mentionRegex.exec(input)
-	return {
-		actual: match ? match[0] : null,
-		expected,
-	}
-}
-
-function expectMatch(result: TestResult) {
-	if (result.expected === null) {
-		return expect(result.actual).toBeNull()
-	}
-	if (result.actual !== result.expected) {
-		// Instead of console.log, use expect().toBe() with a descriptive message
-		expect(result.actual).toBe(result.expected)
-	}
-}
-
-describe("Mention Regex", () => {
-	describe("Windows Path Support", () => {
-		it("matches simple Windows paths", () => {
-			const cases: Array<[string, string]> = [
-				["@C:\\folder\\file.txt", "@C:\\folder\\file.txt"],
-				["@c:\\Program/ Files\\file.txt", "@c:\\Program/ Files\\file.txt"],
-				["@C:\\file.txt", "@C:\\file.txt"],
-			]
-
-			cases.forEach(([input, expected]) => {
-				const result = testMention(input, expected)
-				expectMatch(result)
-			})
-		})
-
-		it("matches Windows network shares", () => {
-			const cases: Array<[string, string]> = [
-				["@\\\\server\\share\\file.txt", "@\\\\server\\share\\file.txt"],
-				["@\\\\127.0.0.1\\network-path\\file.txt", "@\\\\127.0.0.1\\network-path\\file.txt"],
-			]
-
-			cases.forEach(([input, expected]) => {
-				const result = testMention(input, expected)
-				expectMatch(result)
-			})
-		})
-
-		it("matches mixed separators", () => {
-			const result = testMention("@C:\\folder\\file.txt", "@C:\\folder\\file.txt")
-			expectMatch(result)
-		})
-
-		it("matches Windows relative paths", () => {
-			const cases: Array<[string, string]> = [
-				["@folder\\file.txt", "@folder\\file.txt"],
-				["@.\\folder\\file.txt", "@.\\folder\\file.txt"],
-				["@..\\parent\\file.txt", "@..\\parent\\file.txt"],
-				["@path\\to\\directory\\", "@path\\to\\directory\\"],
-				["@.\\current\\path\\with/ space.txt", "@.\\current\\path\\with/ space.txt"],
-			]
-
-			cases.forEach(([input, expected]) => {
-				const result = testMention(input, expected)
-				expectMatch(result)
-			})
-		})
-	})
-
-	describe("Escaped Spaces Support", () => {
-		it("matches Unix paths with escaped spaces", () => {
-			const cases: Array<[string, string]> = [
-				["@/path/to/file\\ with\\ spaces.txt", "@/path/to/file\\ with\\ spaces.txt"],
-				["@/path/with\\ \\ multiple\\ spaces.txt", "@/path/with\\ \\ multiple\\ spaces.txt"],
-			]
-
-			cases.forEach(([input, expected]) => {
-				const result = testMention(input, expected)
-				expectMatch(result)
-			})
-		})
-
-		it("matches Windows paths with escaped spaces", () => {
-			const cases: Array<[string, string]> = [
-				["@C:\\path\\to\\file/ with/ spaces.txt", "@C:\\path\\to\\file/ with/ spaces.txt"],
-				["@C:\\Program/ Files\\app\\file.txt", "@C:\\Program/ Files\\app\\file.txt"],
-			]
-
-			cases.forEach(([input, expected]) => {
-				const result = testMention(input, expected)
-				expectMatch(result)
-			})
-		})
-	})
-
-	describe("Combined Path Variations", () => {
-		it("matches complex path combinations", () => {
-			const cases: Array<[string, string]> = [
-				[
-					"@C:\\Users\\name\\Documents\\file/ with/ spaces.txt",
-					"@C:\\Users\\name\\Documents\\file/ with/ spaces.txt",
-				],
-				[
-					"@\\\\server\\share\\path/ with/ spaces\\file.txt",
-					"@\\\\server\\share\\path/ with/ spaces\\file.txt",
-				],
-				["@C:\\path/ with/ spaces\\file.txt", "@C:\\path/ with/ spaces\\file.txt"],
-			]
-
-			cases.forEach(([input, expected]) => {
-				const result = testMention(input, expected)
-				expectMatch(result)
-			})
-		})
-	})
-
-	describe("Edge Cases", () => {
-		it("handles edge cases correctly", () => {
-			const cases: Array<[string, string]> = [
-				["@C:\\", "@C:\\"],
-				["@/path/to/folder", "@/path/to/folder"],
-				["@C:\\folder\\file with spaces.txt", "@C:\\folder\\file"],
-				["@C:\\Users\\name\\path\\to\\文件夹\\file.txt", "@C:\\Users\\name\\path\\to\\文件夹\\file.txt"],
-				["@/path123/file-name_2.0.txt", "@/path123/file-name_2.0.txt"],
-			]
-
-			cases.forEach(([input, expected]) => {
-				const result = testMention(input, expected)
-				expectMatch(result)
-			})
-		})
-	})
-
-	describe("Existing Functionality", () => {
-		it("matches Unix paths", () => {
-			const cases: Array<[string, string]> = [
-				["@/usr/local/bin/file", "@/usr/local/bin/file"],
-				["@/path/to/file.txt", "@/path/to/file.txt"],
-			]
-
-			cases.forEach(([input, expected]) => {
-				const result = testMention(input, expected)
-				expectMatch(result)
-			})
-		})
-
-		it("matches URLs", () => {
-			const cases: Array<[string, string]> = [
-				["@http://example.com", "@http://example.com"],
-				["@https://example.com/path/to/file.html", "@https://example.com/path/to/file.html"],
-				["@ftp://server.example.com/file.zip", "@ftp://server.example.com/file.zip"],
-			]
-
-			cases.forEach(([input, expected]) => {
-				const result = testMention(input, expected)
-				expectMatch(result)
-			})
-		})
-
-		it("matches git hashes", () => {
-			const cases: Array<[string, string]> = [
-				["@a1b2c3d4e5f6g7h8i9j0", "@a1b2c3d4e5f6g7h8i9j0"],
-				["@abcdef1234567890abcdef1234567890abcdef12", "@abcdef1234567890abcdef1234567890abcdef12"],
-			]
-
-			cases.forEach(([input, expected]) => {
-				const result = testMention(input, expected)
-				expectMatch(result)
-			})
-		})
-
-		it("matches special keywords", () => {
-			const cases: Array<[string, string]> = [
-				["@problems", "@problems"],
-				["@git-changes", "@git-changes"],
-				["@terminal", "@terminal"],
-			]
-
-			cases.forEach(([input, expected]) => {
-				const result = testMention(input, expected)
-				expectMatch(result)
-			})
-		})
-	})
-
-	describe("Invalid Patterns", () => {
-		it("rejects invalid patterns", () => {
-			const cases: Array<[string, null]> = [
-				["C:\\folder\\file.txt", null],
-				["@", null],
-				["@ C:\\file.txt", null],
-			]
-
-			cases.forEach(([input, expected]) => {
-				const result = testMention(input, expected)
-				expectMatch(result)
-			})
-		})
-
-		it("matches only until invalid characters", () => {
-			const result = testMention("@C:\\folder\\file.txt invalid suffix", "@C:\\folder\\file.txt")
-			expectMatch(result)
-		})
-	})
-
-	describe("In Context", () => {
-		it("matches mentions within text", () => {
-			const cases: Array<[string, string]> = [
-				["Check the file at @C:\\folder\\file.txt for details.", "@C:\\folder\\file.txt"],
-				["See @/path/to/file\\ with\\ spaces.txt for an example.", "@/path/to/file\\ with\\ spaces.txt"],
-				["Review @problems and @git-changes.", "@problems"],
-				["Multiple: @/file1.txt and @C:\\file2.txt and @terminal", "@/file1.txt"],
-			]
-
-			cases.forEach(([input, expected]) => {
-				const result = testMention(input, expected)
-				expectMatch(result)
-			})
-		})
-	})
-
-	describe("Multiple Mentions", () => {
-		it("finds all mentions in a string using global regex", () => {
-			const text = "Check @/path/file1.txt and @C:\\folder\\file2.txt and report any @problems to @git-changes"
-			const matches = text.match(mentionRegexGlobal)
-			expect(matches).toEqual(["@/path/file1.txt", "@C:\\folder\\file2.txt", "@problems", "@git-changes"])
-		})
-	})
-
-	describe("Special Characters in Paths", () => {
-		it("handles special characters in file paths", () => {
-			const cases: Array<[string, string]> = [
-				["@/path/with-dash/file_underscore.txt", "@/path/with-dash/file_underscore.txt"],
-				["@C:\\folder+plus\\file(parens)[]brackets.txt", "@C:\\folder+plus\\file(parens)[]brackets.txt"],
-				["@/path/with/file#hash%percent.txt", "@/path/with/file#hash%percent.txt"],
-				["@/path/with/file@symbol$dollar.txt", "@/path/with/file@symbol$dollar.txt"],
-			]
-
-			cases.forEach(([input, expected]) => {
-				const result = testMention(input, expected)
-				expectMatch(result)
-			})
-		})
-	})
-
-	describe("Mixed Path Types in Single String", () => {
-		it("correctly identifies the first path in a string with multiple path types", () => {
-			const text = "Check both @/unix/path and @C:\\windows\\path for details."
-			const result = mentionRegex.exec(text)
-			expect(result?.[0]).toBe("@/unix/path")
-
-			// Test starting from after the first match
-			const secondSearchStart = text.indexOf("@C:")
-			const secondResult = mentionRegex.exec(text.substring(secondSearchStart))
-			expect(secondResult?.[0]).toBe("@C:\\windows\\path")
-		})
-	})
-
-	describe("Non-Latin Character Support", () => {
-		it("handles international characters in paths", () => {
-			const cases: Array<[string, string]> = [
-				["@/path/to/你好/file.txt", "@/path/to/你好/file.txt"],
-				["@C:\\用户\\документы\\файл.txt", "@C:\\用户\\документы\\файл.txt"],
-				["@/путь/к/файлу.txt", "@/путь/к/файлу.txt"],
-				["@C:\\folder\\file_äöü.txt", "@C:\\folder\\file_äöü.txt"],
-			]
-
-			cases.forEach(([input, expected]) => {
-				const result = testMention(input, expected)
-				expectMatch(result)
-			})
-		})
-	})
-
-	describe("Mixed Path Delimiters", () => {
-		// Modifying expectations to match current behavior
-		it("documents behavior with mixed forward and backward slashes in Windows paths", () => {
-			const cases: Array<[string, null]> = [
-				// Current implementation doesn't support mixed slashes
-				["@C:\\Users/Documents\\folder/file.txt", null],
-				["@C:/Windows\\System32/drivers\\etc/hosts", null],
-			]
-
-			cases.forEach(([input, expected]) => {
-				const result = testMention(input, expected)
-				expectMatch(result)
-			})
-		})
-	})
-
-	describe("Extended Negative Tests", () => {
-		// Modifying expectations to match current behavior
-		it("documents behavior with potentially invalid characters", () => {
-			const cases: Array<[string, string]> = [
-				// Current implementation actually matches these patterns
-				["@/path/with<illegal>chars.txt", "@/path/with<illegal>chars.txt"],
-				["@C:\\folder\\file|with|pipe.txt", "@C:\\folder\\file|with|pipe.txt"],
-				['@/path/with"quotes".txt', '@/path/with"quotes".txt'],
-			]
-
-			cases.forEach(([input, expected]) => {
-				const result = testMention(input, expected)
-				expectMatch(result)
-			})
-		})
-	})
-
-	// // These are documented as "not implemented yet"
-	// describe("Future Enhancement Candidates", () => {
-	// 	it("identifies patterns that could be supported in future enhancements", () => {
-	// 		// These patterns aren't currently supported by the regex
-	// 		// but might be considered for future improvements
-	// 		console.log(
-	// 			"The following patterns are not currently supported but might be considered for future enhancements:",
-	// 		)
-	// 		console.log("- Paths with double slashes: @/path//with/double/slash.txt")
-	// 		console.log("- Complex path traversals: @/very/./long/../../path/.././traversal.txt")
-	// 		console.log("- Environment variables in paths: @$HOME/file.txt, @C:\\Users\\%USERNAME%\\file.txt")
-	// 	})
-	// })
-})

+ 49 - 82
src/shared/context-mentions.ts

@@ -1,90 +1,57 @@
 /*
-- **Regex Breakdown**:
-
-  1. **Pattern Components**:
-     - The regex is built from multiple patterns joined with OR (|) operators
-     - Each pattern handles a specific type of mention:
-       - Unix/Linux paths
-       - Windows paths with drive letters
-       - Windows relative paths
-       - Windows network shares
-       - URLs with protocols
-       - Git commit hashes
-       - Special keywords (problems, git-changes, terminal)
-
-  2. **Unix Path Pattern**:
-     - `(?:\\/|^)`: Starts with a forward slash or beginning of line
-     - `(?:[^\\/\\s\\\\]|\\\\[ \\t])+`: Path segment that can include escaped spaces
-     - `(?:\\/(?:[^\\/\\s\\\\]|\\\\[ \\t])+)*`: Additional path segments after slashes
-     - `\\/?`: Optional trailing slash
-
-  3. **Windows Path Pattern**:
-     - `[A-Za-z]:\\\\`: Drive letter followed by colon and double backslash
-     - `(?:(?:[^\\\\\\s/]+|\\/[ ])+`: Path segment that can include spaces escaped with forward slash
-     - `(?:\\\\(?:[^\\\\\\s/]+|\\/[ ])+)*)?`: Additional path segments after backslashes
-
-  4. **Windows Relative Path Pattern**:
-     - `(?:\\.{0,2}|[^\\\\\\s/]+)`: Path prefix that can be:
-       - Current directory (.)
-       - Parent directory (..)
-       - Any directory name not containing spaces, backslashes, or forward slashes
-     - `\\\\`: Backslash separator
-     - `(?:[^\\\\\\s/]+|\\\\[ \\t]|\\/[ ])+`: Path segment that can include spaces escaped with backslash or forward slash
-     - `(?:\\\\(?:[^\\\\\\s/]+|\\\\[ \\t]|\\/[ ])+)*`: Additional path segments after backslashes
-     - `\\\\?`: Optional trailing backslash
+Mention regex:
+- **Purpose**: 
+  - To identify and highlight specific mentions in text that start with '@'. 
+  - These mentions can be file paths, URLs, or the exact word 'problems'.
+  - Ensures that trailing punctuation marks (like commas, periods, etc.) are not included in the match, allowing punctuation to follow the mention without being part of it.
 
-  5. **Network Share Pattern**:
-     - `\\\\\\\\`: Double backslash (escaped) to start network path
-     - `[^\\\\\\s]+`: Server name
-     - `(?:\\\\(?:[^\\\\\\s/]+|\\/[ ])+)*`: Share name and additional path components
-     - `(?:\\\\)?`: Optional trailing backslash
-
-  6. **URL Pattern**:
-     - `\\w+:\/\/`: Protocol (http://, https://, etc.)
-     - `[^\\s]+`: Rest of the URL (non-whitespace characters)
-
-  7. **Git Hash Pattern**:
-     - `[a-zA-Z0-9]{7,40}\\b`: 7-40 alphanumeric characters followed by word boundary
-
-  8. **Special Keywords Pattern**:
-     - `problems\\b`, `git-changes\\b`, `terminal\\b`: Exact word matches with word boundaries
+- **Regex Breakdown**:
+  - `/@`: 
+	- **@**: The mention must start with the '@' symbol.
+  
+  - `((?:\/|\w+:\/\/)[^\s]+?|problems\b|git-changes\b)`:
+	- **Capturing Group (`(...)`)**: Captures the part of the string that matches one of the specified patterns.
+	- `(?:\/|\w+:\/\/)`: 
+	  - **Non-Capturing Group (`(?:...)`)**: Groups the alternatives without capturing them for back-referencing.
+	  - `\/`: 
+		- **Slash (`/`)**: Indicates that the mention is a file or folder path starting with a '/'.
+	  - `|`: Logical OR.
+	  - `\w+:\/\/`: 
+		- **Protocol (`\w+://`)**: Matches URLs that start with a word character sequence followed by '://', such as 'http://', 'https://', 'ftp://', etc.
+	- `[^\s]+?`: 
+	  - **Non-Whitespace Characters (`[^\s]+`)**: Matches one or more characters that are not whitespace.
+	  - **Non-Greedy (`+?`)**: Ensures the smallest possible match, preventing the inclusion of trailing punctuation.
+	- `|`: Logical OR.
+	- `problems\b`: 
+	  - **Exact Word ('problems')**: Matches the exact word 'problems'.
+	  - **Word Boundary (`\b`)**: Ensures that 'problems' is matched as a whole word and not as part of another word (e.g., 'problematic').
+		- `|`: Logical OR.
+    - `terminal\b`:
+      - **Exact Word ('terminal')**: Matches the exact word 'terminal'.
+      - **Word Boundary (`\b`)**: Ensures that 'terminal' is matched as a whole word and not as part of another word (e.g., 'terminals').
+  - `(?=[.,;:!?]?(?=[\s\r\n]|$))`:
+	- **Positive Lookahead (`(?=...)`)**: Ensures that the match is followed by specific patterns without including them in the match.
+	- `[.,;:!?]?`: 
+	  - **Optional Punctuation (`[.,;:!?]?`)**: Matches zero or one of the specified punctuation marks.
+	- `(?=[\s\r\n]|$)`: 
+	  - **Nested Positive Lookahead (`(?=[\s\r\n]|$)`)**: Ensures that the punctuation (if present) is followed by a whitespace character, a line break, or the end of the string.
+  
+- **Summary**:
+  - The regex effectively matches:
+	- Mentions that are file or folder paths starting with '/' and containing any non-whitespace characters (including periods within the path).
+	- URLs that start with a protocol (like 'http://') followed by any non-whitespace characters (including query parameters).
+	- The exact word 'problems'.
+	- The exact word 'git-changes'.
+    - The exact word 'terminal'.
+  - It ensures that any trailing punctuation marks (such as ',', '.', '!', etc.) are not included in the matched mention, allowing the punctuation to follow the mention naturally in the text.
 
-  9. **Termination Logic**:
-     - `(?=[.,;:!?]?(?=[\\s\\r\\n]|$))`: Positive lookahead that:
-       - Allows an optional punctuation mark after the mention
-       - Ensures the mention (and optional punctuation) is followed by whitespace or end of string
+- **Global Regex**:
+  - `mentionRegexGlobal`: Creates a global version of the `mentionRegex` to find all matches within a given string.
 
-- **Behavior Summary**:
-  - Matches @-prefixed mentions
-  - Handles different path formats across operating systems
-  - Supports escaped spaces in paths using OS-appropriate conventions
-  - Cleanly terminates at whitespace or end of string
-  - Excludes trailing punctuation from the match
-  - Creates both single-match and global-match regex objects
 */
-
-const mentionPatterns = [
-	// Unix paths with escaped spaces using backslash
-	"(?:\\/|^)(?:[^\\/\\s\\\\]|\\\\[ \\t])+(?:\\/(?:[^\\/\\s\\\\]|\\\\[ \\t])+)*\\/?",
-	// Windows paths with drive letters (C:\path) with support for escaped spaces using forward slash
-	"[A-Za-z]:\\\\(?:(?:[^\\\\\\s/]+|\\/[ ])+(?:\\\\(?:[^\\\\\\s/]+|\\/[ ])+)*)?",
-	// Windows relative paths (folder\file or .\folder\file) with support for escaped spaces
-	"(?:\\.{0,2}|[^\\\\\\s/]+)\\\\(?:[^\\\\\\s/]+|\\\\[ \\t]|\\/[ ])+(?:\\\\(?:[^\\\\\\s/]+|\\\\[ \\t]|\\/[ ])+)*\\\\?",
-	// Windows network shares (\\server\share) with support for escaped spaces using forward slash
-	"\\\\\\\\[^\\\\\\s]+(?:\\\\(?:[^\\\\\\s/]+|\\/[ ])+)*(?:\\\\)?",
-	// URLs with protocols (http://, https://, etc.)
-	"\\w+:\/\/[^\\s]+",
-	// Git hashes (7-40 alphanumeric characters)
-	"[a-zA-Z0-9]{7,40}\\b",
-	// Special keywords
-	"problems\\b",
-	"git-changes\\b",
-	"terminal\\b",
-]
-// Build the full regex pattern by joining the patterns with OR operator
-const mentionRegexPattern = `@(${mentionPatterns.join("|")})(?=[.,;:!?]?(?=[\\s\\r\\n]|$))`
-export const mentionRegex = new RegExp(mentionRegexPattern)
-export const mentionRegexGlobal = new RegExp(mentionRegexPattern, "g")
+export const mentionRegex =
+	/@((?:\/|\w+:\/\/)[^\s]+?|[a-f0-9]{7,40}\b|problems\b|git-changes\b|terminal\b)(?=[.,;:!?]?(?=[\s\r\n]|$))/
+export const mentionRegexGlobal = new RegExp(mentionRegex.source, "g")
 
 export interface MentionSuggestion {
 	type: "file" | "folder" | "git" | "problems"