Browse Source

feat: add run-length encoding for repeated lines

Implement applyRunLengthEncoding function to compress repeated lines in text output:
- Add line repetition compression with count message
- Focus on single line repetitions
- Only compress when beneficial
- Add tests for empty input and single line repetitions

Signed-off-by: Eric Wheeler <[email protected]>
Eric Wheeler 11 months ago
parent
commit
131f9ad0d9

+ 26 - 1
src/integrations/misc/__tests__/extract-text.test.ts

@@ -1,4 +1,10 @@
-import { addLineNumbers, everyLineHasLineNumbers, stripLineNumbers, truncateOutput } from "../extract-text"
+import {
+	addLineNumbers,
+	everyLineHasLineNumbers,
+	stripLineNumbers,
+	truncateOutput,
+	applyRunLengthEncoding,
+} from "../extract-text"
 
 describe("addLineNumbers", () => {
 	it("should add line numbers starting from 1 by default", () => {
@@ -165,3 +171,22 @@ describe("truncateOutput", () => {
 		expect(resultLines).toEqual(expectedLines)
 	})
 })
+
+describe("applyRunLengthEncoding", () => {
+	it("should handle empty input", () => {
+		expect(applyRunLengthEncoding("")).toBe("")
+		expect(applyRunLengthEncoding(null as any)).toBe(null as any)
+		expect(applyRunLengthEncoding(undefined as any)).toBe(undefined as any)
+	})
+
+	it("should compress repeated single lines when beneficial", () => {
+		const input = "longerline\nlongerline\nlongerline\nlongerline\nlongerline\nlongerline\n"
+		const expected = "longerline\n<previous line repeated 5 additional times>\n"
+		expect(applyRunLengthEncoding(input)).toBe(expected)
+	})
+
+	it("should not compress when not beneficial", () => {
+		const input = "y\ny\ny\ny\ny\n"
+		expect(applyRunLengthEncoding(input)).toBe(input)
+	})
+})

+ 62 - 0
src/integrations/misc/extract-text.ts

@@ -149,3 +149,65 @@ export function truncateOutput(content: string, lineLimit?: number): string {
 	const endSection = content.slice(endStartPos)
 	return startSection + `\n[...${omittedLines} lines omitted...]\n\n` + endSection
 }
+
+/**
+ * Applies run-length encoding to compress repeated lines in text.
+ * Only compresses when the compression description is shorter than the repeated content.
+ *
+ * @param content The text content to compress
+ * @returns The compressed text with run-length encoding applied
+ */
+export function applyRunLengthEncoding(content: string): string {
+	if (!content) {
+		return content
+	}
+
+	let result = ""
+	let pos = 0
+	let repeatCount = 0
+	let prevLine = null
+	let firstOccurrence = true
+
+	while (pos < content.length) {
+		const nextNewlineIdx = content.indexOf("\n", pos)
+		const currentLine = nextNewlineIdx === -1 ? content.slice(pos) : content.slice(pos, nextNewlineIdx + 1)
+
+		if (prevLine === null) {
+			prevLine = currentLine
+		} else if (currentLine === prevLine) {
+			repeatCount++
+		} else {
+			if (repeatCount > 0) {
+				const compressionDesc = `<previous line repeated ${repeatCount} additional times>\n`
+				if (compressionDesc.length < prevLine.length * (repeatCount + 1)) {
+					result += prevLine + compressionDesc
+				} else {
+					for (let i = 0; i <= repeatCount; i++) {
+						result += prevLine
+					}
+				}
+				repeatCount = 0
+			} else {
+				result += prevLine
+			}
+			prevLine = currentLine
+		}
+
+		pos = nextNewlineIdx === -1 ? content.length : nextNewlineIdx + 1
+	}
+
+	if (repeatCount > 0 && prevLine !== null) {
+		const compressionDesc = `<previous line repeated ${repeatCount} additional times>\n`
+		if (compressionDesc.length < prevLine.length * repeatCount) {
+			result += prevLine + compressionDesc
+		} else {
+			for (let i = 0; i <= repeatCount; i++) {
+				result += prevLine
+			}
+		}
+	} else if (prevLine !== null) {
+		result += prevLine
+	}
+
+	return result
+}