Browse Source

Improve file excludes for checkpoints service

cte 10 months ago
parent
commit
2325d8be46

+ 19 - 35
src/services/checkpoints/ShadowCheckpointService.ts

@@ -6,8 +6,11 @@ import EventEmitter from "events"
 import simpleGit, { SimpleGit } from "simple-git"
 import simpleGit, { SimpleGit } from "simple-git"
 import { globby } from "globby"
 import { globby } from "globby"
 
 
-import { GIT_DISABLED_SUFFIX, GIT_EXCLUDES } from "./constants"
+import { fileExistsAtPath } from "../../utils/fs"
+
+import { GIT_DISABLED_SUFFIX } from "./constants"
 import { CheckpointDiff, CheckpointResult, CheckpointEventMap } from "./types"
 import { CheckpointDiff, CheckpointResult, CheckpointEventMap } from "./types"
+import { getExcludePatterns } from "./excludes"
 
 
 export abstract class ShadowCheckpointService extends EventEmitter {
 export abstract class ShadowCheckpointService extends EventEmitter {
 	public readonly taskId: string
 	public readonly taskId: string
@@ -65,12 +68,6 @@ export abstract class ShadowCheckpointService extends EventEmitter {
 		const gitVersion = await git.version()
 		const gitVersion = await git.version()
 		this.log(`[${this.constructor.name}#create] git = ${gitVersion}`)
 		this.log(`[${this.constructor.name}#create] git = ${gitVersion}`)
 
 
-		const fileExistsAtPath = (path: string) =>
-			fs
-				.access(path)
-				.then(() => true)
-				.catch(() => false)
-
 		let created = false
 		let created = false
 		const startTime = Date.now()
 		const startTime = Date.now()
 
 
@@ -84,41 +81,16 @@ export abstract class ShadowCheckpointService extends EventEmitter {
 				)
 				)
 			}
 			}
 
 
+			await this.writeExcludeFile()
 			this.baseHash = await git.revparse(["HEAD"])
 			this.baseHash = await git.revparse(["HEAD"])
 		} else {
 		} else {
 			this.log(`[${this.constructor.name}#initShadowGit] creating shadow git repo at ${this.checkpointsDir}`)
 			this.log(`[${this.constructor.name}#initShadowGit] creating shadow git repo at ${this.checkpointsDir}`)
-
 			await git.init()
 			await git.init()
 			await git.addConfig("core.worktree", this.workspaceDir) // Sets the working tree to the current workspace.
 			await git.addConfig("core.worktree", this.workspaceDir) // Sets the working tree to the current workspace.
 			await git.addConfig("commit.gpgSign", "false") // Disable commit signing for shadow repo.
 			await git.addConfig("commit.gpgSign", "false") // Disable commit signing for shadow repo.
 			await git.addConfig("user.name", "Roo Code")
 			await git.addConfig("user.name", "Roo Code")
 			await git.addConfig("user.email", "[email protected]")
 			await git.addConfig("user.email", "[email protected]")
-
-			let lfsPatterns: string[] = [] // Get LFS patterns from workspace if they exist.
-
-			try {
-				const attributesPath = path.join(this.workspaceDir, ".gitattributes")
-
-				if (await fileExistsAtPath(attributesPath)) {
-					lfsPatterns = (await fs.readFile(attributesPath, "utf8"))
-						.split("\n")
-						.filter((line) => line.includes("filter=lfs"))
-						.map((line) => line.split(" ")[0].trim())
-				}
-			} catch (error) {
-				this.log(
-					`[${this.constructor.name}#initShadowGit] failed to read .gitattributes: ${error instanceof Error ? error.message : String(error)}`,
-				)
-			}
-
-			// Add basic excludes directly in git config, while respecting any
-			// .gitignore in the workspace.
-			// .git/info/exclude is local to the shadow git repo, so it's not
-			// shared with the main repo - and won't conflict with user's
-			// .gitignore.
-			await fs.mkdir(path.join(this.dotGitDir, "info"), { recursive: true })
-			const excludesPath = path.join(this.dotGitDir, "info", "exclude")
-			await fs.writeFile(excludesPath, [...GIT_EXCLUDES, ...lfsPatterns].join("\n"))
+			await this.writeExcludeFile()
 			await this.stageAll(git)
 			await this.stageAll(git)
 			const { commit } = await git.commit("initial commit", { "--allow-empty": null })
 			const { commit } = await git.commit("initial commit", { "--allow-empty": null })
 			this.baseHash = commit
 			this.baseHash = commit
@@ -126,6 +98,7 @@ export abstract class ShadowCheckpointService extends EventEmitter {
 		}
 		}
 
 
 		const duration = Date.now() - startTime
 		const duration = Date.now() - startTime
+
 		this.log(
 		this.log(
 			`[${this.constructor.name}#initShadowGit] initialized shadow repo with base commit ${this.baseHash} in ${duration}ms`,
 			`[${this.constructor.name}#initShadowGit] initialized shadow repo with base commit ${this.baseHash} in ${duration}ms`,
 		)
 		)
@@ -145,8 +118,18 @@ export abstract class ShadowCheckpointService extends EventEmitter {
 		return { created, duration }
 		return { created, duration }
 	}
 	}
 
 
+	// Add basic excludes directly in git config, while respecting any
+	// .gitignore in the workspace.
+	// .git/info/exclude is local to the shadow git repo, so it's not
+	// shared with the main repo - and won't conflict with user's
+	// .gitignore.
+	protected async writeExcludeFile() {
+		await fs.mkdir(path.join(this.dotGitDir, "info"), { recursive: true })
+		const patterns = await getExcludePatterns(this.workspaceDir)
+		await fs.writeFile(path.join(this.dotGitDir, "info", "exclude"), patterns.join("\n"))
+	}
+
 	private async stageAll(git: SimpleGit) {
 	private async stageAll(git: SimpleGit) {
-		// await writeExcludesFile(gitPath, await getLfsPatterns(this.cwd)).
 		await this.renameNestedGitRepos(true)
 		await this.renameNestedGitRepos(true)
 
 
 		try {
 		try {
@@ -188,6 +171,7 @@ export abstract class ShadowCheckpointService extends EventEmitter {
 
 
 			try {
 			try {
 				await fs.rename(fullPath, newPath)
 				await fs.rename(fullPath, newPath)
+
 				this.log(
 				this.log(
 					`[${this.constructor.name}#renameNestedGitRepos] ${disable ? "disabled" : "enabled"} nested git repo ${gitPath}`,
 					`[${this.constructor.name}#renameNestedGitRepos] ${disable ? "disabled" : "enabled"} nested git repo ${gitPath}`,
 				)
 				)

+ 149 - 2
src/services/checkpoints/__tests__/ShadowCheckpointService.test.ts

@@ -7,6 +7,7 @@ import { EventEmitter } from "events"
 
 
 import { simpleGit, SimpleGit } from "simple-git"
 import { simpleGit, SimpleGit } from "simple-git"
 
 
+import { fileExistsAtPath } from "../../../utils/fs"
 import { RepoPerTaskCheckpointService } from "../RepoPerTaskCheckpointService"
 import { RepoPerTaskCheckpointService } from "../RepoPerTaskCheckpointService"
 import { RepoPerWorkspaceCheckpointService } from "../RepoPerWorkspaceCheckpointService"
 import { RepoPerWorkspaceCheckpointService } from "../RepoPerWorkspaceCheckpointService"
 
 
@@ -16,7 +17,7 @@ jest.mock("globby", () => ({
 
 
 const tmpDir = path.join(os.tmpdir(), "CheckpointService")
 const tmpDir = path.join(os.tmpdir(), "CheckpointService")
 
 
-const initRepo = async ({
+const initWorkspaceRepo = async ({
 	workspaceDir,
 	workspaceDir,
 	userName = "Roo Code",
 	userName = "Roo Code",
 	userEmail = "[email protected]",
 	userEmail = "[email protected]",
@@ -64,7 +65,7 @@ describe.each([
 
 
 		const shadowDir = path.join(tmpDir, `${prefix}-${Date.now()}`)
 		const shadowDir = path.join(tmpDir, `${prefix}-${Date.now()}`)
 		const workspaceDir = path.join(tmpDir, `workspace-${Date.now()}`)
 		const workspaceDir = path.join(tmpDir, `workspace-${Date.now()}`)
-		const repo = await initRepo({ workspaceDir })
+		const repo = await initWorkspaceRepo({ workspaceDir })
 
 
 		workspaceGit = repo.git
 		workspaceGit = repo.git
 		testFile = repo.testFile
 		testFile = repo.testFile
@@ -298,6 +299,52 @@ describe.each([
 			await expect(fs.readFile(testFile, "utf-8")).rejects.toThrow()
 			await expect(fs.readFile(testFile, "utf-8")).rejects.toThrow()
 			await expect(fs.readFile(untrackedFile, "utf-8")).rejects.toThrow()
 			await expect(fs.readFile(untrackedFile, "utf-8")).rejects.toThrow()
 		})
 		})
+
+		it("does not create a checkpoint for ignored files", async () => {
+			// Create a file that matches an ignored pattern (e.g., .log file).
+			const ignoredFile = path.join(service.workspaceDir, "ignored.log")
+			await fs.writeFile(ignoredFile, "Initial ignored content")
+
+			const commit = await service.saveCheckpoint("Ignored file checkpoint")
+			expect(commit?.commit).toBeFalsy()
+
+			await fs.writeFile(ignoredFile, "Modified ignored content")
+
+			const commit2 = await service.saveCheckpoint("Ignored file modified checkpoint")
+			expect(commit2?.commit).toBeFalsy()
+
+			expect(await fs.readFile(ignoredFile, "utf-8")).toBe("Modified ignored content")
+		})
+
+		it("does not create a checkpoint for LFS files", async () => {
+			// Create a .gitattributes file with LFS patterns.
+			const gitattributesPath = path.join(service.workspaceDir, ".gitattributes")
+			await fs.writeFile(gitattributesPath, "*.lfs filter=lfs diff=lfs merge=lfs -text")
+
+			// Re-initialize the service to trigger a write to .git/info/exclude.
+			service = new klass(service.taskId, service.checkpointsDir, service.workspaceDir, () => {})
+			const excludesPath = path.join(service.checkpointsDir, ".git", "info", "exclude")
+			expect((await fs.readFile(excludesPath, "utf-8")).split("\n")).not.toContain("*.lfs")
+			await service.initShadowGit()
+			expect((await fs.readFile(excludesPath, "utf-8")).split("\n")).toContain("*.lfs")
+
+			const commit0 = await service.saveCheckpoint("Add gitattributes")
+			expect(commit0?.commit).toBeTruthy()
+
+			// Create a file that matches an LFS pattern.
+			const lfsFile = path.join(service.workspaceDir, "foo.lfs")
+			await fs.writeFile(lfsFile, "Binary file content simulation")
+
+			const commit = await service.saveCheckpoint("LFS file checkpoint")
+			expect(commit?.commit).toBeFalsy()
+
+			await fs.writeFile(lfsFile, "Modified binary content")
+
+			const commit2 = await service.saveCheckpoint("LFS file modified checkpoint")
+			expect(commit2?.commit).toBeFalsy()
+
+			expect(await fs.readFile(lfsFile, "utf-8")).toBe("Modified binary content")
+		})
 	})
 	})
 
 
 	describe(`${klass.name}#create`, () => {
 	describe(`${klass.name}#create`, () => {
@@ -337,6 +384,106 @@ describe.each([
 		})
 		})
 	})
 	})
 
 
+	describe(`${klass.name}#renameNestedGitRepos`, () => {
+		it("handles nested git repositories during initialization", async () => {
+			// Create a new temporary workspace and service for this test.
+			const shadowDir = path.join(tmpDir, `${prefix}-nested-git-${Date.now()}`)
+			const workspaceDir = path.join(tmpDir, `workspace-nested-git-${Date.now()}`)
+
+			// Create a primary workspace repo.
+			await fs.mkdir(workspaceDir, { recursive: true })
+			const mainGit = simpleGit(workspaceDir)
+			await mainGit.init()
+			await mainGit.addConfig("user.name", "Roo Code")
+			await mainGit.addConfig("user.email", "[email protected]")
+
+			// Create a nested repo inside the workspace.
+			const nestedRepoPath = path.join(workspaceDir, "nested-project")
+			await fs.mkdir(nestedRepoPath, { recursive: true })
+			const nestedGit = simpleGit(nestedRepoPath)
+			await nestedGit.init()
+			await nestedGit.addConfig("user.name", "Roo Code")
+			await nestedGit.addConfig("user.email", "[email protected]")
+
+			// Add a file to the nested repo.
+			const nestedFile = path.join(nestedRepoPath, "nested-file.txt")
+			await fs.writeFile(nestedFile, "Content in nested repo")
+			await nestedGit.add(".")
+			await nestedGit.commit("Initial commit in nested repo")
+
+			// Create a test file in the main workspace.
+			const mainFile = path.join(workspaceDir, "main-file.txt")
+			await fs.writeFile(mainFile, "Content in main repo")
+			await mainGit.add(".")
+			await mainGit.commit("Initial commit in main repo")
+
+			// Confirm nested git directory exists before initialization.
+			const nestedGitDir = path.join(nestedRepoPath, ".git")
+			const nestedGitDisabledDir = `${nestedGitDir}_disabled`
+			expect(await fileExistsAtPath(nestedGitDir)).toBe(true)
+			expect(await fileExistsAtPath(nestedGitDisabledDir)).toBe(false)
+
+			// Configure globby mock to return our nested git repository.
+			const relativeGitPath = path.relative(workspaceDir, nestedGitDir)
+
+			jest.mocked(require("globby").globby).mockImplementation((pattern: string | string[]) => {
+				if (pattern === "**/.git") {
+					return Promise.resolve([relativeGitPath])
+				} else if (pattern === "**/.git_disabled") {
+					return Promise.resolve([`${relativeGitPath}_disabled`])
+				}
+
+				return Promise.resolve([])
+			})
+
+			// Create a spy on fs.rename to track when it's called.
+			const renameSpy = jest.spyOn(fs, "rename")
+
+			// Initialize the shadow git service.
+			const service = new klass(taskId, shadowDir, workspaceDir, () => {})
+
+			// Override renameNestedGitRepos to track calls.
+			const originalRenameMethod = service["renameNestedGitRepos"].bind(service)
+			let disableCall = false
+			let enableCall = false
+
+			service["renameNestedGitRepos"] = async (disable: boolean) => {
+				if (disable) {
+					disableCall = true
+				} else {
+					enableCall = true
+				}
+
+				return originalRenameMethod(disable)
+			}
+
+			// Initialize the shadow git repo.
+			await service.initShadowGit()
+
+			// Verify both disable and enable were called.
+			expect(disableCall).toBe(true)
+			expect(enableCall).toBe(true)
+
+			// Verify rename was called with correct paths.
+			const renameCallsArgs = renameSpy.mock.calls.map((call) => call[0] + " -> " + call[1])
+			expect(
+				renameCallsArgs.some((args) => args.includes(nestedGitDir) && args.includes(nestedGitDisabledDir)),
+			).toBe(true)
+			expect(
+				renameCallsArgs.some((args) => args.includes(nestedGitDisabledDir) && args.includes(nestedGitDir)),
+			).toBe(true)
+
+			// Verify the nested git directory is back to normal after initialization.
+			expect(await fileExistsAtPath(nestedGitDir)).toBe(true)
+			expect(await fileExistsAtPath(nestedGitDisabledDir)).toBe(false)
+
+			// Clean up.
+			renameSpy.mockRestore()
+			await fs.rm(shadowDir, { recursive: true, force: true })
+			await fs.rm(workspaceDir, { recursive: true, force: true })
+		})
+	})
+
 	describe(`${klass.name}#events`, () => {
 	describe(`${klass.name}#events`, () => {
 		it("emits initialize event when service is created", async () => {
 		it("emits initialize event when service is created", async () => {
 			const shadowDir = path.join(tmpDir, `${prefix}3-${Date.now()}`)
 			const shadowDir = path.join(tmpDir, `${prefix}3-${Date.now()}`)

+ 156 - 0
src/services/checkpoints/__tests__/excludes.test.ts

@@ -0,0 +1,156 @@
+// npx jest src/services/checkpoints/__tests__/excludes.test.ts
+
+import fs from "fs/promises"
+import { join } from "path"
+
+import { fileExistsAtPath } from "../../../utils/fs"
+
+import { getExcludePatterns } from "../excludes"
+import { GIT_DISABLED_SUFFIX } from "../constants"
+
+jest.mock("fs/promises")
+
+jest.mock("../../../utils/fs")
+
+describe("getExcludePatterns", () => {
+	const mockedFs = fs as jest.Mocked<typeof fs>
+	const mockedFileExistsAtPath = fileExistsAtPath as jest.MockedFunction<typeof fileExistsAtPath>
+	const testWorkspacePath = "/test/workspace"
+
+	beforeEach(() => {
+		jest.resetAllMocks()
+	})
+
+	describe("getLfsPatterns", () => {
+		it("should include LFS patterns from .gitattributes when they exist", async () => {
+			// Mock .gitattributes file exists
+			mockedFileExistsAtPath.mockResolvedValue(true)
+
+			// Mock .gitattributes file content with LFS patterns
+			const gitAttributesContent = `*.psd filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+# A comment line
+*.mp4 filter=lfs diff=lfs merge=lfs -text
+readme.md text
+`
+			mockedFs.readFile.mockResolvedValue(gitAttributesContent)
+
+			// Expected LFS patterns
+			const expectedLfsPatterns = ["*.psd", "*.zip", "*.mp4"]
+
+			// Get exclude patterns
+			const excludePatterns = await getExcludePatterns(testWorkspacePath)
+
+			// Verify .gitattributes was checked at the correct path
+			expect(mockedFileExistsAtPath).toHaveBeenCalledWith(join(testWorkspacePath, ".gitattributes"))
+
+			// Verify file was read
+			expect(mockedFs.readFile).toHaveBeenCalledWith(join(testWorkspacePath, ".gitattributes"), "utf8")
+
+			// Verify LFS patterns are included in result
+			expectedLfsPatterns.forEach((pattern) => {
+				expect(excludePatterns).toContain(pattern)
+			})
+
+			// Verify all normal patterns also exist
+			expect(excludePatterns).toContain(".git/")
+			expect(excludePatterns).toContain(`.git${GIT_DISABLED_SUFFIX}/`)
+		})
+
+		it("should handle .gitattributes with no LFS patterns", async () => {
+			// Mock .gitattributes file exists
+			mockedFileExistsAtPath.mockResolvedValue(true)
+
+			// Mock .gitattributes file content with no LFS patterns
+			const gitAttributesContent = `*.md text
+*.txt text
+*.js text eol=lf
+`
+			mockedFs.readFile.mockResolvedValue(gitAttributesContent)
+
+			// Get exclude patterns
+			const excludePatterns = await getExcludePatterns(testWorkspacePath)
+
+			// Verify .gitattributes was checked
+			expect(mockedFileExistsAtPath).toHaveBeenCalledWith(join(testWorkspacePath, ".gitattributes"))
+
+			// Verify file was read
+			expect(mockedFs.readFile).toHaveBeenCalledWith(join(testWorkspacePath, ".gitattributes"), "utf8")
+
+			// Verify LFS patterns are not included
+			// Just ensure no lines from our mock gitAttributes are in the result
+			const gitAttributesLines = gitAttributesContent.split("\n").map((line) => line.split(" ")[0].trim())
+
+			gitAttributesLines.forEach((line) => {
+				if (line && !line.startsWith("#")) {
+					expect(excludePatterns.includes(line)).toBe(false)
+				}
+			})
+
+			// Verify default patterns are included
+			expect(excludePatterns).toContain(".git/")
+			expect(excludePatterns).toContain(`.git${GIT_DISABLED_SUFFIX}/`)
+		})
+
+		it("should handle missing .gitattributes file", async () => {
+			// Mock .gitattributes file doesn't exist
+			mockedFileExistsAtPath.mockResolvedValue(false)
+
+			// Get exclude patterns
+			const excludePatterns = await getExcludePatterns(testWorkspacePath)
+
+			// Verify .gitattributes was checked
+			expect(mockedFileExistsAtPath).toHaveBeenCalledWith(join(testWorkspacePath, ".gitattributes"))
+
+			// Verify file was not read
+			expect(mockedFs.readFile).not.toHaveBeenCalled()
+
+			// Verify standard patterns are included
+			expect(excludePatterns).toContain(".git/")
+			expect(excludePatterns).toContain(`.git${GIT_DISABLED_SUFFIX}/`)
+
+			// Verify we have standard patterns but no LFS patterns
+			// Check for a few known patterns from different categories
+			expect(excludePatterns).toContain("node_modules/") // buildArtifact
+			expect(excludePatterns).toContain("*.jpg") // media
+			expect(excludePatterns).toContain("*.tmp") // cache
+			expect(excludePatterns).toContain("*.env*") // config
+			expect(excludePatterns).toContain("*.zip") // large data
+			expect(excludePatterns).toContain("*.db") // database
+			expect(excludePatterns).toContain("*.shp") // geospatial
+			expect(excludePatterns).toContain("*.log") // log
+		})
+
+		it("should handle errors when reading .gitattributes", async () => {
+			// Mock .gitattributes file exists
+			mockedFileExistsAtPath.mockResolvedValue(true)
+
+			// Mock readFile to throw error
+			mockedFs.readFile.mockRejectedValue(new Error("File read error"))
+
+			// Get exclude patterns
+			const excludePatterns = await getExcludePatterns(testWorkspacePath)
+
+			// Verify .gitattributes was checked
+			expect(mockedFileExistsAtPath).toHaveBeenCalledWith(join(testWorkspacePath, ".gitattributes"))
+
+			// Verify file read was attempted
+			expect(mockedFs.readFile).toHaveBeenCalledWith(join(testWorkspacePath, ".gitattributes"), "utf8")
+
+			// Verify standard patterns are included
+			expect(excludePatterns).toContain(".git/")
+			expect(excludePatterns).toContain(`.git${GIT_DISABLED_SUFFIX}/`)
+
+			// Verify we have standard patterns but no LFS patterns
+			// Check for a few known patterns from different categories
+			expect(excludePatterns).toContain("node_modules/") // buildArtifact
+			expect(excludePatterns).toContain("*.jpg") // media
+			expect(excludePatterns).toContain("*.tmp") // cache
+			expect(excludePatterns).toContain("*.env*") // config
+			expect(excludePatterns).toContain("*.zip") // large data
+			expect(excludePatterns).toContain("*.db") // database
+			expect(excludePatterns).toContain("*.shp") // geospatial
+			expect(excludePatterns).toContain("*.log") // log
+		})
+	})
+})

+ 0 - 88
src/services/checkpoints/constants.ts

@@ -1,89 +1 @@
 export const GIT_DISABLED_SUFFIX = "_disabled"
 export const GIT_DISABLED_SUFFIX = "_disabled"
-
-export const GIT_EXCLUDES = [
-	".git/", // Ignore the user's .git.
-	`.git${GIT_DISABLED_SUFFIX}/`, // Ignore the disabled nested git repos.
-	".DS_Store",
-	"*.log",
-	"node_modules/",
-	"__pycache__/",
-	"env/",
-	"venv/",
-	"target/dependency/",
-	"build/dependencies/",
-	"dist/",
-	"out/",
-	"bundle/",
-	"vendor/",
-	"tmp/",
-	"temp/",
-	"deps/",
-	"pkg/",
-	"Pods/",
-	// Media files.
-	"*.jpg",
-	"*.jpeg",
-	"*.png",
-	"*.gif",
-	"*.bmp",
-	"*.ico",
-	// "*.svg",
-	"*.mp3",
-	"*.mp4",
-	"*.wav",
-	"*.avi",
-	"*.mov",
-	"*.wmv",
-	"*.webm",
-	"*.webp",
-	"*.m4a",
-	"*.flac",
-	// Build and dependency directories.
-	"build/",
-	"bin/",
-	"obj/",
-	".gradle/",
-	".idea/",
-	".vscode/",
-	".vs/",
-	"coverage/",
-	".next/",
-	".nuxt/",
-	// Cache and temporary files.
-	"*.cache",
-	"*.tmp",
-	"*.temp",
-	"*.swp",
-	"*.swo",
-	"*.pyc",
-	"*.pyo",
-	".pytest_cache/",
-	".eslintcache",
-	// Environment and config files.
-	".env*",
-	"*.local",
-	"*.development",
-	"*.production",
-	// Large data files.
-	"*.zip",
-	"*.tar",
-	"*.gz",
-	"*.rar",
-	"*.7z",
-	"*.iso",
-	"*.bin",
-	"*.exe",
-	"*.dll",
-	"*.so",
-	"*.dylib",
-	// Database files.
-	"*.sqlite",
-	"*.db",
-	"*.sql",
-	// Log files.
-	"*.logs",
-	"*.error",
-	"npm-debug.log*",
-	"yarn-debug.log*",
-	"yarn-error.log*",
-]

+ 213 - 0
src/services/checkpoints/excludes.ts

@@ -0,0 +1,213 @@
+import fs from "fs/promises"
+import { join } from "path"
+
+import { fileExistsAtPath } from "../../utils/fs"
+
+import { GIT_DISABLED_SUFFIX } from "./constants"
+
+const getBuildArtifactPatterns = () => [
+	".gradle/",
+	".idea/",
+	".parcel-cache/",
+	".pytest_cache/",
+	".next/",
+	".nuxt/",
+	".sass-cache/",
+	".vs/",
+	".vscode/",
+	"Pods/",
+	"__pycache__/",
+	"bin/",
+	"build/",
+	"bundle/",
+	"coverage/",
+	"deps/",
+	"dist/",
+	"env/",
+	"node_modules/",
+	"obj/",
+	"out/",
+	"pkg/",
+	"pycache/",
+	"target/dependency/",
+	"temp/",
+	"vendor/",
+	"venv/",
+]
+
+const getMediaFilePatterns = () => [
+	"*.jpg",
+	"*.jpeg",
+	"*.png",
+	"*.gif",
+	"*.bmp",
+	"*.ico",
+	"*.webp",
+	"*.tiff",
+	"*.tif",
+	"*.raw",
+	"*.heic",
+	"*.avif",
+	"*.eps",
+	"*.psd",
+	"*.3gp",
+	"*.aac",
+	"*.aiff",
+	"*.asf",
+	"*.avi",
+	"*.divx",
+	"*.flac",
+	"*.m4a",
+	"*.m4v",
+	"*.mkv",
+	"*.mov",
+	"*.mp3",
+	"*.mp4",
+	"*.mpeg",
+	"*.mpg",
+	"*.ogg",
+	"*.opus",
+	"*.rm",
+	"*.rmvb",
+	"*.vob",
+	"*.wav",
+	"*.webm",
+	"*.wma",
+	"*.wmv",
+]
+
+const getCacheFilePatterns = () => [
+	"*.DS_Store",
+	"*.bak",
+	"*.cache",
+	"*.crdownload",
+	"*.dmp",
+	"*.dump",
+	"*.eslintcache",
+	"*.lock",
+	"*.log",
+	"*.old",
+	"*.part",
+	"*.partial",
+	"*.pyc",
+	"*.pyo",
+	"*.stackdump",
+	"*.swo",
+	"*.swp",
+	"*.temp",
+	"*.tmp",
+	"*.Thumbs.db",
+]
+
+const getConfigFilePatterns = () => ["*.env*", "*.local", "*.development", "*.production"]
+
+const getLargeDataFilePatterns = () => [
+	"*.zip",
+	"*.tar",
+	"*.gz",
+	"*.rar",
+	"*.7z",
+	"*.iso",
+	"*.bin",
+	"*.exe",
+	"*.dll",
+	"*.so",
+	"*.dylib",
+	"*.dat",
+	"*.dmg",
+	"*.msi",
+]
+
+const getDatabaseFilePatterns = () => [
+	"*.arrow",
+	"*.accdb",
+	"*.aof",
+	"*.avro",
+	"*.bak",
+	"*.bson",
+	"*.csv",
+	"*.db",
+	"*.dbf",
+	"*.dmp",
+	"*.frm",
+	"*.ibd",
+	"*.mdb",
+	"*.myd",
+	"*.myi",
+	"*.orc",
+	"*.parquet",
+	"*.pdb",
+	"*.rdb",
+	"*.sql",
+	"*.sqlite",
+]
+
+const getGeospatialPatterns = () => [
+	"*.shp",
+	"*.shx",
+	"*.dbf",
+	"*.prj",
+	"*.sbn",
+	"*.sbx",
+	"*.shp.xml",
+	"*.cpg",
+	"*.gdb",
+	"*.mdb",
+	"*.gpkg",
+	"*.kml",
+	"*.kmz",
+	"*.gml",
+	"*.geojson",
+	"*.dem",
+	"*.asc",
+	"*.img",
+	"*.ecw",
+	"*.las",
+	"*.laz",
+	"*.mxd",
+	"*.qgs",
+	"*.grd",
+	"*.csv",
+	"*.dwg",
+	"*.dxf",
+]
+
+const getLogFilePatterns = () => [
+	"*.error",
+	"*.log",
+	"*.logs",
+	"*.npm-debug.log*",
+	"*.out",
+	"*.stdout",
+	"yarn-debug.log*",
+	"yarn-error.log*",
+]
+
+const getLfsPatterns = async (workspacePath: string) => {
+	try {
+		const attributesPath = join(workspacePath, ".gitattributes")
+
+		if (await fileExistsAtPath(attributesPath)) {
+			return (await fs.readFile(attributesPath, "utf8"))
+				.split("\n")
+				.filter((line) => line.includes("filter=lfs"))
+				.map((line) => line.split(" ")[0].trim())
+		}
+	} catch (error) {}
+
+	return []
+}
+
+export const getExcludePatterns = async (workspacePath: string) => [
+	".git/",
+	`.git${GIT_DISABLED_SUFFIX}/`,
+	...getBuildArtifactPatterns(),
+	...getMediaFilePatterns(),
+	...getCacheFilePatterns(),
+	...getConfigFilePatterns(),
+	...getLargeDataFilePatterns(),
+	...getDatabaseFilePatterns(),
+	...getGeospatialPatterns(),
+	...getLogFilePatterns(),
+	...(await getLfsPatterns(workspacePath)),
+]