Просмотр исходного кода

Wait for html to stabilize before considering page loaded

Saoud Rizwan 1 год назад
Родитель
Сommit
2e272a1ad3
2 измененных файлов с 38 добавлено и 2 удалено
  1. 1 1
      package.json
  2. 37 1
      src/utils/UrlContentFetcher.ts

+ 1 - 1
package.json

@@ -2,7 +2,7 @@
   "name": "claude-dev",
   "displayName": "Claude Dev",
   "description": "Autonomous coding agent right in your IDE, capable of creating/editing files, executing commands, and more with your permission every step of the way.",
-  "version": "1.9.1",
+  "version": "1.9.2",
   "icon": "icons/icon.png",
   "galleryBanner": {
     "color": "#1E1E1E",

+ 37 - 1
src/utils/UrlContentFetcher.ts

@@ -7,6 +7,7 @@ import TurndownService from "turndown"
 // @ts-ignore
 import PCR from "puppeteer-chromium-resolver"
 import pWaitFor from "p-wait-for"
+import delay from "delay"
 
 interface PCRStats {
 	puppeteer: { launch: typeof launch }
@@ -114,7 +115,9 @@ export class UrlContentFetcher {
 
 		try {
 			// networkidle2 isn't good enough since page may take some time to load. we can assume locally running dev sites will reach networkidle0 in a reasonable amount of time
-			await this.page.goto(url, { timeout: 7_000, waitUntil: ["domcontentloaded", "networkidle0"] })
+			await this.page.goto(url, { timeout: 7_000, waitUntil: ["domcontentloaded", "networkidle2"] })
+			// await this.page.goto(url, { timeout: 10_000, waitUntil: "load" })
+			await this.waitTillHTMLStable(this.page) // in case the page is loading more resources
 		} catch (err) {
 			if (!(err instanceof TimeoutError)) {
 				logs.push(`[Navigation Error] ${err.toString()}`)
@@ -143,4 +146,37 @@ export class UrlContentFetcher {
 			logs: logs.join("\n"),
 		}
 	}
+
+	// page.goto { waitUntil: "networkidle0" } may not ever resolve, and not waiting could return page content too early before js has loaded
+	// https://stackoverflow.com/questions/52497252/puppeteer-wait-until-page-is-completely-loaded/61304202#61304202
+	private async waitTillHTMLStable(page: Page, timeout = 5_000) {
+		const checkDurationMsecs = 500 // 1000
+		const maxChecks = timeout / checkDurationMsecs
+		let lastHTMLSize = 0
+		let checkCounts = 1
+		let countStableSizeIterations = 0
+		const minStableSizeIterations = 3
+
+		while (checkCounts++ <= maxChecks) {
+			let html = await page.content()
+			let currentHTMLSize = html.length
+
+			// let bodyHTMLSize = await page.evaluate(() => document.body.innerHTML.length)
+			console.log("last: ", lastHTMLSize, " <> curr: ", currentHTMLSize)
+
+			if (lastHTMLSize !== 0 && currentHTMLSize === lastHTMLSize) {
+				countStableSizeIterations++
+			} else {
+				countStableSizeIterations = 0 //reset the counter
+			}
+
+			if (countStableSizeIterations >= minStableSizeIterations) {
+				console.log("Page rendered fully...")
+				break
+			}
+
+			lastHTMLSize = currentHTMLSize
+			await delay(checkDurationMsecs)
+		}
+	}
 }