|
|
@@ -2347,6 +2347,17 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
|
|
|
const modelId = getModelId(this.apiConfiguration)
|
|
|
const apiProtocol = getApiProtocol(this.apiConfiguration.apiProvider, modelId)
|
|
|
|
|
|
+ // Respect user-configured provider rate limiting BEFORE we emit api_req_started.
|
|
|
+ // This prevents the UI from showing an "API Request..." spinner while we are
|
|
|
+ // intentionally waiting due to the rate limit slider.
|
|
|
+ //
|
|
|
+ // NOTE: We also set Task.lastGlobalApiRequestTime here to reserve this slot
|
|
|
+ // before we build environment details (which can take time).
|
|
|
+ // This ensures subsequent requests (including subtasks) still honour the
|
|
|
+ // provider rate-limit window.
|
|
|
+ await this.maybeWaitForProviderRateLimit(currentItem.retryAttempt ?? 0)
|
|
|
+ Task.lastGlobalApiRequestTime = performance.now()
|
|
|
+
|
|
|
await this.say(
|
|
|
"api_req_started",
|
|
|
JSON.stringify({
|
|
|
@@ -2554,7 +2565,7 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
|
|
|
// Yields only if the first chunk is successful, otherwise will
|
|
|
// allow the user to retry the request (most likely due to rate
|
|
|
// limit error, which gets thrown on the first chunk).
|
|
|
- const stream = this.attemptApiRequest()
|
|
|
+ const stream = this.attemptApiRequest(currentItem.retryAttempt ?? 0, { skipProviderRateLimit: true })
|
|
|
let assistantMessage = ""
|
|
|
let reasoningMessage = ""
|
|
|
let pendingGroundingSources: GroundingSource[] = []
|
|
|
@@ -3656,7 +3667,44 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
|
|
|
await this.providerRef.deref()?.postMessageToWebview({ type: "condenseTaskContextResponse", text: this.taskId })
|
|
|
}
|
|
|
|
|
|
- public async *attemptApiRequest(retryAttempt: number = 0): ApiStream {
|
|
|
+ /**
|
|
|
+ * Enforce the user-configured provider rate limit.
|
|
|
+ *
|
|
|
+ * NOTE: This is intentionally treated as expected behavior and is surfaced via
|
|
|
+ * the `api_req_rate_limit_wait` say type (not an error).
|
|
|
+ */
|
|
|
+ private async maybeWaitForProviderRateLimit(retryAttempt: number): Promise<void> {
|
|
|
+ const state = await this.providerRef.deref()?.getState()
|
|
|
+ const rateLimitSeconds =
|
|
|
+ state?.apiConfiguration?.rateLimitSeconds ?? this.apiConfiguration?.rateLimitSeconds ?? 0
|
|
|
+
|
|
|
+ if (rateLimitSeconds <= 0 || !Task.lastGlobalApiRequestTime) {
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ const now = performance.now()
|
|
|
+ const timeSinceLastRequest = now - Task.lastGlobalApiRequestTime
|
|
|
+ const rateLimitDelay = Math.ceil(
|
|
|
+ Math.min(rateLimitSeconds, Math.max(0, rateLimitSeconds * 1000 - timeSinceLastRequest) / 1000),
|
|
|
+ )
|
|
|
+
|
|
|
+ // Only show the countdown UX on the first attempt. Retry flows have their own delay messaging.
|
|
|
+ if (rateLimitDelay > 0 && retryAttempt === 0) {
|
|
|
+ for (let i = rateLimitDelay; i > 0; i--) {
|
|
|
+ // Send structured JSON data for i18n-safe transport
|
|
|
+ const delayMessage = JSON.stringify({ seconds: i })
|
|
|
+ await this.say("api_req_rate_limit_wait", delayMessage, undefined, true)
|
|
|
+ await delay(1000)
|
|
|
+ }
|
|
|
+ // Finalize the partial message so the UI doesn't keep rendering an in-progress spinner.
|
|
|
+ await this.say("api_req_rate_limit_wait", undefined, undefined, false)
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ public async *attemptApiRequest(
|
|
|
+ retryAttempt: number = 0,
|
|
|
+ options: { skipProviderRateLimit?: boolean } = {},
|
|
|
+ ): ApiStream {
|
|
|
const state = await this.providerRef.deref()?.getState()
|
|
|
|
|
|
const {
|
|
|
@@ -3693,29 +3741,17 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- let rateLimitDelay = 0
|
|
|
-
|
|
|
- // Use the shared timestamp so that subtasks respect the same rate-limit
|
|
|
- // window as their parent tasks.
|
|
|
- if (Task.lastGlobalApiRequestTime) {
|
|
|
- const now = performance.now()
|
|
|
- const timeSinceLastRequest = now - Task.lastGlobalApiRequestTime
|
|
|
- const rateLimit = apiConfiguration?.rateLimitSeconds || 0
|
|
|
- rateLimitDelay = Math.ceil(Math.min(rateLimit, Math.max(0, rateLimit * 1000 - timeSinceLastRequest) / 1000))
|
|
|
- }
|
|
|
-
|
|
|
- // Only show rate limiting message if we're not retrying. If retrying, we'll include the delay there.
|
|
|
- if (rateLimitDelay > 0 && retryAttempt === 0) {
|
|
|
- // Show countdown timer
|
|
|
- for (let i = rateLimitDelay; i > 0; i--) {
|
|
|
- const delayMessage = `Rate limiting for ${i} seconds...`
|
|
|
- await this.say("api_req_retry_delayed", delayMessage, undefined, true)
|
|
|
- await delay(1000)
|
|
|
- }
|
|
|
+ if (!options.skipProviderRateLimit) {
|
|
|
+ await this.maybeWaitForProviderRateLimit(retryAttempt)
|
|
|
}
|
|
|
|
|
|
- // Update last request time before making the request so that subsequent
|
|
|
+ // Update last request time right before making the request so that subsequent
|
|
|
// requests — even from new subtasks — will honour the provider's rate-limit.
|
|
|
+ //
|
|
|
+ // NOTE: When recursivelyMakeClineRequests handles rate limiting, it sets the
|
|
|
+ // timestamp earlier to include the environment details build. We still set it
|
|
|
+ // here for direct callers (tests) and for the case where we didn't rate-limit
|
|
|
+ // in the caller.
|
|
|
Task.lastGlobalApiRequestTime = performance.now()
|
|
|
|
|
|
const systemPrompt = await this.getSystemPrompt()
|