gateway.ts 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596
  1. import { Hono, MiddlewareHandler } from "hono"
  2. import { type ProviderMetadata, type LanguageModelUsage } from "ai"
  3. import { createAnthropic } from "@ai-sdk/anthropic"
  4. import { createOpenAI } from "@ai-sdk/openai"
  5. import { createOpenAICompatible } from "@ai-sdk/openai-compatible"
  6. import type { LanguageModelV2Prompt } from "@ai-sdk/provider"
  7. import { type ChatCompletionCreateParamsBase } from "openai/resources/chat/completions"
  8. import { Actor } from "@opencode/cloud-core/actor.js"
  9. import { Database, eq, sql } from "@opencode/cloud-core/drizzle/index.js"
  10. import { KeyTable } from "@opencode/cloud-core/schema/key.sql.js"
  11. import { Billing } from "@opencode/cloud-core/billing.js"
  12. import { Resource } from "@opencode/cloud-resource"
  13. type Env = {}
  14. const SUPPORTED_MODELS = {
  15. "anthropic/claude-sonnet-4": {
  16. input: 0.0000015,
  17. output: 0.000006,
  18. reasoning: 0.0000015,
  19. cacheRead: 0.0000001,
  20. cacheWrite: 0.0000001,
  21. model: () =>
  22. createAnthropic({
  23. apiKey: Resource.ANTHROPIC_API_KEY.value,
  24. })("claude-sonnet-4-20250514"),
  25. },
  26. "openai/gpt-4.1": {
  27. input: 0.0000015,
  28. output: 0.000006,
  29. reasoning: 0.0000015,
  30. cacheRead: 0.0000001,
  31. cacheWrite: 0.0000001,
  32. model: () =>
  33. createOpenAI({
  34. apiKey: Resource.OPENAI_API_KEY.value,
  35. })("gpt-4.1"),
  36. },
  37. "zhipuai/glm-4.5-flash": {
  38. input: 0,
  39. output: 0,
  40. reasoning: 0,
  41. cacheRead: 0,
  42. cacheWrite: 0,
  43. model: () =>
  44. createOpenAICompatible({
  45. name: "Zhipu AI",
  46. baseURL: "https://api.z.ai/api/paas/v4",
  47. apiKey: Resource.ZHIPU_API_KEY.value,
  48. })("glm-4.5-flash"),
  49. },
  50. }
  51. const GatewayAuth: MiddlewareHandler = async (c, next) => {
  52. const authHeader = c.req.header("authorization")
  53. if (!authHeader || !authHeader.startsWith("Bearer ")) {
  54. return c.json(
  55. {
  56. error: {
  57. message: "Missing API key.",
  58. type: "invalid_request_error",
  59. param: null,
  60. code: "unauthorized",
  61. },
  62. },
  63. 401,
  64. )
  65. }
  66. const apiKey = authHeader.split(" ")[1]
  67. // Check against KeyTable
  68. const keyRecord = await Database.use((tx) =>
  69. tx
  70. .select({
  71. id: KeyTable.id,
  72. workspaceID: KeyTable.workspaceID,
  73. })
  74. .from(KeyTable)
  75. .where(eq(KeyTable.key, apiKey))
  76. .then((rows) => rows[0]),
  77. )
  78. if (!keyRecord) {
  79. return c.json(
  80. {
  81. error: {
  82. message: "Invalid API key.",
  83. type: "invalid_request_error",
  84. param: null,
  85. code: "unauthorized",
  86. },
  87. },
  88. 401,
  89. )
  90. }
  91. c.set("keyRecord", keyRecord)
  92. await next()
  93. }
  94. const app = new Hono<{ Bindings: Env; Variables: { keyRecord?: { id: string; workspaceID: string } } }>()
  95. .get("/", (c) => c.text("Hello, world!"))
  96. .post("/v1/chat/completions", GatewayAuth, async (c) => {
  97. const keyRecord = c.get("keyRecord")!
  98. return await Actor.provide("system", { workspaceID: keyRecord.workspaceID }, async () => {
  99. try {
  100. // Check balance
  101. const customer = await Billing.get()
  102. if (customer.balance <= 0) {
  103. return c.json(
  104. {
  105. error: {
  106. message: "Insufficient balance",
  107. type: "insufficient_quota",
  108. param: null,
  109. code: "insufficient_quota",
  110. },
  111. },
  112. 401,
  113. )
  114. }
  115. const body = await c.req.json<ChatCompletionCreateParamsBase>()
  116. const model = SUPPORTED_MODELS[body.model as keyof typeof SUPPORTED_MODELS]?.model()
  117. if (!model) throw new Error(`Unsupported model: ${body.model}`)
  118. const requestBody = transformOpenAIRequestToAiSDK()
  119. return body.stream ? await handleStream() : await handleGenerate()
  120. async function handleStream() {
  121. const result = await model.doStream({
  122. ...requestBody,
  123. })
  124. const encoder = new TextEncoder()
  125. const stream = new ReadableStream({
  126. async start(controller) {
  127. const id = `chatcmpl-${Date.now()}`
  128. const created = Math.floor(Date.now() / 1000)
  129. try {
  130. for await (const chunk of result.stream) {
  131. console.log("!!! CHUNK !!! : " + chunk.type)
  132. switch (chunk.type) {
  133. case "text-delta": {
  134. const data = {
  135. id,
  136. object: "chat.completion.chunk",
  137. created,
  138. model: body.model,
  139. choices: [
  140. {
  141. index: 0,
  142. delta: {
  143. content: chunk.delta,
  144. },
  145. finish_reason: null,
  146. },
  147. ],
  148. }
  149. controller.enqueue(encoder.encode(`data: ${JSON.stringify(data)}\n\n`))
  150. break
  151. }
  152. case "reasoning-delta": {
  153. const data = {
  154. id,
  155. object: "chat.completion.chunk",
  156. created,
  157. model: body.model,
  158. choices: [
  159. {
  160. index: 0,
  161. delta: {
  162. reasoning_content: chunk.delta,
  163. },
  164. finish_reason: null,
  165. },
  166. ],
  167. }
  168. controller.enqueue(encoder.encode(`data: ${JSON.stringify(data)}\n\n`))
  169. break
  170. }
  171. case "tool-call": {
  172. const data = {
  173. id,
  174. object: "chat.completion.chunk",
  175. created,
  176. model: body.model,
  177. choices: [
  178. {
  179. index: 0,
  180. delta: {
  181. tool_calls: [
  182. {
  183. index: 0,
  184. id: chunk.toolCallId,
  185. type: "function",
  186. function: {
  187. name: chunk.toolName,
  188. arguments: chunk.input,
  189. },
  190. },
  191. ],
  192. },
  193. finish_reason: null,
  194. },
  195. ],
  196. }
  197. controller.enqueue(encoder.encode(`data: ${JSON.stringify(data)}\n\n`))
  198. break
  199. }
  200. case "error": {
  201. const data = {
  202. id,
  203. object: "chat.completion.chunk",
  204. created,
  205. model: body.model,
  206. choices: [
  207. {
  208. index: 0,
  209. delta: {},
  210. finish_reason: "stop",
  211. },
  212. ],
  213. error: {
  214. message: typeof chunk.error === "string" ? chunk.error : chunk.error,
  215. type: "server_error",
  216. },
  217. }
  218. controller.enqueue(encoder.encode(`data: ${JSON.stringify(data)}\n\n`))
  219. controller.enqueue(encoder.encode("data: [DONE]\n\n"))
  220. controller.close()
  221. break
  222. }
  223. case "finish": {
  224. const data = {
  225. id,
  226. object: "chat.completion.chunk",
  227. created,
  228. model: body.model,
  229. choices: [
  230. {
  231. index: 0,
  232. delta: {},
  233. finish_reason:
  234. {
  235. stop: "stop",
  236. length: "length",
  237. "content-filter": "content_filter",
  238. "tool-calls": "tool_calls",
  239. error: "stop",
  240. other: "stop",
  241. unknown: "stop",
  242. }[chunk.finishReason] || "stop",
  243. },
  244. ],
  245. usage: {
  246. prompt_tokens: chunk.usage.inputTokens,
  247. completion_tokens: chunk.usage.outputTokens,
  248. total_tokens: chunk.usage.totalTokens,
  249. completion_tokens_details: {
  250. reasoning_tokens: chunk.usage.reasoningTokens,
  251. },
  252. prompt_tokens_details: {
  253. cached_tokens: chunk.usage.cachedInputTokens,
  254. },
  255. },
  256. }
  257. await trackUsage(body.model, chunk.usage, chunk.providerMetadata)
  258. controller.enqueue(encoder.encode(`data: ${JSON.stringify(data)}\n\n`))
  259. controller.enqueue(encoder.encode("data: [DONE]\n\n"))
  260. controller.close()
  261. break
  262. }
  263. //case "stream-start":
  264. //case "response-metadata":
  265. case "text-start":
  266. case "text-end":
  267. case "reasoning-start":
  268. case "reasoning-end":
  269. case "tool-input-start":
  270. case "tool-input-delta":
  271. case "tool-input-end":
  272. case "raw":
  273. default:
  274. // Log unknown chunk types for debugging
  275. console.warn(`Unknown chunk type: ${(chunk as any).type}`)
  276. break
  277. }
  278. }
  279. } catch (error) {
  280. controller.error(error)
  281. }
  282. },
  283. })
  284. return new Response(stream, {
  285. headers: {
  286. "Content-Type": "text/plain; charset=utf-8",
  287. "Cache-Control": "no-cache",
  288. Connection: "keep-alive",
  289. },
  290. })
  291. }
  292. async function handleGenerate() {
  293. const response = await model.doGenerate({
  294. ...requestBody,
  295. })
  296. await trackUsage(body.model, response.usage, response.providerMetadata)
  297. return c.json({
  298. id: `chatcmpl-${Date.now()}`,
  299. object: "chat.completion" as const,
  300. created: Math.floor(Date.now() / 1000),
  301. model: body.model,
  302. choices: [
  303. {
  304. index: 0,
  305. message: {
  306. role: "assistant" as const,
  307. content: response.content?.find((c) => c.type === "text")?.text ?? "",
  308. reasoning_content: response.content?.find((c) => c.type === "reasoning")?.text,
  309. tool_calls: response.content
  310. ?.filter((c) => c.type === "tool-call")
  311. .map((toolCall) => ({
  312. id: toolCall.toolCallId,
  313. type: "function" as const,
  314. function: {
  315. name: toolCall.toolName,
  316. arguments: toolCall.input,
  317. },
  318. })),
  319. },
  320. finish_reason:
  321. (
  322. {
  323. stop: "stop",
  324. length: "length",
  325. "content-filter": "content_filter",
  326. "tool-calls": "tool_calls",
  327. error: "stop",
  328. other: "stop",
  329. unknown: "stop",
  330. } as const
  331. )[response.finishReason] || "stop",
  332. },
  333. ],
  334. usage: {
  335. prompt_tokens: response.usage?.inputTokens,
  336. completion_tokens: response.usage?.outputTokens,
  337. total_tokens: response.usage?.totalTokens,
  338. completion_tokens_details: {
  339. reasoning_tokens: response.usage?.reasoningTokens,
  340. },
  341. prompt_tokens_details: {
  342. cached_tokens: response.usage?.cachedInputTokens,
  343. },
  344. },
  345. })
  346. }
  347. function transformOpenAIRequestToAiSDK() {
  348. const prompt = transformMessages()
  349. const tools = transformTools()
  350. return {
  351. prompt,
  352. maxOutputTokens: body.max_tokens ?? body.max_completion_tokens ?? undefined,
  353. temperature: body.temperature ?? undefined,
  354. topP: body.top_p ?? undefined,
  355. frequencyPenalty: body.frequency_penalty ?? undefined,
  356. presencePenalty: body.presence_penalty ?? undefined,
  357. providerOptions: body.reasoning_effort
  358. ? {
  359. anthropic: {
  360. reasoningEffort: body.reasoning_effort,
  361. },
  362. }
  363. : undefined,
  364. stopSequences: (typeof body.stop === "string" ? [body.stop] : body.stop) ?? undefined,
  365. responseFormat: (() => {
  366. if (!body.response_format) return { type: "text" as const }
  367. if (body.response_format.type === "json_schema")
  368. return {
  369. type: "json" as const,
  370. schema: body.response_format.json_schema.schema,
  371. name: body.response_format.json_schema.name,
  372. description: body.response_format.json_schema.description,
  373. }
  374. if (body.response_format.type === "json_object") return { type: "json" as const }
  375. throw new Error("Unsupported response format")
  376. })(),
  377. seed: body.seed ?? undefined,
  378. tools: tools.tools,
  379. toolChoice: tools.toolChoice,
  380. }
  381. function transformTools() {
  382. const { tools, tool_choice } = body
  383. if (!tools || tools.length === 0) {
  384. return { tools: undefined, toolChoice: undefined }
  385. }
  386. const aiSdkTools = tools.map((tool) => {
  387. return {
  388. type: tool.type,
  389. name: tool.function.name,
  390. description: tool.function.description,
  391. inputSchema: tool.function.parameters!,
  392. }
  393. })
  394. let aiSdkToolChoice
  395. if (tool_choice == null) {
  396. aiSdkToolChoice = undefined
  397. } else if (tool_choice === "auto") {
  398. aiSdkToolChoice = { type: "auto" as const }
  399. } else if (tool_choice === "none") {
  400. aiSdkToolChoice = { type: "none" as const }
  401. } else if (tool_choice === "required") {
  402. aiSdkToolChoice = { type: "required" as const }
  403. } else if (tool_choice.type === "function") {
  404. aiSdkToolChoice = {
  405. type: "tool" as const,
  406. toolName: tool_choice.function.name,
  407. }
  408. }
  409. return { tools: aiSdkTools, toolChoice: aiSdkToolChoice }
  410. }
  411. function transformMessages() {
  412. const { messages } = body
  413. const prompt: LanguageModelV2Prompt = []
  414. for (const message of messages) {
  415. switch (message.role) {
  416. case "system": {
  417. prompt.push({
  418. role: "system",
  419. content: message.content as string,
  420. })
  421. break
  422. }
  423. case "user": {
  424. if (typeof message.content === "string") {
  425. prompt.push({
  426. role: "user",
  427. content: [{ type: "text", text: message.content }],
  428. })
  429. } else {
  430. const content = message.content.map((part) => {
  431. switch (part.type) {
  432. case "text":
  433. return { type: "text" as const, text: part.text }
  434. case "image_url":
  435. return {
  436. type: "file" as const,
  437. mediaType: "image/jpeg" as const,
  438. data: part.image_url.url,
  439. }
  440. default:
  441. throw new Error(`Unsupported content part type: ${(part as any).type}`)
  442. }
  443. })
  444. prompt.push({
  445. role: "user",
  446. content,
  447. })
  448. }
  449. break
  450. }
  451. case "assistant": {
  452. const content: Array<
  453. | { type: "text"; text: string }
  454. | {
  455. type: "tool-call"
  456. toolCallId: string
  457. toolName: string
  458. input: any
  459. }
  460. > = []
  461. if (message.content) {
  462. content.push({
  463. type: "text",
  464. text: message.content as string,
  465. })
  466. }
  467. if (message.tool_calls) {
  468. for (const toolCall of message.tool_calls) {
  469. content.push({
  470. type: "tool-call",
  471. toolCallId: toolCall.id,
  472. toolName: toolCall.function.name,
  473. input: JSON.parse(toolCall.function.arguments),
  474. })
  475. }
  476. }
  477. prompt.push({
  478. role: "assistant",
  479. content,
  480. })
  481. break
  482. }
  483. case "tool": {
  484. prompt.push({
  485. role: "tool",
  486. content: [
  487. {
  488. type: "tool-result",
  489. toolName: "placeholder",
  490. toolCallId: message.tool_call_id,
  491. output: {
  492. type: "text",
  493. value: message.content as string,
  494. },
  495. },
  496. ],
  497. })
  498. break
  499. }
  500. default: {
  501. throw new Error(`Unsupported message role: ${message.role}`)
  502. }
  503. }
  504. }
  505. return prompt
  506. }
  507. }
  508. async function trackUsage(model: string, usage: LanguageModelUsage, providerMetadata?: ProviderMetadata) {
  509. const modelData = SUPPORTED_MODELS[model as keyof typeof SUPPORTED_MODELS]
  510. if (!modelData) throw new Error(`Unsupported model: ${model}`)
  511. const inputTokens = usage.inputTokens ?? 0
  512. const outputTokens = usage.outputTokens ?? 0
  513. const reasoningTokens = usage.reasoningTokens ?? 0
  514. const cacheReadTokens = usage.cachedInputTokens ?? 0
  515. const cacheWriteTokens =
  516. providerMetadata?.["anthropic"]?.["cacheCreationInputTokens"] ??
  517. // @ts-expect-error
  518. providerMetadata?.["bedrock"]?.["usage"]?.["cacheWriteInputTokens"] ??
  519. 0
  520. const inputCost = modelData.input * inputTokens
  521. const outputCost = modelData.output * outputTokens
  522. const reasoningCost = modelData.reasoning * reasoningTokens
  523. const cacheReadCost = modelData.cacheRead * cacheReadTokens
  524. const cacheWriteCost = modelData.cacheWrite * cacheWriteTokens
  525. const costInCents = (inputCost + outputCost + reasoningCost + cacheReadCost + cacheWriteCost) * 100
  526. await Billing.consume({
  527. model,
  528. inputTokens,
  529. outputTokens,
  530. reasoningTokens,
  531. cacheReadTokens,
  532. cacheWriteTokens,
  533. costInCents,
  534. })
  535. await Database.use((tx) =>
  536. tx
  537. .update(KeyTable)
  538. .set({ timeUsed: sql`now()` })
  539. .where(eq(KeyTable.id, keyRecord.id)),
  540. )
  541. }
  542. } catch (error: any) {
  543. return c.json({ error: { message: error.message } }, 500)
  544. }
  545. })
  546. })
  547. .all("*", (c) => c.text("Not Found"))
  548. export type ApiType = typeof app
  549. export default app