wip gateway

This commit is contained in:
Frank 2025-08-10 12:30:41 -04:00
parent 542186aa49
commit 20e818ad05
2 changed files with 445 additions and 451 deletions

View file

@ -178,456 +178,470 @@ const RestAuth: MiddlewareHandler = async (c, next) => {
const app = new Hono<{ Bindings: Env; Variables: { keyRecord?: { id: string; workspaceID: string } } }>() const app = new Hono<{ Bindings: Env; Variables: { keyRecord?: { id: string; workspaceID: string } } }>()
.get("/", (c) => c.text("Hello, world!")) .get("/", (c) => c.text("Hello, world!"))
.post("/v1/chat/completions", GatewayAuth, async (c) => { .post("/v1/chat/completions", GatewayAuth, async (c) => {
try { const keyRecord = c.get("keyRecord")!
const body = await c.req.json<ChatCompletionCreateParamsBase>()
const model = SUPPORTED_MODELS[body.model as keyof typeof SUPPORTED_MODELS]?.model() return await Actor.provide("system", { workspaceID: keyRecord.workspaceID }, async () => {
if (!model) throw new Error(`Unsupported model: ${body.model}`) try {
// Check balance
const requestBody = transformOpenAIRequestToAiSDK() const customer = await Billing.get()
if (customer.balance <= 0) {
return body.stream ? await handleStream() : await handleGenerate() return c.json(
async function handleStream() {
const result = await model.doStream({
...requestBody,
})
const encoder = new TextEncoder()
const stream = new ReadableStream({
async start(controller) {
const id = `chatcmpl-${Date.now()}`
const created = Math.floor(Date.now() / 1000)
try {
for await (const chunk of result.stream) {
console.log("!!! CHUNK !!! : " + chunk.type)
switch (chunk.type) {
case "text-delta": {
const data = {
id,
object: "chat.completion.chunk",
created,
model: body.model,
choices: [
{
index: 0,
delta: {
content: chunk.delta,
},
finish_reason: null,
},
],
}
controller.enqueue(encoder.encode(`data: ${JSON.stringify(data)}\n\n`))
break
}
case "reasoning-delta": {
const data = {
id,
object: "chat.completion.chunk",
created,
model: body.model,
choices: [
{
index: 0,
delta: {
reasoning_content: chunk.delta,
},
finish_reason: null,
},
],
}
controller.enqueue(encoder.encode(`data: ${JSON.stringify(data)}\n\n`))
break
}
case "tool-call": {
const data = {
id,
object: "chat.completion.chunk",
created,
model: body.model,
choices: [
{
index: 0,
delta: {
tool_calls: [
{
index: 0,
id: chunk.toolCallId,
type: "function",
function: {
name: chunk.toolName,
arguments: chunk.input,
},
},
],
},
finish_reason: null,
},
],
}
controller.enqueue(encoder.encode(`data: ${JSON.stringify(data)}\n\n`))
break
}
case "error": {
const data = {
id,
object: "chat.completion.chunk",
created,
model: body.model,
choices: [
{
index: 0,
delta: {},
finish_reason: "stop",
},
],
error: {
message: typeof chunk.error === "string" ? chunk.error : chunk.error,
type: "server_error",
},
}
controller.enqueue(encoder.encode(`data: ${JSON.stringify(data)}\n\n`))
controller.enqueue(encoder.encode("data: [DONE]\n\n"))
controller.close()
break
}
case "finish": {
const data = {
id,
object: "chat.completion.chunk",
created,
model: body.model,
choices: [
{
index: 0,
delta: {},
finish_reason:
{
stop: "stop",
length: "length",
"content-filter": "content_filter",
"tool-calls": "tool_calls",
error: "stop",
other: "stop",
unknown: "stop",
}[chunk.finishReason] || "stop",
},
],
usage: {
prompt_tokens: chunk.usage.inputTokens,
completion_tokens: chunk.usage.outputTokens,
total_tokens: chunk.usage.totalTokens,
completion_tokens_details: {
reasoning_tokens: chunk.usage.reasoningTokens,
},
prompt_tokens_details: {
cached_tokens: chunk.usage.cachedInputTokens,
},
},
}
await trackUsage(body.model, chunk.usage, chunk.providerMetadata)
controller.enqueue(encoder.encode(`data: ${JSON.stringify(data)}\n\n`))
controller.enqueue(encoder.encode("data: [DONE]\n\n"))
controller.close()
break
}
//case "stream-start":
//case "response-metadata":
case "text-start":
case "text-end":
case "reasoning-start":
case "reasoning-end":
case "tool-input-start":
case "tool-input-delta":
case "tool-input-end":
case "raw":
default:
// Log unknown chunk types for debugging
console.warn(`Unknown chunk type: ${(chunk as any).type}`)
break
}
}
} catch (error) {
controller.error(error)
}
},
})
return new Response(stream, {
headers: {
"Content-Type": "text/plain; charset=utf-8",
"Cache-Control": "no-cache",
Connection: "keep-alive",
},
})
}
async function handleGenerate() {
const response = await model.doGenerate({
...requestBody,
})
await trackUsage(body.model, response.usage, response.providerMetadata)
return c.json({
id: `chatcmpl-${Date.now()}`,
object: "chat.completion" as const,
created: Math.floor(Date.now() / 1000),
model: body.model,
choices: [
{ {
index: 0, error: {
message: { message: "Insufficient balance",
role: "assistant" as const, type: "insufficient_quota",
content: response.content?.find((c) => c.type === "text")?.text ?? "", param: null,
reasoning_content: response.content?.find((c) => c.type === "reasoning")?.text, code: "insufficient_quota",
tool_calls: response.content
?.filter((c) => c.type === "tool-call")
.map((toolCall) => ({
id: toolCall.toolCallId,
type: "function" as const,
function: {
name: toolCall.toolName,
arguments: toolCall.input,
},
})),
}, },
finish_reason:
(
{
stop: "stop",
length: "length",
"content-filter": "content_filter",
"tool-calls": "tool_calls",
error: "stop",
other: "stop",
unknown: "stop",
} as const
)[response.finishReason] || "stop",
}, },
], 401,
usage: { )
prompt_tokens: response.usage?.inputTokens,
completion_tokens: response.usage?.outputTokens,
total_tokens: response.usage?.totalTokens,
completion_tokens_details: {
reasoning_tokens: response.usage?.reasoningTokens,
},
prompt_tokens_details: {
cached_tokens: response.usage?.cachedInputTokens,
},
},
})
}
function transformOpenAIRequestToAiSDK() {
const prompt = transformMessages()
const tools = transformTools()
return {
prompt,
maxOutputTokens: body.max_tokens ?? body.max_completion_tokens ?? undefined,
temperature: body.temperature ?? undefined,
topP: body.top_p ?? undefined,
frequencyPenalty: body.frequency_penalty ?? undefined,
presencePenalty: body.presence_penalty ?? undefined,
providerOptions: body.reasoning_effort
? {
anthropic: {
reasoningEffort: body.reasoning_effort,
},
}
: undefined,
stopSequences: (typeof body.stop === "string" ? [body.stop] : body.stop) ?? undefined,
responseFormat: (() => {
if (!body.response_format) return { type: "text" as const }
if (body.response_format.type === "json_schema")
return {
type: "json" as const,
schema: body.response_format.json_schema.schema,
name: body.response_format.json_schema.name,
description: body.response_format.json_schema.description,
}
if (body.response_format.type === "json_object") return { type: "json" as const }
throw new Error("Unsupported response format")
})(),
seed: body.seed ?? undefined,
tools: tools.tools,
toolChoice: tools.toolChoice,
} }
function transformTools() { const body = await c.req.json<ChatCompletionCreateParamsBase>()
const { tools, tool_choice } = body const model = SUPPORTED_MODELS[body.model as keyof typeof SUPPORTED_MODELS]?.model()
if (!model) throw new Error(`Unsupported model: ${body.model}`)
if (!tools || tools.length === 0) { const requestBody = transformOpenAIRequestToAiSDK()
return { tools: undefined, toolChoice: undefined }
}
const aiSdkTools = tools.map((tool) => { return body.stream ? await handleStream() : await handleGenerate()
return {
type: tool.type, async function handleStream() {
name: tool.function.name, const result = await model.doStream({
description: tool.function.description, ...requestBody,
inputSchema: tool.function.parameters!,
}
}) })
let aiSdkToolChoice const encoder = new TextEncoder()
if (tool_choice == null) { const stream = new ReadableStream({
aiSdkToolChoice = undefined async start(controller) {
} else if (tool_choice === "auto") { const id = `chatcmpl-${Date.now()}`
aiSdkToolChoice = { type: "auto" as const } const created = Math.floor(Date.now() / 1000)
} else if (tool_choice === "none") {
aiSdkToolChoice = { type: "none" as const }
} else if (tool_choice === "required") {
aiSdkToolChoice = { type: "required" as const }
} else if (tool_choice.type === "function") {
aiSdkToolChoice = {
type: "tool" as const,
toolName: tool_choice.function.name,
}
}
return { tools: aiSdkTools, toolChoice: aiSdkToolChoice } try {
} for await (const chunk of result.stream) {
console.log("!!! CHUNK !!! : " + chunk.type)
function transformMessages() { switch (chunk.type) {
const { messages } = body case "text-delta": {
const prompt: LanguageModelV2Prompt = [] const data = {
id,
for (const message of messages) { object: "chat.completion.chunk",
switch (message.role) { created,
case "system": { model: body.model,
prompt.push({ choices: [
role: "system", {
content: message.content as string, index: 0,
}) delta: {
break content: chunk.delta,
} },
finish_reason: null,
case "user": { },
if (typeof message.content === "string") { ],
prompt.push({ }
role: "user", controller.enqueue(encoder.encode(`data: ${JSON.stringify(data)}\n\n`))
content: [{ type: "text", text: message.content }], break
})
} else {
const content = message.content.map((part) => {
switch (part.type) {
case "text":
return { type: "text" as const, text: part.text }
case "image_url":
return {
type: "file" as const,
mediaType: "image/jpeg" as const,
data: part.image_url.url,
}
default:
throw new Error(`Unsupported content part type: ${(part as any).type}`)
} }
})
prompt.push({
role: "user",
content,
})
}
break
}
case "assistant": { case "reasoning-delta": {
const content: Array< const data = {
| { type: "text"; text: string } id,
| { object: "chat.completion.chunk",
type: "tool-call" created,
toolCallId: string model: body.model,
toolName: string choices: [
input: any {
index: 0,
delta: {
reasoning_content: chunk.delta,
},
finish_reason: null,
},
],
}
controller.enqueue(encoder.encode(`data: ${JSON.stringify(data)}\n\n`))
break
} }
> = []
if (message.content) { case "tool-call": {
content.push({ const data = {
type: "text", id,
text: message.content as string, object: "chat.completion.chunk",
}) created,
} model: body.model,
choices: [
{
index: 0,
delta: {
tool_calls: [
{
index: 0,
id: chunk.toolCallId,
type: "function",
function: {
name: chunk.toolName,
arguments: chunk.input,
},
},
],
},
finish_reason: null,
},
],
}
controller.enqueue(encoder.encode(`data: ${JSON.stringify(data)}\n\n`))
break
}
if (message.tool_calls) { case "error": {
for (const toolCall of message.tool_calls) { const data = {
content.push({ id,
type: "tool-call", object: "chat.completion.chunk",
toolCallId: toolCall.id, created,
toolName: toolCall.function.name, model: body.model,
input: JSON.parse(toolCall.function.arguments), choices: [
}) {
index: 0,
delta: {},
finish_reason: "stop",
},
],
error: {
message: typeof chunk.error === "string" ? chunk.error : chunk.error,
type: "server_error",
},
}
controller.enqueue(encoder.encode(`data: ${JSON.stringify(data)}\n\n`))
controller.enqueue(encoder.encode("data: [DONE]\n\n"))
controller.close()
break
}
case "finish": {
const data = {
id,
object: "chat.completion.chunk",
created,
model: body.model,
choices: [
{
index: 0,
delta: {},
finish_reason:
{
stop: "stop",
length: "length",
"content-filter": "content_filter",
"tool-calls": "tool_calls",
error: "stop",
other: "stop",
unknown: "stop",
}[chunk.finishReason] || "stop",
},
],
usage: {
prompt_tokens: chunk.usage.inputTokens,
completion_tokens: chunk.usage.outputTokens,
total_tokens: chunk.usage.totalTokens,
completion_tokens_details: {
reasoning_tokens: chunk.usage.reasoningTokens,
},
prompt_tokens_details: {
cached_tokens: chunk.usage.cachedInputTokens,
},
},
}
await trackUsage(body.model, chunk.usage, chunk.providerMetadata)
controller.enqueue(encoder.encode(`data: ${JSON.stringify(data)}\n\n`))
controller.enqueue(encoder.encode("data: [DONE]\n\n"))
controller.close()
break
}
//case "stream-start":
//case "response-metadata":
case "text-start":
case "text-end":
case "reasoning-start":
case "reasoning-end":
case "tool-input-start":
case "tool-input-delta":
case "tool-input-end":
case "raw":
default:
// Log unknown chunk types for debugging
console.warn(`Unknown chunk type: ${(chunk as any).type}`)
break
} }
} }
} catch (error) {
prompt.push({ controller.error(error)
role: "assistant",
content,
})
break
} }
},
})
case "tool": { return new Response(stream, {
prompt.push({ headers: {
role: "tool", "Content-Type": "text/plain; charset=utf-8",
content: [ "Cache-Control": "no-cache",
{ Connection: "keep-alive",
type: "tool-result", },
toolName: "placeholder", })
toolCallId: message.tool_call_id, }
output: {
type: "text", async function handleGenerate() {
value: message.content as string, const response = await model.doGenerate({
...requestBody,
})
await trackUsage(body.model, response.usage, response.providerMetadata)
return c.json({
id: `chatcmpl-${Date.now()}`,
object: "chat.completion" as const,
created: Math.floor(Date.now() / 1000),
model: body.model,
choices: [
{
index: 0,
message: {
role: "assistant" as const,
content: response.content?.find((c) => c.type === "text")?.text ?? "",
reasoning_content: response.content?.find((c) => c.type === "reasoning")?.text,
tool_calls: response.content
?.filter((c) => c.type === "tool-call")
.map((toolCall) => ({
id: toolCall.toolCallId,
type: "function" as const,
function: {
name: toolCall.toolName,
arguments: toolCall.input,
}, },
}, })),
], },
}) finish_reason:
break (
} {
stop: "stop",
length: "length",
"content-filter": "content_filter",
"tool-calls": "tool_calls",
error: "stop",
other: "stop",
unknown: "stop",
} as const
)[response.finishReason] || "stop",
},
],
usage: {
prompt_tokens: response.usage?.inputTokens,
completion_tokens: response.usage?.outputTokens,
total_tokens: response.usage?.totalTokens,
completion_tokens_details: {
reasoning_tokens: response.usage?.reasoningTokens,
},
prompt_tokens_details: {
cached_tokens: response.usage?.cachedInputTokens,
},
},
})
}
default: { function transformOpenAIRequestToAiSDK() {
throw new Error(`Unsupported message role: ${message.role}`) const prompt = transformMessages()
} const tools = transformTools()
}
return {
prompt,
maxOutputTokens: body.max_tokens ?? body.max_completion_tokens ?? undefined,
temperature: body.temperature ?? undefined,
topP: body.top_p ?? undefined,
frequencyPenalty: body.frequency_penalty ?? undefined,
presencePenalty: body.presence_penalty ?? undefined,
providerOptions: body.reasoning_effort
? {
anthropic: {
reasoningEffort: body.reasoning_effort,
},
}
: undefined,
stopSequences: (typeof body.stop === "string" ? [body.stop] : body.stop) ?? undefined,
responseFormat: (() => {
if (!body.response_format) return { type: "text" as const }
if (body.response_format.type === "json_schema")
return {
type: "json" as const,
schema: body.response_format.json_schema.schema,
name: body.response_format.json_schema.name,
description: body.response_format.json_schema.description,
}
if (body.response_format.type === "json_object") return { type: "json" as const }
throw new Error("Unsupported response format")
})(),
seed: body.seed ?? undefined,
tools: tools.tools,
toolChoice: tools.toolChoice,
} }
return prompt function transformTools() {
const { tools, tool_choice } = body
if (!tools || tools.length === 0) {
return { tools: undefined, toolChoice: undefined }
}
const aiSdkTools = tools.map((tool) => {
return {
type: tool.type,
name: tool.function.name,
description: tool.function.description,
inputSchema: tool.function.parameters!,
}
})
let aiSdkToolChoice
if (tool_choice == null) {
aiSdkToolChoice = undefined
} else if (tool_choice === "auto") {
aiSdkToolChoice = { type: "auto" as const }
} else if (tool_choice === "none") {
aiSdkToolChoice = { type: "none" as const }
} else if (tool_choice === "required") {
aiSdkToolChoice = { type: "required" as const }
} else if (tool_choice.type === "function") {
aiSdkToolChoice = {
type: "tool" as const,
toolName: tool_choice.function.name,
}
}
return { tools: aiSdkTools, toolChoice: aiSdkToolChoice }
}
function transformMessages() {
const { messages } = body
const prompt: LanguageModelV2Prompt = []
for (const message of messages) {
switch (message.role) {
case "system": {
prompt.push({
role: "system",
content: message.content as string,
})
break
}
case "user": {
if (typeof message.content === "string") {
prompt.push({
role: "user",
content: [{ type: "text", text: message.content }],
})
} else {
const content = message.content.map((part) => {
switch (part.type) {
case "text":
return { type: "text" as const, text: part.text }
case "image_url":
return {
type: "file" as const,
mediaType: "image/jpeg" as const,
data: part.image_url.url,
}
default:
throw new Error(`Unsupported content part type: ${(part as any).type}`)
}
})
prompt.push({
role: "user",
content,
})
}
break
}
case "assistant": {
const content: Array<
| { type: "text"; text: string }
| {
type: "tool-call"
toolCallId: string
toolName: string
input: any
}
> = []
if (message.content) {
content.push({
type: "text",
text: message.content as string,
})
}
if (message.tool_calls) {
for (const toolCall of message.tool_calls) {
content.push({
type: "tool-call",
toolCallId: toolCall.id,
toolName: toolCall.function.name,
input: JSON.parse(toolCall.function.arguments),
})
}
}
prompt.push({
role: "assistant",
content,
})
break
}
case "tool": {
prompt.push({
role: "tool",
content: [
{
type: "tool-result",
toolName: "placeholder",
toolCallId: message.tool_call_id,
output: {
type: "text",
value: message.content as string,
},
},
],
})
break
}
default: {
throw new Error(`Unsupported message role: ${message.role}`)
}
}
}
return prompt
}
} }
}
async function trackUsage(model: string, usage: LanguageModelUsage, providerMetadata?: ProviderMetadata) { async function trackUsage(model: string, usage: LanguageModelUsage, providerMetadata?: ProviderMetadata) {
const keyRecord = c.get("keyRecord") const modelData = SUPPORTED_MODELS[model as keyof typeof SUPPORTED_MODELS]
if (!keyRecord) return if (!modelData) throw new Error(`Unsupported model: ${model}`)
const modelData = SUPPORTED_MODELS[model as keyof typeof SUPPORTED_MODELS] const inputTokens = usage.inputTokens ?? 0
if (!modelData) throw new Error(`Unsupported model: ${model}`) const outputTokens = usage.outputTokens ?? 0
const reasoningTokens = usage.reasoningTokens ?? 0
const cacheReadTokens = usage.cachedInputTokens ?? 0
const cacheWriteTokens =
providerMetadata?.["anthropic"]?.["cacheCreationInputTokens"] ??
// @ts-expect-error
providerMetadata?.["bedrock"]?.["usage"]?.["cacheWriteInputTokens"] ??
0
const inputTokens = usage.inputTokens ?? 0 const inputCost = modelData.input * inputTokens
const outputTokens = usage.outputTokens ?? 0 const outputCost = modelData.output * outputTokens
const reasoningTokens = usage.reasoningTokens ?? 0 const reasoningCost = modelData.reasoning * reasoningTokens
const cacheReadTokens = usage.cachedInputTokens ?? 0 const cacheReadCost = modelData.cacheRead * cacheReadTokens
const cacheWriteTokens = const cacheWriteCost = modelData.cacheWrite * cacheWriteTokens
providerMetadata?.["anthropic"]?.["cacheCreationInputTokens"] ?? const costInCents = (inputCost + outputCost + reasoningCost + cacheReadCost + cacheWriteCost) * 100
// @ts-expect-error
providerMetadata?.["bedrock"]?.["usage"]?.["cacheWriteInputTokens"] ??
0
const inputCost = modelData.input * inputTokens
const outputCost = modelData.output * outputTokens
const reasoningCost = modelData.reasoning * reasoningTokens
const cacheReadCost = modelData.cacheRead * cacheReadTokens
const cacheWriteCost = modelData.cacheWrite * cacheWriteTokens
const costInCents = (inputCost + outputCost + reasoningCost + cacheReadCost + cacheWriteCost) * 100
await Actor.provide("system", { workspaceID: keyRecord.workspaceID }, async () => {
await Billing.consume({ await Billing.consume({
model, model,
inputTokens, inputTokens,
@ -637,18 +651,18 @@ const app = new Hono<{ Bindings: Env; Variables: { keyRecord?: { id: string; wor
cacheWriteTokens, cacheWriteTokens,
costInCents, costInCents,
}) })
})
await Database.use((tx) => await Database.use((tx) =>
tx tx
.update(KeyTable) .update(KeyTable)
.set({ timeUsed: sql`now()` }) .set({ timeUsed: sql`now()` })
.where(eq(KeyTable.id, keyRecord.id)), .where(eq(KeyTable.id, keyRecord.id)),
) )
}
} catch (error: any) {
return c.json({ error: { message: error.message } }, 500)
} }
} catch (error: any) { })
return c.json({ error: { message: error.message } }, 500)
}
}) })
.use("/*", cors()) .use("/*", cors())
.use(RestAuth) .use(RestAuth)

View file

@ -1,25 +1,5 @@
{ {
"$schema": "https://opencode.ai/config.json", "$schema": "https://opencode.ai/config.json",
"provider": {
"oc-frank": {
"npm": "@ai-sdk/openai-compatible",
"name": "OC-Frank",
"options": {
"baseURL": "https://api.gateway.frank.dev.opencode.ai/v1"
},
"models": {
"anthropic/claude-sonnet-4": {
"name": "Claude Sonnet 4"
},
"openai/gpt-4.1": {
"name": "GPT-4.1"
},
"zhipuai/glm-4.5-flash": {
"name": "GLM-4.5 Flash"
}
}
}
},
"mcp": { "mcp": {
"context7": { "context7": {
"type": "remote", "type": "remote",