本文提供了通过 AI 加速网关调用大模型服务的代码示例,涵盖 OpenAI 兼容协议和协议透传两种调用方式。
AI 加速网关支持两种 API 调用方式:
完成创建 AI 加速网关实例并获取以下信息:
注意
使用 OpenAI 兼容协议要求创建实例时已传入模型 API Key。若未传入,请使用协议透传方式。
在 OpenAI 兼容协议调用方式下,网关会将各模型厂商的原始响应统一转换为 OpenAI 格式输出。
以下示例中,请将变量替换为您的实际值:
$BASE_URL:网关服务地址。$AI_GATEWAY_API_KEY:网关 API Key。$MODEL_NAME:您在网关中配置的模型名称。Curl
curl $BASE_URL/chat/completions \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $AI_GATEWAY_API_KEY" \ -d '{ "model": "$MODEL_NAME", "messages": [{"role": "user", "content": "Say this is a test!"}], "temperature": 0.7 }'
Python
# pip install openai # https://platform.openai.com/docs/api-reference from openai import OpenAI client = OpenAI( base_url="$BASE_URL", api_key="$AI_GATEWAY_API_KEY", ) completion = client.chat.completions.create( model="$MODEL_NAME", messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello!"} ], ) print(completion.choices[0].message)
Curl
curl $BASE_URL/images/generations \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $AI_GATEWAY_API_KEY" \ -d '{ "model": "$MODEL_NAME", "prompt": "A cute baby sea otter" }'
Python
# pip install openai # https://platform.openai.com/docs/api-reference from openai import OpenAI client = OpenAI( base_url="$BASE_URL", api_key="$AI_GATEWAY_API_KEY", ) images = client.images.generate( model="$MODEL_NAME", size="512x512", response_format="url", ) print(images.data[0].url)
Python
# prerequisites # pip install websockets==12.0 numpy soundfile scipy import asyncio import base64 import json import numpy as np from scipy.signal import resample import websockets def resample_audio(audio_data, original_sample_rate, target_sample_rate): number_of_samples = round(len(audio_data) * float(target_sample_rate) / original_sample_rate) resampled_audio = resample(audio_data, number_of_samples) return resampled_audio.astype(np.int16) async def send_text(client, text: str): for t in text: await asyncio.sleep(0.05) event = { "type": "input_text.append", "delta": t } await client.send(json.dumps(event)) event = { "type": "input_text.done" } await client.send(json.dumps(event)) # 定义一个函数来写入音频数据 def write_audio_data(stream, data): stream.write(data) async def receive_messages(client, file_path="response_audio.pcm"): audio_list = bytearray() while not client.closed: message = await client.recv() if message is None: print("===None Message===") continue event = json.loads(message) message_type = event.get("type") if message_type == "response.audio.delta": audio_bytes = base64.b64decode(event["delta"]) audio_list.extend(audio_bytes) del event['delta'] print(event) continue print(event) with open(file_path, 'wb') as ff: ff.write(audio_list) if message_type == "response.audio.done": break continue def get_session_update_msg(): config = { "voice": "your_voice", "output_audio_format": "pcm", "output_audio_sample_rate": 24000, # your_sample_rate } event = { "type": "tts_session.update", "session": config } return json.dumps(event) async def with_openai(): key = "$AI_GATEWAY_API_KEY" ws_url = "wss://$BASE_URL/realtime?intent=text-to-speech&model=$MODEL_NAME" headers = { "Authorization": f"Bearer {key}", } async with websockets.connect(ws_url, ping_interval=None, extra_headers=headers) as client: session_msg = get_session_update_msg() await client.send(session_msg) await asyncio.gather(send_text(client, "你好呀"), receive_messages(client)) if __name__ == "__main__": asyncio.run(with_openai())
Python
# prerequisites # pip install websockets==12.0 numpy soundfile scipy import asyncio import base64 import json import numpy as np import soundfile as sf from scipy.signal import resample import websockets SAMPLE_RATE = 16000 # your_sample_rate def resample_audio(audio_data, original_sample_rate, target_sample_rate): number_of_samples = round(len(audio_data) * float(target_sample_rate) / original_sample_rate) resampled_audio = resample(audio_data, number_of_samples) return resampled_audio.astype(np.int16) async def send_audio(client, audio_file_path: str): duration_ms = 100 samples_per_chunk = SAMPLE_RATE * (duration_ms / 1000) bytes_per_sample = 2 bytes_per_chunk = int(samples_per_chunk * bytes_per_sample) extra_params = {} if audio_file_path.endswith(".raw"): extra_params = { "samplerate": SAMPLE_RATE, "channels": 1, "subtype": "PCM_16", } audio_data, original_sample_rate = sf.read(audio_file_path, dtype="int16", **extra_params) if original_sample_rate != SAMPLE_RATE: audio_data = resample_audio(audio_data, original_sample_rate, SAMPLE_RATE) audio_bytes = audio_data.tobytes() for i in range(0, len(audio_bytes), bytes_per_chunk): await asyncio.sleep((duration_ms - 20) / 1000) chunk = audio_bytes[i: i + bytes_per_chunk] base64_audio = base64.b64encode(chunk).decode("utf-8") append_event = { "type": "input_audio_buffer.append", "audio": base64_audio } await client.send(json.dumps(append_event)) print("send complete") commit_event = { "type": "input_audio_buffer.commit" } await client.send(json.dumps(commit_event)) async def receive_messages(client): while not client.closed: message = await client.recv() print(message) event = json.loads(message) if event.get("type") == "conversation.item.input_audio_transcription.completed": return def get_session_update_msg(): config = { "input_audio_format": "pcm", "input_audio_sample_rate": SAMPLE_RATE, "input_audio_bits": 16, "input_audio_channel": 1, } event = { "type": "transcription_session.update", "session": config } return json.dumps(event) async def with_openai(audio_file_path: str): ws_url = "wss://$BASE_URL/realtime?intent=transcription&model=$MODEL_NAME" key = "$AI_GATEWAY_API_KEY" headers = { "Authorization": f"Bearer {key}", } async with websockets.connect(ws_url, ping_interval=None, extra_headers=headers) as client: session_msg = get_session_update_msg() await client.send(session_msg) await asyncio.gather(send_audio(client, audio_file_path), receive_messages(client)) if __name__ == "__main__": file_path = "recording.mp3" # your_audio_file asyncio.run(with_openai(file_path))
Curl
curl https://$BASE_URL/embeddings \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $AI_GATEWAY_API_KEY" \ -d '{ "model": "$MODEL_NAME", "input": "The food was delicious and the waiter...", "encoding_format": "float" }'
Python
# pip install openai # https://platform.openai.com/docs/api-reference from openai import OpenAI client = OpenAI( base_url="https://$BASE_URL", api_key="$AI_GATEWAY_API_KEY", ) client.embeddings.create( model="$MODEL_NAME", input="The food was delicious and the waiter...", encoding_format="float" )
协议透传是指网关原样透传各模型厂商各自的接口协议(包括请求头和请求体),不做协议的转换和兼容。网关仅针对特定路径(如 /chat/completions、/messages 等)的请求尝试解析响应体中的 usage 字段,进行 Token 计量。
与 OpenAI 兼容协议的主要区别如下:
对比项 | OpenAI 兼容协议 | 协议透传 |
|---|---|---|
协议转换 | 网关统一转换为 OpenAI 格式 | 原样透传模型厂商协议,不做转换 |
鉴权方式 | 使用网关生成的 | 使用模型厂商自身的密钥 |
请求/响应体 | 统一为 OpenAI 格式 | 与模型厂商接口完全一致 |
支持的网关能力 | 请求加速、模型路由(负载均衡 / 主备容灾)、语义缓存、限速等 | 仅请求加速 |
适用场景 | 希望统一管理多模型厂商调用协议 | 希望保留模型厂商原生接口行为 |
协议透传的请求路径由以下四部分组成:
https://{网关服务地址}/{提供商 ID}/{模型厂商请求路径}
组成部分 | 说明 | 示例 |
|---|---|---|
| BaseUrl |
|
| 模型服务商标识 |
|
| 提供商原始 API 路径 |
|
总的来说,使用协议透传时,您只需将原本指向三方模型厂商的域名替换为 https://{网关服务地址}/{提供商 ID},其余的请求路径、请求头和请求体与模型厂商接口完全一致。
以下表格列出了各提供商在协议透传方式下的调用路径对照。示例中的变量说明:
$BASE_URL:网关服务地址。$KEY:模型厂商自身的 API Key(非网关 API Key)。$MODEL_NAME:模型厂商的模型名称。模型厂商原始路径 | 网关调用路径 |
|---|---|
|
|
curl $BASE_URL/tencent/v1/chat/completions \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $KEY" \ -d '{ "model": "$MODEL_NAME", "messages": [{"role": "user", "content": "Say this is a test!"}], "temperature": 0.7 }'
模型厂商原始路径 | 网关调用路径 |
|---|---|
|
|
curl $BASE_URL/ali/compatible-mode/v1/chat/completions \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $KEY" \ -d '{ "model": "$MODEL_NAME", "messages": [{"role": "user", "content": "Say this is a test!"}], "temperature": 0.7 }'
模型厂商原始路径 | 网关调用路径 |
|---|---|
|
|
curl $BASE_URL/baidu/v2/chat/completions \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $KEY" \ -d '{ "model": "$MODEL_NAME", "messages": [{"role": "user", "content": "Say this is a test!"}], "temperature": 0.7 }'
模型厂商原始路径 | 网关调用路径 |
|---|---|
|
|
curl $BASE_URL/zhipu/api/paas/v4/chat/completions \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $KEY" \ -d '{ "model": "$MODEL_NAME", "messages": [{"role": "user", "content": "Say this is a test!"}], "temperature": 0.7 }'
模型厂商原始路径 | 网关调用路径 |
|---|---|
|
|
curl $BASE_URL/minimax/v1/chat/completions \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $KEY" \ -d '{ "model": "$MODEL_NAME", "messages": [{"role": "user", "content": "Say this is a test!"}], "temperature": 0.7 }'
模型厂商原始路径 | 网关调用路径 |
|---|---|
|
|
curl $BASE_URL/lingyi/v1/chat/completions \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $KEY" \ -d '{ "model": "$MODEL_NAME", "messages": [{"role": "user", "content": "Say this is a test!"}], "temperature": 0.7 }'
模型厂商原始路径 | 网关调用路径 |
|---|---|
|
|
curl $BASE_URL/deepseek/v1/chat/completions \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $KEY" \ -d '{ "model": "$MODEL_NAME", "messages": [{"role": "user", "content": "Say this is a test!"}], "temperature": 0.7 }'
模型厂商原始路径 | 网关调用路径 |
|---|---|
|
|
curl $BASE_URL/moonshot/v1/chat/completions \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $KEY" \ -d '{ "model": "$MODEL_NAME", "messages": [{"role": "user", "content": "Say this is a test!"}], "temperature": 0.7 }'
模型厂商原始路径 | 网关调用路径 |
|---|---|
|
|
curl $BASE_URL/xunfei/v2/chat/completions \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $KEY" \ -d '{ "model": "$MODEL_NAME", "messages": [{"role": "user", "content": "Say this is a test!"}], "temperature": 0.7 }'
模型厂商原始路径 | 网关调用路径 |
|---|---|
|
|
curl $BASE_URL/silliconflow/v1/chat/completions \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $KEY" \ -d '{ "model": "$MODEL_NAME", "messages": [{"role": "user", "content": "Say this is a test!"}], "temperature": 0.7 }'
火山方舟在协议透传方式下支持多种接口,包括但不限于对话(Chat)API、Responses API 和 WebSocket 等。
模型厂商原始路径 | 网关调用路径 |
|---|---|
|
|
|
|
|
|
对话(Chat)API 示例
curl $BASE_URL/bytedance/api/v3/chat/completions \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $KEY" \ -d '{ "model": "$MODEL_NAME", "messages": [{"role": "user", "content": "Say this is a test!"}], "temperature": 0.7 }'
Responses API 示例
curl $BASE_URL/bytedance/api/v3/responses \ --header 'Authorization: Bearer $KEY' \ --header 'Content-Type: application/json' \ --data '{ "model": "doubao-seed-1-6-250615", "input": "你好呀。", "stream":true }'
语音识别(WebSocket)
请参考大模型流式语音识别 API 文档,将 WebSocket 连接地址替换为 wss://$BASE_URL/bytedance/api/v3/sauc/bigmodel。