最近更新时间:2024.04.01 16:07:17
首次发布时间:2023.08.30 15:20:58
云雀 (Skylark) 是字节内部团队研发的大规模预训练语言模型系列,本文详细介绍了 Skylark-lite 的 SDK 及 API 使用方法。
Host:maas-api.ml-platform-cn-beijing.volces.com
Region: cn-beijing
提供统一 SDK 的接入形式(需要用 ak/sk 进行旁路鉴权,火山鉴权逻辑可以参考)
Golang SDK: https://github.com/volcengine/volc-sdk-golang
Python SDK: https://github.com/volcengine/volc-sdk-python
说明
调用前请修改:
设置环境变量(或在代码中修改):VOLC_ACCESSKEY
和VOLC_SECRETKEY
;
修改调用模型名占位符${YOUR_MODLE_NAME}
(或者设置model.endpoint_id
,具体请参考 API Specification)
// Usage: // // 1. go get -u github.com/volcengine/volc-sdk-golang // 2. VOLC_ACCESSKEY=XXXXX VOLC_SECRETKEY=YYYYY go run main.go package main import ( "encoding/json" "errors" "fmt" "os" "github.com/volcengine/volc-sdk-golang/service/maas" "github.com/volcengine/volc-sdk-golang/service/maas/models/api" ) func main() { r := maas.NewInstance("maas-api.ml-platform-cn-beijing.volces.com", "cn-beijing") // fetch ak&sk from environmental variables r.SetAccessKey(os.Getenv("VOLC_ACCESSKEY")) r.SetSecretKey(os.Getenv("VOLC_SECRETKEY")) req := &api.ChatReq{ Model: &api.Model{ Name: "skylark-lite-public", }, Messages: []*api.Message{ { Role: maas.ChatRoleOfUser, Content: "天为什么这么蓝?", }, { Role: maas.ChatRoleOfAssistant, Content: "因为有你", }, { Role: maas.ChatRoleOfUser, Content: "花儿为什么这么香?", }, }, Parameters: &api.Parameters{ MaxNewTokens: 1000, // 输出文本的最大tokens限制 Temperature: 0.7, // 用于控制生成文本的随机性和创造性,Temperature值越大随机性越大,取值范围0~1 TopP: 0.9, // 用于控制输出tokens的多样性,TopP值越大输出的tokens类型越丰富,取值范围0~1 TopK: 0, // 选择预测值最大的k个token进行采样,取值范围0-1000,0表示不生效 }, } TestChat(r, req) TestStreamChat(r, req) } func TestChat(r *maas.MaaS, req *api.ChatReq) { got, status, err := r.Chat(req) if err != nil { errVal := &api.Error{} if errors.As(err, &errVal) { // the returned error always type of *api.Error fmt.Printf("meet maas error=%v, status=%d\n", errVal, status) } return } fmt.Println("chat answer", mustMarshalJson(got)) } func TestStreamChat(r *maas.MaaS, req *api.ChatReq) { ch, err := r.StreamChat(req) if err != nil { errVal := &api.Error{} if errors.As(err, &errVal) { // the returned error always type of *api.Error fmt.Println("meet maas error", errVal.Error()) } return } for resp := range ch { if resp.Error != nil { // it is possible that error occurs during response processing fmt.Println(mustMarshalJson(resp.Error)) return } fmt.Println(mustMarshalJson(resp)) // last response may contain `usage` if resp.Usage != nil { // last message, will return full response including usage, role, finish_reason, etc. fmt.Println(mustMarshalJson(resp.Usage)) } } } func mustMarshalJson(v interface{}) string { s, _ := json.Marshal(v) return string(s) }
注意
目前仅支持 python>=3.5
。
''' Usage: 1. python3 -m pip install --user volcengine 2. VOLC_ACCESSKEY=XXXXX VOLC_SECRETKEY=YYYYY python main.py ''' import os from volcengine.maas import MaasService, MaasException, ChatRole def test_chat(maas, req): try: resp = maas.chat(req) print(resp) print(resp.choice.message.content) except MaasException as e: print(e) def test_stream_chat(maas, req): try: resps = maas.stream_chat(req) for resp in resps: print(resp) print(resp.choice.message.content) except MaasException as e: print(e) if __name__ == '__main__': maas = MaasService('maas-api.ml-platform-cn-beijing.volces.com', 'cn-beijing') maas.set_ak(os.getenv("VOLC_ACCESSKEY")) maas.set_sk(os.getenv("VOLC_SECRETKEY")) # document: "https://www.volcengine.com/docs/82379/1099475" req = { "model": { "name": "skylark-lite-public", }, "parameters": { "max_new_tokens": 1000, # 输出文本的最大tokens限制 "temperature": 0.7, # 用于控制生成文本的随机性和创造性,Temperature值越大随机性越大,取值范围0~1 "top_p": 0.9, # 用于控制输出tokens的多样性,TopP值越大输出的tokens类型越丰富,取值范围0~1 "top_k": 0, # 选择预测值最大的k个token进行采样,取值范围0-1000,0表示不生效 }, "messages": [ { "role": ChatRole.USER, "content": "天为什么这么蓝?" }, { "role": ChatRole.ASSISTANT, "content": "因为有你" }, { "role": ChatRole.USER, "content": "花儿为什么这么香?" }, ] } test_chat(maas, req) test_stream_chat(maas, req)
/* # pom.xml <dependency> <groupId>com.volcengine</groupId> <artifactId>volc-sdk-java</artifactId> <version>LATEST</version> </dependency> */ package com.volcengine.example.maas; import com.volcengine.helper.Const; import com.volcengine.model.maas.api.Api; import com.volcengine.service.maas.MaasException; import com.volcengine.service.maas.MaasService; import com.volcengine.service.maas.impl.MaasServiceImpl; import java.util.stream.Stream; public class ChatDemo { public static void main(String[] args) { MaasService maasService = new MaasServiceImpl("maas-api.ml-platform-cn-beijing.volces.com", "cn-beijing"); // fetch ak&sk from environmental variables maasService.setAccessKey(System.getenv("VOLC_ACCESSKEY")); maasService.setSecretKey(System.getenv("VOLC_SECRETKEY")); Api.ChatReq req = Api.ChatReq.newBuilder() .setModel(Api.Model.newBuilder() .setName("skylark-lite-public") ) .setParameters(Api.Parameters.newBuilder() .setMaxNewTokens(1000) // 输出文本的最大tokens限制 .setTemperature(0.7) // 用于控制生成文本的随机性和创造性,Temperature值越大随机性越大,取值范围0~1 .setTopP(0.9) // 用于控制输出tokens的多样性,TopP值越大输出的tokens类型越丰富,取值范围0~1 .setTopK(0) // 选择预测值最大的k个token进行采样,取值范围0-1000,0表示不生效 ) .addMessages(Api.Message.newBuilder().setRole(Const.MaasChatRoleOfUser).setContent("天为什么这么蓝?")) .addMessages(Api.Message.newBuilder().setRole(Const.MaasChatRoleOfAssistant).setContent("因为有你")) .addMessages(Api.Message.newBuilder().setRole(Const.MaasChatRoleOfUser).setContent("花儿为什么这么香?")) .build(); testChat(maasService, req); testStreamChat(maasService, req); } private static void testChat(MaasService maasService, Api.ChatReq req) { try { Api.ChatResp resp = maasService.chat(req); System.out.println(resp.getChoice().getMessage().getContent()); System.out.println(resp.getUsage()); } catch (MaasException e) { System.out.println("code: " + e.getCode()); System.out.println("code_n: " + e.getCodeN()); System.out.println("message: " + e.getMsg()); e.printStackTrace(); } } private static void testStreamChat(MaasService maasService, Api.ChatReq req) { Stream<Api.ChatResp> resps = null; try { resps = maasService.streamChat(req); } catch (MaasException e) { e.printStackTrace(); } assert resps != null; // it is possible that error occurs during response processing try { resps.forEach(resp -> { System.out.println(resp.getChoice().getMessage().getContent()); // last message, will return full response including usage, role, finish_reason, etc. if (resp.getUsage().isInitialized()) { System.out.println(resp.getUsage()); } }); } catch (RuntimeException e) { Throwable cause = e.getCause(); if (cause instanceof MaasException) { System.out.println("code: " + ((MaasException) cause).getCode()); System.out.println("code_n: " + ((MaasException) cause).getCodeN()); System.out.println("message: " + ((MaasException) cause).getMsg()); } System.out.println("caught: " + e); } } }
主要参考 OpenAI 和 HuggingFace
Parameters 记录可选控制参数,具体哪些参数可用依赖模型服务(模型详情页会描述哪些参数可用)
字段 | 类型 | 描述 | 默认值 |
---|---|---|---|
model (required) | object |
| None |
messages (required) | list |
| None |
stream | boolean | 是否流式返回。如果为 true,则按 SSE 协议返回数据 | false |
parameters.max_new_tokens | integer | 最多新生成 token 数(不包含 prompt 的 token 数目),和 | 2000(依赖模型默认配置) |
parameters.temperature | number | 采样温度,(0, 1.0] | 1.0 |
parameters.top_p | number | 核采样,[0, 1.0] | 1.0 |
parameters.top_k | integer | top-k-filtering 算法保留多少个 最高概率的词 作为候选,正整数。 | 0 |
parameters.stop | list | 用于指定模型在生成响应时应停止的标记。当模型生成的响应中包含这些标记时,生成过程将停止 | [] |
字段 | 类型 | 描述 |
---|---|---|
req_id | string | 请求 id |
choice | object |
|
usage | object |
|
error(optioanl) | object |
|
在 stream 模式下,基于 SSE (Server-Sent Events) 协议返回生成内容,每次返回结果为生成的部分内容片段:
内容片段按照生成的先后顺序返回,完整的结果需要调用者拼接才能得到;
如果流式请求开始时就出现错误(如参数错误),HTTP返回非200,方法调用也会直接返回错误;
如果流式过程中出现错误,HTTP 依然会返回 200, 错误信息会在一个片段返回。