最近更新时间:2024.04.01 16:07:16
首次发布时间:2023.07.18 21:02:10
ChatGLM-130B 参考了 ChatGPT 的设计思路,在千亿基座模型 GLM-130B 中注入了代码预训练,通过有监督微调(Supervised Fine-Tuning)等技术实现人类意图对齐。ChatGLM 当前版本模型的能力提升主要来源于独特的千亿基座模型 GLM-130B。它是不同于 BERT、GPT-3 以及 T5 的架构,是一个包含多目标函数的自回归预训练模型。
本文详细介绍了 ChatGLM-130B 的SDK及API使用方法。
Host:maas-api.ml-platform-cn-beijing.volces.com
Region: cn-beijing
提供统一 SDK 的接入形式(需要用 ak/sk 进行旁路鉴权,火山鉴权逻辑可以参考)
Golang SDK: https://github.com/volcengine/volc-sdk-golang
Python SDK: https://github.com/volcengine/volc-sdk-python
说明
调用前请修改:
设置环境变量(或在代码中修改):VOLC_ACCESSKEY
和VOLC_SECRETKEY
;
修改调用模型名占位符${YOUR_MODLE_NAME}
(或者设置model.endpoint_id
,具体请参考 API Specification)
// Usage: // // 1. go get -u github.com/volcengine/volc-sdk-golang // 2. VOLC_ACCESSKEY=XXXXX VOLC_SECRETKEY=YYYYY go run main.go package main import ( "encoding/json" "errors" "fmt" "os" "github.com/volcengine/volc-sdk-golang/service/maas" "github.com/volcengine/volc-sdk-golang/service/maas/models/api" ) func main() { r := maas.NewInstance("maas-api.ml-platform-cn-beijing.volces.com", "cn-beijing") // fetch ak&sk from environmental variables r.SetAccessKey(os.Getenv("VOLC_ACCESSKEY")) r.SetSecretKey(os.Getenv("VOLC_SECRETKEY")) req := &api.ChatReq{ Model: &api.Model{ Name: "chatglm-130b", }, Messages: []*api.Message{ { Role: maas.ChatRoleOfUser, Content: "天为什么这么蓝?", }, { Role: maas.ChatRoleOfAssistant, Content: "因为有你", }, { Role: maas.ChatRoleOfUser, Content: "花儿为什么这么香?", }, }, Parameters: &api.Parameters{ MaxNewTokens: 1000, // 最大生成的回答文本长度 TopP: 0.92, // 用于控制输出tokens的多样性,TopP值越大输出的tokens类型越丰富,取值范围0~1 Temperature: 1, // 用于控制生成文本的随机性和创造性,Temperature值越大随机性越大,取值范围0~1 }, } TestChat(r, req) TestStreamChat(r, req) } func TestChat(r *maas.MaaS, req *api.ChatReq) { got, status, err := r.Chat(req) if err != nil { errVal := &api.Error{} if errors.As(err, &errVal) { // the returned error always type of *api.Error fmt.Printf("meet maas error=%v, status=%d\n", errVal, status) } return } fmt.Println("chat answer", mustMarshalJson(got)) } func TestStreamChat(r *maas.MaaS, req *api.ChatReq) { ch, err := r.StreamChat(req) if err != nil { errVal := &api.Error{} if errors.As(err, &errVal) { // the returned error always type of *api.Error fmt.Println("meet maas error", errVal.Error()) } return } for resp := range ch { if resp.Error != nil { // it is possible that error occurs during response processing fmt.Println(mustMarshalJson(resp.Error)) return } fmt.Println(mustMarshalJson(resp)) // last response may contain `usage` if resp.Usage != nil { // last message, will return full response including usage, role, finish_reason, etc. fmt.Println(mustMarshalJson(resp.Usage)) } } } func mustMarshalJson(v interface{}) string { s, _ := json.Marshal(v) return string(s) }
注意
目前仅支持 python>=3.5
。
''' Usage: 1. python3 -m pip install --user volcengine 2. VOLC_ACCESSKEY=XXXXX VOLC_SECRETKEY=YYYYY python main.py ''' import os from volcengine.maas import MaasService, MaasException, ChatRole def test_chat(maas, req): try: resp = maas.chat(req) print(resp) print(resp.choice.message.content) except MaasException as e: print(e) def test_stream_chat(maas, req): try: resps = maas.stream_chat(req) for resp in resps: print(resp) print(resp.choice.message.content) except MaasException as e: print(e) if __name__ == '__main__': maas = MaasService('maas-api.ml-platform-cn-beijing.volces.com', 'cn-beijing') maas.set_ak(os.getenv("VOLC_ACCESSKEY")) maas.set_sk(os.getenv("VOLC_SECRETKEY")) # document: "https://www.volcengine.com/docs/82379/1099475" req = { "model": { "name": "chatglm-130b", }, "parameters": { "max_new_tokens": 1000, # 最大生成的回答文本长度 "top_p": 0.92, # 用于控制输出tokens的多样性,TopP值越大输出的tokens类型越丰富,取值范围0~1 "temperature": 1, # 用于控制生成文本的随机性和创造性,Temperature值越大随机性越大,取值范围0~1 }, "messages": [ { "role": ChatRole.USER, "content": "天为什么这么蓝?" }, { "role": ChatRole.ASSISTANT, "content": "因为有你" }, { "role": ChatRole.USER, "content": "花儿为什么这么香?" }, ] } test_chat(maas, req) test_stream_chat(maas, req)
/* # pom.xml <dependency> <groupId>com.volcengine</groupId> <artifactId>volc-sdk-java</artifactId> <version>LATEST</version> </dependency> */ package com.volcengine.example.maas; import com.volcengine.helper.Const; import com.volcengine.model.maas.api.Api; import com.volcengine.service.maas.MaasException; import com.volcengine.service.maas.MaasService; import com.volcengine.service.maas.impl.MaasServiceImpl; import java.util.stream.Stream; public class ChatDemo { public static void main(String[] args) { MaasService maasService = new MaasServiceImpl("maas-api.ml-platform-cn-beijing.volces.com", "cn-beijing"); // fetch ak&sk from environmental variables maasService.setAccessKey(System.getenv("VOLC_ACCESSKEY")); maasService.setSecretKey(System.getenv("VOLC_SECRETKEY")); Api.ChatReq req = Api.ChatReq.newBuilder() .setModel(Api.Model.newBuilder() .setName("chatglm-130b") ) .setParameters(Api.Parameters.newBuilder() .setMaxNewTokens(1000) // 最大生成的回答文本长度 .setTopP(0.92) // 用于控制输出tokens的多样性,TopP值越大输出的tokens类型越丰富,取值范围0~1 .setTemperature(1) // 用于控制生成文本的随机性和创造性,Temperature值越大随机性越大,取值范围0~1 ) .addMessages(Api.Message.newBuilder().setRole(Const.MaasChatRoleOfUser).setContent("天为什么这么蓝?")) .addMessages(Api.Message.newBuilder().setRole(Const.MaasChatRoleOfAssistant).setContent("因为有你")) .addMessages(Api.Message.newBuilder().setRole(Const.MaasChatRoleOfUser).setContent("花儿为什么这么香?")) .build(); testChat(maasService, req); testStreamChat(maasService, req); } private static void testChat(MaasService maasService, Api.ChatReq req) { try { Api.ChatResp resp = maasService.chat(req); System.out.println(resp.getChoice().getMessage().getContent()); System.out.println(resp.getUsage()); } catch (MaasException e) { System.out.println("code: " + e.getCode()); System.out.println("code_n: " + e.getCodeN()); System.out.println("message: " + e.getMsg()); e.printStackTrace(); } } private static void testStreamChat(MaasService maasService, Api.ChatReq req) { Stream<Api.ChatResp> resps = null; try { resps = maasService.streamChat(req); } catch (MaasException e) { e.printStackTrace(); } assert resps != null; // it is possible that error occurs during response processing try { resps.forEach(resp -> { System.out.println(resp.getChoice().getMessage().getContent()); // last message, will return full response including usage, role, finish_reason, etc. if (resp.getUsage().isInitialized()) { System.out.println(resp.getUsage()); } }); } catch (RuntimeException e) { Throwable cause = e.getCause(); if (cause instanceof MaasException) { System.out.println("code: " + ((MaasException) cause).getCode()); System.out.println("code_n: " + ((MaasException) cause).getCodeN()); System.out.println("message: " + ((MaasException) cause).getMsg()); } System.out.println("caught: " + e); } } }
主要参考 OpenAI 和 HuggingFace
Parameters 记录可选控制参数,具体哪些参数可用依赖模型服务(模型详情页会描述哪些参数可用)
字段 | 类型 | 描述 | 默认值 |
---|---|---|---|
model (required) | object |
| None |
messages (required) | list |
| None |
stream | boolean | 是否流式返回。如果为 true,则按 SSE 协议返回数据 | false |
parameters.max_new_tokens | integer | 最多新生成 token 数(不包含 prompt 的 token 数目),和 | 2000(依赖模型默认配置) |
parameters.temperature | number | 采样温度,(0, 1.0] | 1.0 |
parameters.top_p | number | 核采样,[0, 1.0] | 1.0 |
字段 | 类型 | 描述 |
---|---|---|
req_id | string | 请求 id |
choice | object |
|
usage | object |
|
error(optioanl) | object |
|
在 stream 模式下,基于 SSE (Server-Sent Events) 协议返回生成内容,每次返回结果为生成的部分内容片段:
内容片段按照生成的先后顺序返回,完整的结果需要调用者拼接才能得到;
如果流式请求开始时就出现错误(如参数错误),HTTP返回非200,方法调用也会直接返回错误;
如果流式过程中出现错误,HTTP 依然会返回 200, 错误信息会在一个片段返回。