文档中心

ChatGLM-130B API调用指南

最近更新时间：2024.04.01 16:07:16

首次发布时间：2023.07.18 21:02:10

ChatGLM-130B 参考了 ChatGPT 的设计思路，在千亿基座模型 GLM-130B 中注入了代码预训练，通过有监督微调（Supervised Fine-Tuning）等技术实现人类意图对齐。ChatGLM 当前版本模型的能力提升主要来源于独特的千亿基座模型 GLM-130B。它是不同于 BERT、GPT-3 以及 T5 的架构，是一个包含多目标函数的自回归预训练模型。
本文详细介绍了 ChatGLM-130B 的SDK及API使用方法。

API Host

Host：maas-api.ml-platform-cn-beijing.volces.com
Region: cn-beijing

API SDK

提供统一 SDK 的接入形式（需要用 ak/sk 进行旁路鉴权，火山鉴权逻辑可以参考）

Golang SDK: https://github.com/volcengine/volc-sdk-golang
Python SDK: https://github.com/volcengine/volc-sdk-python
Java SDK: https://github.com/volcengine/volc-sdk-java

说明

调用前请修改：

设置环境变量（或在代码中修改）：VOLC_ACCESSKEY和VOLC_SECRETKEY；
修改调用模型名占位符${YOUR_MODLE_NAME}（或者设置model.endpoint_id，具体请参考 API Specification）

Golang

// Usage:
//
// 1. go get -u github.com/volcengine/volc-sdk-golang
// 2. VOLC_ACCESSKEY=XXXXX VOLC_SECRETKEY=YYYYY go run main.go

package main

import (
    "encoding/json"
    "errors"
    "fmt"
    "os"

    "github.com/volcengine/volc-sdk-golang/service/maas"
    "github.com/volcengine/volc-sdk-golang/service/maas/models/api"
)

func main() {
    r := maas.NewInstance("maas-api.ml-platform-cn-beijing.volces.com", "cn-beijing")

    // fetch ak&sk from environmental variables
    r.SetAccessKey(os.Getenv("VOLC_ACCESSKEY"))
    r.SetSecretKey(os.Getenv("VOLC_SECRETKEY"))

    req := &api.ChatReq{
        Model: &api.Model{
            Name: "chatglm-130b",
        },
        Messages: []*api.Message{
            {
                Role:    maas.ChatRoleOfUser,
                Content: "天为什么这么蓝？",
            },
            {
                Role:    maas.ChatRoleOfAssistant,
                Content: "因为有你",
            },
            {
                Role:    maas.ChatRoleOfUser,
                Content: "花儿为什么这么香？",
            },
        },
        Parameters: &api.Parameters{
            MaxNewTokens: 1000, // 最大生成的回答文本长度
            TopP: 0.92, // 用于控制输出tokens的多样性，TopP值越大输出的tokens类型越丰富，取值范围0~1
            Temperature: 1, // 用于控制生成文本的随机性和创造性，Temperature值越大随机性越大，取值范围0~1
        },
    }
    TestChat(r, req)
    TestStreamChat(r, req)
}

func TestChat(r *maas.MaaS, req *api.ChatReq) {
    got, status, err := r.Chat(req)
    if err != nil {
        errVal := &api.Error{}
        if errors.As(err, &errVal) { // the returned error always type of *api.Error
            fmt.Printf("meet maas error=%v, status=%d\n", errVal, status)
        }
        return
    }
    fmt.Println("chat answer", mustMarshalJson(got))
}

func TestStreamChat(r *maas.MaaS, req *api.ChatReq) {
    ch, err := r.StreamChat(req)

    if err != nil {
        errVal := &api.Error{}
        if errors.As(err, &errVal) { // the returned error always type of *api.Error
            fmt.Println("meet maas error", errVal.Error())
        }
        return
    }

    for resp := range ch {
        if resp.Error != nil {
            // it is possible that error occurs during response processing
            fmt.Println(mustMarshalJson(resp.Error))
            return
        }
        fmt.Println(mustMarshalJson(resp))
        // last response may contain `usage`
        if resp.Usage != nil {
            // last message, will return full response including usage, role, finish_reason, etc.
            fmt.Println(mustMarshalJson(resp.Usage))
        }
    }
}

func mustMarshalJson(v interface{}) string {
    s, _ := json.Marshal(v)
    return string(s)
}

Python

注意

目前仅支持 python>=3.5。

'''
Usage:

1. python3 -m pip install --user volcengine
2. VOLC_ACCESSKEY=XXXXX VOLC_SECRETKEY=YYYYY python main.py
'''
import os
from volcengine.maas import MaasService, MaasException, ChatRole


def test_chat(maas, req):
    try:
        resp = maas.chat(req)
        print(resp)
        print(resp.choice.message.content)
    except MaasException as e:
        print(e)

    
def test_stream_chat(maas, req):
    try:
        resps = maas.stream_chat(req)
        for resp in resps:
            print(resp)
            print(resp.choice.message.content)
    except MaasException as e:
        print(e)


if __name__ == '__main__':
    maas = MaasService('maas-api.ml-platform-cn-beijing.volces.com', 'cn-beijing')
    
    maas.set_ak(os.getenv("VOLC_ACCESSKEY"))
    maas.set_sk(os.getenv("VOLC_SECRETKEY"))
    
    # document: "https://www.volcengine.com/docs/82379/1099475"
    req = {
        "model": {
            "name": "chatglm-130b",
        },
        "parameters": {
            "max_new_tokens": 1000,  # 最大生成的回答文本长度
            "top_p": 0.92,  # 用于控制输出tokens的多样性，TopP值越大输出的tokens类型越丰富，取值范围0~1
            "temperature": 1,  # 用于控制生成文本的随机性和创造性，Temperature值越大随机性越大，取值范围0~1
        },
        "messages": [
            {
                "role": ChatRole.USER,
                "content": "天为什么这么蓝？"
            }, {
                "role": ChatRole.ASSISTANT,
                "content": "因为有你"
            }, {
                "role": ChatRole.USER,
                "content": "花儿为什么这么香？"
            },
        ]
    }
    
    test_chat(maas, req)
    test_stream_chat(maas, req)

Java

/*
# pom.xml
<dependency>
    <groupId>com.volcengine</groupId>
    <artifactId>volc-sdk-java</artifactId>
    <version>LATEST</version>
</dependency>
*/

package com.volcengine.example.maas;

import com.volcengine.helper.Const;
import com.volcengine.model.maas.api.Api;
import com.volcengine.service.maas.MaasException;
import com.volcengine.service.maas.MaasService;
import com.volcengine.service.maas.impl.MaasServiceImpl;

import java.util.stream.Stream;

public class ChatDemo {
    public static void main(String[] args) {
        MaasService maasService = new MaasServiceImpl("maas-api.ml-platform-cn-beijing.volces.com", "cn-beijing");

        // fetch ak&sk from environmental variables
        maasService.setAccessKey(System.getenv("VOLC_ACCESSKEY"));
        maasService.setSecretKey(System.getenv("VOLC_SECRETKEY"));

        Api.ChatReq req = Api.ChatReq.newBuilder()
                .setModel(Api.Model.newBuilder()
                        .setName("chatglm-130b")
                )
                .setParameters(Api.Parameters.newBuilder()
                        .setMaxNewTokens(1000) // 最大生成的回答文本长度
                        .setTopP(0.92) // 用于控制输出tokens的多样性，TopP值越大输出的tokens类型越丰富，取值范围0~1
                        .setTemperature(1) // 用于控制生成文本的随机性和创造性，Temperature值越大随机性越大，取值范围0~1
                )
                .addMessages(Api.Message.newBuilder().setRole(Const.MaasChatRoleOfUser).setContent("天为什么这么蓝？"))
                .addMessages(Api.Message.newBuilder().setRole(Const.MaasChatRoleOfAssistant).setContent("因为有你"))
                .addMessages(Api.Message.newBuilder().setRole(Const.MaasChatRoleOfUser).setContent("花儿为什么这么香？"))
                .build();

        testChat(maasService, req);
        testStreamChat(maasService, req);
    }

    private static void testChat(MaasService maasService, Api.ChatReq req) {
        try {
            Api.ChatResp resp = maasService.chat(req);
            System.out.println(resp.getChoice().getMessage().getContent());
            System.out.println(resp.getUsage());
        } catch (MaasException e) {
            System.out.println("code: " + e.getCode());
            System.out.println("code_n: " + e.getCodeN());
            System.out.println("message: " + e.getMsg());
            e.printStackTrace();
        }
    }

    private static void testStreamChat(MaasService maasService, Api.ChatReq req) {
        Stream<Api.ChatResp> resps = null;
        try {
            resps = maasService.streamChat(req);
        } catch (MaasException e) {
            e.printStackTrace();
        }


        assert resps != null;

        // it is possible that error occurs during response processing
        try {
            resps.forEach(resp -> {
                System.out.println(resp.getChoice().getMessage().getContent());

                // last message, will return full response including usage, role, finish_reason, etc.
                if (resp.getUsage().isInitialized()) {
                    System.out.println(resp.getUsage());
                }
            });
        } catch (RuntimeException e) {
            Throwable cause = e.getCause();
            if (cause instanceof MaasException) {
                System.out.println("code: " + ((MaasException) cause).getCode());
                System.out.println("code_n: " + ((MaasException) cause).getCodeN());
                System.out.println("message: " + ((MaasException) cause).getMsg());
            }
            System.out.println("caught: " + e);
        }
    }
}

API Specification

Chat

主要参考 OpenAI 和 HuggingFace
Parameters 记录可选控制参数，具体哪些参数可用依赖模型服务（模型详情页会描述哪些参数可用）

Input

字段	类型	描述	默认值
model (required)	object	`{ "name": "gpt-3.5.turbo" // "version": "1.0" // "endpoint_id": "mse-xxx-yyy" }` `name`：以模型的名称索引对应的服务节点； `version`：可选，模型版本，如果仅指定了模型名称，则使用默认版本的服务节点； `endpoint_id`：可选，模型访问服务节点 ID，如果指定了该参数，则使用该参数指定的服务节点。	None
messages (required)	list	`[ { "role": "user", "content": "how to learn python?" } ]` `role`：消息角色，目前支持`user`/`assistant`/`system` `content`：消息内容消息是列表形式，依次记录了消息聊天上下文（第一个是最早的对话，最后一个是最新的对话），形式上类似：Q1， A1， Q2， A2， Q3（输入应该是奇数个）	None
stream	boolean	是否流式返回。如果为 true，则按 SSE 协议返回数据	false

parameters.max_new_tokens	integer	最多新生成 token 数（不包含 prompt 的 token 数目），和`max_tokens`一般二选一设置，[1, 4096]	2000（依赖模型默认配置）
parameters.temperature	number	采样温度，(0， 1.0]	1.0
parameters.top_p	number	核采样，[0， 1.0]	1.0

Output

字段	类型	描述
req_id	string	请求 id
choice	object	{ "message": { "role": "assistant", "content": "Learning Python can be a fun and rewarding experience, and there are many resources available to help you get started. Here are some steps you can take to learn Python:\n\n1. Start by installing Python on your computer. You can download the latest version of Python from the official website at <https://www.python.org/downloads/>.\n2. Once you have Python installed, open a terminal or command prompt and enter `python` to start learning the language. This will open a Python interpreter, where you can start writing and running code.\n3. Learn the basics of Python by reading through the official Python documentation at <https://docs.python.org/3/>. This will cover topics such as variables, data types, control structures, functions, and modules.\n4. There are many online resources available to help you learn Python, including tutorials, courses, and forums. Some popular resources include Codecademy, edX, and Coursera.\n5. Practice writing and running code. The more you practice, the better you will become at Python. You can find many exercises and projects online to help you practice.\n6. Join a Python community. Python has a large and active community of developers, and joining one of these communities can be a great way to learn from others and get support. You can find communities on social media, online forums, and through online communities such as Stack Overflow.\n\nOverall, learning Python requires practice and dedication, but with the right resources and approach, it can be a fun and rewarding experience." }, "finish_reason": "stop", } `message`：同上说明 `finish_reason`：结束原因 `stop`表示正常生成结束 `length`表示已经到了制定的最大的 token 数量（`max_tokens`/`max_new_tokens`）
usage	object	`{ "prompt_tokens": 18, "completion_tokens": 317, "total_tokens": 335 }` `prompt_tokens`：提示的 prompt token 数量 `completion_tokens`：生成的 token 数量 `total_tokens`：总的 token 数量在 stream 模式下，只有最后一个输出 frame 才会记录 usage 内容
error(optioanl)	object	`{ "code": "UnauthorizedCode", "code_n": 123456, "message": "You are not authorized" }` `code`：错误的代码； `code_n`：错误的数字代码； `message`：错误的信息。如果没有错误，error 这个条目为空如果有错误，其余条目都为空，只有 error 条目

在 stream 模式下，基于 SSE (Server-Sent Events) 协议返回生成内容，每次返回结果为生成的部分内容片段：

内容片段按照生成的先后顺序返回，完整的结果需要调用者拼接才能得到；
如果流式请求开始时就出现错误（如参数错误），HTTP返回非200，方法调用也会直接返回错误；
如果流式过程中出现错误，HTTP 依然会返回 200， 错误信息会在一个片段返回。

API Host

API SDK

Golang

Python

Java

API Specification

Chat

火山方舟大模型服务平台

ChatGLM-130B API调用指南

API Host

API SDK

Golang

Python

Java

API Specification

Chat

Input

Output