You need to enable JavaScript to run this app.
导航

ChatGLM2-Pro API 调用指南

最近更新时间2024.04.01 16:07:17

首次发布时间2023.09.19 18:20:43

智谱AI与清华KEG实验室发布的ChatGLM大语言模型,支持8k上下文,在内容创作、文本理解、信息抽取等任务上表现突出。经过海量中英标识符的预训练与人类偏好对齐训练,相比一代模型在 MMLU、C-Eval、GSM8K 分别取得了16%、36%、280%的提升,并登顶中文任务榜单C-Eval。

本文详细介绍了 ChatGLM2-Pro 的SDK及API使用方法。

API Host

Host:maas-api.ml-platform-cn-beijing.volces.com
Region: cn-beijing

API SDK

提供统一 SDK 的接入形式(需要用 ak/sk 进行旁路鉴权,火山鉴权逻辑可以参考

说明

调用前请修改:

  1. 设置环境变量(或在代码中修改):VOLC_ACCESSKEYVOLC_SECRETKEY

  2. 修改调用模型名占位符${YOUR_MODLE_NAME}(或者设置model.endpoint_id,具体请参考 API Specification

Golang

// Usage:
//
// 1. go get -u github.com/volcengine/volc-sdk-golang
// 2. VOLC_ACCESSKEY=XXXXX VOLC_SECRETKEY=YYYYY go run main.go
package main

import (
    "encoding/json"
    "errors"
    "fmt"
    "os"

    "github.com/volcengine/volc-sdk-golang/service/maas"
    "github.com/volcengine/volc-sdk-golang/service/maas/models/api"
)

func main() {
    r := maas.NewInstance("maas-api.ml-platform-cn-beijing.volces.com", "cn-beijing")

    // fetch ak&sk from environmental variables
    r.SetAccessKey(os.Getenv("VOLC_ACCESSKEY"))
    r.SetSecretKey(os.Getenv("VOLC_SECRETKEY"))

    req := &api.ChatReq{
        Model: &api.Model{
            Name: "chatglm2-pro",
        },
        Messages: []*api.Message{
            {
                Role:    maas.ChatRoleOfUser,
                Content: "天为什么这么蓝?",
            },
            {
                Role:    maas.ChatRoleOfAssistant,
                Content: "因为有你",
            },
            {
                Role:    maas.ChatRoleOfUser,
                Content: "花儿为什么这么香?",
            },
        },
        Parameters: &api.Parameters{
            MaxNewTokens: 1024, // 输出文本的最大tokens限制
            Temperature: 0.8, // 用于控制生成文本的随机性和创造性,Temperature值越大随机性越大,取值范围0~1
            TopP: 0.8, // 用于控制输出tokens的多样性,TopP值越大输出的tokens类型越丰富,取值范围0~1
            TopK: 16, // 选择预测值最大的k个token进行采样,取值范围0-1024,0表示不生效
        },
    }
    TestChat(r, req)
    TestStreamChat(r, req)
}

func TestChat(r *maas.MaaS, req *api.ChatReq) {
    got, status, err := r.Chat(req)
    if err != nil {
        errVal := &api.Error{}
        if errors.As(err, &errVal) { // the returned error always type of *api.Error
            fmt.Printf("meet maas error=%v, status=%d\n", errVal, status)
        }
        return
    }
    fmt.Println("chat answer", mustMarshalJson(got))
}

func TestStreamChat(r *maas.MaaS, req *api.ChatReq) {
    ch, err := r.StreamChat(req)

    if err != nil {
        errVal := &api.Error{}
        if errors.As(err, &errVal) { // the returned error always type of *api.Error
            fmt.Println("meet maas error", errVal.Error())
        }
        return
    }

    for resp := range ch {
        if resp.Error != nil {
            // it is possible that error occurs during response processing
            fmt.Println(mustMarshalJson(resp.Error))
            return
        }
        fmt.Println(mustMarshalJson(resp))
        // last response may contain `usage`
        if resp.Usage != nil {
            // last message, will return full response including usage, role, finish_reason, etc.
            fmt.Println(mustMarshalJson(resp.Usage))
        }
    }
}

func mustMarshalJson(v interface{}) string {
    s, _ := json.Marshal(v)
    return string(s)
}

Python

注意

目前仅支持 python>=3.5

'''
Usage:

1. python3 -m pip install --user volcengine
2. VOLC_ACCESSKEY=XXXXX VOLC_SECRETKEY=YYYYY python main.py
'''
import os
from volcengine.maas import MaasService, MaasException, ChatRole


def test_chat(maas, req):
    try:
        resp = maas.chat(req)
        print(resp)
        print(resp.choice.message.content)
    except MaasException as e:
        print(e)

    
def test_stream_chat(maas, req):
    try:
        resps = maas.stream_chat(req)
        for resp in resps:
            print(resp)
            print(resp.choice.message.content)
    except MaasException as e:
        print(e)


if __name__ == '__main__':
    maas = MaasService('maas-api.ml-platform-cn-beijing.volces.com', 'cn-beijing')
    
    maas.set_ak(os.getenv("VOLC_ACCESSKEY"))
    maas.set_sk(os.getenv("VOLC_SECRETKEY"))
    
    # document: "https://www.volcengine.com/docs/82379/1099475"
    req = {
        "model": {
            "name": "chatglm2-pro",
        },
        "parameters": {
            "max_new_tokens": 1024,  # 输出文本的最大tokens限制
            "temperature": 0.8,  # 用于控制生成文本的随机性和创造性,Temperature值越大随机性越大,取值范围0~1
            "top_p": 0.8,  # 用于控制输出tokens的多样性,TopP值越大输出的tokens类型越丰富,取值范围0~1
            "top_k": 16,  # 选择预测值最大的k个token进行采样,取值范围0-1024,0表示不生效
        },
        "messages": [
            {
                "role": ChatRole.USER,
                "content": "天为什么这么蓝?"
            }, {
                "role": ChatRole.ASSISTANT,
                "content": "因为有你"
            }, {
                "role": ChatRole.USER,
                "content": "花儿为什么这么香?"
            },
        ]
    }
    
    test_chat(maas, req)
    test_stream_chat(maas, req)

Java

/*
# pom.xml
<dependency>
    <groupId>com.volcengine</groupId>
    <artifactId>volc-sdk-java</artifactId>
    <version>LATEST</version>
</dependency>
*/

package com.volcengine.example.maas;

import com.volcengine.helper.Const;
import com.volcengine.model.maas.api.Api;
import com.volcengine.service.maas.MaasException;
import com.volcengine.service.maas.MaasService;
import com.volcengine.service.maas.impl.MaasServiceImpl;

import java.util.stream.Stream;

public class ChatDemo {
    public static void main(String[] args) {
        MaasService maasService = new MaasServiceImpl("maas-api.ml-platform-cn-beijing.volces.com", "cn-beijing");

        // fetch ak&sk from environmental variables
        maasService.setAccessKey(System.getenv("VOLC_ACCESSKEY"));
        maasService.setSecretKey(System.getenv("VOLC_SECRETKEY"));

        Api.ChatReq req = Api.ChatReq.newBuilder()
                .setModel(Api.Model.newBuilder()
                        .setName("chatglm2-pro")
                )
                .setParameters(Api.Parameters.newBuilder()
                        .setMaxNewTokens(1024) // 输出文本的最大tokens限制
                        .setTemperature(0.8) // 用于控制生成文本的随机性和创造性,Temperature值越大随机性越大,取值范围0~1
                        .setTopP(0.8) // 用于控制输出tokens的多样性,TopP值越大输出的tokens类型越丰富,取值范围0~1
                        .setTopK(16) // 选择预测值最大的k个token进行采样,取值范围0-1024,0表示不生效
                )
                .addMessages(Api.Message.newBuilder().setRole(Const.MaasChatRoleOfUser).setContent("天为什么这么蓝?"))
                .addMessages(Api.Message.newBuilder().setRole(Const.MaasChatRoleOfAssistant).setContent("因为有你"))
                .addMessages(Api.Message.newBuilder().setRole(Const.MaasChatRoleOfUser).setContent("花儿为什么这么香?"))
                .build();

        testChat(maasService, req);
        testStreamChat(maasService, req);
    }

    private static void testChat(MaasService maasService, Api.ChatReq req) {
        try {
            Api.ChatResp resp = maasService.chat(req);
            System.out.println(resp.getChoice().getMessage().getContent());
            System.out.println(resp.getUsage());
        } catch (MaasException e) {
            System.out.println("code: " + e.getCode());
            System.out.println("code_n: " + e.getCodeN());
            System.out.println("message: " + e.getMsg());
            e.printStackTrace();
        }
    }

    private static void testStreamChat(MaasService maasService, Api.ChatReq req) {
        Stream<Api.ChatResp> resps = null;
        try {
            resps = maasService.streamChat(req);
        } catch (MaasException e) {
            e.printStackTrace();
        }


        assert resps != null;

        // it is possible that error occurs during response processing
        try {
            resps.forEach(resp -> {
                System.out.println(resp.getChoice().getMessage().getContent());

                // last message, will return full response including usage, role, finish_reason, etc.
                if (resp.getUsage().isInitialized()) {
                    System.out.println(resp.getUsage());
                }
            });
        } catch (RuntimeException e) {
            Throwable cause = e.getCause();
            if (cause instanceof MaasException) {
                System.out.println("code: " + ((MaasException) cause).getCode());
                System.out.println("code_n: " + ((MaasException) cause).getCodeN());
                System.out.println("message: " + ((MaasException) cause).getMsg());
            }
            System.out.println("caught: " + e);
        }
    }
}


API Specification

Chat

主要参考 OpenAIHuggingFace
Parameters 记录可选控制参数,具体哪些参数可用依赖模型服务(模型详情页会描述哪些参数可用)

Input

字段类型描述默认值

model (required)

object

{
    "name": "gpt-3.5.turbo"
    // "version": "1.0"
    // "endpoint_id": "mse-xxx-yyy"
}
  1. name:以模型的名称索引对应的服务节点;

  2. version:可选,模型版本,如果仅指定了模型名称,则使用默认版本的服务节点;

  3. endpoint_id:可选,模型访问服务节点 ID,如果指定了该参数,则使用该参数指定的服务节点。

None

messages (required)

list

[
  {
    "role": "user",
    "content": "how to learn python?"
  }
]
  1. role:消息角色,目前支持user/assistant/system

  2. content:消息内容

消息是列表形式,依次记录了消息聊天上下文(第一个是最早的对话,最后一个是最新的对话),形式上类似:Q1, A1, Q2, A2, Q3(输入应该是奇数个)

None

streamboolean是否流式返回。如果为 true,则按 SSE 协议返回数据false

parameters.max_new_tokens

integer

最多新生成 token 数(不包含 prompt 的 token 数目),和max_tokens一般二选一设置,[1, 4096]

2000(依赖模型默认配置)

parameters.temperaturenumber采样温度,[0, 1.0]1.0
parameters.top_pnumber核采样,[0, 1.0]1.0

Output

字段类型描述
req_idstring请求 id

choice

object

{
    "message": {
        "role": "assistant",
        "content": "Learning Python can be a fun and rewarding experience, and there are many resources available to help you get started. Here are some steps you can take to learn Python:\n\n1. Start by installing Python on your computer. You can download the latest version of Python from the official website at <https://www.python.org/downloads/>.\n2. Once you have Python installed, open a terminal or command prompt and enter `python` to start learning the language. This will open a Python interpreter, where you can start writing and running code.\n3. Learn the basics of Python by reading through the official Python documentation at <https://docs.python.org/3/>. This will cover topics such as variables, data types, control structures, functions, and modules.\n4. There are many online resources available to help you learn Python, including tutorials, courses, and forums. Some popular resources include Codecademy, edX, and Coursera.\n5. Practice writing and running code. The more you practice, the better you will become at Python. You can find many exercises and projects online to help you practice.\n6. Join a Python community. Python has a large and active community of developers, and joining one of these communities can be a great way to learn from others and get support. You can find communities on social media, online forums, and through online communities such as Stack Overflow.\n\nOverall, learning Python requires practice and dedication, but with the right resources and approach, it can be a fun and rewarding experience."
    },
    "finish_reason": "stop",
}
  1. message:同上说明

  2. finish_reason:结束原因

    1. stop表示正常生成结束

    2. length表示已经到了制定的最大的 token 数量(max_tokens/max_new_tokens

usage

object

{
    "prompt_tokens": 18,
    "completion_tokens": 317,
    "total_tokens": 335
}
  1. prompt_tokens:提示的 prompt token 数量

  2. completion_tokens:生成的 token 数量

  3. total_tokens:总的 token 数量

在 stream 模式下,只有最后一个输出 frame 才会记录 usage 内容

error(optioanl)

object

{
    "code": "UnauthorizedCode",
    "code_n": 123456,
    "message": "You are not authorized"
}
  1. code:错误的代码;

  2. code_n:错误的数字代码;

  3. message:错误的信息。

如果没有错误,error 这个条目为空
如果有错误,其余条目都为空,只有 error 条目

在 stream 模式下,基于 SSE (Server-Sent Events) 协议返回生成内容,每次返回结果为生成的部分内容片段:

  • 内容片段按照生成的先后顺序返回,完整的结果需要调用者拼接才能得到;
  • 如果流式请求开始时就出现错误(如参数错误),HTTP返回非200,方法调用也会直接返回错误;
  • 如果流式过程中出现错误,HTTP 依然会返回 200,  错误信息会在一个片段返回。