部分模型支持处理PDF格式的文档,会通过视觉功能来理解整个文档的上下文。当传入PDF文档时,大模型会将文件分页处理成多图,然后分析解读对应的文本、图片等信息,并结合这些信息完成文档理解相关任务。
前提条件
API 接口
Responses API
文档输入方式
支持文档传入方式如下:
本地文件上传:
文件 URL 传入 :适用于文件已存在公网可访问 URL 的场景,文件大小不能超过 50 MB。
本地文件上传
Files API 上传(推荐) 建议优先使用 Files API 上传本地文件,不仅可以支持最大 512MB 文件的处理,还可以避免请求时重新上传内容,减少预处理导致的时延,同时可在多次请求中重复使用,节省公网下载时延。其中文件预处理的原理,参见附:文件预处理 。
该方式上传的文件默认存储 7 天,存储有效期取值范围为1-30天。 如果需要实时获取分析内容,或者要规避复杂任务引发的客户端超时失败问题,可采用流式输出的方式,具体示例见流式输出 。 代码示例:
上传PDF文件获取File ID。
curl https://ark.cn-beijing.volces.com/api/v3/files \
-H "Authorization: Bearer $ARK_API_KEY" \
-F 'purpose=user_data' \
-F 'file=@/Users/doc/demo.pdf'
在Responses API中引用File ID。
curl https://ark.cn-beijing.volces.com/api/v3/responses \
-H "Authorization: Bearer $ARK_API_KEY" \
-H 'Content-Type: application/json' \
-d '{
"model": "doubao-seed-1-6-251015",
"input": [
{
"role": "user",
"content": [
{
"type": "input_file",
"file_id": "file-20251018****"
},
{
"type": "input_text",
"text": "按段落给出文档中的文字内容,以JSON格式输出,包括段落类型(type)、文字内容(content)信息。"
}
]
}
]
}'
import asyncio
import os
from volcenginesdkarkruntime import AsyncArk
client = AsyncArk(
base_url='https://ark.cn-beijing.volces.com/api/v3',
api_key=os.getenv('ARK_API_KEY')
)
async def main():
# upload pdf file
print("Upload pdf file")
file = await client.files.create(
# replace with your local pdf path
file=open("/Users/doc/demo.pdf", "rb"),
purpose="user_data"
)
print(f"File uploaded: {file.id}")
# Wait for the file to finish processing
await client.files.wait_for_processing(file.id)
print(f"File processed: {file.id}")
response = await client.responses.create(
model="doubao-seed-1-6-251015",
input=[
{"role": "user", "content": [
{
"type": "input_file",
"file_id": file.id # ref pdf file id
},
{
"type": "input_text",
"text": "按段落给出文档中的文字内容,以JSON格式输出,包括段落类型(type)、文字内容(content)信息。"
}
]},
],
)
print(response)
if __name__ == "__main__":
asyncio.run(main())
package main
import (
"context"
"fmt"
"io"
"os"
"time"
"github.com/volcengine/volcengine-go-sdk/service/arkruntime"
"github.com/volcengine/volcengine-go-sdk/service/arkruntime/model/file"
"github.com/volcengine/volcengine-go-sdk/service/arkruntime/model/responses"
"github.com/volcengine/volcengine-go-sdk/volcengine"
)
func main() {
client := arkruntime.NewClientWithApiKey(os.Getenv("ARK_API_KEY"))
ctx := context.Background()
fmt.Println("----- upload file data -----")
data, err := os.Open("/Users/doc/demo.pdf")
if err != nil {
fmt.Printf("read file error: %v\n", err)
return
}
fileInfo, err := client.UploadFile(ctx, &file.UploadFileRequest{
File: data,
Purpose: file.PurposeUserData,
})
if err != nil {
fmt.Printf("upload file error: %v", err)
return
}
// Wait for the file to finish processing
for fileInfo.Status == file.StatusProcessing {
fmt.Println("Waiting for file to be processed...")
time.Sleep(2 * time.Second)
fileInfo, err = client.RetrieveFile(ctx, fileInfo.ID) // update file info
if err != nil {
fmt.Printf("get file status error: %v", err)
return
}
}
fmt.Printf("File processing completed: %s, status: %s\n", fileInfo.ID, fileInfo.Status)
inputMessage := &responses.ItemInputMessage{
Role: responses.MessageRole_user,
Content: []*responses.ContentItem{
{
Union: &responses.ContentItem_File{
File: &responses.ContentItemFile{
Type: responses.ContentItemType_input_file,
FileId: volcengine.String(fileInfo.ID),
},
},
},
{
Union: &responses.ContentItem_Text{
Text: &responses.ContentItemText{
Type: responses.ContentItemType_input_text,
Text: "按段落给出文档中的文字内容,以JSON格式输出,包括段落类型(type)、文字内容(content)信息。",
},
},
},
},
}
createResponsesReq := &responses.ResponsesRequest{
Model: "doubao-seed-1-6-251015",
Input: &responses.ResponsesInput{
Union: &responses.ResponsesInput_ListValue{
ListValue: &responses.InputItemList{ListValue: []*responses.InputItem{{
Union: &responses.InputItem_InputMessage{
InputMessage: inputMessage,
},
}}},
},
},
Caching: &responses.ResponsesCaching{Type: responses.CacheType_enabled.Enum()},
}
resp, err := client.CreateResponses(ctx, createResponsesReq)
if err != nil {
fmt.Printf("stream error: %v\n", err)
return
}
fmt.Println(resp)
}
package com.ark.example;
import com.volcengine.ark.runtime.model.files.FileMeta;
import com.volcengine.ark.runtime.model.files.UploadFileRequest;
import com.volcengine.ark.runtime.model.responses.content.InputContentItemFile;
import com.volcengine.ark.runtime.service.ArkService;
import com.volcengine.ark.runtime.model.responses.request.*;
import com.volcengine.ark.runtime.model.responses.item.ItemEasyMessage;
import com.volcengine.ark.runtime.model.responses.constant.ResponsesConstants;
import com.volcengine.ark.runtime.model.responses.item.MessageContent;
import com.volcengine.ark.runtime.model.responses.content.InputContentItemText;
import com.volcengine.ark.runtime.model.responses.response.ResponseObject;
import java.io.File;
import java.util.concurrent.TimeUnit;
public class demo {
public static void main(String[] args) {
String apiKey = System.getenv("ARK_API_KEY");
ArkService service = ArkService.builder().apiKey(apiKey).build();
System.out.println("===== Upload File Example=====");
// upload a file for responses
FileMeta fileMeta;
fileMeta = service.uploadFile(
UploadFileRequest.builder().
file(new File("/Users/doc/demo.pdf")) // replace with your file file path
.purpose("user_data")
.build());
System.out.println("Uploaded file Meta: " + fileMeta);
System.out.println("status:" + fileMeta.getStatus());
try {
while (fileMeta.getStatus().equals("processing")) {
System.out.println("Waiting for file to be processed...");
TimeUnit.SECONDS.sleep(2);
fileMeta = service.retrieveFile(fileMeta.getId());
}
} catch (Exception e) {
System.err.println("get file status error:" + e.getMessage());
}
System.out.println("Uploaded file Meta: " + fileMeta);
CreateResponsesRequest request = CreateResponsesRequest.builder()
.model("doubao-seed-1-6-251015")
.input(ResponsesInput.builder().addListItem(
ItemEasyMessage.builder().role(ResponsesConstants.MESSAGE_ROLE_USER).content(
MessageContent.builder()
.addListItem(InputContentItemFile.InputContentItemFileBuilder.anInputContentItemFile().fileId(fileMeta.getId()).build())
.addListItem(InputContentItemText.builder().text("按段落给出文档中的文字内容,以JSON格式输出,包括段落类型(type)、文字内容(content)信息。").build())
.build()
).build()
).build())
.build();
ResponseObject resp = service.createResponse(request);
System.out.println(resp);
service.shutdownExecutor();
}
}
import os
import time
from openai import OpenAI
api_key = os.getenv('ARK_API_KEY')
client = OpenAI(
base_url='https://ark.cn-beijing.volces.com/api/v3',
api_key=api_key,
)
file = client.files.create(
file=open("/Users/doc/demo.pdf", "rb"),
purpose="user_data"
)
# Wait for the file to finish processing
while (file.status == "processing"):
time.sleep(2)
file = client.files.retrieve(file.id)
print(f"File processed: {file}")
response = client.responses.create(
model="doubao-seed-1-6-251015",
input=[
{
"role": "user",
"content": [
{
"type": "input_file",
"file_id": file.id,
},
{
"type": "input_text",
"text": "按段落给出文档中的文字内容,以JSON格式输出,包括段落类型(type)、文字内容(content)信息。",
},
]
}
]
)
print(response)
输出示例:
{
"text": [
{
"type": "heading",
"content": "1 Introduction"
},
{
"type": "paragraph",
"content": "Diffusion models [3–5] learn to reverse a process that incrementally corrupts data with noise, effectively decomposing a complex distribution into a hierarchy of simplified representations. This coarse-to-fine generative approach has proven remarkably successful across a wide range of applications, including image and video synthesis [6] as well as solving complex challenges in natural sciences [7]."
},
...
{
"type": "heading",
"content": "3 Seed Diffusion"
},
{
"type": "paragraph",
"content": "As the first experimental model in our Seed Diffusion series, Seed Diffusion Preview is specifically focused on code generation, thus adopting the data pipeline (code/code-related data only) and processing methodology of the open-sourced Seed Coder project [20]. The architecture is a standard dense Transformer, and we intentionally omit complex components such as LongCoT reasoning in this initial version to first establish a strong and efficient performance baseline. This section introduces its key components and training strategies."
}
]
}
Base64 编码传入 将本地文件转换为 Base64 编码字符串,然后提交给大模型。该方式适用于文档体积较小的情况,,文件不能超过 50 MB,请求体不能超过 64 MB。
注意
将文档转换为Base64编码字符串,然后遵循data:{mime_type};base64,{base64_data}格式拼接,传入模型。
{mime_type}:文件的媒体类型,需要与文件格式mime_type对应(application/pdf)。{base64_data}:文件经过Base64编码后的字符串。BASE64_FILE=$(base64 < demo.pdf) && curl https://ark.cn-beijing.volces.com/api/v3/responses \
-H "Content-Type: application/json" \
-H "Authorization: Bearer $ARK_API_KEY" \
-d @- <<EOF
{
"model": "doubao-seed-1-6-251015",
"input": [
{
"role": "user",
"content": [
{
"type": "input_file",
"file_data": "data:application/pdf;base64,$BASE64_FILE",
"filename": "demo.pdf" # When using file_data, the filename parameter is required.
},
{
"type": "input_text",
"text": "按段落给出文档中的文字内容,以JSON格式输出,包括段落类型(type)、文字内容(content)信息。"
}
]
}
]
}
EOF
import os
from volcenginesdkarkruntime import Ark
import base64
api_key = os.getenv('ARK_API_KEY')
client = Ark(
base_url='https://ark.cn-beijing.volces.com/api/v3',
api_key=api_key,
)
# Convert local files to Base64-encoded strings.
def encode_file(file_path):
with open(file_path, "rb") as read_file:
return base64.b64encode(read_file.read()).decode('utf-8')
base64_file = encode_file("/Users/doc/demo.pdf")
response = client.responses.create(
model="doubao-seed-1-6-251015",
input=[
{
"role": "user",
"content": [
{
"type": "input_file",
"file_data": f"data:application/pdf;base64,{base64_file}",
"filename": "demo.pdf"
},
{
"type": "input_text",
"text": "按段落给出文档中的文字内容,以JSON格式输出,包括段落类型(type)、文字内容(content)信息。"
}
]
}
]
)
print(response)
package main
import (
"context"
"encoding/base64"
"fmt"
"os"
"github.com/samber/lo"
"github.com/volcengine/volcengine-go-sdk/service/arkruntime"
"github.com/volcengine/volcengine-go-sdk/service/arkruntime/model/responses"
)
func main() {
// Convert local files to Base64-encoded strings.
fileBytes, err := os.ReadFile("/Users/doc/demo.pdf")
if err != nil {
fmt.Printf("read file error: %v\n", err)
return
}
base64File := base64.StdEncoding.EncodeToString(fileBytes)
client := arkruntime.NewClientWithApiKey(
os.Getenv("ARK_API_KEY"),
arkruntime.WithBaseUrl("https://ark.cn-beijing.volces.com/api/v3"),
)
ctx := context.Background()
inputMessage := &responses.ItemInputMessage{
Role: responses.MessageRole_user,
Content: []*responses.ContentItem{
{
Union: &responses.ContentItem_File{
File: &responses.ContentItemFile{
Type: responses.ContentItemType_input_file,
FileData: lo.ToPtr(fmt.Sprintf("data:application/pdf;base64,%s", base64File)),
Filename: lo.ToPtr("demo.pdf"),
},
},
},
{
Union: &responses.ContentItem_Text{
Text: &responses.ContentItemText{
Type: responses.ContentItemType_input_text,
Text: "按段落给出文档中的文字内容,以JSON格式输出,包括段落类型(type)、文字内容(content)信息。",
},
},
},
},
}
resp, err := client.CreateResponses(ctx, &responses.ResponsesRequest{
Model: "doubao-seed-1-6-251015",
Input: &responses.ResponsesInput{
Union: &responses.ResponsesInput_ListValue{
ListValue: &responses.InputItemList{ListValue: []*responses.InputItem{{
Union: &responses.InputItem_InputMessage{
InputMessage: inputMessage,
},
}}},
},
},
})
if err != nil {
fmt.Printf("response error: %v", err)
return
}
fmt.Println(resp)
}
package com.ark.example;
import com.volcengine.ark.runtime.model.responses.content.InputContentItemFile;
import com.volcengine.ark.runtime.model.responses.content.InputContentItemImage;
import com.volcengine.ark.runtime.model.responses.content.InputContentItemText;
import com.volcengine.ark.runtime.model.responses.item.ItemEasyMessage;
import com.volcengine.ark.runtime.service.ArkService;
import com.volcengine.ark.runtime.model.responses.request.*;
import com.volcengine.ark.runtime.model.responses.response.ResponseObject;
import com.volcengine.ark.runtime.model.responses.constant.ResponsesConstants;
import com.volcengine.ark.runtime.model.responses.item.MessageContent;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Base64;
import java.io.IOException;
public class demo {
private static String encodeFile(String filePath) throws IOException {
byte[] fileBytes = Files.readAllBytes(Paths.get(filePath));
return Base64.getEncoder().encodeToString(fileBytes);
}
public static void main(String[] args) {
String apiKey = System.getenv("ARK_API_KEY");
ArkService arkService = ArkService.builder().apiKey(apiKey).baseUrl("https://ark.cn-beijing.volces.com/api/v3").build();
// Convert local files to Base64-encoded strings.
String base64Data = "";
try {
base64Data = "data:application/pdf;base64," + encodeFile("/Users/doc/demo.pdf");
} catch (IOException e) {
System.err.println("encode error: " + e.getMessage());
}
CreateResponsesRequest request = CreateResponsesRequest.builder()
.model("doubao-seed-1-6-251015")
.input(ResponsesInput.builder().addListItem(
ItemEasyMessage.builder().role(ResponsesConstants.MESSAGE_ROLE_USER).content(
MessageContent.builder()
.addListItem(InputContentItemFile.InputContentItemFileBuilder.anInputContentItemFile().fileData(base64Data).fileName("demo.pdf").build())
.addListItem(InputContentItemText.builder().text("按段落给出文档中的文字内容,以JSON格式输出,包括段落类型(type)、文字内容(content)信息。").build())
.build()
).build()
).build())
.build();
ResponseObject resp = arkService.createResponse(request);
System.out.println(resp);
arkService.shutdownExecutor();
}
}
import os
from openai import OpenAI
import base64
api_key = os.getenv('ARK_API_KEY')
client = OpenAI(
base_url='https://ark.cn-beijing.volces.com/api/v3',
api_key=api_key,
)
# Convert local files to Base64-encoded strings.
def encode_file(file_path):
with open(file_path, "rb") as read_file:
return base64.b64encode(read_file.read()).decode('utf-8')
base64_file = encode_file("/Users/doc/demo.pdf")
response = client.responses.create(
model="doubao-seed-1-6-251015",
input=[
{
"role": "user",
"content": [
{
"type": "input_file",
"file_data": f"data:application/pdf;base64,{base64_file}",
"filename": "demo.pdf"
},
{
"type": "input_text",
"text": "按段落给出文档中的文字内容,以JSON格式输出,包括段落类型(type)、文字内容(content)信息。"
}
]
}
]
)
print(response)
文件 URL 传入 如果文档已存在公网可访问 URL,可以在 Responses API 请求中直接填入文档的公网 URL,文件不能超过50 MB。
curl https://ark.cn-beijing.volces.com/api/v3/responses \
-H "Authorization: Bearer $ARK_API_KEY" \
-H 'Content-Type: application/json' \
-d '{
"model": "doubao-seed-1-6-251015",
"input": [
{
"role": "user",
"content": [
{
"type": "input_file",
"file_url": "https://ark-project.tos-cn-beijing.volces.com/doc_pdf/demo.pdf"
},
{
"type": "input_text",
"text": "按段落给出文档中的文字内容,以JSON格式输出,包括段落类型(type)、文字内容(content)信息。"
}
]
}
]
}'
import os
from volcenginesdkarkruntime import Ark
api_key = os.getenv('ARK_API_KEY')
client = Ark(
base_url='https://ark.cn-beijing.volces.com/api/v3',
api_key=api_key,
)
response = client.responses.create(
model="doubao-seed-1-6-251015",
input=[
{
"role": "user",
"content": [
{
"type": "input_file",
"file_url": "https://ark-project.tos-cn-beijing.volces.com/doc_pdf/demo.pdf"
},
{
"type": "input_text",
"text": "按段落给出文档中的文字内容,以JSON格式输出,包括段落类型(type)、文字内容(content)信息。"
},
],
}
]
)
print(response)
package main
import (
"context"
"fmt"
"os"
"github.com/samber/lo"
"github.com/volcengine/volcengine-go-sdk/service/arkruntime"
"github.com/volcengine/volcengine-go-sdk/service/arkruntime/model/responses"
)
func main() {
client := arkruntime.NewClientWithApiKey(
// Get API Key:https://console.volcengine.com/ark/region:ark+cn-beijing/apikey
os.Getenv("ARK_API_KEY"),
arkruntime.WithBaseUrl("https://ark.cn-beijing.volces.com/api/v3"),
)
ctx := context.Background()
inputMessage := &responses.ItemInputMessage{
Role: responses.MessageRole_user,
Content: []*responses.ContentItem{
{
Union: &responses.ContentItem_File{
File: &responses.ContentItemFile{
Type: responses.ContentItemType_input_file,
FileUrl: lo.ToPtr("https://ark-project.tos-cn-beijing.volces.com/doc_pdf/demo.pdf"),
},
},
},
{
Union: &responses.ContentItem_Text{
Text: &responses.ContentItemText{
Type: responses.ContentItemType_input_text,
Text: "按段落给出文档中的文字内容,以JSON格式输出,包括段落类型(type)、文字内容(content)信息。",
},
},
},
},
}
resp, err := client.CreateResponses(ctx, &responses.ResponsesRequest{
Model: "doubao-seed-1-6-251015",
Input: &responses.ResponsesInput{
Union: &responses.ResponsesInput_ListValue{
ListValue: &responses.InputItemList{ListValue: []*responses.InputItem{{
Union: &responses.InputItem_InputMessage{
InputMessage: inputMessage,
},
}}},
},
},
})
if err != nil {
fmt.Printf("response error: %v", err)
return
}
fmt.Println(resp)
}
package com.ark.example;
import com.volcengine.ark.runtime.model.responses.content.InputContentItemFile;
import com.volcengine.ark.runtime.model.responses.content.InputContentItemImage;
import com.volcengine.ark.runtime.model.responses.content.InputContentItemText;
import com.volcengine.ark.runtime.model.responses.item.ItemEasyMessage;
import com.volcengine.ark.runtime.service.ArkService;
import com.volcengine.ark.runtime.model.responses.request.*;
import com.volcengine.ark.runtime.model.responses.response.ResponseObject;
import com.volcengine.ark.runtime.model.responses.constant.ResponsesConstants;
import com.volcengine.ark.runtime.model.responses.item.MessageContent;
public class demo {
public static void main(String[] args) {
String apiKey = System.getenv("ARK_API_KEY");
// The base URL for model invocation
ArkService arkService = ArkService.builder().apiKey(apiKey).baseUrl("https://ark.cn-beijing.volces.com/api/v3").build();
CreateResponsesRequest request = CreateResponsesRequest.builder()
.model("doubao-seed-1-6-251015")
.input(ResponsesInput.builder().addListItem(
ItemEasyMessage.builder().role(ResponsesConstants.MESSAGE_ROLE_USER).content(
MessageContent.builder()
.addListItem(InputContentItemFile.InputContentItemFileBuilder.anInputContentItemFile().fileUrl("https://ark-project.tos-cn-beijing.volces.com/doc_pdf/demo.pdf").build())
.addListItem(InputContentItemText.builder().text("按段落给出文档中的文字内容,以JSON格式输出,包括段落类型(type)、文字内容(content)信息。").build())
.build()
).build()
).build())
.build();
ResponseObject resp = arkService.createResponse(request);
System.out.println(resp);
arkService.shutdownExecutor();
}
}
import os
from openai import OpenAI
api_key = os.getenv('ARK_API_KEY')
client = OpenAI(
base_url='https://ark.cn-beijing.volces.com/api/v3',
api_key=api_key,
)
response = client.responses.create(
model="doubao-seed-1-6-251015",
input=[
{
"role": "user",
"content": [
{
"type": "input_file",
"file_url": "https://ark-project.tos-cn-beijing.volces.com/doc_pdf/demo.pdf"
},
{
"type": "input_text",
"text": "按段落给出文档中的文字内容,以JSON格式输出,包括段落类型(type)、文字内容(content)信息。"
},
],
}
]
)
print(response)
流式输出
流式输出支持内容动态实时呈现,既能够缓解用户等待焦虑,又可以规避复杂任务因长时间推理引发的客户端超时失败问题,保障请求流程顺畅。
import asyncio
import os
from volcenginesdkarkruntime import AsyncArk
from volcenginesdkarkruntime.types.responses.response_completed_event import ResponseCompletedEvent
from volcenginesdkarkruntime.types.responses.response_reasoning_summary_text_delta_event import ResponseReasoningSummaryTextDeltaEvent
from volcenginesdkarkruntime.types.responses.response_output_item_added_event import ResponseOutputItemAddedEvent
from volcenginesdkarkruntime.types.responses.response_text_delta_event import ResponseTextDeltaEvent
from volcenginesdkarkruntime.types.responses.response_text_done_event import ResponseTextDoneEvent
client = AsyncArk(
base_url='https://ark.cn-beijing.volces.com/api/v3',
api_key=os.getenv('ARK_API_KEY')
)
async def main():
# upload pdf file
print("Upload pdf file")
file = await client.files.create(
# replace with your local pdf path
file=open("/Users/doc/demo.pdf", "rb"),
purpose="user_data"
)
print(f"File uploaded: {file.id}")
# Wait for the file to finish processing
await client.files.wait_for_processing(file.id)
print(f"File processed: {file.id}")
stream = await client.responses.create(
model="doubao-seed-1-6-251015",
input=[
{"role": "user", "content": [
{
"type": "input_file",
"file_id": file.id # ref pdf file id
},
{
"type": "input_text",
"text": "按段落给出文档中的文字内容,以JSON格式输出,包括段落类型(type)、文字内容(content)信息。"
}
]},
],
caching={
"type": "enabled",
},
store=True,
stream=True
)
async for event in stream:
if isinstance(event, ResponseReasoningSummaryTextDeltaEvent):
print(event.delta, end="")
if isinstance(event, ResponseOutputItemAddedEvent):
print("\noutPutItem " + event.type + " start:")
if isinstance(event, ResponseTextDeltaEvent):
print(event.delta,end="")
if isinstance(event, ResponseTextDoneEvent):
print("\noutPutTextDone.")
if isinstance(event, ResponseCompletedEvent):
print("Response Completed. Usage = " + event.response.usage.model_dump_json())
if __name__ == "__main__":
asyncio.run(main())
package main
import (
"context"
"fmt"
"io"
"os"
"time"
"github.com/volcengine/volcengine-go-sdk/service/arkruntime"
"github.com/volcengine/volcengine-go-sdk/service/arkruntime/model/file"
"github.com/volcengine/volcengine-go-sdk/service/arkruntime/model/responses"
"github.com/volcengine/volcengine-go-sdk/volcengine"
)
func main() {
client := arkruntime.NewClientWithApiKey(os.Getenv("ARK_API_KEY"))
ctx := context.Background()
fmt.Println("----- upload file data -----")
data, err := os.Open("/Users/doc/demo.pdf")
if err != nil {
fmt.Printf("read file error: %v\n", err)
return
}
fileInfo, err := client.UploadFile(ctx, &file.UploadFileRequest{
File: data,
Purpose: file.PurposeUserData,
})
if err != nil {
fmt.Printf("upload file error: %v", err)
return
}
// Wait for the file to finish processing
for fileInfo.Status == file.StatusProcessing {
fmt.Println("Waiting for file to be processed...")
time.Sleep(2 * time.Second)
fileInfo, err = client.RetrieveFile(ctx, fileInfo.ID) // update file info
if err != nil {
fmt.Printf("get file status error: %v", err)
return
}
}
fmt.Printf("File processing completed: %s, status: %s\n", fileInfo.ID, fileInfo.Status)
inputMessage := &responses.ItemInputMessage{
Role: responses.MessageRole_user,
Content: []*responses.ContentItem{
{
Union: &responses.ContentItem_File{
File: &responses.ContentItemFile{
Type: responses.ContentItemType_input_file,
FileId: volcengine.String(fileInfo.ID),
},
},
},
{
Union: &responses.ContentItem_Text{
Text: &responses.ContentItemText{
Type: responses.ContentItemType_input_text,
Text: "按段落给出文档中的文字内容,以JSON格式输出,包括段落类型(type)、文字内容(content)信息。",
},
},
},
},
}
createResponsesReq := &responses.ResponsesRequest{
Model: "doubao-seed-1-6-251015",
Input: &responses.ResponsesInput{
Union: &responses.ResponsesInput_ListValue{
ListValue: &responses.InputItemList{ListValue: []*responses.InputItem{{
Union: &responses.InputItem_InputMessage{
InputMessage: inputMessage,
},
}}},
},
},
Caching: &responses.ResponsesCaching{Type: responses.CacheType_enabled.Enum()},
}
resp, err := client.CreateResponsesStream(ctx, createResponsesReq)
if err != nil {
fmt.Printf("stream error: %v\n", err)
return
}
var responseId string
for {
event, err := resp.Recv()
if err == io.EOF {
break
}
if err != nil {
fmt.Printf("stream error: %v\n", err)
return
}
handleEvent(event)
if responseEvent := event.GetResponse(); responseEvent != nil {
responseId = responseEvent.GetResponse().GetId()
fmt.Printf("Response ID: %s", responseId)
}
}
}
func handleEvent(event *responses.Event) {
switch event.GetEventType() {
case responses.EventType_response_reasoning_summary_text_delta.String():
print(event.GetReasoningText().GetDelta())
case responses.EventType_response_reasoning_summary_text_done.String(): // aggregated reasoning text
fmt.Printf("\nAggregated reasoning text: %s\n", event.GetReasoningText().GetText())
case responses.EventType_response_output_text_delta.String():
print(event.GetText().GetDelta())
case responses.EventType_response_output_text_done.String(): // aggregated output text
fmt.Printf("\nAggregated output text: %s\n", event.GetTextDone().GetText())
default:
return
}
}
package com.ark.example;
import com.volcengine.ark.runtime.model.files.FileMeta;
import com.volcengine.ark.runtime.model.files.UploadFileRequest;
import com.volcengine.ark.runtime.model.responses.content.InputContentItemFile;
import com.volcengine.ark.runtime.service.ArkService;
import com.volcengine.ark.runtime.model.responses.request.*;
import com.volcengine.ark.runtime.model.responses.item.ItemEasyMessage;
import com.volcengine.ark.runtime.model.responses.constant.ResponsesConstants;
import com.volcengine.ark.runtime.model.responses.item.MessageContent;
import com.volcengine.ark.runtime.model.responses.content.InputContentItemText;
import com.volcengine.ark.runtime.model.responses.event.functioncall.FunctionCallArgumentsDoneEvent;
import com.volcengine.ark.runtime.model.responses.event.outputitem.OutputItemAddedEvent;
import com.volcengine.ark.runtime.model.responses.event.outputitem.OutputItemDoneEvent;
import com.volcengine.ark.runtime.model.responses.event.outputtext.OutputTextDeltaEvent;
import com.volcengine.ark.runtime.model.responses.event.outputtext.OutputTextDoneEvent;
import com.volcengine.ark.runtime.model.responses.event.reasoningsummary.ReasoningSummaryTextDeltaEvent;
import com.volcengine.ark.runtime.model.responses.event.response.ResponseCompletedEvent;
import java.io.File;
import java.util.concurrent.TimeUnit;
public class demo {
public static void main(String[] args) {
String apiKey = System.getenv("ARK_API_KEY");
ArkService service = ArkService.builder().apiKey(apiKey).build();
System.out.println("===== Upload File Example=====");
// upload a file for responses
FileMeta fileMeta;
fileMeta = service.uploadFile(
UploadFileRequest.builder().
file(new File("/Users/doc/demo.pdf")) // replace with your file file path
.purpose("user_data")
.build());
System.out.println("Uploaded file Meta: " + fileMeta);
System.out.println("status:" + fileMeta.getStatus());
try {
while (fileMeta.getStatus().equals("processing")) {
System.out.println("Waiting for file to be processed...");
TimeUnit.SECONDS.sleep(2);
fileMeta = service.retrieveFile(fileMeta.getId());
}
} catch (Exception e) {
System.err.println("get file status error:" + e.getMessage());
}
System.out.println("Uploaded file Meta: " + fileMeta);
CreateResponsesRequest request = CreateResponsesRequest.builder()
.model("doubao-seed-1-6-251015")
.stream(true)
.input(ResponsesInput.builder().addListItem(
ItemEasyMessage.builder().role(ResponsesConstants.MESSAGE_ROLE_USER).content(
MessageContent.builder()
.addListItem(InputContentItemFile.InputContentItemFileBuilder.anInputContentItemFile().fileId(fileMeta.getId()).build())
.addListItem(InputContentItemText.builder().text("按段落给出文档中的文字内容,以JSON格式输出,包括段落类型(type)、文字内容(content)信息。").build())
.build()
).build()
).build())
.build();
service.streamResponse(request)
.doOnError(Throwable::printStackTrace)
.blockingForEach(event -> {
if (event instanceof ReasoningSummaryTextDeltaEvent) {
System.out.print(((ReasoningSummaryTextDeltaEvent) event).getDelta());
}
if (event instanceof OutputItemAddedEvent) {
System.out.println("\nOutputItem " + (((OutputItemAddedEvent) event).getItem().getType()) + " Start: ");
}
if (event instanceof OutputTextDeltaEvent) {
System.out.print(((OutputTextDeltaEvent) event).getDelta());
}
if (event instanceof OutputTextDoneEvent) {
System.out.println("\nOutputText End.");
}
if (event instanceof OutputItemDoneEvent) {
System.out.println("\nOutputItem " + ((OutputItemDoneEvent) event).getItem().getType() + " End.");
}
if (event instanceof FunctionCallArgumentsDoneEvent) {
System.out.println("\nFunctionCall Arguments: " + ((FunctionCallArgumentsDoneEvent) event).getArguments());
}
if (event instanceof ResponseCompletedEvent) {
System.out.println("\nResponse Completed. Usage = " + ((ResponseCompletedEvent) event).getResponse().getUsage());
}
});
service.shutdownExecutor();
}
}
import os
import time
from openai import OpenAI
api_key = os.getenv('ARK_API_KEY')
client = OpenAI(
base_url='https://ark.cn-beijing.volces.com/api/v3',
api_key=api_key,
)
file = client.files.create(
file=open("/Users/doc/demo.pdf", "rb"),
purpose="user_data"
)
# Wait for the file to finish processing
while (file.status == "processing"):
time.sleep(2)
file = client.files.retrieve(file.id)
print(f"File processed: {file}")
response = client.responses.create(
model="doubao-seed-1-6-251015",
input=[
{
"role": "user",
"content": [
{
"type": "input_file",
"file_id": file.id,
},
{
"type": "input_text",
"text": "按段落给出文档中的文字内容,以JSON格式输出,包括段落类型(type)、文字内容(content)信息。",
},
]
}
],
stream=True
)
for event in response:
if event.type == "response.reasoning_summary_text.delta":
print(event.delta, end="")
if event.type == "response.output_item.added":
print("
outPutItem " + event.type + " start:")
if event.type == "response.output_text.delta":
print(event.delta,end="")
if event.type == "response.output_item.done":
print("
outPutTextDone.")
if event.type == "response.completed":
print("
Response Completed. Usage = " + event.response.usage.model_dump_json())
附:文件预处理
对于PDF文件会分页来处理成多图,在预处理时不会对拆分的图片做分辨率缩放,以确保图片能够完整且清晰地保留PDF文件中的原始信息。在作为输入的时候,会根据模型input.content.detail 参数的auto行为自动缩放。