如何通过编程方式使用FFmpeg转换视频？能否脱离命令行调用FFmpeg库？

如何通过编程方式使用FFmpeg转换视频？能否脱离命令行调用FFmpeg库？

阿华AIGC实验室

2026-5-20

当然可以！完全不用依赖命令行调用FFmpeg，不管是C++还是Java，都能直接通过API/封装库实现你需要的功能——比如从视频和PNG生成带叠加层的GIF。下面分两种语言给你具体方案：

C++ 直接调用FFmpeg原生API

FFmpeg本身是C语言编写的，C++可以无缝调用它的原生API，虽然代码量比命令行多，但灵活性拉满。核心步骤大概是这样：

初始化FFmpeg组件：设置日志级别，新版本FFmpeg中av_register_all()已废弃，无需额外调用初始化格式/编码器的通用方法
打开输入源：分别打开视频文件和PNG图片，解析流信息、获取对应解码器
帧处理与叠加：读取视频帧和PNG帧，用libswscale统一像素格式，再将PNG像素叠加到视频帧的指定位置（可处理透明度）
初始化输出编码器：配置GIF编码器参数（帧率、调色板、尺寸等），创建输出上下文
编码并写入文件：将处理后的每一帧编码为GIF帧，写入输出文件

给你一段简化的核心示例代码（省略了部分错误处理，实际开发一定要补充）：

#include <iostream>
#include <ffmpeg/avformat.h>
#include <ffmpeg/swscale.h>
#include <ffmpeg/avcodec.h>

int main() {
    // 初始化FFmpeg日志
    av_log_set_level(AV_LOG_INFO);

    // 打开视频输入
    AVFormatContext *video_fmt_ctx = nullptr;
    avformat_open_input(&video_fmt_ctx, "input.mp4", nullptr, nullptr);
    avformat_find_stream_info(video_fmt_ctx, nullptr);
    int video_stream_idx = av_find_best_stream(video_fmt_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, nullptr, 0);
    AVCodecContext *video_codec_ctx = avcodec_alloc_context3(nullptr);
    avcodec_parameters_to_context(video_codec_ctx, video_fmt_ctx->streams[video_stream_idx]->codecpar);
    const AVCodec *video_codec = avcodec_find_decoder(video_codec_ctx->codec_id);
    avcodec_open2(video_codec_ctx, video_codec, nullptr);

    // 打开PNG输入
    AVFormatContext *png_fmt_ctx = nullptr;
    avformat_open_input(&png_fmt_ctx, "overlay.png", nullptr, nullptr);
    avformat_find_stream_info(png_fmt_ctx, nullptr);
    int png_stream_idx = av_find_best_stream(png_fmt_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, nullptr, 0);
    AVCodecContext *png_codec_ctx = avcodec_alloc_context3(nullptr);
    avcodec_parameters_to_context(png_codec_ctx, png_fmt_ctx->streams[png_stream_idx]->codecpar);
    const AVCodec *png_codec = avcodec_find_decoder(png_codec_ctx->codec_id);
    avcodec_open2(png_codec_ctx, png_codec, nullptr);

    // 初始化缩放器，统一转为RGB24格式（适配GIF处理）
    SwsContext *sws_video = sws_getContext(
        video_codec_ctx->width, video_codec_ctx->height, video_codec_ctx->pix_fmt,
        video_codec_ctx->width, video_codec_ctx->height, AV_PIX_FMT_RGB24,
        SWS_BILINEAR, nullptr, nullptr, nullptr
    );
    SwsContext *sws_png = sws_getContext(
        png_codec_ctx->width, png_codec_ctx->height, png_codec_ctx->pix_fmt,
        video_codec_ctx->width, video_codec_ctx->height, AV_PIX_FMT_RGB24,
        SWS_BILINEAR, nullptr, nullptr, nullptr
    );

    // 读取并转换PNG帧
    AVFrame *png_frame = av_frame_alloc();
    AVPacket pkt;
    while (av_read_frame(png_fmt_ctx, &pkt) >= 0) {
        if (pkt.stream_index == png_stream_idx) {
            avcodec_send_packet(png_codec_ctx, &pkt);
            avcodec_receive_frame(png_codec_ctx, png_frame);
            break;
        }
        av_packet_unref(&pkt);
    }
    AVFrame *png_rgb = av_frame_alloc();
    av_image_alloc(png_rgb->data, png_rgb->linesize, video_codec_ctx->width, video_codec_ctx->height, AV_PIX_FMT_RGB24, 1);
    sws_scale(sws_png, png_frame->data, png_frame->linesize, 0, png_codec_ctx->height, png_rgb->data, png_rgb->linesize);

    // 初始化GIF输出
    AVFormatContext *output_fmt_ctx = nullptr;
    avformat_alloc_output_context2(&output_fmt_ctx, nullptr, "gif", "output.gif");
    AVStream *output_stream = avformat_new_stream(output_fmt_ctx, nullptr);
    AVCodecContext *output_codec_ctx = avcodec_alloc_context3(avcodec_find_encoder(AV_CODEC_ID_GIF));
    output_codec_ctx->width = video_codec_ctx->width;
    output_codec_ctx->height = video_codec_ctx->height;
    output_codec_ctx->pix_fmt = AV_PIX_FMT_RGB8; // GIF专用调色板格式
    output_codec_ctx->time_base = av_inv_q(video_fmt_ctx->streams[video_stream_idx]->r_frame_rate);
    avcodec_open2(output_codec_ctx, avcodec_find_encoder(AV_CODEC_ID_GIF), nullptr);
    avcodec_parameters_from_context(output_stream->codecpar, output_codec_ctx);
    output_stream->time_base = output_codec_ctx->time_base;
    avio_open(&output_fmt_ctx->pb, "output.gif", AVIO_FLAG_WRITE);
    avformat_write_header(output_fmt_ctx, nullptr);

    // 处理视频帧、叠加PNG并编码输出
    AVFrame *video_frame = av_frame_alloc();
    AVFrame *video_rgb = av_frame_alloc();
    av_image_alloc(video_rgb->data, video_rgb->linesize, video_codec_ctx->width, video_codec_ctx->height, AV_PIX_FMT_RGB24, 1);
    AVFrame *output_frame = av_frame_alloc();
    av_image_alloc(output_frame->data, output_frame->linesize, video_codec_ctx->width, video_codec_ctx->height, AV_PIX_FMT_RGB8, 1);

    while (av_read_frame(video_fmt_ctx, &pkt) >= 0) {
        if (pkt.stream_index == video_stream_idx) {
            avcodec_send_packet(video_codec_ctx, &pkt);
            while (avcodec_receive_frame(video_codec_ctx, video_frame) == 0) {
                // 视频帧转RGB24
                sws_scale(sws_video, video_frame->data, video_frame->linesize, 0, video_codec_ctx->height, video_rgb->data, video_rgb->linesize);
                
                // 叠加PNG（带透明度示例，假设PNG有Alpha通道）
                for (int y = 0; y < video_codec_ctx->height; y++) {
                    uint8_t *video_row = video_rgb->data[0] + y * video_rgb->linesize[0];
                    uint8_t *png_row = png_rgb->data[0] + y * png_rgb->linesize[0];
                    uint8_t *png_alpha = png_rgb->data[3] + y * png_rgb->linesize[3]; // 假设PNG是RGBA格式
                    for (int x = 0; x < video_codec_ctx->width; x++) {
                        float alpha = png_alpha[x] / 255.0f;
                        video_row[x*3] = (uint8_t)((1 - alpha) * video_row[x*3] + alpha * png_row[x*3]);
                        video_row[x*3+1] = (uint8_t)((1 - alpha) * video_row[x*3+1] + alpha * png_row[x*3+1]);
                        video_row[x*3+2] = (uint8_t)((1 - alpha) * video_row[x*3+2] + alpha * png_row[x*3+2]);
                    }
                }

                // RGB24转RGB8（GIF格式）
                sws_scale(sws_getContext(video_codec_ctx->width, video_codec_ctx->height, AV_PIX_FMT_RGB24, video_codec_ctx->width, video_codec_ctx->height, AV_PIX_FMT_RGB8, SWS_BILINEAR, nullptr, nullptr, nullptr),
                          video_rgb->data, video_rgb->linesize, 0, video_codec_ctx->height, output_frame->data, output_frame->linesize);
                
                // 编码写入
                output_frame->pts = av_rescale_q(video_frame->pts, video_codec_ctx->time_base, output_stream->time_base);
                AVPacket output_pkt = {0};
                avcodec_send_frame(output_codec_ctx, output_frame);
                while (avcodec_receive_packet(output_codec_ctx, &output_pkt) == 0) {
                    av_packet_rescale_ts(&output_pkt, output_codec_ctx->time_base, output_stream->time_base);
                    av_interleaved_write_frame(output_fmt_ctx, &output_pkt);
                    av_packet_unref(&output_pkt);
                }
            }
        }
        av_packet_unref(&pkt);
    }

    // 资源释放
    av_write_trailer(output_fmt_ctx);
    avio_close(output_fmt_ctx->pb);
    avformat_free_context(output_fmt_ctx);
    // 释放其他帧、上下文资源...

    return 0;
}

Java 借助封装库实现

Java不能直接调用C语言API，推荐用JavaCV——它是FFmpeg和OpenCV的Java封装，上手简单，封装了大部分FFmpeg的功能。步骤如下：

引入依赖：如果用Maven，直接添加JavaCV的依赖（注意匹配FFmpeg版本）
读取输入源：用FFmpegFrameGrabber读取视频，用OpenCV工具加载PNG图片
帧叠加处理：将PNG调整为视频尺寸，用addWeighted实现带透明度的叠加
写入GIF：用FFmpegFrameRecorder配置GIF参数，写入处理后的帧

给你一段示例代码：

import org.bytedeco.javacv.FFmpegFrameGrabber;
import org.bytedeco.javacv.FFmpegFrameRecorder;
import org.bytedeco.javacv.Frame;
import org.bytedeco.javacv.OpenCVFrameConverter;
import org.bytedeco.opencv.opencv_core.Mat;
import org.bytedeco.opencv.opencv_imgcodecs;

import static org.bytedeco.opencv.global.opencv_core.addWeighted;
import static org.bytedeco.opencv.global.opencv_imgproc.resize;

public class VideoToGifWithOverlay {
    public static void main(String[] args) throws Exception {
        // 读取视频文件
        FFmpegFrameGrabber videoGrabber = new FFmpegFrameGrabber("input.mp4");
        videoGrabber.start();

        // 读取并调整PNG叠加层尺寸
        Mat overlayMat = opencv_imgcodecs.imread("overlay.png", opencv_imgcodecs.IMREAD_UNCHANGED);
        resize(overlayMat, overlayMat, new org.bytedeco.opencv.opencv_core.Size(videoGrabber.getImageWidth(), videoGrabber.getImageHeight()));

        OpenCVFrameConverter.ToMat converter = new OpenCVFrameConverter.ToMat();

        // 初始化GIF录制器
        FFmpegFrameRecorder recorder = new FFmpegFrameRecorder("output.gif", videoGrabber.getImageWidth(), videoGrabber.getImageHeight());
        recorder.setFormat("gif");
        recorder.setFrameRate(videoGrabber.getFrameRate());
        recorder.setPixelFormat(0); // 对应RGB8格式
        recorder.start();

        Frame videoFrame;
        while ((videoFrame = videoGrabber.grabFrame()) != null) {
            if (videoFrame.image != null) {
                Mat videoMat = converter.convert(videoFrame);
                // 叠加PNG：参数依次为源帧、源权重、叠加帧、叠加权重、偏移量
                addWeighted(videoMat, 0.8, overlayMat, 0.2, 0, videoMat);
                // 转换回帧并写入
                recorder.record(converter.convert(videoMat));
                videoMat.release();
            }
        }

        // 收尾工作
        recorder.stop();
        videoGrabber.stop();
        overlayMat.release();
    }
}

一些注意事项

库配置：C++需要链接FFmpeg的动态库（如avformat.dll、avcodec.dll）或静态编译；JavaCV的Maven依赖会自动处理本地库加载
像素格式：不同输入的像素格式可能不同，必须统一后再处理，否则会出现画面错乱
GIF优化：生成GIF时可以调整帧率、调色板数量，或添加palettegen滤镜来减小体积、提升画质

内容的提问来源于stack exchange，提问作者Mehdi Haghgoo

火山引擎最新活动

方舟 Coding Plan

模型自由，工具不限，最新支持 DeepSeek-V4 系列与 GLM-5.1，受邀下单叠加9.5折

ArkClaw

7×24在线专属智能伙伴

Seedance 2.0 全面开放 API

创作无限可能，一键生成电影级 AI 视频

新用户特惠专场

大模型19元起，Al应用9.9元畅享，新人首购爆款尽享优惠