Swift中如何将TTS语音保存为MP3格式？已通过AVSpeechSynthesizer生成CAF文件，需改为保存为MP3

阿华AIGC实验室

2026-4-30

嘿，我来帮你搞定这个问题！Swift里的AVSpeechSynthesizer确实没法直接输出MP3格式，因为它的合成输出是PCM音频数据，默认存储为无压缩的CAF文件。不过我们可以分两步走：先把TTS内容保存为CAF，再通过系统原生的转码工具把它转换成MP3。下面是具体的实现步骤和代码：

在Swift中将TTS内容保存为MP3的方法

核心思路

AVSpeechSynthesizer本身不支持直接生成MP3，所以我们需要：

先将合成的语音保存为CAF格式的临时文件
使用AVAssetExportSession将CAF文件转码为MP3格式

步骤1：用AVSpeechSynthesizer保存CAF文件

如果你已经实现了CAF文件的保存，可以直接跳到步骤2。这里给出完整的合成并保存CAF的代码：

import AVFoundation

class TTSService: NSObject, AVSpeechSynthesizerDelegate {
    private let synthesizer = AVSpeechSynthesizer()
    private var audioFile: AVAudioFile?
    private var targetMP3URL: URL? // 用来存储最终MP3的路径

    // 入口方法：传入要合成的文本，以及最终MP3的输出路径
    func synthesizeTextToMP3(text: String, outputMP3URL: URL) {
        targetMP3URL = outputMP3URL
        // 生成临时CAF文件的路径（可以放在App的临时目录）
        let tempCAFURL = FileManager.default.temporaryDirectory.appendingPathComponent("\(UUID().uuidString).caf")
        
        do {
            // 设置音频格式：PCM 16位，44.1kHz采样率，双声道
            let audioFormat = AVAudioFormat(commonFormat: .pcmFormatInt16, sampleRate: 44100, channels: 2, interleaved: false)!
            // 创建CAF输出文件
            audioFile = try AVAudioFile(forWriting: tempCAFURL, settings: audioFormat.settings, commonFormat: audioFormat.commonFormat, interleaved: audioFormat.isInterleaved)
            
            // 创建语音 utterance
            let utterance = AVSpeechUtterance(string: text)
            utterance.voice = AVSpeechSynthesisVoice(language: "zh-CN") // 可以替换为其他语言
            utterance.rate = 0.5 // 调整语速，范围0.0-1.0
            
            synthesizer.delegate = self
            synthesizer.speak(utterance)
        } catch {
            print("创建CAF临时文件失败：\(error.localizedDescription)")
        }
    }

    // 合成器输出音频数据时的回调，写入CAF文件
    func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didSpeakAudioBuffer buffer: AVAudioBuffer) {
        guard let pcmBuffer = buffer as? AVAudioPCMBuffer, let audioFile = audioFile else { return }
        do {
            try audioFile.write(from: pcmBuffer)
        } catch {
            print("写入CAF文件失败：\(error.localizedDescription)")
        }
    }

    // 合成完成后，触发转MP3的操作
    func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didFinish utterance: AVSpeechUtterance) {
        guard let cafURL = audioFile?.url, let mp3URL = targetMP3URL else { return }
        convertCAFtoMP3(cafURL: cafURL, outputMP3URL: mp3URL)
    }
}

步骤2：将CAF转码为MP3

使用系统的AVAssetExportSession完成转码，这是最稳定的原生方案：

extension TTSService {
    private func convertCAFtoMP3(cafURL: URL, outputMP3URL: URL) {
        let audioAsset = AVAsset(url: cafURL)
        
        // 创建导出会话，使用Passthrough预设保持原音频质量
        guard let exportSession = AVAssetExportSession(asset: audioAsset, presetName: AVAssetExportPresetPassthrough) else {
            print("创建MP3导出会话失败")
            return
        }
        
        exportSession.outputURL = outputMP3URL
        exportSession.outputFileType = .mp3 // 指定输出格式为MP3
        
        exportSession.exportAsynchronously { [weak self] in
            DispatchQueue.main.async {
                switch exportSession.status {
                case .completed:
                    print("MP3文件生成成功！路径：\(outputMP3URL.path)")
                    // 转码完成后删除临时CAF文件
                    do {
                        try FileManager.default.removeItem(at: cafURL)
                    } catch {
                        print("删除临时CAF文件失败：\(error.localizedDescription)")
                    }
                case .failed:
                    print("MP3转码失败：\(exportSession.error?.localizedDescription ?? "未知错误")")
                case .cancelled:
                    print("MP3转码被取消")
                default:
                    break
                }
            }
        }
    }
}

使用示例

// 获取Documents目录下的MP3输出路径
let documentsDir = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask).first!
let outputMP3URL = documentsDir.appendingPathComponent("tts_output.mp3")

let ttsService = TTSService()
ttsService.synthesizeTextToMP3(text: "你好，这是Swift生成的TTS转MP3示例", outputMP3URL: outputMP3URL)

注意事项

记得在Info.plist中添加文件读写权限描述，比如NSDocumentsFolderUsageDescription，避免App崩溃
如果需要自定义MP3的比特率、采样率等参数，可以修改exportSession.audioSettings，比如设置AVEncoderBitRateKey来调整比特率
如果你需要更灵活的编码控制，可以考虑集成第三方库如LAME，但原生API已经能满足大部分需求

内容的提问来源于stack exchange，提问作者ppol2918