Python音频降噪脚本故障:合并WAV文件而非抵消噪声求修复
问题分析与修复方案
嘿,你的问题根源其实很直观——在combination()函数里,你把两个音频数据做了加法运算,而不是预期的减法,这就是为什么生成的是合并音频而非降噪后的结果。不过除了这个核心错误,还有几个细节需要调整,才能让降噪效果正常工作:
1. 修正核心运算逻辑
把加法改成减法的同时,要注意音频数据是int16(16位有符号整数)类型,直接加减很容易超出数值范围导致失真。建议先转换为更高精度的int32类型完成运算,再把结果裁剪回int16的合法区间。
2. 确保音频参数一致性
必须先检查两个输入音频的采样率、通道数、总长度是否完全一致,否则逐帧运算会出现错位或报错。
3. 修复绘图的硬编码问题
你所有绘图的标题和标签都是固定的speech1.wav,改成对应实际文件名的动态内容,能更清晰地区分不同波形图的含义。
完整修复后的代码
import pyaudio import wave import matplotlib.pyplot as plt import numpy as np import scipy.io.wavfile import scipy.signal as sp def ambient(): FORMAT = pyaudio.paInt16 CHANNELS = 2 RATE = 44100 CHUNK = 1024 RECORD_SECONDS = 5 WAVE_OUTPUT_FILENAME = "ambientnoise.wav" audio = pyaudio.PyAudio() # 开始录制环境噪声 stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) print ("recording ambient noise...") frames = [] for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)): data = stream.read(CHUNK) frames.append(data) print ("finished recording ambient noise") # 停止录制并保存文件 stream.stop_stream() stream.close() audio.terminate() waveFile = wave.open(WAVE_OUTPUT_FILENAME, 'wb') waveFile.setnchannels(CHANNELS) waveFile.setsampwidth(audio.get_sample_size(FORMAT)) waveFile.setframerate(RATE) waveFile.writeframes(b''.join(frames)) waveFile.close() # 绘制环境噪声波形 rate, data = scipy.io.wavfile.read(WAVE_OUTPUT_FILENAME) times = np.linspace(0, len(data)/rate, len(data)) plt.title(f"Waveform of {WAVE_OUTPUT_FILENAME}") plt.xlabel("Time (s)") plt.ylabel("Amplitude") plt.plot(times, data) plt.show() def voice(): FORMAT = pyaudio.paInt16 CHANNELS = 2 RATE = 44100 CHUNK = 1024 RECORD_SECONDS = 5 WAVE_OUTPUT_FILENAME = "ambientwithvoice.wav" audio = pyaudio.PyAudio() # 开始录制带语音的环境噪声 stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) print ("recording voice with ambient noise...") frames = [] for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)): data = stream.read(CHUNK) frames.append(data) print ("finished recording voice with ambient noise") # 停止录制并保存文件 stream.stop_stream() stream.close() audio.terminate() waveFile = wave.open(WAVE_OUTPUT_FILENAME, 'wb') waveFile.setnchannels(CHANNELS) waveFile.setsampwidth(audio.get_sample_size(FORMAT)) waveFile.setframerate(RATE) waveFile.writeframes(b''.join(frames)) waveFile.close() # 绘制带语音的噪声音频波形 rate, data = scipy.io.wavfile.read(WAVE_OUTPUT_FILENAME) times = np.linspace(0, len(data)/rate, len(data)) plt.title(f"Waveform of {WAVE_OUTPUT_FILENAME}") plt.xlabel("Time (s)") plt.ylabel("Amplitude") plt.plot(times, data) plt.show() def combination(): # 读取两个音频文件 rate1, data1 = scipy.io.wavfile.read('ambientnoise.wav') rate2, data2 = scipy.io.wavfile.read('ambientwithvoice.wav') # 检查音频参数是否一致 if rate1 != rate2: raise ValueError("两个音频文件的采样率不一致!") if data1.shape != data2.shape: raise ValueError("两个音频文件的通道数或长度不一致!") # 转换为int32避免溢出,执行减法降噪 data1_int32 = data1.astype(np.int32) data2_int32 = data2.astype(np.int32) filtered_data = data2_int32 - data1_int32 # 裁剪回int16的合法范围,避免失真 filtered_data = np.clip(filtered_data, np.iinfo(np.int16).min, np.iinfo(np.int16).max) filtered_data = filtered_data.astype(np.int16) # 保存降噪后的音频 scipy.io.wavfile.write('filtered.wav', rate1, filtered_data) # 绘制降噪后的波形 rate, data = scipy.io.wavfile.read('filtered.wav') times = np.linspace(0, len(data)/rate, len(data)) plt.title("Waveform of filtered.wav (Noise Reduced)") plt.xlabel("Time (s)") plt.ylabel("Amplitude") plt.plot(times, data) plt.show() # 调用示例(按需取消注释) # ambient() # voice() # combination()
额外说明
这种简单的减法降噪只适用于环境噪声稳定、两次录制的噪声完全同步的场景。如果噪声是动态变化的,或者录制时麦克风位置有移动,效果会大打折扣。如果需要更专业的降噪效果,可以尝试基于谱减法、维纳滤波的算法,或者使用noisereduce这类专门的音频降噪库。
内容的提问来源于stack exchange,提问作者Markus




