使用MossFormer2_SE_48K进行音频降噪
输入列名 | 说明 |
|---|---|
audio_path | 存放音频路径的列 |
output_path | 存放降噪后音频路径的列 |
降噪成功输出降噪后音频路径,失败为None
如参数没有默认值,则为必填参数
参数名称 | 类型 | 默认值 | 描述 |
|---|---|---|---|
model_path | str | /opt/las/models | 模型目录,LAS中固定为默认值 |
model_name | str | MossFormer2_SE_48K | 模型名字,LAS中固定为默认值 |
max_duration | str | 7200 | 音频时长超过该时长(以秒为单位)时,音频会切分后再执行降噪 |
output_format | str | None | 降噪后音频的格式,默认与输入音频格式一致,可选值为('flac', ‘mp3’, 'm4a', 'wav', 'ogg', 'aac') 等。 |
下面的代码展示了如何使用 daft 运行算子对音频进行降噪。
from __future__ import annotations import logging import os import ray import daft from daft import col from daft.las.functions.audio.audio_denoise import AudioDenoise from daft.las.functions.udf import las_udf def configure_logging(): logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S.%s".format(), ) logging.getLogger("tracing.span").setLevel(logging.WARNING) logging.getLogger("daft_io.stats").setLevel(logging.WARNING) logging.getLogger("DaftStatisticsManager").setLevel(logging.WARNING) logging.getLogger("DaftFlotillaScheduler").setLevel(logging.WARNING) logging.getLogger("DaftFlotillaDispatcher").setLevel(logging.WARNING) configure_logging() if __name__ == "__main__": TOS_INPUT_DIR_URL = os.getenv("TOS_INPUT_DIR_URL", "las-cn-beijing-public-online.tos-cn-beijing.volces.com") TOS_OUTPUT_DIR = os.getenv("TOS_OUTPUT_DIR", "las-cn-beijing-public-online") samples = { "input_path": [os.path.join(f"https://{TOS_INPUT_DIR_URL}", "public/shared_audio_dataset/黑神话悟空对话.mp3")], "output_path": [os.path.join(f"tos://{TOS_OUTPUT_DIR}", "public/output/黑神话悟空对话_denoised.mp3")], } # 输出至output_path需要设置tos access_key和secret_key等认证信息,其中火山环境中TOS_ENDPOINT可使用内网地址以提升上传下载速度 # os.environ["TOS_ACCESS_KEY"] = os.getenv("TOS_ACCESS_KEY", "aksk") # os.environ["TOS_SECRET_KEY"] = os.getenv("TOS_SECRET_KEY", "aksk") # os.environ["TOS_ENDPOINT"] = os.getenv("TOS_ENDPOINT", "https://tos-cn-beijing.volces.com") # os.environ["TOS_REGION"] = os.getenv("TOS_REGION", "cn-beijing") ray.init(dashboard_host="0.0.0.0", runtime_env={"worker_process_setup_hook": configure_logging}) daft.context.set_runner_ray() daft.set_execution_config(actor_udf_ready_timeout=600) daft.set_execution_config(min_cpu_per_task=0) df_samples = daft.from_pydict(samples) df = df_samples.with_column( "result_path", las_udf( AudioDenoise, construct_args={ "model_path": "/opt/las/models", }, num_gpus=1, concurrency=1, batch_size=2, )(col("input_path"), col("output_path")), ) df.show(max_width=120, format="grid") # ┌────────────────────────────────────┬───────────────────────────────────────────────────────────────────┬────────────────────────────────────────────────────────────────────┐ # │ audio_path │ output_path │ result_path │ # ╞════════════════════════════════════╪═══════════════════════════════════════════════════════════════════╪════════════════════════════════════════════════════════════════════╡ # │ tos://xxxxx/黑神话悟空对话.mp3 │ tos://xxxxx/output/黑神话悟空对话_denoised.mp3 │ tos://xxxxx/output/黑神话悟空对话_denoised.mp3 │ # └────────────────────────────────────┴───────────────────────────────────────────────────────────────────┴────────────────────────────────────────────────────────────────────┘