| MusicGen-Style | 3.3B | 风格迁移 | 最长 30 秒 |
EnCodec 编解码器
EnCodec 用于压缩和解压缩音频,比传统编解码器(如 MP3、Opus)更高的质量。
from audiocraft.modules import AudioEncoder
# 加载编码器
encoder = AudioEncoder.get_encoder('facebook/encodec_32khz')
# 编码音频
audio = load_audio("input.wav")
codes = encoder.encode(audio)
# 解码
decoded = encoder.decode(codes)
EnCodec 音频编解码
音频压缩
import torch
from audiocraft.modules import AudioEncoder
encoder = AudioEncoder.get_encoder('facebook/encodec_24khz')
# 编码
audio, sr = torchaudio.load("music.wav")
codes = encoder.encode(audio)
# 量化比特流
quantized = encoder.quantize(codes)
compressed_size = quantized.numel() * quantized.element_size()
# 解码还原
decoded_audio = encoder.decode(encoder.dequantize(quantized))
常见工作流
工作流 1:音乐应用
import streamlit as st
from audiocraft.models import MusicGen
@st.cache_resource
def load_model():
return MusicGen.get_pretrained('facebook/musicgen-small')
model = load_model()
prompt = st.text_input("描述你想要生成的音乐:")
if st.button("生成"):
with st.spinner("生成中..."):
model.set_generation_params(duration=8)
wav = model.generate([prompt])
st.audio(wav[0, 0].cpu().numpy(), sample_rate=32000)
工作流 2:音效库生成
from audiocraft.models import AudioGen
import os
model = AudioGen.get_pretrained('facebook/audiogen-medium')
model.set_generation_params(duration=3)
sfx_descriptions = [
"explosion, deep boom",
"rain on window",
"keyboard typing fast",
"door creaking open"
]
for i, desc in enumerate(sfx_descriptions):
wav = model.generate([desc])
torchaudio.save(f"sfx_{i:03d}.wav", wav[0, 0].cpu(), sample_rate=32000)
print(f"生成了 {len(sfx_descriptions)} 个音效")
工作流 3:批处理
from audiocraft.models import MusicGen
from tqdm import tqdm
model = MusicGen.get_pretrained('facebook/musicgen-small')
prompts = [
"jazz, saxophone solo, relaxed",
"electronic, synth pad, upbeat",
"classical, piano, melancholic"
]
for i, prompt in enumerate(prompts):
print(f"生成中: {prompt}")
model.set_generation_params(duration=10)
wav = model.generate([prompt])
torchaudio.save(f"batch_{i}.wav", wav[0, 0].cpu(), sample_rate=32000)
高级用法
混合条件生成
# 多个条件组合
wav = model.generate(
descriptions=[
"rock song, electric guitar, drums"
],
melody_wavs=melody, # 旋律条件
descriptions=[style_prompt] # 风格文本
)
温度和采样控制
# 高随机性(创意)
model.set_generation_params(temperature=1.5, top_k=500)
# 低随机性(确定性)
model.set_generation_params(temperature=0.1, top_k=50)
自定义模型权重
# 加载自定义微调的 MusicGen
model = MusicGen.get_pretrained('path/to/custom_musicgen')
model.set_generation_params(duration=15)
wav = model.generate(["custom style music"])
Gradio 演示
import gradio as gr
from audiocraft.models import MusicGen
model = MusicGen.get_pretrained('facebook/musicgen-small')
def generate_music(prompt, duration):
model.set_generation_params(duration=duration)
wav = model.generate([prompt])
return wav[0, 0].cpu().numpy(), model.sample_rate
demo = gr.Interface(
fn=generate_music,
inputs=["text", gr.Slider(1, 30, value=8)],
outputs=gr.Audio(label="Generated Music"),
title="MusicGen 音乐生成器"
)
demo.launch()
性能优化
GPU 加速
import torch
# 确保使用 CUDA
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = model.to(device)
内存优化
# 使用小模型减少内存占用
model = MusicGen.get_pretrained('facebook/musicgen-small')
# 批量处理
batches = [prompts[i:i+2] for i in range(0, len(prompts), 2)]
for batch in batches:
wavs = model.generate(batch)
常见问题
| 问题 | 解决方案 |
| CUDA OOM | 使用小模型(musicgen-small),缩短时长 |
| 生成的音乐质量差 | 丰富描述文本,加入乐器名称和风格 |
| 旋律条件不生效 | 确保旋律采样率与模型采样率一致(32kHz) |
| 生成慢 | 使用 GPU 加速,检查批大小 |
| 音频有噪声 | 尝试不同的 temperature/top_k 参数 |
资源链接
- GitHub: https://github.com/facebookresearch/audiocraft
- 论文: https://arxiv.org/abs/2307.12574
- HuggingFace: https://huggingface.co/facebook/musicgen-small
- 在线演示: https://huggingface.co/spaces/facebook/MusicGen
评论区