You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
43 lines
1.3 KiB
43 lines
1.3 KiB
4 months ago
|
import librosa
|
||
|
import librosa.display
|
||
|
import matplotlib.pyplot as plt
|
||
|
import numpy as np
|
||
|
import torch
|
||
|
from torchlibrosa.augmentation import SpecAugmentation
|
||
|
|
||
|
# 加载音频文件
|
||
|
file_path = 'test.wav'
|
||
|
y, sr = librosa.load(file_path, sr=None)
|
||
|
|
||
|
# 计算音频信号的 Short-Time Fourier Transform (STFT)
|
||
|
D = librosa.stft(y)
|
||
|
|
||
|
# 计算 spectrogram
|
||
|
spectrogram = librosa.amplitude_to_db(abs(D), ref=np.max)
|
||
|
|
||
|
# 绘制原始音频信号的频谱图
|
||
|
plt.figure(figsize=(10, 6))
|
||
|
librosa.display.specshow(spectrogram, sr=sr, x_axis='time', y_axis='log')
|
||
|
plt.colorbar(format='%+2.0f dB')
|
||
|
plt.title('origin spectrogram')
|
||
|
plt.savefig('origin_spectrogram.png')
|
||
|
|
||
|
|
||
|
# 转换为 PyTorch 张量
|
||
|
spectrogram_tensor = torch.from_numpy(spectrogram).unsqueeze(0).unsqueeze(0)
|
||
|
|
||
|
# 应用 SpecAugmentation
|
||
|
spec_augmenter = SpecAugmentation(time_drop_width=64, time_stripes_num=2, freq_drop_width=8, freq_stripes_num=2)
|
||
|
augmented_spectrogram_tensor = spec_augmenter(spectrogram_tensor)
|
||
|
|
||
|
# 将增强的频谱图转换回 numpy 数组
|
||
|
augmented_spectrogram = augmented_spectrogram_tensor.squeeze(0).squeeze(0).numpy()
|
||
|
|
||
|
|
||
|
|
||
|
# 绘制使用 SpecAugmentation 的频谱图
|
||
|
plt.figure(figsize=(10, 6))
|
||
|
librosa.display.specshow(augmented_spectrogram, sr=sr, x_axis='time', y_axis='log')
|
||
|
plt.colorbar(format='%+2.0f dB')
|
||
|
plt.title('after spec augment spectrogram')
|
||
|
plt.savefig('spec_augment.png')
|