import librosa import librosa.display import matplotlib.pyplot as plt import numpy as np import torch from torchlibrosa.augmentation import SpecAugmentation # 加载音频文件 file_path = 'test.wav' y, sr = librosa.load(file_path, sr=None) # 计算音频信号的 Short-Time Fourier Transform (STFT) D = librosa.stft(y) # 计算 spectrogram spectrogram = librosa.amplitude_to_db(abs(D), ref=np.max) # 绘制原始音频信号的频谱图 plt.figure(figsize=(10, 6)) librosa.display.specshow(spectrogram, sr=sr, x_axis='time', y_axis='log') plt.colorbar(format='%+2.0f dB') plt.title('origin spectrogram') plt.savefig('origin_spectrogram.png') # 转换为 PyTorch 张量 spectrogram_tensor = torch.from_numpy(spectrogram).unsqueeze(0).unsqueeze(0) # 应用 SpecAugmentation spec_augmenter = SpecAugmentation(time_drop_width=64, time_stripes_num=2, freq_drop_width=8, freq_stripes_num=2) augmented_spectrogram_tensor = spec_augmenter(spectrogram_tensor) # 将增强的频谱图转换回 numpy 数组 augmented_spectrogram = augmented_spectrogram_tensor.squeeze(0).squeeze(0).numpy() # 绘制使用 SpecAugmentation 的频谱图 plt.figure(figsize=(10, 6)) librosa.display.specshow(augmented_spectrogram, sr=sr, x_axis='time', y_axis='log') plt.colorbar(format='%+2.0f dB') plt.title('after spec augment spectrogram') plt.savefig('spec_augment.png')