diff --git a/SpecAugment.py b/SpecAugment.py new file mode 100644 index 0000000..d2c4663 --- /dev/null +++ b/SpecAugment.py @@ -0,0 +1,43 @@ +import librosa +import librosa.display +import matplotlib.pyplot as plt +import numpy as np +import torch +from torchlibrosa.augmentation import SpecAugmentation + +# 加载音频文件 +file_path = '00_BRUSH.wav' +y, sr = librosa.load(file_path, sr=None) + +# 计算音频信号的 Short-Time Fourier Transform (STFT) +D = librosa.stft(y) + +# 计算 spectrogram +spectrogram = librosa.amplitude_to_db(abs(D), ref=np.max) + +# 绘制原始音频信号的频谱图 +plt.figure(figsize=(10, 6)) +librosa.display.specshow(spectrogram, sr=sr, x_axis='time', y_axis='log') +plt.colorbar(format='%+2.0f dB') +plt.title('origin spectrogram') +plt.savefig('origin_spectrogram.png') + + +# 转换为 PyTorch 张量 +spectrogram_tensor = torch.from_numpy(spectrogram).unsqueeze(0).unsqueeze(0) + +# 应用 SpecAugmentation +spec_augmenter = SpecAugmentation(time_drop_width=64, time_stripes_num=2, freq_drop_width=8, freq_stripes_num=2) +augmented_spectrogram_tensor = spec_augmenter(spectrogram_tensor) + +# 将增强的频谱图转换回 numpy 数组 +augmented_spectrogram = augmented_spectrogram_tensor.squeeze(0).squeeze(0).numpy() + + + +# 绘制使用 SpecAugmentation 的频谱图 +plt.figure(figsize=(10, 6)) +librosa.display.specshow(augmented_spectrogram, sr=sr, x_axis='time', y_axis='log') +plt.colorbar(format='%+2.0f dB') +plt.title('after spec augment spectrogram') +plt.savefig('spec_augment.png') \ No newline at end of file diff --git a/test.wav b/test.wav new file mode 100644 index 0000000..b2d81e1 Binary files /dev/null and b/test.wav differ