forked from p13590867/p94fxhifk
parent
6910979d90
commit
8a873cc430
@ -0,0 +1,101 @@
|
|||||||
|
import os
|
||||||
|
import random
|
||||||
|
|
||||||
|
import librosa
|
||||||
|
import numpy as np
|
||||||
|
import tensorflow as tf
|
||||||
|
from tensorflow.keras.models import Model
|
||||||
|
|
||||||
|
|
||||||
|
layer_name = 'global_max_pooling2d'
|
||||||
|
#model = tf.keras.models.load_model('models/resnet.h5')
|
||||||
|
#intermediate_layer_model = Model(inputs=model.input, outputs=model.get_layer(layer_name).output)
|
||||||
|
|
||||||
|
|
||||||
|
# 读取音频数据
|
||||||
|
def load_data(data_path):
|
||||||
|
wav, sr = librosa.load(data_path, sr=16000)
|
||||||
|
intervals = librosa.effects.split(wav, top_db=20)
|
||||||
|
wav_output = []
|
||||||
|
for sliced in intervals:
|
||||||
|
wav_output.extend(wav[sliced[0]:sliced[1]])
|
||||||
|
assert len(wav_output) >= 8000, "有效音频小于0.5s"
|
||||||
|
wav_output = np.array(wav_output)
|
||||||
|
ps = librosa.feature.melspectrogram(y=wav_output, sr=sr, hop_length=256).astype(np.float32)
|
||||||
|
ps = ps[np.newaxis, ..., np.newaxis]
|
||||||
|
return ps
|
||||||
|
|
||||||
|
|
||||||
|
def infer(audio_path):
|
||||||
|
data = load_data(audio_path)
|
||||||
|
feature = intermediate_layer_model.predict(data)
|
||||||
|
return feature
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
# 要预测的两个人的音频文件
|
||||||
|
# person1 = 'dataset/wc-5.wav'
|
||||||
|
# person2 = 'dataset/wc-1.wav'
|
||||||
|
# feature1 = infer(person1)[0]
|
||||||
|
# feature2 = infer(person2)[0]
|
||||||
|
# # 对角余弦值
|
||||||
|
# dist = np.dot(feature1, feature2) / (np.linalg.norm(feature1) * np.linalg.norm(feature2))
|
||||||
|
# if dist > 0.92:
|
||||||
|
# print("%s 和 %s 为同一个人,相似度为:%f" % (person1, person2, dist))
|
||||||
|
# else:
|
||||||
|
# print("%s 和 %s 不是同一个人,相似度为:%f" % (person1, person2, dist))
|
||||||
|
|
||||||
|
dirs = os.listdir('dataset/predict')
|
||||||
|
for dir in dirs:
|
||||||
|
dist_sum = 0
|
||||||
|
print(dir)
|
||||||
|
model_path = "models" + '/' + dir + '/resnet.h5'
|
||||||
|
#model_path = os.path.join(model_path, 'resnet.h5')
|
||||||
|
print(model_path + " is running")
|
||||||
|
model = tf.keras.models.load_model(model_path)
|
||||||
|
intermediate_layer_model = Model(inputs=model.input, outputs=model.get_layer(layer_name).output)
|
||||||
|
|
||||||
|
person2 = 'dataset/wctest.wav'
|
||||||
|
feature2 = infer(person2)[0]
|
||||||
|
|
||||||
|
dir = os.path.join('dataset/predict', dir)
|
||||||
|
print(dir)
|
||||||
|
files = os.listdir(dir)
|
||||||
|
length = len(files)
|
||||||
|
predict_num = 20
|
||||||
|
for i in range(0, predict_num):
|
||||||
|
index = random.randint(0, length - 1)
|
||||||
|
# print(files[index])
|
||||||
|
person1 = os.path.join(dir, files[index])
|
||||||
|
feature1 = infer(person1)[0]
|
||||||
|
dist = np.dot(feature1, feature2) / (np.linalg.norm(feature1) * np.linalg.norm(feature2))
|
||||||
|
dist_sum = dist + dist_sum
|
||||||
|
if dist > 0.92:
|
||||||
|
print("%s 和 %s 为同一个人,相似度为:%f" % (person1, person2, dist))
|
||||||
|
else:
|
||||||
|
print("%s 和 %s 不是同一个人,相似度为:%f" % (person1, person2, dist))
|
||||||
|
|
||||||
|
dist_avg = dist_sum / predict_num
|
||||||
|
if dist_avg > 0.925:
|
||||||
|
print("为同一个人,相似度为:%f" % dist_avg)
|
||||||
|
else:
|
||||||
|
print("不是同一个人,相似度为:%f" % dist_avg)
|
||||||
|
|
||||||
|
# dist_sum = 0
|
||||||
|
# for file in files:
|
||||||
|
# person1 = os.path.join('dataset/audio', file)
|
||||||
|
# feature1 = infer(person1)[0]
|
||||||
|
# dist = np.dot(feature1, feature2) / (np.linalg.norm(feature1) * np.linalg.norm(feature2))
|
||||||
|
# dist_sum = dist + dist_sum
|
||||||
|
# if dist > 0.92:
|
||||||
|
# print("%s 和 %s 为同一个人,相似度为:%f" % (person1, person2, dist))
|
||||||
|
# else:
|
||||||
|
# print("%s 和 %s 不是同一个人,相似度为:%f" % (person1, person2, dist))
|
||||||
|
#
|
||||||
|
# num = len(files)
|
||||||
|
# dist_avg = dist_sum / num
|
||||||
|
# print(dist_avg)
|
||||||
|
# if dist_avg > 0.925:
|
||||||
|
# print("为同一个人,相似度为:%f" % (dist_avg))
|
||||||
|
# else:
|
||||||
|
# print("不是同一个人,相似度为:%f" % (dist_avg))
|
@ -0,0 +1,27 @@
|
|||||||
|
import tensorflow as tf
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_data_function(example):
|
||||||
|
# [可能需要修改参数】 设置的梅尔频谱的shape相乘的值
|
||||||
|
data_feature_description = {
|
||||||
|
'data': tf.io.FixedLenFeature([16384], tf.float32),
|
||||||
|
'label': tf.io.FixedLenFeature([], tf.int64),
|
||||||
|
}
|
||||||
|
return tf.io.parse_single_example(example, data_feature_description)
|
||||||
|
|
||||||
|
|
||||||
|
def train_reader_tfrecord(data_path, num_epochs, batch_size):
|
||||||
|
raw_dataset = tf.data.TFRecordDataset(data_path)
|
||||||
|
train_dataset = raw_dataset.map(_parse_data_function)
|
||||||
|
train_dataset = train_dataset.shuffle(buffer_size=1000) \
|
||||||
|
.repeat(count=num_epochs) \
|
||||||
|
.batch(batch_size=batch_size) \
|
||||||
|
.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
|
||||||
|
return train_dataset
|
||||||
|
|
||||||
|
|
||||||
|
def test_reader_tfrecord(data_path, batch_size):
|
||||||
|
raw_dataset = tf.data.TFRecordDataset(data_path)
|
||||||
|
test_dataset = raw_dataset.map(_parse_data_function)
|
||||||
|
test_dataset = test_dataset.batch(batch_size=batch_size)
|
||||||
|
return test_dataset
|
Loading…
Reference in new issue