forked from p13590867/p94fxhifk
parent
6910979d90
commit
8a873cc430
@ -0,0 +1,101 @@
|
||||
import os
|
||||
import random
|
||||
|
||||
import librosa
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
from tensorflow.keras.models import Model
|
||||
|
||||
|
||||
layer_name = 'global_max_pooling2d'
|
||||
#model = tf.keras.models.load_model('models/resnet.h5')
|
||||
#intermediate_layer_model = Model(inputs=model.input, outputs=model.get_layer(layer_name).output)
|
||||
|
||||
|
||||
# 读取音频数据
|
||||
def load_data(data_path):
|
||||
wav, sr = librosa.load(data_path, sr=16000)
|
||||
intervals = librosa.effects.split(wav, top_db=20)
|
||||
wav_output = []
|
||||
for sliced in intervals:
|
||||
wav_output.extend(wav[sliced[0]:sliced[1]])
|
||||
assert len(wav_output) >= 8000, "有效音频小于0.5s"
|
||||
wav_output = np.array(wav_output)
|
||||
ps = librosa.feature.melspectrogram(y=wav_output, sr=sr, hop_length=256).astype(np.float32)
|
||||
ps = ps[np.newaxis, ..., np.newaxis]
|
||||
return ps
|
||||
|
||||
|
||||
def infer(audio_path):
|
||||
data = load_data(audio_path)
|
||||
feature = intermediate_layer_model.predict(data)
|
||||
return feature
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# 要预测的两个人的音频文件
|
||||
# person1 = 'dataset/wc-5.wav'
|
||||
# person2 = 'dataset/wc-1.wav'
|
||||
# feature1 = infer(person1)[0]
|
||||
# feature2 = infer(person2)[0]
|
||||
# # 对角余弦值
|
||||
# dist = np.dot(feature1, feature2) / (np.linalg.norm(feature1) * np.linalg.norm(feature2))
|
||||
# if dist > 0.92:
|
||||
# print("%s 和 %s 为同一个人,相似度为:%f" % (person1, person2, dist))
|
||||
# else:
|
||||
# print("%s 和 %s 不是同一个人,相似度为:%f" % (person1, person2, dist))
|
||||
|
||||
dirs = os.listdir('dataset/predict')
|
||||
for dir in dirs:
|
||||
dist_sum = 0
|
||||
print(dir)
|
||||
model_path = "models" + '/' + dir + '/resnet.h5'
|
||||
#model_path = os.path.join(model_path, 'resnet.h5')
|
||||
print(model_path + " is running")
|
||||
model = tf.keras.models.load_model(model_path)
|
||||
intermediate_layer_model = Model(inputs=model.input, outputs=model.get_layer(layer_name).output)
|
||||
|
||||
person2 = 'dataset/wctest.wav'
|
||||
feature2 = infer(person2)[0]
|
||||
|
||||
dir = os.path.join('dataset/predict', dir)
|
||||
print(dir)
|
||||
files = os.listdir(dir)
|
||||
length = len(files)
|
||||
predict_num = 20
|
||||
for i in range(0, predict_num):
|
||||
index = random.randint(0, length - 1)
|
||||
# print(files[index])
|
||||
person1 = os.path.join(dir, files[index])
|
||||
feature1 = infer(person1)[0]
|
||||
dist = np.dot(feature1, feature2) / (np.linalg.norm(feature1) * np.linalg.norm(feature2))
|
||||
dist_sum = dist + dist_sum
|
||||
if dist > 0.92:
|
||||
print("%s 和 %s 为同一个人,相似度为:%f" % (person1, person2, dist))
|
||||
else:
|
||||
print("%s 和 %s 不是同一个人,相似度为:%f" % (person1, person2, dist))
|
||||
|
||||
dist_avg = dist_sum / predict_num
|
||||
if dist_avg > 0.925:
|
||||
print("为同一个人,相似度为:%f" % dist_avg)
|
||||
else:
|
||||
print("不是同一个人,相似度为:%f" % dist_avg)
|
||||
|
||||
# dist_sum = 0
|
||||
# for file in files:
|
||||
# person1 = os.path.join('dataset/audio', file)
|
||||
# feature1 = infer(person1)[0]
|
||||
# dist = np.dot(feature1, feature2) / (np.linalg.norm(feature1) * np.linalg.norm(feature2))
|
||||
# dist_sum = dist + dist_sum
|
||||
# if dist > 0.92:
|
||||
# print("%s 和 %s 为同一个人,相似度为:%f" % (person1, person2, dist))
|
||||
# else:
|
||||
# print("%s 和 %s 不是同一个人,相似度为:%f" % (person1, person2, dist))
|
||||
#
|
||||
# num = len(files)
|
||||
# dist_avg = dist_sum / num
|
||||
# print(dist_avg)
|
||||
# if dist_avg > 0.925:
|
||||
# print("为同一个人,相似度为:%f" % (dist_avg))
|
||||
# else:
|
||||
# print("不是同一个人,相似度为:%f" % (dist_avg))
|
@ -0,0 +1,27 @@
|
||||
import tensorflow as tf
|
||||
|
||||
|
||||
def _parse_data_function(example):
|
||||
# [可能需要修改参数】 设置的梅尔频谱的shape相乘的值
|
||||
data_feature_description = {
|
||||
'data': tf.io.FixedLenFeature([16384], tf.float32),
|
||||
'label': tf.io.FixedLenFeature([], tf.int64),
|
||||
}
|
||||
return tf.io.parse_single_example(example, data_feature_description)
|
||||
|
||||
|
||||
def train_reader_tfrecord(data_path, num_epochs, batch_size):
|
||||
raw_dataset = tf.data.TFRecordDataset(data_path)
|
||||
train_dataset = raw_dataset.map(_parse_data_function)
|
||||
train_dataset = train_dataset.shuffle(buffer_size=1000) \
|
||||
.repeat(count=num_epochs) \
|
||||
.batch(batch_size=batch_size) \
|
||||
.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
|
||||
return train_dataset
|
||||
|
||||
|
||||
def test_reader_tfrecord(data_path, batch_size):
|
||||
raw_dataset = tf.data.TFRecordDataset(data_path)
|
||||
test_dataset = raw_dataset.map(_parse_data_function)
|
||||
test_dataset = test_dataset.batch(batch_size=batch_size)
|
||||
return test_dataset
|
Loading…
Reference in new issue