You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

486 lines
28 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#项目一些参数配置
import os
import sys
sys.path.append(os.getcwd())
import torch
import multiprocessing
from datetime import datetime
from utils import create_logger
# 项目根目录
root_path = os.path.abspath(os.path.dirname(__file__))
# 是否增量数据模式
samples_mode = True
if samples_mode == True:
samples_mode_flag = 'sample'
else:
samples_mode_flag = 'full'
# 采样的用户数量
samples_user_nums = 10000
# 召回评估的标志
need_metric_recall = True
# 线下验证模型模式,只使用训练数据集
offline_mode = True
# 公共保存路径
data_path = root_path + '/data/'
# 根据是增量还是全量数据决定保存路径
data_parent_path = data_path + samples_mode_flag + '/'
shixuns_data_path = data_parent_path + 'shixuns.csv'
shixuns_embed_path = data_parent_path + 'shixuns_emb.csv'
shixuns_bert_em_path = data_parent_path + 'shixuns_bert_emb.csv'
shixun_merge_emb_path = data_parent_path + 'shixun_merage_emb.csv'
cold_start_shixuns_data_path = data_parent_path + 'cold_start_shixuns.csv'
cold_start_shixuns_parent_path = data_path + 'cold_start_data/shixun/' + samples_mode_flag + '/'
subjects_data_path = data_parent_path + 'subjects.csv'
subjects_embed_path = data_parent_path + 'subjects_emb.csv'
subjects_bert_em_path = data_parent_path + 'subjects_bert_emb.csv'
subjects_merge_emb_path = data_parent_path + 'subjects_merage_emb.csv'
cold_start_subjects_data_path = data_parent_path + 'cold_start_subjects.csv'
cold_start_subjects_parent_path = data_path + 'cold_start_data/subject/' + samples_mode_flag + '/'
myshixuns_data_path = root_path + '/data/' + samples_mode_flag + '/' + 'myshixuns.csv'
myshixuns_data = root_path + '/data/' + samples_mode_flag + '/' + 'myshixuns_full.csv'
myshixuns_train_data = root_path + '/data/' + samples_mode_flag + '/' + 'myshixuns_train.csv'
myshixuns_test_data = root_path + '/data/' + samples_mode_flag + '/' + 'myshixuns_test.csv'
mysubjects_data_path = root_path + '/data/' + samples_mode_flag + '/' + 'mysubjects.csv'
mysubjects_data = root_path + '/data/' + samples_mode_flag + '/' + 'mysubjectsfull.csv'
mysubjects_train_data = root_path + '/data/' + samples_mode_flag + '/' + 'mysubjects_train.csv'
mysubjects_test_data = root_path + '/data/' + samples_mode_flag + '/' + 'mysubjects_test.csv'
myshixuns_train_data_baseline = root_path + '/data/' + samples_mode_flag + '/' + 'myshixuns_train_baseline.csv'
myshixuns_test_data_baseline = root_path + '/data/' + samples_mode_flag + '/myshixuns_test_baseline.csv'
mysubjects_train_data_baseline = root_path + '/data/' + samples_mode_flag + '/' + 'mysubjects_train_baseline.csv'
mysubjects_test_data_baseline = root_path + '/data/' + samples_mode_flag + '/mysubjects_test_baseline.csv'
users_data_path = root_path + '/data/' + samples_mode_flag + '/' + 'users.csv'
shixun_save_path = root_path + '/results/shixun/' + samples_mode_flag + '/'
subject_save_path = root_path + '/results/subject/' + samples_mode_flag + '/'
shixun_features_save_path = root_path + '/features/shixun/' + samples_mode_flag + '/'
subject_features_save_path = root_path + '/features/subject/' + samples_mode_flag + '/'
shixun_model_save_path = root_path + '/models/shixun/' + samples_mode_flag + '/'
subject_model_save_path = root_path + '/models/subject/' + samples_mode_flag + '/'
myshixuns_save_path = root_path + '/data/shixun/' + samples_mode_flag + '/'
mysubjects_save_path = root_path + '/data/subject/' + samples_mode_flag + '/'
# 用户自定义词典文件
user_dict_path = os.path.join(data_path, 'user_dict.txt')
# 实训数据提取的关键词
shixuns_keywords_path = os.path.join(data_parent_path, 'shixuns_keywords.txt')
# 课程数据提取的关键词
subjects_keywords_path = os.path.join(data_parent_path, 'subjects_keywords.txt')
# 实践项目推荐模型训练过程产生的输出
shixun_id_to_name_dict_data = shixun_save_path + 'shixun_id_to_name_dict.pkl'
shixun_itemcf_i2i_sim_data_baseline = shixun_save_path + 'itemcf_i2i_sim_baseline.pkl'
shixun_itemcf_recall_baseline_dict = shixun_save_path + 'itemcf_recall_baseline_dict.pkl'
shixun_itemcf_i2i_sim_data = shixun_save_path + 'itemcf_i2i_sim.pkl'
shixun_user_item_time_dict_data = shixun_save_path + 'user_item_time_dict.pkl'
shixun_user_item_dict_data = shixun_save_path + 'user_item_dict.pkl'
shixun_item_user_time_dict = shixun_save_path + 'item_user_time_dict.pkl'
shixun_usercf_u2u_sim_data = shixun_save_path + 'usercf_u2u_sim.pkl'
shixun_emb_i2i_sim_data = shixun_save_path + 'emb_i2i_sim.pkl'
shixun_wordemb_i2i_sim_data = shixun_save_path + 'wordemb_i2i_sim.pkl'
shixun_itemcf_recall_dict = shixun_save_path + 'itemcf_recall_dict.pkl'
shixun_usercf_recall_dict = shixun_save_path + 'usercf_recall_dict.pkl'
shixun_item_embedding_recall_dict = shixun_save_path + 'item_embedding_recall_dict.pkl'
shixun_pinsage_recall_dict = shixun_save_path + 'pinsage_recall_dict.pkl'
shixun_youtubednn_recall_dict = shixun_save_path + 'youtubednn_recall_dict.pkl'
shixun_youtubednn_usercf_recall_dict = shixun_save_path + 'youtubednn_usercf_recall_dict.pkl'
shixun_dssm_usercf_recall_dict = shixun_save_path + 'dssm_usercf_recall_dict.pkl'
shixun_cold_start_recall_dict = shixun_save_path + 'cold_start_recall_dict.pkl'
shixun_final_recall_items_dict = shixun_save_path + 'final_recall_items_dict.pkl'
shixun_xdeepfm_rank_dict = shixun_save_path + 'xdeepfm_rank_dict.pkl'
shixun_difm_rank_dict = shixun_save_path + 'difm_rank_dict.pkl'
shixun_bst_rank_dict = shixun_save_path + 'bst_rank_dict.pkl'
shixun_din_rank_dict = shixun_save_path + 'din_rank_dict.pkl'
shixun_item_w2v_emb_dict = shixun_save_path + 'item_w2v_emb_dict.pkl'
shixun_user_w2v_emb_dict = shixun_save_path + 'user_w2v_emb_dict.pkl'
shixun_train_user_item_feats = shixun_features_save_path + 'train_user_item_feats_df.csv'
shixun_val_user_item_feats = shixun_features_save_path + 'val_user_item_feats_df.csv'
shixun_test_user_item_feats = shixun_features_save_path + 'test_user_item_feats_df.csv'
shixun_all_user_item_feats = shixun_features_save_path + 'all_user_item_feats_df.csv'
shixuns_emb_dict = shixun_save_path + 'shixuns_emb_dict.pkl'
shixuns_bert_emb_dict = shixun_save_path + 'shixuns_bert_emb_dict.pkl'
shixun_youtube_item_emb_dict = shixun_save_path + 'youtube_item_emb_dict.pkl'
shixun_youtube_user_emb_dict = shixun_save_path + 'youtube_user_emb_dict.pkl'
shixun_user_embedding_index_dict = shixun_save_path + 'user_embedding_index_dict.pkl'
shixun_youtube_user_embedding_index_dict = shixun_save_path + 'youtube_user_embedding_index_dict.pkl'
shixun_youtube_item_embedding_index_dict = shixun_save_path + 'youtube_item_embedding_index_dict.pkl'
shixun_youtubednn_train_input_data = shixun_save_path + 'youtubednn_train_input.pkl'
shixun_youtubednn_train_label_data = shixun_save_path + 'youtubednn_train_label.pkl'
shixun_youtubednn_test_input_data = shixun_save_path + 'youtubednn_test_input.pkl'
shixun_youtubednn_test_label_data = shixun_save_path + 'youtubednn_test_label.pkl'
shixun_youtubednn_train_set_data = shixun_save_path + 'youtubednn_train_set.pkl'
shixun_youtubednn_test_set_data = shixun_save_path + 'youtubednn_test_set.pkl'
shixun_youtube_user_embedding_data = shixun_save_path + 'youtube_user_embedding_data.pkl'
shixun_youtube_item_embedding_data = shixun_save_path + 'youtube_item_embedding_data.pkl'
shixun_cold_start_user_shixun_dict = shixun_save_path + 'cold_start_user_shixun_dict.pkl'
shixun_mind_train_set_data = shixun_save_path + 'mind_train_set.pkl'
shixun_mind_test_set_data = shixun_save_path + 'mind_test_set.pkl'
shixun_dssm_train_set_data = shixun_save_path + 'dssm_train_set.pkl'
shixun_dssm_test_set_data = shixun_save_path + 'dssm_test_set.pkl'
shixun_dssm_train_input_data = shixun_save_path + 'dssm_train_input.pkl'
shixun_dssm_train_label_data = shixun_save_path + 'dssm_train_label.pkl'
shixun_dssm_test_input_data = shixun_save_path + 'dssm_test_input.pkl'
shixun_dssm_test_label_data = shixun_save_path + 'dssm_test_label.pkl'
shixun_dssm_user_embedding_data = shixun_save_path + 'dssm_user_embedding_data.pkl'
shixun_dssm_item_embedding_data = shixun_save_path + 'dssm_item_embedding_data.pkl'
shixun_dssm_item_embedding_index_dict = shixun_save_path + 'dssm_item_embedding_index_dict.pkl'
shixun_dssm_user_embedding_index_dict = shixun_save_path + 'dssm_user_embedding_index_dict.pkl'
shixun_dssm_user_emb_dict = shixun_save_path + 'dssm_user_emb_dict.pkl'
shixun_dssm_item_emb_dict = shixun_save_path + 'dssm_item_emb_dict.pkl'
shixun_dssm_recall_dict = shixun_save_path + 'dssm_recall_dict.pkl'
shixun_fm_train_set_data = shixun_save_path + 'fm_train_set.pkl'
shixun_fm_test_set_data = shixun_save_path + 'fm_test_set.pkl'
shixun_fm_train_input_data = shixun_save_path + 'fm_train_input.pkl'
shixun_fm_train_label_data = shixun_save_path + 'fm_train_label.pkl'
shixun_fm_test_input_data = shixun_save_path + 'fm_test_input.pkl'
shixun_fm_test_label_data = shixun_save_path + 'fm_test_label.pkl'
shixun_fm_user_embedding_data = shixun_save_path + 'fm_user_embedding_data.pkl'
shixun_fm_item_embedding_data = shixun_save_path + 'fm_item_embedding_data.pkl'
shixun_fm_item_embedding_index_dict = shixun_save_path + 'fm_item_embedding_index_dict.pkl'
shixun_fm_user_embedding_index_dict = shixun_save_path + 'fm_user_embedding_index_dict.pkl'
shixun_fm_user_emb_dict = shixun_save_path + 'fm_user_emb_dict.pkl'
shixun_fm_item_emb_dict = shixun_save_path + 'fm_item_emb_dict.pkl'
shixun_fm_recall_dict = shixun_save_path + 'fm_recall_dict.pkl'
shixun_mind_recall0_dict = shixun_save_path + 'mind_recall0_dict.pkl'
shixun_mind_recall1_dict = shixun_save_path + 'mind_recall1_dict.pkl'
shixun_mind_user_emb0_dict = shixun_save_path + 'mind_user_emb0_dict.pkl'
shixun_mind_user_emb1_dict = shixun_save_path + 'mind_user_emb1_dict.pkl'
shixun_mind_item_emb_dict = shixun_save_path + 'mind_item_emb_dict.pkl'
shixun_mind_user_embedding0_index_dict = shixun_save_path + 'mind_user_embedding0_index_dict.pkl'
shixun_mind_user_embedding1_index_dict = shixun_save_path + 'mind_user_embedding1_index_dict.pkl'
shixun_mind_item_embedding_index_dict = shixun_save_path + 'mind_item_embedding_index_dict.pkl'
shixun_mind_train_input_data = shixun_save_path + 'mind_train_input.pkl'
shixun_mind_train_label_data = shixun_save_path + 'mind_train_label.pkl'
shixun_mind_test_input_data = shixun_save_path + 'mind_test_input.pkl'
shixun_mind_test_label_data = shixun_save_path + 'mind_test_label.pkl'
shixun_mind_train_set_data = shixun_save_path + 'mind_train_set.pkl'
shixun_mind_test_set_data = shixun_save_path + 'mind_test_set.pkl'
shixun_mind_user_embedding0_data = shixun_save_path + 'mind_user_embedding0_data.pkl'
shixun_mind_user_embedding1_data = shixun_save_path + 'mind_user_embedding1_data.pkl'
shixun_mind_item_embedding_data = shixun_save_path + 'mind_item_embedding_data.pkl'
# 实践课程推荐模型训练过程产生的输出
subject_id_to_name_dict_data = subject_save_path + 'subject_id_to_name_dict0419.pkl'
subject_itemcf_i2i_sim_data_baseline = subject_save_path + 'itemcf_i2i_sim_baseline.pkl'
subject_itemcf_recall_baseline_dict = subject_save_path + 'itemcf_recall_baseline_dict.pkl'
subject_itemcf_i2i_sim_data = subject_save_path + 'itemcf_i2i_sim.pkl'
subject_user_item_time_dict_data = subject_save_path + 'user_item_time_dict.pkl'
subject_item_user_time_dict = subject_save_path + 'item_user_time_dict.pkl'
subject_usercf_u2u_sim_data = subject_save_path + 'usercf_u2u_sim.pkl'
subject_emb_i2i_sim_data = subject_save_path + 'emb_i2i_sim.pkl'
subject_itemcf_recall_dict = subject_save_path + 'itemcf_recall_dict.pkl'
subject_usercf_recall_dict = subject_save_path + 'usercf_recall_dict.pkl'
subject_item_embedding_recall_dict = subject_save_path + 'item_embedding_recall_dict.pkl'
subject_youtubednn_recall_dict = subject_save_path + 'youtubednn_recall_dict.pkl'
subject_mind_recall_dict = subject_save_path + 'mind_recall_dict.pkl'
subject_youtubednn_usercf_recall_dict = subject_save_path + 'youtubednn_usercf_recall_dict.pkl'
subject_dssm_usercf_recall_dict = subject_save_path + 'dssm_usercf_recall_dict.pkl'
subject_cold_start_recall_dict = subject_save_path + 'cold_start_recall_dict.pkl'
subject_final_recall_items_dict = subject_save_path + 'final_recall_items_dict.pkl'
subject_xdeepfm_rank_dict = subject_save_path + 'xdeepfm_rank_dict.pkl'
subject_difm_rank_dict = subject_save_path + 'difm_rank_dict.pkl'
subject_bst_rank_dict = subject_save_path + 'bst_rank_dict.pkl'
subject_din_rank_dict = subject_save_path + 'din_rank_dict.pkl'
subject_item_w2v_emb_dict = subject_save_path + 'item_w2v_emb_dict.pkl'
subject_user_w2v_emb_dict = subject_save_path + 'user_w2v_emb_dict.pkl'
subject_train_user_item_feats = subject_features_save_path + 'train_user_item_feats_df.csv'
subject_val_user_item_feats = subject_features_save_path + 'val_user_item_feats_df.csv'
subject_test_user_item_feats = subject_features_save_path + 'test_user_item_feats_df.csv'
subject_all_user_item_feats = subject_features_save_path + 'all_user_item_feats_df.csv'
subjects_emb_dict = subject_save_path + 'subjects_emb_dict.pkl'
subject_bert_emb_dict = subject_save_path + 'subjects_bert_emb_dict.pkl'
subject_youtube_item_emb_dict = subject_save_path + 'youtube_item_emb_dict.pkl'
subject_mind_item_emb_dict = subject_save_path + 'mind_item_emb_dict.pkl'
subject_youtube_user_emb_dict = subject_save_path + 'youtube_user_emb_dict.pkl'
subject_mind_user_emb_dict = subject_save_path + 'mind_user_emb_dict.pkl'
subject_youtube_item_emb_data = subject_save_path + 'youtube_item_emb.pkl'
subject_youtube_user_emb_data = subject_save_path + 'youtube_user_emb.pkl'
subject_user_embedding_index_dict = subject_save_path + 'user_embedding_index_dict.pkl'
subject_youtube_user_embedding_index_dict = subject_save_path + 'youtube_user_embedding_index_dict.pkl'
subject_mind_user_embedding_index_dict = subject_save_path + 'mind_user_embedding_index_dict.pkl'
subject_youtube_item_embedding_index_dict = subject_save_path + 'youtube_item_embedding_index_dict.pkl'
subject_mind_item_embedding_index_dict = subject_save_path + 'mind_item_embedding_index_dict.pkl'
subject_youtubednn_train_input_data = subject_save_path + 'youtubednn_train_input.pkl'
subject_mind_train_input_data = subject_save_path + 'mind_train_input.pkl'
subject_youtubednn_train_label_data = subject_save_path + 'youtubednn_train_label.pkl'
subject_mind_train_label_data = subject_save_path + 'mind_train_label.pkl'
subject_youtubednn_test_input_data = subject_save_path + 'youtubednn_test_input.pkl'
subject_mind_test_input_data = subject_save_path + 'mind_test_input.pkl'
subject_youtubednn_test_label_data = subject_save_path + 'youtubednn_test_label.pkl'
subject_mind_test_label_data = subject_save_path + 'mind_test_label.pkl'
subject_youtubednn_train_set_data = subject_save_path + 'youtubednn_train_set.pkl'
subject_mind_train_set_data = subject_save_path + 'mind_train_set.pkl'
subject_youtubednn_test_set_data = subject_save_path + 'youtubednn_test_set.pkl'
subject_mind_test_set_data = subject_save_path + 'mind_test_set.pkl'
subject_youtube_user_embedding_data = subject_save_path + 'youtube_user_embedding_data.pkl'
subject_mind_user_embedding_data = subject_save_path + 'mind_user_embedding_data.pkl'
subject_youtube_item_embedding_data = subject_save_path + 'youtube_item_embedding_data.pkl'
subject_mind_item_embedding_data = subject_save_path + 'mind_item_embedding_data.pkl'
subject_cold_start_user_subject_dict = subject_save_path + 'cold_start_user_subject_dict.pkl'
subject_dssm_usercf_recall_dict = subject_save_path + 'dssm_usercf_recall_dict.pkl'
subject_pinsage_recall_dict = subject_save_path + 'pinsage_recall_dict.pkl'
subject_mind_recall0_dict = subject_save_path + 'mind_recall0_dict.pkl'
subject_mind_recall1_dict = subject_save_path + 'mind_recall1_dict.pkl'
subject_mind_user_emb0_dict = subject_save_path + 'mind_user_emb0_dict.pkl'
subject_mind_user_emb1_dict = subject_save_path + 'mind_user_emb1_dict.pkl'
subject_mind_item_emb_dict = subject_save_path + 'mind_item_emb_dict.pkl'
subject_mind_user_embedding0_index_dict = subject_save_path + 'mind_user_embedding0_index_dict.pkl'
subject_mind_user_embedding1_index_dict = subject_save_path + 'mind_user_embedding1_index_dict.pkl'
subject_mind_item_embedding_index_dict = subject_save_path + 'mind_item_embedding_index_dict.pkl'
subject_mind_train_input_data = subject_save_path + 'mind_train_input.pkl'
subject_mind_train_label_data = subject_save_path + 'mind_train_label.pkl'
subject_mind_test_input_data = subject_save_path + 'mind_test_input.pkl'
subject_mind_test_label_data = subject_save_path + 'mind_test_label.pkl'
subject_mind_train_set_data = subject_save_path + 'mind_train_set.pkl'
subject_mind_test_set_data = subject_save_path + 'mind_test_set.pkl'
subject_mind_user_embedding0_data = subject_save_path + 'mind_user_embedding0_data.pkl'
subject_mind_user_embedding1_data = subject_save_path + 'mind_user_embedding1_data.pkl'
subject_mind_item_embedding_data = subject_save_path + 'mind_item_embedding_data.pkl'
subject_dssm_train_set_data = subject_save_path + 'dssm_train_set.pkl'
subject_dssm_test_set_data = subject_save_path + 'dssm_test_set.pkl'
subject_dssm_train_input_data = subject_save_path + 'dssm_train_input.pkl'
subject_dssm_train_label_data = subject_save_path + 'dssm_train_label.pkl'
subject_dssm_test_input_data = subject_save_path + 'dssm_test_input.pkl'
subject_dssm_test_label_data = subject_save_path + 'dssm_test_label.pkl'
subject_dssm_user_embedding_data = subject_save_path + 'dssm_user_embedding_data.pkl'
subject_dssm_item_embedding_data = subject_save_path + 'dssm_item_embedding_data.pkl'
subject_dssm_item_embedding_index_dict = subject_save_path + 'dssm_item_embedding_index_dict.pkl'
subject_dssm_user_embedding_index_dict = subject_save_path + 'dssm_user_embedding_index_dict.pkl'
subject_dssm_user_emb_dict = subject_save_path + 'dssm_user_emb_dict.pkl'
subject_dssm_item_emb_dict = subject_save_path + 'dssm_item_emb_dict.pkl'
subject_dssm_recall_dict = subject_save_path + 'dssm_recall_dict.pkl'
subject_fm_train_set_data = subject_save_path + 'fm_train_set.pkl'
subject_fm_test_set_data = subject_save_path + 'fm_test_set.pkl'
subject_fm_train_input_data = subject_save_path + 'fm_train_input.pkl'
subject_fm_train_label_data = subject_save_path + 'fm_train_label.pkl'
subject_fm_test_input_data = subject_save_path + 'fm_test_input.pkl'
subject_fm_test_label_data = subject_save_path + 'fm_test_label.pkl'
subject_fm_user_embedding_data = subject_save_path + 'fm_user_embedding_data.pkl'
subject_fm_item_embedding_data = subject_save_path + 'fm_item_embedding_data.pkl'
subject_fm_item_embedding_index_dict = subject_save_path + 'fm_item_embedding_index_dict.pkl'
subject_fm_user_embedding_index_dict = subject_save_path + 'fm_user_embedding_index_dict.pkl'
subject_fm_user_emb_dict = subject_save_path + 'fm_user_emb_dict.pkl'
subject_fm_item_emb_dict = subject_save_path + 'fm_item_emb_dict.pkl'
subject_fm_recall_dict = subject_save_path + 'fm_recall_dict.pkl'
# faiss召回用到的Word2Vec词向量
shixun_faiss_w2v_path = shixun_model_save_path + "faiss_word2vec.model"
subject_faiss_w2v_path = subject_model_save_path + "faiss_word2vec.model"
# 根据名称训练的faiss模型保存路径
shixuns_fassi_model_path = shixun_model_save_path + 'item_content_fassi.model'
subjects_fassi_model_path = subject_model_save_path + 'item_content_fassi.model'
#pinsage训练的模型保存路径
shixun_pinsage_model_path = shixun_model_save_path
subject_pinsage_model_path = subject_model_save_path
# YoutubeDNN训练的faiss模型保存路径
shixun_youtube_item_faiss_model_path = shixun_model_save_path + 'youtube_item_faiss.model'
shixun_youtube_user_faiss_model_path = shixun_model_save_path + 'youtube_user_faiss.model'
subject_youtube_item_faiss_model_path = subject_model_save_path + 'youtube_item_faiss.model'
subject_youtube_user_faiss_model_path = subject_model_save_path + 'youtube_user_faiss.model'
#DSSM模型训练的faiss模型保存路径
shixun_dssm_item_faiss_model_path = shixun_model_save_path + 'dssm_item_faiss.model'
shixun_dssm_user_faiss_model_path = shixun_model_save_path + 'dssm_user_faiss.model'
subject_dssm_item_faiss_model_path = subject_model_save_path + 'dssm_item_faiss.model'
subject_dssm_user_faiss_model_path = subject_model_save_path + 'dssm_user_faiss.model'
#FM模型训练的faiss模型保存路径
shixun_fm_item_faiss_model_path = shixun_model_save_path + 'fm_item_faiss.model'
shixun_fm_user_faiss_model_path = shixun_model_save_path + 'fm_user_faiss.model'
subject_fm_item_faiss_model_path = subject_model_save_path + 'fm_item_faiss.model'
subject_fm_user_faiss_model_path = subject_model_save_path + 'fm_user_faiss.model'
#MIND模型训练的faiss模型保存路径
shixun_mind_item_faiss_model_path = shixun_model_save_path + 'mind_item_faiss.model'
shixun_mind_user0_faiss_model_path = shixun_model_save_path + 'mind_user0_faiss.model'
shixun_mind_user1_faiss_model_path = shixun_model_save_path + 'mind_user1_faiss.model'
subject_mind_item_faiss_model_path = subject_model_save_path + 'mind_item_faiss.model'
subject_mind_user0_faiss_model_path = subject_model_save_path + 'mind_user0_faiss.model'
subject_mind_user1_faiss_model_path = subject_model_save_path + 'mind_user1_faiss.model'
# 根据用户选择的物品序列生成的Word2Vec词向量
shixuns_item_word2vec_model_path = shixun_model_save_path + 'item_word2vec.model'
subjects_item_word2vec_model_path = subject_model_save_path + 'item_word2vec.model'
shixuns_user_word2vec_faiss_model_path = shixun_model_save_path + 'user_word2vec_faiss.model'
subjects_user_word2vec_faiss_model_path = subject_model_save_path + 'user_word2vec_faiss.model'
#训练word2vec的维基百科语料库文件
word2vec_input_path = '/home/plm_models/word2vec/wiki.txt'
#word2vec训练的词向量保存地址
word2vec_model_path = '/home/plm_models/word2vec/wiki.model'
word2vec_dim = 100
# 确定Embedding的维度
embedding_dim = 100
# ELMo预训练语言模型地址
elmo_model_path = '/home/plm_models/elmo/zhs.model'
# bert预训练语言模型地址
bert_pretrain_path = "/home/plm_models/chinese-bert-wwm-ext"
roberta_pretrain_path = "/home/plm_models/chinese-roberta-wwm-ext"
ernie20_pretrain_path = "/home/plm_models/ernie-2.0"
bert_base_chinese = '/home/plm_models/bert-base-chinese'
#sentence-transformers预训练语言模型地址
sentence_transformers_model_path = "/home/plm_models/paraphrase-multilingual-MiniLM-L12-v2"
# ltp模型路径
ltp_model_path = "/home/plm_models/ltp/v4/base2"
# 是否使用多卡GPU
use_multi_gpu = False
# 两种分词方式
JIEBA_TOKEN = 'jieba'
LTP_TOKEN = 'ltp'
# 停用词文件路径
stop_words_path = root_path + '/stopwords/cn_stopwords.txt'
# HNSW参数
ef_construction = 3000 # 搜索时保存最近邻的动态列表大小
M = 64 # 图中最大节点的近邻结点数
# 随机数种子
RANDOM_SEED = 2023
# 创建日志对象
logger = create_logger(root_path + '/logs/' + str(datetime.date(datetime.now())) + '.log')
# 检测cuda是否可用
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
# 使用的CPU核数
cpu_count = multiprocessing.cpu_count() - 2
# lightgbm排序模型的特征列
shixun_lgb_cols = ['sim0', 'time_diff0', 'visit_diff0', 'myshixuns_diff0', 'challenges_diff0',
'sim_max', 'sim_min', 'sim_sum', 'sim_mean', 'user_item_sim', 'score',
'select_size', 'user_time_diff_mean', 'active_level', 'item_time_diff_mean',
'user_num', 'hot_level', 'user_time_hob1', 'user_time_hob2',
'visits_hbo', 'myshixuns_hbo', 'challenges_hbo', 'averge_star_hbo',
'visits', 'trainee', 'myshixuns_count', 'challenges_count', 'averge_star',
'created_at_ts', 'user_visits', 'grade', 'experience', 'is_trainee_hab']
subject_lgb_cols = ['sim0', 'time_diff0', 'visit_diff0', 'study_count_diff0', 'course_study_count_diff0',
'passed_count_diff0', 'course_used_count_diff0', 'school_used_count_diff0',
'challenge_count_diff0', 'evaluate_count_diff0', 'video_study_time_diff0',
'study_pdf_attachment_count_diff0', 'averge_star_diff0',
'sim_max', 'sim_min', 'sim_sum', 'sim_mean', 'user_item_sim', 'score',
'select_size', 'user_time_diff_mean', 'active_level', 'item_time_diff_mean',
'user_num', 'hot_level', 'user_time_hob1', 'user_time_hob2',
'visits_hbo', 'study_count_hbo', 'course_study_count_hbo', 'passed_count_hbo',
'course_used_count_hbo', 'school_used_count_hbo', 'challenge_count_hbo',
'evaluate_count_hbo', 'video_study_time_hbo', 'study_pdf_attachment_count_hbo',
'averge_star_hbo', 'visits', 'study_count', 'course_study_count', 'passed_count',
'course_used_count', 'school_used_count', 'challenge_count', 'evaluate_count',
'video_study_time', 'study_pdf_attachment_count', 'averge_star',
'created_at_ts', 'user_visits', 'grade', 'experience', 'is_disciplines_hab']
# 排序模型的Dense特征
shixun_rank_dense_fea = ['sim0', 'time_diff0', 'visit_diff0', 'myshixuns_diff0', 'challenges_diff0','trainee_diff0','averge_star_diff0',
'task_pass_diff0','sim_max', 'sim_min', 'sim_sum', 'sim_mean', 'score', 'rank',
'select_size', 'user_time_diff_mean', 'active_level', 'item_time_diff_mean', 'seq_length',
'user_num', 'hot_level', 'user_time_hob1', 'user_time_hob2', 'task_pass_hbo',
'visits_hbo', 'myshixuns_hbo', 'challenges_hbo', 'averge_star_hbo',
'visits', 'myshixuns_count', 'challenges_count', 'averge_star', 'task_pass',
'logins', 'grade', 'edu_background','experience']
subject_rank_dense_fea = ['sim0', 'time_diff0', 'visit_diff0', 'study_count_diff0', 'course_study_count_diff0',
'passed_count_diff0', 'course_used_count_diff0', 'school_used_count_diff0','stages_count_diff0',
'challenge_count_diff0', 'evaluate_count_diff0', 'video_study_time_diff0', 'stages_shixuns_count_diff0',
'study_pdf_attachment_count_diff0', 'averge_star_diff0', 'stages_count','stage_shixuns_count',
'sim_max', 'sim_min', 'sim_sum', 'sim_mean', 'score', 'rank',
'select_size', 'user_time_diff_mean', 'active_level', 'item_time_diff_mean', 'seq_length',
'user_num', 'hot_level', 'user_time_hob1', 'user_time_hob2',
'visits_hbo', 'study_count_hbo', 'course_study_count_hbo', 'passed_count_hbo',
'course_used_count_hbo', 'school_used_count_hbo', 'challenge_count_hbo',
'evaluate_count_hbo', 'video_study_time_hbo', 'study_pdf_attachment_count_hbo',
'averge_star_hbo', 'visits', 'study_count', 'course_study_count', 'passed_count',
'course_used_count', 'school_used_count', 'challenge_count', 'evaluate_count',
'video_study_time', 'study_pdf_attachment_count', 'averge_star',
'logins', 'grade', 'experience']
# 排序模型的sparse特征
shixun_rank_sparse_fea = ['user_id', 'shixun_id', 'gender', 'school_id', 'identity', 'trainee', 'is_trainee_hab']
subject_rank_sparse_fea = ['user_id', 'subject_id', 'gender', 'school_id', 'identity','edu_background', 'disciplines_id', 'is_disciplines_hab']
# 排序模型特征字段
shixun_rank_feats_columns = ['visits', 'myshixuns_count', 'challenges_count', 'averge_star']
subject_rank_feats_columns = ['visits', 'study_count', 'course_study_count', 'passed_count',
'course_used_count', 'school_used_count', 'challenge_count', 'evaluate_count',
'video_study_time', 'study_pdf_attachment_count', 'averge_star']
# 用户选择实训最大序列长度
shixun_max_seq_len = 500
# 用户选择课程最大序列长度
subject_max_seq_len = 300
# mysql连接配置
mysql_host = "rm-bp13v5020p7828r5rso.mysql.rds.aliyuncs.com"
mysql_user = "testeducoder"
mysql_passwd = "TEST@123"
mysql_port = 3306
mysql_database = "preeducoderweb"
#mysql线上
mysql_test_host = "testeducoder-public.mysql.polardb.rds.aliyuncs.com"
mysql_test_user = "testeducoder"
mysql_test_passwd = "TEST@123"
mysql_test_port = 3306
mysql_test_database = "educoder_kg_search"
test_user_id = 73547
test_shixun_id = 8222
test_shixun_name = 'Pytorch深度学习 - 多尺度目标检测'
test_subject_id = 4
test_subject_name = '数据结构与算法C语言'
#学习路径推荐相关参数
study_path = root_path+"/study_path_rs/"
study_path_data = study_path+"data/sample/"
kg_data = study_path+'knowledge_forest_data/sample/'
initial_learning=study_path+'knowledge_forest_data/initial_learning/'
#neo4j连接配置
neo4j_url = 'http://47.93.43.185:57474/'
neo4j_username = 'neo4j'
neo4j_password = 'yongge123123'