#项目一些参数配置 import os import sys sys.path.append(os.getcwd()) import torch import multiprocessing from datetime import datetime from utils import create_logger # 项目根目录 root_path = os.path.abspath(os.path.dirname(__file__)) # 是否增量数据模式 samples_mode = True if samples_mode == True: samples_mode_flag = 'sample' else: samples_mode_flag = 'full' # 采样的用户数量 samples_user_nums = 10000 # 召回评估的标志 need_metric_recall = True # 线下验证模型模式,只使用训练数据集 offline_mode = True # 公共保存路径 data_path = root_path + '/data/' # 根据是增量还是全量数据决定保存路径 data_parent_path = data_path + samples_mode_flag + '/' shixuns_data_path = data_parent_path + 'shixuns.csv' shixuns_embed_path = data_parent_path + 'shixuns_emb.csv' shixuns_bert_em_path = data_parent_path + 'shixuns_bert_emb.csv' shixun_merge_emb_path = data_parent_path + 'shixun_merage_emb.csv' cold_start_shixuns_data_path = data_parent_path + 'cold_start_shixuns.csv' cold_start_shixuns_parent_path = data_path + 'cold_start_data/shixun/' + samples_mode_flag + '/' subjects_data_path = data_parent_path + 'subjects.csv' subjects_embed_path = data_parent_path + 'subjects_emb.csv' subjects_bert_em_path = data_parent_path + 'subjects_bert_emb.csv' subjects_merge_emb_path = data_parent_path + 'subjects_merage_emb.csv' cold_start_subjects_data_path = data_parent_path + 'cold_start_subjects.csv' cold_start_subjects_parent_path = data_path + 'cold_start_data/subject/' + samples_mode_flag + '/' myshixuns_data_path = root_path + '/data/' + samples_mode_flag + '/' + 'myshixuns.csv' myshixuns_data = root_path + '/data/' + samples_mode_flag + '/' + 'myshixuns_full.csv' myshixuns_train_data = root_path + '/data/' + samples_mode_flag + '/' + 'myshixuns_train.csv' myshixuns_test_data = root_path + '/data/' + samples_mode_flag + '/' + 'myshixuns_test.csv' mysubjects_data_path = root_path + '/data/' + samples_mode_flag + '/' + 'mysubjects.csv' mysubjects_data = root_path + '/data/' + samples_mode_flag + '/' + 'mysubjectsfull.csv' mysubjects_train_data = root_path + '/data/' + samples_mode_flag + '/' + 'mysubjects_train.csv' mysubjects_test_data = root_path + '/data/' + samples_mode_flag + '/' + 'mysubjects_test.csv' myshixuns_train_data_baseline = root_path + '/data/' + samples_mode_flag + '/' + 'myshixuns_train_baseline.csv' myshixuns_test_data_baseline = root_path + '/data/' + samples_mode_flag + '/myshixuns_test_baseline.csv' mysubjects_train_data_baseline = root_path + '/data/' + samples_mode_flag + '/' + 'mysubjects_train_baseline.csv' mysubjects_test_data_baseline = root_path + '/data/' + samples_mode_flag + '/mysubjects_test_baseline.csv' users_data_path = root_path + '/data/' + samples_mode_flag + '/' + 'users.csv' shixun_save_path = root_path + '/results/shixun/' + samples_mode_flag + '/' subject_save_path = root_path + '/results/subject/' + samples_mode_flag + '/' shixun_features_save_path = root_path + '/features/shixun/' + samples_mode_flag + '/' subject_features_save_path = root_path + '/features/subject/' + samples_mode_flag + '/' shixun_model_save_path = root_path + '/models/shixun/' + samples_mode_flag + '/' subject_model_save_path = root_path + '/models/subject/' + samples_mode_flag + '/' myshixuns_save_path = root_path + '/data/shixun/' + samples_mode_flag + '/' mysubjects_save_path = root_path + '/data/subject/' + samples_mode_flag + '/' # 用户自定义词典文件 user_dict_path = os.path.join(data_path, 'user_dict.txt') # 实训数据提取的关键词 shixuns_keywords_path = os.path.join(data_parent_path, 'shixuns_keywords.txt') # 课程数据提取的关键词 subjects_keywords_path = os.path.join(data_parent_path, 'subjects_keywords.txt') # 实践项目推荐模型训练过程产生的输出 shixun_id_to_name_dict_data = shixun_save_path + 'shixun_id_to_name_dict.pkl' shixun_itemcf_i2i_sim_data_baseline = shixun_save_path + 'itemcf_i2i_sim_baseline.pkl' shixun_itemcf_recall_baseline_dict = shixun_save_path + 'itemcf_recall_baseline_dict.pkl' shixun_itemcf_i2i_sim_data = shixun_save_path + 'itemcf_i2i_sim.pkl' shixun_user_item_time_dict_data = shixun_save_path + 'user_item_time_dict.pkl' shixun_user_item_dict_data = shixun_save_path + 'user_item_dict.pkl' shixun_item_user_time_dict = shixun_save_path + 'item_user_time_dict.pkl' shixun_usercf_u2u_sim_data = shixun_save_path + 'usercf_u2u_sim.pkl' shixun_emb_i2i_sim_data = shixun_save_path + 'emb_i2i_sim.pkl' shixun_wordemb_i2i_sim_data = shixun_save_path + 'wordemb_i2i_sim.pkl' shixun_itemcf_recall_dict = shixun_save_path + 'itemcf_recall_dict.pkl' shixun_usercf_recall_dict = shixun_save_path + 'usercf_recall_dict.pkl' shixun_item_embedding_recall_dict = shixun_save_path + 'item_embedding_recall_dict.pkl' shixun_pinsage_recall_dict = shixun_save_path + 'pinsage_recall_dict.pkl' shixun_youtubednn_recall_dict = shixun_save_path + 'youtubednn_recall_dict.pkl' shixun_youtubednn_usercf_recall_dict = shixun_save_path + 'youtubednn_usercf_recall_dict.pkl' shixun_dssm_usercf_recall_dict = shixun_save_path + 'dssm_usercf_recall_dict.pkl' shixun_cold_start_recall_dict = shixun_save_path + 'cold_start_recall_dict.pkl' shixun_final_recall_items_dict = shixun_save_path + 'final_recall_items_dict.pkl' shixun_xdeepfm_rank_dict = shixun_save_path + 'xdeepfm_rank_dict.pkl' shixun_difm_rank_dict = shixun_save_path + 'difm_rank_dict.pkl' shixun_bst_rank_dict = shixun_save_path + 'bst_rank_dict.pkl' shixun_din_rank_dict = shixun_save_path + 'din_rank_dict.pkl' shixun_item_w2v_emb_dict = shixun_save_path + 'item_w2v_emb_dict.pkl' shixun_user_w2v_emb_dict = shixun_save_path + 'user_w2v_emb_dict.pkl' shixun_train_user_item_feats = shixun_features_save_path + 'train_user_item_feats_df.csv' shixun_val_user_item_feats = shixun_features_save_path + 'val_user_item_feats_df.csv' shixun_test_user_item_feats = shixun_features_save_path + 'test_user_item_feats_df.csv' shixun_all_user_item_feats = shixun_features_save_path + 'all_user_item_feats_df.csv' shixuns_emb_dict = shixun_save_path + 'shixuns_emb_dict.pkl' shixuns_bert_emb_dict = shixun_save_path + 'shixuns_bert_emb_dict.pkl' shixun_youtube_item_emb_dict = shixun_save_path + 'youtube_item_emb_dict.pkl' shixun_youtube_user_emb_dict = shixun_save_path + 'youtube_user_emb_dict.pkl' shixun_user_embedding_index_dict = shixun_save_path + 'user_embedding_index_dict.pkl' shixun_youtube_user_embedding_index_dict = shixun_save_path + 'youtube_user_embedding_index_dict.pkl' shixun_youtube_item_embedding_index_dict = shixun_save_path + 'youtube_item_embedding_index_dict.pkl' shixun_youtubednn_train_input_data = shixun_save_path + 'youtubednn_train_input.pkl' shixun_youtubednn_train_label_data = shixun_save_path + 'youtubednn_train_label.pkl' shixun_youtubednn_test_input_data = shixun_save_path + 'youtubednn_test_input.pkl' shixun_youtubednn_test_label_data = shixun_save_path + 'youtubednn_test_label.pkl' shixun_youtubednn_train_set_data = shixun_save_path + 'youtubednn_train_set.pkl' shixun_youtubednn_test_set_data = shixun_save_path + 'youtubednn_test_set.pkl' shixun_youtube_user_embedding_data = shixun_save_path + 'youtube_user_embedding_data.pkl' shixun_youtube_item_embedding_data = shixun_save_path + 'youtube_item_embedding_data.pkl' shixun_cold_start_user_shixun_dict = shixun_save_path + 'cold_start_user_shixun_dict.pkl' shixun_mind_train_set_data = shixun_save_path + 'mind_train_set.pkl' shixun_mind_test_set_data = shixun_save_path + 'mind_test_set.pkl' shixun_dssm_train_set_data = shixun_save_path + 'dssm_train_set.pkl' shixun_dssm_test_set_data = shixun_save_path + 'dssm_test_set.pkl' shixun_dssm_train_input_data = shixun_save_path + 'dssm_train_input.pkl' shixun_dssm_train_label_data = shixun_save_path + 'dssm_train_label.pkl' shixun_dssm_test_input_data = shixun_save_path + 'dssm_test_input.pkl' shixun_dssm_test_label_data = shixun_save_path + 'dssm_test_label.pkl' shixun_dssm_user_embedding_data = shixun_save_path + 'dssm_user_embedding_data.pkl' shixun_dssm_item_embedding_data = shixun_save_path + 'dssm_item_embedding_data.pkl' shixun_dssm_item_embedding_index_dict = shixun_save_path + 'dssm_item_embedding_index_dict.pkl' shixun_dssm_user_embedding_index_dict = shixun_save_path + 'dssm_user_embedding_index_dict.pkl' shixun_dssm_user_emb_dict = shixun_save_path + 'dssm_user_emb_dict.pkl' shixun_dssm_item_emb_dict = shixun_save_path + 'dssm_item_emb_dict.pkl' shixun_dssm_recall_dict = shixun_save_path + 'dssm_recall_dict.pkl' shixun_fm_train_set_data = shixun_save_path + 'fm_train_set.pkl' shixun_fm_test_set_data = shixun_save_path + 'fm_test_set.pkl' shixun_fm_train_input_data = shixun_save_path + 'fm_train_input.pkl' shixun_fm_train_label_data = shixun_save_path + 'fm_train_label.pkl' shixun_fm_test_input_data = shixun_save_path + 'fm_test_input.pkl' shixun_fm_test_label_data = shixun_save_path + 'fm_test_label.pkl' shixun_fm_user_embedding_data = shixun_save_path + 'fm_user_embedding_data.pkl' shixun_fm_item_embedding_data = shixun_save_path + 'fm_item_embedding_data.pkl' shixun_fm_item_embedding_index_dict = shixun_save_path + 'fm_item_embedding_index_dict.pkl' shixun_fm_user_embedding_index_dict = shixun_save_path + 'fm_user_embedding_index_dict.pkl' shixun_fm_user_emb_dict = shixun_save_path + 'fm_user_emb_dict.pkl' shixun_fm_item_emb_dict = shixun_save_path + 'fm_item_emb_dict.pkl' shixun_fm_recall_dict = shixun_save_path + 'fm_recall_dict.pkl' shixun_mind_recall0_dict = shixun_save_path + 'mind_recall0_dict.pkl' shixun_mind_recall1_dict = shixun_save_path + 'mind_recall1_dict.pkl' shixun_mind_user_emb0_dict = shixun_save_path + 'mind_user_emb0_dict.pkl' shixun_mind_user_emb1_dict = shixun_save_path + 'mind_user_emb1_dict.pkl' shixun_mind_item_emb_dict = shixun_save_path + 'mind_item_emb_dict.pkl' shixun_mind_user_embedding0_index_dict = shixun_save_path + 'mind_user_embedding0_index_dict.pkl' shixun_mind_user_embedding1_index_dict = shixun_save_path + 'mind_user_embedding1_index_dict.pkl' shixun_mind_item_embedding_index_dict = shixun_save_path + 'mind_item_embedding_index_dict.pkl' shixun_mind_train_input_data = shixun_save_path + 'mind_train_input.pkl' shixun_mind_train_label_data = shixun_save_path + 'mind_train_label.pkl' shixun_mind_test_input_data = shixun_save_path + 'mind_test_input.pkl' shixun_mind_test_label_data = shixun_save_path + 'mind_test_label.pkl' shixun_mind_train_set_data = shixun_save_path + 'mind_train_set.pkl' shixun_mind_test_set_data = shixun_save_path + 'mind_test_set.pkl' shixun_mind_user_embedding0_data = shixun_save_path + 'mind_user_embedding0_data.pkl' shixun_mind_user_embedding1_data = shixun_save_path + 'mind_user_embedding1_data.pkl' shixun_mind_item_embedding_data = shixun_save_path + 'mind_item_embedding_data.pkl' # 实践课程推荐模型训练过程产生的输出 subject_id_to_name_dict_data = subject_save_path + 'subject_id_to_name_dict0419.pkl' subject_itemcf_i2i_sim_data_baseline = subject_save_path + 'itemcf_i2i_sim_baseline.pkl' subject_itemcf_recall_baseline_dict = subject_save_path + 'itemcf_recall_baseline_dict.pkl' subject_itemcf_i2i_sim_data = subject_save_path + 'itemcf_i2i_sim.pkl' subject_user_item_time_dict_data = subject_save_path + 'user_item_time_dict.pkl' subject_item_user_time_dict = subject_save_path + 'item_user_time_dict.pkl' subject_usercf_u2u_sim_data = subject_save_path + 'usercf_u2u_sim.pkl' subject_emb_i2i_sim_data = subject_save_path + 'emb_i2i_sim.pkl' subject_itemcf_recall_dict = subject_save_path + 'itemcf_recall_dict.pkl' subject_usercf_recall_dict = subject_save_path + 'usercf_recall_dict.pkl' subject_item_embedding_recall_dict = subject_save_path + 'item_embedding_recall_dict.pkl' subject_youtubednn_recall_dict = subject_save_path + 'youtubednn_recall_dict.pkl' subject_mind_recall_dict = subject_save_path + 'mind_recall_dict.pkl' subject_youtubednn_usercf_recall_dict = subject_save_path + 'youtubednn_usercf_recall_dict.pkl' subject_dssm_usercf_recall_dict = subject_save_path + 'dssm_usercf_recall_dict.pkl' subject_cold_start_recall_dict = subject_save_path + 'cold_start_recall_dict.pkl' subject_final_recall_items_dict = subject_save_path + 'final_recall_items_dict.pkl' subject_xdeepfm_rank_dict = subject_save_path + 'xdeepfm_rank_dict.pkl' subject_difm_rank_dict = subject_save_path + 'difm_rank_dict.pkl' subject_bst_rank_dict = subject_save_path + 'bst_rank_dict.pkl' subject_din_rank_dict = subject_save_path + 'din_rank_dict.pkl' subject_item_w2v_emb_dict = subject_save_path + 'item_w2v_emb_dict.pkl' subject_user_w2v_emb_dict = subject_save_path + 'user_w2v_emb_dict.pkl' subject_train_user_item_feats = subject_features_save_path + 'train_user_item_feats_df.csv' subject_val_user_item_feats = subject_features_save_path + 'val_user_item_feats_df.csv' subject_test_user_item_feats = subject_features_save_path + 'test_user_item_feats_df.csv' subject_all_user_item_feats = subject_features_save_path + 'all_user_item_feats_df.csv' subjects_emb_dict = subject_save_path + 'subjects_emb_dict.pkl' subject_bert_emb_dict = subject_save_path + 'subjects_bert_emb_dict.pkl' subject_youtube_item_emb_dict = subject_save_path + 'youtube_item_emb_dict.pkl' subject_mind_item_emb_dict = subject_save_path + 'mind_item_emb_dict.pkl' subject_youtube_user_emb_dict = subject_save_path + 'youtube_user_emb_dict.pkl' subject_mind_user_emb_dict = subject_save_path + 'mind_user_emb_dict.pkl' subject_youtube_item_emb_data = subject_save_path + 'youtube_item_emb.pkl' subject_youtube_user_emb_data = subject_save_path + 'youtube_user_emb.pkl' subject_user_embedding_index_dict = subject_save_path + 'user_embedding_index_dict.pkl' subject_youtube_user_embedding_index_dict = subject_save_path + 'youtube_user_embedding_index_dict.pkl' subject_mind_user_embedding_index_dict = subject_save_path + 'mind_user_embedding_index_dict.pkl' subject_youtube_item_embedding_index_dict = subject_save_path + 'youtube_item_embedding_index_dict.pkl' subject_mind_item_embedding_index_dict = subject_save_path + 'mind_item_embedding_index_dict.pkl' subject_youtubednn_train_input_data = subject_save_path + 'youtubednn_train_input.pkl' subject_mind_train_input_data = subject_save_path + 'mind_train_input.pkl' subject_youtubednn_train_label_data = subject_save_path + 'youtubednn_train_label.pkl' subject_mind_train_label_data = subject_save_path + 'mind_train_label.pkl' subject_youtubednn_test_input_data = subject_save_path + 'youtubednn_test_input.pkl' subject_mind_test_input_data = subject_save_path + 'mind_test_input.pkl' subject_youtubednn_test_label_data = subject_save_path + 'youtubednn_test_label.pkl' subject_mind_test_label_data = subject_save_path + 'mind_test_label.pkl' subject_youtubednn_train_set_data = subject_save_path + 'youtubednn_train_set.pkl' subject_mind_train_set_data = subject_save_path + 'mind_train_set.pkl' subject_youtubednn_test_set_data = subject_save_path + 'youtubednn_test_set.pkl' subject_mind_test_set_data = subject_save_path + 'mind_test_set.pkl' subject_youtube_user_embedding_data = subject_save_path + 'youtube_user_embedding_data.pkl' subject_mind_user_embedding_data = subject_save_path + 'mind_user_embedding_data.pkl' subject_youtube_item_embedding_data = subject_save_path + 'youtube_item_embedding_data.pkl' subject_mind_item_embedding_data = subject_save_path + 'mind_item_embedding_data.pkl' subject_cold_start_user_subject_dict = subject_save_path + 'cold_start_user_subject_dict.pkl' subject_dssm_usercf_recall_dict = subject_save_path + 'dssm_usercf_recall_dict.pkl' subject_pinsage_recall_dict = subject_save_path + 'pinsage_recall_dict.pkl' subject_mind_recall0_dict = subject_save_path + 'mind_recall0_dict.pkl' subject_mind_recall1_dict = subject_save_path + 'mind_recall1_dict.pkl' subject_mind_user_emb0_dict = subject_save_path + 'mind_user_emb0_dict.pkl' subject_mind_user_emb1_dict = subject_save_path + 'mind_user_emb1_dict.pkl' subject_mind_item_emb_dict = subject_save_path + 'mind_item_emb_dict.pkl' subject_mind_user_embedding0_index_dict = subject_save_path + 'mind_user_embedding0_index_dict.pkl' subject_mind_user_embedding1_index_dict = subject_save_path + 'mind_user_embedding1_index_dict.pkl' subject_mind_item_embedding_index_dict = subject_save_path + 'mind_item_embedding_index_dict.pkl' subject_mind_train_input_data = subject_save_path + 'mind_train_input.pkl' subject_mind_train_label_data = subject_save_path + 'mind_train_label.pkl' subject_mind_test_input_data = subject_save_path + 'mind_test_input.pkl' subject_mind_test_label_data = subject_save_path + 'mind_test_label.pkl' subject_mind_train_set_data = subject_save_path + 'mind_train_set.pkl' subject_mind_test_set_data = subject_save_path + 'mind_test_set.pkl' subject_mind_user_embedding0_data = subject_save_path + 'mind_user_embedding0_data.pkl' subject_mind_user_embedding1_data = subject_save_path + 'mind_user_embedding1_data.pkl' subject_mind_item_embedding_data = subject_save_path + 'mind_item_embedding_data.pkl' subject_dssm_train_set_data = subject_save_path + 'dssm_train_set.pkl' subject_dssm_test_set_data = subject_save_path + 'dssm_test_set.pkl' subject_dssm_train_input_data = subject_save_path + 'dssm_train_input.pkl' subject_dssm_train_label_data = subject_save_path + 'dssm_train_label.pkl' subject_dssm_test_input_data = subject_save_path + 'dssm_test_input.pkl' subject_dssm_test_label_data = subject_save_path + 'dssm_test_label.pkl' subject_dssm_user_embedding_data = subject_save_path + 'dssm_user_embedding_data.pkl' subject_dssm_item_embedding_data = subject_save_path + 'dssm_item_embedding_data.pkl' subject_dssm_item_embedding_index_dict = subject_save_path + 'dssm_item_embedding_index_dict.pkl' subject_dssm_user_embedding_index_dict = subject_save_path + 'dssm_user_embedding_index_dict.pkl' subject_dssm_user_emb_dict = subject_save_path + 'dssm_user_emb_dict.pkl' subject_dssm_item_emb_dict = subject_save_path + 'dssm_item_emb_dict.pkl' subject_dssm_recall_dict = subject_save_path + 'dssm_recall_dict.pkl' subject_fm_train_set_data = subject_save_path + 'fm_train_set.pkl' subject_fm_test_set_data = subject_save_path + 'fm_test_set.pkl' subject_fm_train_input_data = subject_save_path + 'fm_train_input.pkl' subject_fm_train_label_data = subject_save_path + 'fm_train_label.pkl' subject_fm_test_input_data = subject_save_path + 'fm_test_input.pkl' subject_fm_test_label_data = subject_save_path + 'fm_test_label.pkl' subject_fm_user_embedding_data = subject_save_path + 'fm_user_embedding_data.pkl' subject_fm_item_embedding_data = subject_save_path + 'fm_item_embedding_data.pkl' subject_fm_item_embedding_index_dict = subject_save_path + 'fm_item_embedding_index_dict.pkl' subject_fm_user_embedding_index_dict = subject_save_path + 'fm_user_embedding_index_dict.pkl' subject_fm_user_emb_dict = subject_save_path + 'fm_user_emb_dict.pkl' subject_fm_item_emb_dict = subject_save_path + 'fm_item_emb_dict.pkl' subject_fm_recall_dict = subject_save_path + 'fm_recall_dict.pkl' # faiss召回用到的Word2Vec词向量 shixun_faiss_w2v_path = shixun_model_save_path + "faiss_word2vec.model" subject_faiss_w2v_path = subject_model_save_path + "faiss_word2vec.model" # 根据名称训练的faiss模型保存路径 shixuns_fassi_model_path = shixun_model_save_path + 'item_content_fassi.model' subjects_fassi_model_path = subject_model_save_path + 'item_content_fassi.model' #pinsage训练的模型保存路径 shixun_pinsage_model_path = shixun_model_save_path subject_pinsage_model_path = subject_model_save_path # YoutubeDNN训练的faiss模型保存路径 shixun_youtube_item_faiss_model_path = shixun_model_save_path + 'youtube_item_faiss.model' shixun_youtube_user_faiss_model_path = shixun_model_save_path + 'youtube_user_faiss.model' subject_youtube_item_faiss_model_path = subject_model_save_path + 'youtube_item_faiss.model' subject_youtube_user_faiss_model_path = subject_model_save_path + 'youtube_user_faiss.model' #DSSM模型训练的faiss模型保存路径 shixun_dssm_item_faiss_model_path = shixun_model_save_path + 'dssm_item_faiss.model' shixun_dssm_user_faiss_model_path = shixun_model_save_path + 'dssm_user_faiss.model' subject_dssm_item_faiss_model_path = subject_model_save_path + 'dssm_item_faiss.model' subject_dssm_user_faiss_model_path = subject_model_save_path + 'dssm_user_faiss.model' #FM模型训练的faiss模型保存路径 shixun_fm_item_faiss_model_path = shixun_model_save_path + 'fm_item_faiss.model' shixun_fm_user_faiss_model_path = shixun_model_save_path + 'fm_user_faiss.model' subject_fm_item_faiss_model_path = subject_model_save_path + 'fm_item_faiss.model' subject_fm_user_faiss_model_path = subject_model_save_path + 'fm_user_faiss.model' #MIND模型训练的faiss模型保存路径 shixun_mind_item_faiss_model_path = shixun_model_save_path + 'mind_item_faiss.model' shixun_mind_user0_faiss_model_path = shixun_model_save_path + 'mind_user0_faiss.model' shixun_mind_user1_faiss_model_path = shixun_model_save_path + 'mind_user1_faiss.model' subject_mind_item_faiss_model_path = subject_model_save_path + 'mind_item_faiss.model' subject_mind_user0_faiss_model_path = subject_model_save_path + 'mind_user0_faiss.model' subject_mind_user1_faiss_model_path = subject_model_save_path + 'mind_user1_faiss.model' # 根据用户选择的物品序列生成的Word2Vec词向量 shixuns_item_word2vec_model_path = shixun_model_save_path + 'item_word2vec.model' subjects_item_word2vec_model_path = subject_model_save_path + 'item_word2vec.model' shixuns_user_word2vec_faiss_model_path = shixun_model_save_path + 'user_word2vec_faiss.model' subjects_user_word2vec_faiss_model_path = subject_model_save_path + 'user_word2vec_faiss.model' #训练word2vec的维基百科语料库文件 word2vec_input_path = '/home/plm_models/word2vec/wiki.txt' #word2vec训练的词向量保存地址 word2vec_model_path = '/home/plm_models/word2vec/wiki.model' word2vec_dim = 100 # 确定Embedding的维度 embedding_dim = 100 # ELMo预训练语言模型地址 elmo_model_path = '/home/plm_models/elmo/zhs.model' # bert预训练语言模型地址 bert_pretrain_path = "/home/plm_models/chinese-bert-wwm-ext" roberta_pretrain_path = "/home/plm_models/chinese-roberta-wwm-ext" ernie20_pretrain_path = "/home/plm_models/ernie-2.0" bert_base_chinese = '/home/plm_models/bert-base-chinese' #sentence-transformers预训练语言模型地址 sentence_transformers_model_path = "/home/plm_models/paraphrase-multilingual-MiniLM-L12-v2" # ltp模型路径 ltp_model_path = "/home/plm_models/ltp/v4/base2" # 是否使用多卡GPU use_multi_gpu = False # 两种分词方式 JIEBA_TOKEN = 'jieba' LTP_TOKEN = 'ltp' # 停用词文件路径 stop_words_path = root_path + '/stopwords/cn_stopwords.txt' # HNSW参数 ef_construction = 3000 # 搜索时保存最近邻的动态列表大小 M = 64 # 图中最大节点的近邻结点数 # 随机数种子 RANDOM_SEED = 2023 # 创建日志对象 logger = create_logger(root_path + '/logs/' + str(datetime.date(datetime.now())) + '.log') # 检测cuda是否可用 device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') # 使用的CPU核数 cpu_count = multiprocessing.cpu_count() - 2 # lightgbm排序模型的特征列 shixun_lgb_cols = ['sim0', 'time_diff0', 'visit_diff0', 'myshixuns_diff0', 'challenges_diff0', 'sim_max', 'sim_min', 'sim_sum', 'sim_mean', 'user_item_sim', 'score', 'select_size', 'user_time_diff_mean', 'active_level', 'item_time_diff_mean', 'user_num', 'hot_level', 'user_time_hob1', 'user_time_hob2', 'visits_hbo', 'myshixuns_hbo', 'challenges_hbo', 'averge_star_hbo', 'visits', 'trainee', 'myshixuns_count', 'challenges_count', 'averge_star', 'created_at_ts', 'user_visits', 'grade', 'experience', 'is_trainee_hab'] subject_lgb_cols = ['sim0', 'time_diff0', 'visit_diff0', 'study_count_diff0', 'course_study_count_diff0', 'passed_count_diff0', 'course_used_count_diff0', 'school_used_count_diff0', 'challenge_count_diff0', 'evaluate_count_diff0', 'video_study_time_diff0', 'study_pdf_attachment_count_diff0', 'averge_star_diff0', 'sim_max', 'sim_min', 'sim_sum', 'sim_mean', 'user_item_sim', 'score', 'select_size', 'user_time_diff_mean', 'active_level', 'item_time_diff_mean', 'user_num', 'hot_level', 'user_time_hob1', 'user_time_hob2', 'visits_hbo', 'study_count_hbo', 'course_study_count_hbo', 'passed_count_hbo', 'course_used_count_hbo', 'school_used_count_hbo', 'challenge_count_hbo', 'evaluate_count_hbo', 'video_study_time_hbo', 'study_pdf_attachment_count_hbo', 'averge_star_hbo', 'visits', 'study_count', 'course_study_count', 'passed_count', 'course_used_count', 'school_used_count', 'challenge_count', 'evaluate_count', 'video_study_time', 'study_pdf_attachment_count', 'averge_star', 'created_at_ts', 'user_visits', 'grade', 'experience', 'is_disciplines_hab'] # 排序模型的Dense特征 shixun_rank_dense_fea = ['sim0', 'time_diff0', 'visit_diff0', 'myshixuns_diff0', 'challenges_diff0','trainee_diff0','averge_star_diff0', 'task_pass_diff0','sim_max', 'sim_min', 'sim_sum', 'sim_mean', 'score', 'rank', 'select_size', 'user_time_diff_mean', 'active_level', 'item_time_diff_mean', 'seq_length', 'user_num', 'hot_level', 'user_time_hob1', 'user_time_hob2', 'task_pass_hbo', 'visits_hbo', 'myshixuns_hbo', 'challenges_hbo', 'averge_star_hbo', 'visits', 'myshixuns_count', 'challenges_count', 'averge_star', 'task_pass', 'logins', 'grade', 'edu_background','experience'] subject_rank_dense_fea = ['sim0', 'time_diff0', 'visit_diff0', 'study_count_diff0', 'course_study_count_diff0', 'passed_count_diff0', 'course_used_count_diff0', 'school_used_count_diff0','stages_count_diff0', 'challenge_count_diff0', 'evaluate_count_diff0', 'video_study_time_diff0', 'stages_shixuns_count_diff0', 'study_pdf_attachment_count_diff0', 'averge_star_diff0', 'stages_count','stage_shixuns_count', 'sim_max', 'sim_min', 'sim_sum', 'sim_mean', 'score', 'rank', 'select_size', 'user_time_diff_mean', 'active_level', 'item_time_diff_mean', 'seq_length', 'user_num', 'hot_level', 'user_time_hob1', 'user_time_hob2', 'visits_hbo', 'study_count_hbo', 'course_study_count_hbo', 'passed_count_hbo', 'course_used_count_hbo', 'school_used_count_hbo', 'challenge_count_hbo', 'evaluate_count_hbo', 'video_study_time_hbo', 'study_pdf_attachment_count_hbo', 'averge_star_hbo', 'visits', 'study_count', 'course_study_count', 'passed_count', 'course_used_count', 'school_used_count', 'challenge_count', 'evaluate_count', 'video_study_time', 'study_pdf_attachment_count', 'averge_star', 'logins', 'grade', 'experience'] # 排序模型的sparse特征 shixun_rank_sparse_fea = ['user_id', 'shixun_id', 'gender', 'school_id', 'identity', 'trainee', 'is_trainee_hab'] subject_rank_sparse_fea = ['user_id', 'subject_id', 'gender', 'school_id', 'identity','edu_background', 'disciplines_id', 'is_disciplines_hab'] # 排序模型特征字段 shixun_rank_feats_columns = ['visits', 'myshixuns_count', 'challenges_count', 'averge_star'] subject_rank_feats_columns = ['visits', 'study_count', 'course_study_count', 'passed_count', 'course_used_count', 'school_used_count', 'challenge_count', 'evaluate_count', 'video_study_time', 'study_pdf_attachment_count', 'averge_star'] # 用户选择实训最大序列长度 shixun_max_seq_len = 500 # 用户选择课程最大序列长度 subject_max_seq_len = 300 # mysql连接配置 mysql_host = "rm-bp13v5020p7828r5rso.mysql.rds.aliyuncs.com" mysql_user = "testeducoder" mysql_passwd = "TEST@123" mysql_port = 3306 mysql_database = "preeducoderweb" #mysql线上 mysql_test_host = "testeducoder-public.mysql.polardb.rds.aliyuncs.com" mysql_test_user = "testeducoder" mysql_test_passwd = "TEST@123" mysql_test_port = 3306 mysql_test_database = "educoder_kg_search" test_user_id = 73547 test_shixun_id = 8222 test_shixun_name = 'Pytorch深度学习 - 多尺度目标检测' test_subject_id = 4 test_subject_name = '数据结构与算法(C语言)' #学习路径推荐相关参数 study_path = root_path+"/study_path_rs/" study_path_data = study_path+"data/sample/" kg_data = study_path+'knowledge_forest_data/sample/' initial_learning=study_path+'knowledge_forest_data/initial_learning/' #neo4j连接配置 neo4j_url = 'http://47.93.43.185:57474/' neo4j_username = 'neo4j' neo4j_password = 'yongge123123'