import pandas as pd import os import sys sys.path.append(os.getcwd()) from config import subjects_data_path, subjects_embed_path from config import subjects_merge_emb_path def merge_item(data,data_em,id): data = pd.merge(data,data_em,on=[id]) return data if __name__ == '__main__': subject = pd.read_csv(subjects_data_path,sep='\t',encoding='utf-8') subject_em = pd.read_csv(subjects_embed_path,sep='\t',encoding='utf-8') data = merge_item(subject,subject_em,'subject_id') subject_data = data.drop(['disciplines_id','disciplines_name', 'sub_discipline_id','sub_discipline_name', 'subject_name', 'subject_name', 'status', 'updated_at', 'stage_shixuns_count', 'publish_time', 'homepage_show', 'repertoire_id', 'score_count','shixuns_count','course_study_count', 'initiative_study','course_used_count','school_used_count','initiative_school_used_count', 'initiative_passed_count','initiative_challenge_count','initiative_evaluate_count', 'video_study_time','initiative_video_study_time','initiative_study_pdf_attachment_count', 'tag_names','created_at_ts','stages_count','study_count','passed_count','created_at'],axis=1) print(subject_data.columns) subject_data.to_csv(subjects_merge_emb_path,sep='\t', index=False, header=True)