You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

27 lines
1.4 KiB

5 months ago
import pandas as pd
import os
import sys
sys.path.append(os.getcwd())
from config import subjects_data_path, subjects_embed_path
from config import subjects_merge_emb_path
def merge_item(data,data_em,id):
data = pd.merge(data,data_em,on=[id])
return data
if __name__ == '__main__':
subject = pd.read_csv(subjects_data_path,sep='\t',encoding='utf-8')
subject_em = pd.read_csv(subjects_embed_path,sep='\t',encoding='utf-8')
data = merge_item(subject,subject_em,'subject_id')
subject_data = data.drop(['disciplines_id','disciplines_name', 'sub_discipline_id','sub_discipline_name', 'subject_name',
'subject_name', 'status', 'updated_at', 'stage_shixuns_count', 'publish_time',
'homepage_show', 'repertoire_id', 'score_count','shixuns_count','course_study_count',
'initiative_study','course_used_count','school_used_count','initiative_school_used_count',
'initiative_passed_count','initiative_challenge_count','initiative_evaluate_count',
'video_study_time','initiative_video_study_time','initiative_study_pdf_attachment_count',
'tag_names','created_at_ts','stages_count','study_count','passed_count','created_at'],axis=1)
print(subject_data.columns)
subject_data.to_csv(subjects_merge_emb_path,sep='\t', index=False, header=True)