You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
27 lines
1.4 KiB
27 lines
1.4 KiB
5 months ago
|
import pandas as pd
|
||
|
import os
|
||
|
import sys
|
||
|
sys.path.append(os.getcwd())
|
||
|
from config import subjects_data_path, subjects_embed_path
|
||
|
from config import subjects_merge_emb_path
|
||
|
|
||
|
def merge_item(data,data_em,id):
|
||
|
data = pd.merge(data,data_em,on=[id])
|
||
|
return data
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
subject = pd.read_csv(subjects_data_path,sep='\t',encoding='utf-8')
|
||
|
subject_em = pd.read_csv(subjects_embed_path,sep='\t',encoding='utf-8')
|
||
|
|
||
|
data = merge_item(subject,subject_em,'subject_id')
|
||
|
|
||
|
subject_data = data.drop(['disciplines_id','disciplines_name', 'sub_discipline_id','sub_discipline_name', 'subject_name',
|
||
|
'subject_name', 'status', 'updated_at', 'stage_shixuns_count', 'publish_time',
|
||
|
'homepage_show', 'repertoire_id', 'score_count','shixuns_count','course_study_count',
|
||
|
'initiative_study','course_used_count','school_used_count','initiative_school_used_count',
|
||
|
'initiative_passed_count','initiative_challenge_count','initiative_evaluate_count',
|
||
|
'video_study_time','initiative_video_study_time','initiative_study_pdf_attachment_count',
|
||
|
'tag_names','created_at_ts','stages_count','study_count','passed_count','created_at'],axis=1)
|
||
|
|
||
|
print(subject_data.columns)
|
||
|
subject_data.to_csv(subjects_merge_emb_path,sep='\t', index=False, header=True)
|