EduCoder_Study_RS/matching/subject/item_merge_emb.py

import pandas as pd
import os
import sys
sys.path.append(os.getcwd())
from config import subjects_data_path, subjects_embed_path
from config import subjects_merge_emb_path

def merge_item(data,data_em,id):
    data = pd.merge(data,data_em,on=[id])
    return data

if __name__ == '__main__':
    subject = pd.read_csv(subjects_data_path,sep='\t',encoding='utf-8')
    subject_em = pd.read_csv(subjects_embed_path,sep='\t',encoding='utf-8')

    data = merge_item(subject,subject_em,'subject_id')

    subject_data = data.drop(['disciplines_id','disciplines_name', 'sub_discipline_id','sub_discipline_name', 'subject_name',
                            'subject_name', 'status', 'updated_at', 'stage_shixuns_count', 'publish_time',
                            'homepage_show', 'repertoire_id', 'score_count','shixuns_count','course_study_count',
                            'initiative_study','course_used_count','school_used_count','initiative_school_used_count',
                            'initiative_passed_count','initiative_challenge_count','initiative_evaluate_count',
                            'video_study_time','initiative_video_study_time','initiative_study_pdf_attachment_count',
                            'tag_names','created_at_ts','stages_count','study_count','passed_count','created_at'],axis=1)

    print(subject_data.columns)
    subject_data.to_csv(subjects_merge_emb_path,sep='\t', index=False, header=True)