You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
22 lines
856 B
22 lines
856 B
5 months ago
|
import pandas as pd
|
||
|
import os
|
||
|
import sys
|
||
|
sys.path.append(os.getcwd())
|
||
|
from config import shixuns_data_path, shixuns_embed_path
|
||
|
from config import shixun_merge_emb_path
|
||
|
|
||
|
def merge_item(data,data_em,id):
|
||
|
data = pd.merge(data,data_em,on=[id])
|
||
|
return data
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
shixun = pd.read_csv(shixuns_data_path,sep='\t',encoding='utf-8')
|
||
|
shixun_em = pd.read_csv(shixuns_embed_path,sep='\t',encoding='utf-8')
|
||
|
|
||
|
data = merge_item(shixun,shixun_em,'shixun_id')
|
||
|
|
||
|
shixun_data = data.drop(['shixun_name','created_at', 'updated_at','status', 'publish_time',
|
||
|
'language', 'modify_time', 'reset_time', 'disciplines_id', 'disciplines_name',
|
||
|
'subject_id','subject_name', 'created_at_ts'],axis=1)
|
||
|
|
||
|
shixun_data.to_csv(shixun_merge_emb_path,sep='\t', index=False, header=True)
|