Delete 'Controller/Data2ES.py'

main
ptf2oq859 9 months ago
parent 9595e8258d
commit ad1dc0a529

@ -1,100 +0,0 @@
'''
es
'''
from elasticsearch import Elasticsearch
import requests
from bs4 import BeautifulSoup
import datetime
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36',
}
def get_target(url,selector):
try:
response = requests.get(url=url, headers=headers)
response.encoding = 'UTF-8'
bs = BeautifulSoup(response.text,'html.parser')
return [item.text for item in bs.select(selector=selector)]
except Exception as e:
pass
class esController():
def __init__(self,index_name,index_type):
'''
创建连接对象同时给定连接哪个index和哪个type
:param index_name: 相当于哪个库
:param index_type: 相当于哪个表
'''
self.es = Elasticsearch(hosts="localhost:9200") #创建连接对象
self.index_name = index_name
self.index_type = index_type
def create_index(self):
'''
创建一个index如果存在时就先删除然后再重新创建
:return:
'''
if self.es.indices.exists(index=self.index_name):
self.es.indices.delete(index=self.index_name)
self.es.indices.create(index=self.index_name)
def delete_index(self):
'''
删除某一个index
:return:
'''
try:
self.es.indices.delete(index=self.index_name)
except:pass
def search_index(self,keyword,fields,count):
'''
查询index里的内容
:param keyword: 查询内容
:param count: 返回多少个数量
:return:
'''
body = {
"query": {
"multi_match": {
"query": keyword, # 指定查询内容,注意:会被分词
"fields": fields # 指定字段查询
}
}
}
return self.es.search(index=self.index_name,body=body,size=count)
def get_doc(self,id):
'''
获取index里的某个文档内容
:param id:
:return:
'''
return self.es.get(index=self.index_name,id=id)
def insert_one(self,doc:dict):
'''
往index中插入内容
:param doc: 以键值对方式表示的内容
:return:
'''
self.es.index(index=self.index_name,doc_type=self.index_type,body=doc)
def insert_array(self,docs:list):
'''
往index中插入多项内容
:param docs:
:return:
'''
for doc in docs:
self.es.index(index=self.index_name, doc_type=self.index_type, body=doc)
if __name__ == '__main__':
# 搜索获取新闻
es = esController("tust","news")
res =es.search_index("实验",'title',10)
print("共有{}条结果".format(res['hits']['total']['value']))
for item in res['hits']['hits']:
print(item['_score'],item['_source'])
Loading…
Cancel
Save