|
|
'''
|
|
|
es
|
|
|
'''
|
|
|
from elasticsearch import Elasticsearch
|
|
|
import requests
|
|
|
from bs4 import BeautifulSoup
|
|
|
import datetime
|
|
|
|
|
|
|
|
|
headers = {
|
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36',
|
|
|
}
|
|
|
|
|
|
def get_target(url,selector):
|
|
|
try:
|
|
|
response = requests.get(url=url, headers=headers)
|
|
|
response.encoding = 'UTF-8'
|
|
|
bs = BeautifulSoup(response.text,'html.parser')
|
|
|
return [item.text for item in bs.select(selector=selector)]
|
|
|
except Exception as e:
|
|
|
pass
|
|
|
|
|
|
class esController():
|
|
|
def __init__(self,index_name,index_type):
|
|
|
'''
|
|
|
创建连接对象,同时给定连接哪个index,和哪个type
|
|
|
:param index_name: 相当于哪个库
|
|
|
:param index_type: 相当于哪个表
|
|
|
'''
|
|
|
self.es = Elasticsearch(hosts="localhost:9200") #创建连接对象
|
|
|
self.index_name = index_name
|
|
|
self.index_type = index_type
|
|
|
|
|
|
def create_index(self):
|
|
|
'''
|
|
|
创建一个index,如果存在时就先删除,然后再重新创建
|
|
|
:return:
|
|
|
'''
|
|
|
if self.es.indices.exists(index=self.index_name):
|
|
|
self.es.indices.delete(index=self.index_name)
|
|
|
self.es.indices.create(index=self.index_name)
|
|
|
|
|
|
def delete_index(self):
|
|
|
'''
|
|
|
删除某一个index
|
|
|
:return:
|
|
|
'''
|
|
|
try:
|
|
|
self.es.indices.delete(index=self.index_name)
|
|
|
except:pass
|
|
|
|
|
|
def search_index(self,keyword,fields,count):
|
|
|
'''
|
|
|
查询index里的内容
|
|
|
:param keyword: 查询内容
|
|
|
:param count: 返回多少个数量
|
|
|
:return:
|
|
|
'''
|
|
|
body = {
|
|
|
"query": {
|
|
|
"multi_match": {
|
|
|
"query": keyword, # 指定查询内容,注意:会被分词
|
|
|
"fields": fields # 指定字段查询
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
return self.es.search(index=self.index_name,body=body,size=count)
|
|
|
|
|
|
def get_doc(self,id):
|
|
|
'''
|
|
|
获取index里的某个文档内容
|
|
|
:param id:
|
|
|
:return:
|
|
|
'''
|
|
|
return self.es.get(index=self.index_name,id=id)
|
|
|
|
|
|
def insert_one(self,doc:dict):
|
|
|
'''
|
|
|
往index中插入内容
|
|
|
:param doc: 以键值对方式表示的内容
|
|
|
:return:
|
|
|
'''
|
|
|
self.es.index(index=self.index_name,doc_type=self.index_type,body=doc)
|
|
|
|
|
|
def insert_array(self,docs:list):
|
|
|
'''
|
|
|
往index中插入多项内容
|
|
|
:param docs:
|
|
|
:return:
|
|
|
'''
|
|
|
for doc in docs:
|
|
|
self.es.index(index=self.index_name, doc_type=self.index_type, body=doc)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
# 搜索获取新闻
|
|
|
es = esController("tust","news")
|
|
|
res =es.search_index("实验",'title',10)
|
|
|
print("共有{}条结果".format(res['hits']['total']['value']))
|
|
|
for item in res['hits']['hits']:
|
|
|
print(item['_score'],item['_source'])
|