You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

101 lines
2.9 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

'''
es
'''
from elasticsearch import Elasticsearch
import requests
from bs4 import BeautifulSoup
import datetime
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36',
}
def get_target(url,selector):
try:
response = requests.get(url=url, headers=headers)
response.encoding = 'UTF-8'
bs = BeautifulSoup(response.text,'html.parser')
return [item.text for item in bs.select(selector=selector)]
except Exception as e:
pass
class esController():
def __init__(self,index_name,index_type):
'''
创建连接对象同时给定连接哪个index和哪个type
:param index_name: 相当于哪个库
:param index_type: 相当于哪个表
'''
self.es = Elasticsearch(hosts="localhost:9200") #创建连接对象
self.index_name = index_name
self.index_type = index_type
def create_index(self):
'''
创建一个index如果存在时就先删除然后再重新创建
:return:
'''
if self.es.indices.exists(index=self.index_name):
self.es.indices.delete(index=self.index_name)
self.es.indices.create(index=self.index_name)
def delete_index(self):
'''
删除某一个index
:return:
'''
try:
self.es.indices.delete(index=self.index_name)
except:pass
def search_index(self,keyword,fields,count):
'''
查询index里的内容
:param keyword: 查询内容
:param count: 返回多少个数量
:return:
'''
body = {
"query": {
"multi_match": {
"query": keyword, # 指定查询内容,注意:会被分词
"fields": fields # 指定字段查询
}
}
}
return self.es.search(index=self.index_name,body=body,size=count)
def get_doc(self,id):
'''
获取index里的某个文档内容
:param id:
:return:
'''
return self.es.get(index=self.index_name,id=id)
def insert_one(self,doc:dict):
'''
往index中插入内容
:param doc: 以键值对方式表示的内容
:return:
'''
self.es.index(index=self.index_name,doc_type=self.index_type,body=doc)
def insert_array(self,docs:list):
'''
往index中插入多项内容
:param docs:
:return:
'''
for doc in docs:
self.es.index(index=self.index_name, doc_type=self.index_type, body=doc)
if __name__ == '__main__':
# 搜索获取新闻
es = esController("tust","news")
res =es.search_index("实验",'title',10)
print("共有{}条结果".format(res['hits']['total']['value']))
for item in res['hits']['hits']:
print(item['_score'],item['_source'])