''' es ''' from elasticsearch import Elasticsearch import requests from bs4 import BeautifulSoup import datetime headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36', } def get_target(url,selector): try: response = requests.get(url=url, headers=headers) response.encoding = 'UTF-8' bs = BeautifulSoup(response.text,'html.parser') return [item.text for item in bs.select(selector=selector)] except Exception as e: pass class esController(): def __init__(self,index_name,index_type): ''' 创建连接对象,同时给定连接哪个index,和哪个type :param index_name: 相当于哪个库 :param index_type: 相当于哪个表 ''' self.es = Elasticsearch(hosts="localhost:9200") #创建连接对象 self.index_name = index_name self.index_type = index_type def create_index(self): ''' 创建一个index,如果存在时就先删除,然后再重新创建 :return: ''' if self.es.indices.exists(index=self.index_name): self.es.indices.delete(index=self.index_name) self.es.indices.create(index=self.index_name) def delete_index(self): ''' 删除某一个index :return: ''' try: self.es.indices.delete(index=self.index_name) except:pass def search_index(self,keyword,fields,count): ''' 查询index里的内容 :param keyword: 查询内容 :param count: 返回多少个数量 :return: ''' body = { "query": { "multi_match": { "query": keyword, # 指定查询内容,注意:会被分词 "fields": fields # 指定字段查询 } } } return self.es.search(index=self.index_name,body=body,size=count) def get_doc(self,id): ''' 获取index里的某个文档内容 :param id: :return: ''' return self.es.get(index=self.index_name,id=id) def insert_one(self,doc:dict): ''' 往index中插入内容 :param doc: 以键值对方式表示的内容 :return: ''' self.es.index(index=self.index_name,doc_type=self.index_type,body=doc) def insert_array(self,docs:list): ''' 往index中插入多项内容 :param docs: :return: ''' for doc in docs: self.es.index(index=self.index_name, doc_type=self.index_type, body=doc) if __name__ == '__main__': # 搜索获取新闻 es = esController("tust","news") res =es.search_index("实验",'title',10) print("共有{}条结果".format(res['hits']['total']['value'])) for item in res['hits']['hits']: print(item['_score'],item['_source'])