diff --git a/ArticleSpider/ArticleSpider/__pycache__/settings.cpython-39.pyc b/ArticleSpider/ArticleSpider/__pycache__/settings.cpython-39.pyc
index 22bc2d0..b08b487 100644
Binary files a/ArticleSpider/ArticleSpider/__pycache__/settings.cpython-39.pyc and b/ArticleSpider/ArticleSpider/__pycache__/settings.cpython-39.pyc differ
diff --git a/ArticleSpider/ArticleSpider/settings.py b/ArticleSpider/ArticleSpider/settings.py
index 48bdb61..abdc2b1 100644
--- a/ArticleSpider/ArticleSpider/settings.py
+++ b/ArticleSpider/ArticleSpider/settings.py
@@ -1,115 +1,115 @@
-# Scrapy settings for ArticleSpider project
-#
-# For simplicity, this file contains only settings considered important or
-# commonly used. You can find more settings consulting the documentation:
-#
-# https://docs.scrapy.org/en/latest/topics/settings.html
-# https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
-# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
-import os
-import sys
-
-import scrapy.downloadermiddlewares.useragent
-
-import ArticleSpider.pipelines
-
-BOT_NAME = "ArticleSpider"
-
-SPIDER_MODULES = ["ArticleSpider.spiders"]
-NEWSPIDER_MODULE = "ArticleSpider.spiders"
-
-# Crawl responsibly by identifying yourself (and your website) on the user-agent
-USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36"
-
-# Obey robots.txt rules
-ROBOTSTXT_OBEY = False
-
-# Configure maximum concurrent requests performed by Scrapy (default: 16)
-# CONCURRENT_REQUESTS = 32
-
-# Configure a delay for requests for the same website (default: 0)
-# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
-# See also autothrottle settings and docs
-# DOWNLOAD_DELAY = 3
-# The download delay setting will honor only one of:
-# CONCURRENT_REQUESTS_PER_DOMAIN = 16
-# CONCURRENT_REQUESTS_PER_IP = 16
-
-# Disable cookies (enabled by default)
-COOKIES_ENABLED = True
-COOKIES_DEBUG = True
-
-# Disable Telnet Console (enabled by default)
-# TELNETCONSOLE_ENABLED = False
-
-# Override the default request headers:
-# DEFAULT_REQUEST_HEADERS = {
-# "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
-# "Accept-Language": "en",
-# }
-
-# Enable or disable spider middlewares
-# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
-# SPIDER_MIDDLEWARES = {
-# "ArticleSpider.middlewares.ArticlespiderSpiderMiddleware": 543,
-# }
-
-# Enable or disable downloader middlewares
-# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
-DOWNLOADER_MIDDLEWARES = {
- # "ArticleSpider.middlewares.ArticlespiderDownloaderMiddleware": 543,
- 'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware': 2,
-}
-
-# Enable or disable extensions
-# See https://docs.scrapy.org/en/latest/topics/extensions.html
-# EXTENSIONS = {
-# "scrapy.extensions.telnet.TelnetConsole": None,
-# }
-
-# Configure item pipelines
-# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
-ITEM_PIPELINES = {
- # 'scrapy.pipelines.images.ImagesPipeline': 1,
- # 'ArticleSpider.pipelines.JsonWithEncodingPipeline': 2,
- # 'ArticleSpider.pipelines.JsonExporterPipeline': 3,
- # 'ArticleSpider.pipelines.MysqlPipeline': 4,
- # 'ArticleSpider.pipelines.MysqlTwistedPipline': 5,
- 'ArticleSpider.pipelines.ElasticsearchPipeline': 6,
- 'ArticleSpider.pipelines.ArticlespiderPipeline': 300,
-}
-
-# Enable and configure the AutoThrottle extension (disabled by default)
-# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
-# AUTOTHROTTLE_ENABLED = True
-# The initial download delay
-# AUTOTHROTTLE_START_DELAY = 5
-# The maximum download delay to be set in case of high latencies
-# AUTOTHROTTLE_MAX_DELAY = 60
-# The average number of requests Scrapy should be sending in parallel to
-# each remote server
-# AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
-# Enable showing throttling stats for every response received:
-# AUTOTHROTTLE_DEBUG = False
-
-# Enable and configure HTTP caching (disabled by default)
-# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
-# HTTPCACHE_ENABLED = True
-# HTTPCACHE_EXPIRATION_SECS = 0
-# HTTPCACHE_DIR = "httpcache"
-# HTTPCACHE_IGNORE_HTTP_CODES = []
-# HTTPCACHE_STORAGE = "scrapy.extensions.httpcache.FilesystemCacheStorage"
-
-# Set settings whose default value is deprecated to a future-proof value
-REQUEST_FINGERPRINTER_IMPLEMENTATION = "2.7"
-TWISTED_REACTOR = "twisted.internet.asyncioreactor.AsyncioSelectorReactor"
-FEED_EXPORT_ENCODING = "utf-8"
-
-# IMAGES_URLS_FIELD = 'front_image_url'
-project_dir = os.path.abspath(os.path.dirname(__file__))
-IMAGES_STORE = os.path.join(project_dir, 'images')
-
-MYSQL_HOST = '127.0.0.1'
-MYSQL_DBNAME = 'article_spider'
-MYSQL_USER = 'root'
+# Scrapy settings for ArticleSpider project
+#
+# For simplicity, this file contains only settings considered important or
+# commonly used. You can find more settings consulting the documentation:
+#
+# https://docs.scrapy.org/en/latest/topics/settings.html
+# https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
+# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+import os
+import sys
+
+import scrapy.downloadermiddlewares.useragent
+
+import ArticleSpider.pipelines
+
+BOT_NAME = "ArticleSpider"
+
+SPIDER_MODULES = ["ArticleSpider.spiders"]
+NEWSPIDER_MODULE = "ArticleSpider.spiders"
+
+# Crawl responsibly by identifying yourself (and your website) on the user-agent
+USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36"
+
+# Obey robots.txt rules
+ROBOTSTXT_OBEY = False
+
+# Configure maximum concurrent requests performed by Scrapy (default: 16)
+# CONCURRENT_REQUESTS = 32
+
+# Configure a delay for requests for the same website (default: 0)
+# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
+# See also autothrottle settings and docs
+# DOWNLOAD_DELAY = 3
+# The download delay setting will honor only one of:
+# CONCURRENT_REQUESTS_PER_DOMAIN = 16
+# CONCURRENT_REQUESTS_PER_IP = 16
+
+# Disable cookies (enabled by default)
+COOKIES_ENABLED = True
+COOKIES_DEBUG = True
+
+# Disable Telnet Console (enabled by default)
+# TELNETCONSOLE_ENABLED = False
+
+# Override the default request headers:
+# DEFAULT_REQUEST_HEADERS = {
+# "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+# "Accept-Language": "en",
+# }
+
+# Enable or disable spider middlewares
+# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+# SPIDER_MIDDLEWARES = {
+# "ArticleSpider.middlewares.ArticlespiderSpiderMiddleware": 543,
+# }
+
+# Enable or disable downloader middlewares
+# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
+DOWNLOADER_MIDDLEWARES = {
+ # "ArticleSpider.middlewares.ArticlespiderDownloaderMiddleware": 543,
+ 'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware': 2,
+}
+
+# Enable or disable extensions
+# See https://docs.scrapy.org/en/latest/topics/extensions.html
+# EXTENSIONS = {
+# "scrapy.extensions.telnet.TelnetConsole": None,
+# }
+
+# Configure item pipelines
+# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
+ITEM_PIPELINES = {
+ # 'scrapy.pipelines.images.ImagesPipeline': 1,
+ # 'ArticleSpider.pipelines.JsonWithEncodingPipeline': 2,
+ # 'ArticleSpider.pipelines.JsonExporterPipeline': 3,
+ # 'ArticleSpider.pipelines.MysqlPipeline': 4,
+ # 'ArticleSpider.pipelines.MysqlTwistedPipline': 5,
+ 'ArticleSpider.pipelines.ElasticsearchPipeline': 6,
+ 'ArticleSpider.pipelines.ArticlespiderPipeline': 300,
+}
+
+# Enable and configure the AutoThrottle extension (disabled by default)
+# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
+# AUTOTHROTTLE_ENABLED = True
+# The initial download delay
+# AUTOTHROTTLE_START_DELAY = 5
+# The maximum download delay to be set in case of high latencies
+# AUTOTHROTTLE_MAX_DELAY = 60
+# The average number of requests Scrapy should be sending in parallel to
+# each remote server
+# AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
+# Enable showing throttling stats for every response received:
+# AUTOTHROTTLE_DEBUG = False
+
+# Enable and configure HTTP caching (disabled by default)
+# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
+# HTTPCACHE_ENABLED = True
+# HTTPCACHE_EXPIRATION_SECS = 0
+# HTTPCACHE_DIR = "httpcache"
+# HTTPCACHE_IGNORE_HTTP_CODES = []
+# HTTPCACHE_STORAGE = "scrapy.extensions.httpcache.FilesystemCacheStorage"
+
+# Set settings whose default value is deprecated to a future-proof value
+REQUEST_FINGERPRINTER_IMPLEMENTATION = "2.7"
+TWISTED_REACTOR = "twisted.internet.asyncioreactor.AsyncioSelectorReactor"
+FEED_EXPORT_ENCODING = "utf-8"
+
+# IMAGES_URLS_FIELD = 'front_image_url'
+project_dir = os.path.abspath(os.path.dirname(__file__))
+IMAGES_STORE = os.path.join(project_dir, 'images')
+
+MYSQL_HOST = '127.0.0.1'
+MYSQL_DBNAME = 'article_spider'
+MYSQL_USER = 'root'
MYSQL_PASSWORD = 'qweasdzxc227'
\ No newline at end of file
diff --git a/ArticleSpider/ArticleSpider/spiders/__pycache__/jobbole.cpython-39.pyc b/ArticleSpider/ArticleSpider/spiders/__pycache__/jobbole.cpython-39.pyc
index b0ada79..639c1c6 100644
Binary files a/ArticleSpider/ArticleSpider/spiders/__pycache__/jobbole.cpython-39.pyc and b/ArticleSpider/ArticleSpider/spiders/__pycache__/jobbole.cpython-39.pyc differ
diff --git a/ArticleSpider/ArticleSpider/spiders/jobbole.py b/ArticleSpider/ArticleSpider/spiders/jobbole.py
index 3bb86bc..fa0fcc5 100644
--- a/ArticleSpider/ArticleSpider/spiders/jobbole.py
+++ b/ArticleSpider/ArticleSpider/spiders/jobbole.py
@@ -1,128 +1,87 @@
-import json
-import re
-import os
-import requests
-import scrapy
-import pickle
-import datetime
-from scrapy.http import Request
-from urllib import parse
-from scrapy.loader import ItemLoader
-from ArticleSpider.items import ArticleItemLoader
-from ArticleSpider.items import JobBoleArticleItem
-from ArticleSpider.utils import common
-from ArticleSpider.utils.common import get_md5
-from scrapy import signals
-import time
-from selenium import webdriver
-from scrapy.loader import ItemLoader
-
-
-class JobboleSpider(scrapy.Spider):
- name = "jobbole"
- allowed_domains = ["news.cnblogs.com"]
- start_urls = ["http://news.cnblogs.com/"]
-
- def start_requests(self):
- cookies = []
- if os.path.exists(r'C:\Users\10955\ArticleSpider\cookies\jobbole.cookie'):
- cookies = pickle.load(open(r'C:\Users\10955\ArticleSpider\cookies\jobbole.cookie', 'rb'))
- if not cookies:
- driver = webdriver.Chrome()
- driver.implicitly_wait(10)
- # 进入登录网站
- driver.get('https://account.cnblogs.com/signin')
- # 使点击验证码失效
- driver.execute_script("Object.defineProperties(navigator,{webdriver:{get:()=>undefined}})")
- # 输入账号
- driver.find_element_by_id('mat-input-0').send_keys('包包1')
- # 输入密码
- driver.find_element_by_id('mat-input-1').send_keys('qweasdzxc227')
- # 点击登录
- driver.find_element_by_css_selector('.mat-button-wrapper').click()
- # 点击验证码
- driver.find_element_by_xpath('//*[@id="Shape3"]').click()
- time.sleep(5)
- cookies = driver.get_cookies()
- pickle.dump(cookies, open(r'C:\Users\10955\ArticleSpider\cookies\jobbole.cookie', 'wb'))
- cookie_dict = {}
- for cookie in cookies:
- cookie_dict[cookie['name']] = cookie['value']
- for url in self.start_urls:
- yield scrapy.Request(url, dont_filter=True, cookies=cookie_dict)
- # cookie_dict = {cookie['name']: cookie['value'] for cookie in cookies}
- # print(cookies)
- # print(cookie_dict)
- # yield scrapy.Request(url='https://account.cnblogs.com/signin', callback=self.parse, cookies=cookie_dict)
-
- def parse(self, response):
- # 1.获取新闻列表页中的新闻url并交给scrapy进行下载后调用相应的解析方
- # 提取文章链接,extract_first()提取第一个值
- post_nodes = response.css('#news_list .news_block')[:1]
- for post_node in post_nodes:
- image_url = "https:" + post_node.css('.entry_summary a img::attr(src)').extract_first("")
- post_url = post_node.css('h2 a::attr(href)').extract_first("")
- yield Request(url=parse.urljoin(response.url, post_url), meta={'front_image_url': image_url}, callback=self.parse_detail, dont_filter=True)
- # 2.获取下一页的url并交给scrapy进行下载,下载完成后交给parse继续跟进
- # next_url = response.css('div.pager a:last-child::attr(href)').extract_first("")
- # yield Request(url=parse.urljoin(response.url, next_url), callback=self.parse)
-
- def parse_detail(self, response):
- match_re = re.match(".*?(\d+)", response.url)
- if match_re:
- post_id = match_re.group(1)
- # article_item = JobBoleArticleItem()
- # title = response.css('#news_title a::text').extract_first("")
- # create_date = response.css('#news_info .time::text').extract_first("")
- # match_re = re.match('.*?(\d+.*)', create_date)
- # if match_re:
- # create_date = match_re.group(1)
- # # create_date = response.xpath('//*[@id="news_info"]//*[@class="time"]/text()').extract_first("")
- #
- # content = response.css('#news_content').extract()[0]
- # tag_list = response.css('.news_tags a::text').extract()
- # tags = ','.join(tag_list)
- # article_item['title'] = title
- # article_item['create_date'] = create_date
- # article_item['content'] = content
- # article_item['tags'] = tags
- # article_item['url'] = response.url
- # if response.meta.get('front_image_url', ""):
- # article_item['front_image_url'] = [response.meta.get('front_image_url', "")]
- # else:
- # article_item['front_image_url'] = []
- item_loader = ArticleItemLoader(item=JobBoleArticleItem(), response=response)
- item_loader.add_css('title', '#news_title a::text')
- item_loader.add_css('content', '#news_content')
- item_loader.add_css('tags', '.news_tags a::text')
- item_loader.add_css('create_date', '#news_info .time::text')
- item_loader.add_value('url', response.url)
- item_loader.add_value('front_image_url', response.meta.get('front_image_url', ''))
- # article_item = item_loader.load_item()
- # if response.meta.get('front_image_url', ""):
- # article_item['front_image_url'] = [response.meta.get('front_image_url', "")]
- # else:
- # article_item['front_image_url'] = []
- yield Request(url=parse.urljoin(response.url, "/NewsAjax/GetAjaxNewsInfo?contentId={}".format(post_id)),
- meta={'article_item': item_loader, 'url':response.url}, callback=self.parse_nums)
- # praise_nums = j_data['DiggCount']
- # fav_nums = j_data['TotalView']
- # comment_nums = j_data['CommentCount']
- # pass
-
- def parse_nums(self, response):
- j_data = json.loads(response.text)
- item_loader = response.meta.get('article_item', "")
- # praise_nums = j_data['DiggCount']
- # fav_nums = j_data['TotalView']
- # comment_nums = j_data['CommentCount']
- item_loader.add_value('praise_nums', j_data['DiggCount'])
- item_loader.add_value('fav_nums', j_data['TotalView'])
- item_loader.add_value('comment_nums', j_data['CommentCount'])
- item_loader.add_value('url_object_id', common.get_md5(response.meta.get('url', '')))
- # article_item['praise_nums'] = praise_nums
- # article_item['fav_nums'] = fav_nums
- # article_item['comment_nums'] = comment_nums
- # article_item['url_object_id'] = common.get_md5(article_item['url'])
- article_item = item_loader.load_item()
- yield article_item
+import json
+import re
+import os
+import requests
+import scrapy
+import pickle
+import datetime
+from scrapy.http import Request
+from urllib import parse
+from scrapy.loader import ItemLoader
+from ArticleSpider.items import ArticleItemLoader
+from ArticleSpider.items import JobBoleArticleItem
+from ArticleSpider.utils import common
+from ArticleSpider.utils.common import get_md5
+from scrapy import signals
+import time
+from selenium import webdriver
+from scrapy.loader import ItemLoader
+
+
+class JobboleSpider(scrapy.Spider):
+ name = "jobbole"
+ allowed_domains = ["news.cnblogs.com"]
+ start_urls = ["http://news.cnblogs.com/"]
+
+ def start_requests(self):
+ cookies = []
+ if os.path.exists(r'C:\Users\10955\ArticleSpider\cookies\jobbole.cookie'):
+ cookies = pickle.load(open(r'C:\Users\10955\ArticleSpider\cookies\jobbole.cookie', 'rb'))
+ if not cookies:
+ driver = webdriver.Chrome()
+ driver.implicitly_wait(10)
+ # 进入登录网站
+ driver.get('https://account.cnblogs.com/signin')
+ # 使点击验证码失效
+ driver.execute_script("Object.defineProperties(navigator,{webdriver:{get:()=>undefined}})")
+ # 输入账号
+ driver.find_element_by_id('mat-input-0').send_keys('包包1')
+ # 输入密码
+ driver.find_element_by_id('mat-input-1').send_keys('qweasdzxc227')
+ # 点击登录
+ driver.find_element_by_css_selector('.mat-button-wrapper').click()
+ # 点击验证码
+ driver.find_element_by_xpath('//*[@id="Shape3"]').click()
+ time.sleep(5)
+ cookies = driver.get_cookies()
+ pickle.dump(cookies, open(r'C:\Users\10955\ArticleSpider\cookies\jobbole.cookie', 'wb'))
+ cookie_dict = {}
+ for cookie in cookies:
+ cookie_dict[cookie['name']] = cookie['value']
+ for url in self.start_urls:
+ yield scrapy.Request(url, dont_filter=True, cookies=cookie_dict)
+ def parse(self, response):
+ # 1.获取新闻列表页中的新闻url并交给scrapy进行下载后调用相应的解析方
+ # 提取文章链接,extract_first()提取第一个值
+ post_nodes = response.css('#news_list .news_block')[:100]
+ for post_node in post_nodes:
+ image_url = "https:" + post_node.css('.entry_summary a img::attr(src)').extract_first("")
+ post_url = post_node.css('h2 a::attr(href)').extract_first("")
+ yield Request(url=parse.urljoin(response.url, post_url), meta={'front_image_url': image_url}, callback=self.parse_detail, dont_filter=True)
+ # 2.获取下一页的url并交给scrapy进行下载,下载完成后交给parse继续跟进
+ next_url = response.css('div.pager a:last-child::attr(href)').extract_first("")
+ yield Request(url=parse.urljoin(response.url, next_url), callback=self.parse)
+
+ def parse_detail(self, response):
+ match_re = re.match(".*?(\d+)", response.url)
+ if match_re:
+ post_id = match_re.group(1)
+ item_loader = ArticleItemLoader(item=JobBoleArticleItem(), response=response)
+ item_loader.add_css('title', '#news_title a::text')
+ item_loader.add_css('content', '#news_content')
+ item_loader.add_css('tags', '.news_tags a::text')
+ item_loader.add_css('create_date', '#news_info .time::text')
+ item_loader.add_value('url', response.url)
+ item_loader.add_value('front_image_url', response.meta.get('front_image_url', ''))
+ yield Request(url=parse.urljoin(response.url, "/NewsAjax/GetAjaxNewsInfo?contentId={}".format(post_id)),
+ meta={'article_item': item_loader, 'url':response.url}, callback=self.parse_nums)
+
+ def parse_nums(self, response):
+ j_data = json.loads(response.text)
+ item_loader = response.meta.get('article_item', "")
+ item_loader.add_value('praise_nums', j_data['DiggCount'])
+ item_loader.add_value('fav_nums', j_data['TotalView'])
+ item_loader.add_value('comment_nums', j_data['CommentCount'])
+ item_loader.add_value('url_object_id', common.get_md5(response.meta.get('url', '')))
+ article_item = item_loader.load_item()
+ yield article_item
diff --git a/LcvSearch/LcvSearch/__pycache__/urls.cpython-39.pyc b/LcvSearch/LcvSearch/__pycache__/urls.cpython-39.pyc
index fd4412f..8fa5b59 100644
Binary files a/LcvSearch/LcvSearch/__pycache__/urls.cpython-39.pyc and b/LcvSearch/LcvSearch/__pycache__/urls.cpython-39.pyc differ
diff --git a/LcvSearch/LcvSearch/urls.py b/LcvSearch/LcvSearch/urls.py
index 30e2b8a..3a20af3 100644
--- a/LcvSearch/LcvSearch/urls.py
+++ b/LcvSearch/LcvSearch/urls.py
@@ -17,8 +17,11 @@ Including another URLconf
from django.contrib import admin
from django.urls import path
from django.views.generic import TemplateView
+from search.views import SearchSuggest,SearchView
urlpatterns = [
path('admin/', admin.site.urls),
path('', TemplateView.as_view(template_name='index.html'), name='index'),
+ path('suggest/', SearchSuggest.as_view(), name='suggest'),
+ path('search/', SearchView.as_view(), name='search'),
]
diff --git a/LcvSearch/search/__pycache__/models.cpython-39.pyc b/LcvSearch/search/__pycache__/models.cpython-39.pyc
index 4a1703a..e631c0b 100644
Binary files a/LcvSearch/search/__pycache__/models.cpython-39.pyc and b/LcvSearch/search/__pycache__/models.cpython-39.pyc differ
diff --git a/LcvSearch/search/__pycache__/views.cpython-39.pyc b/LcvSearch/search/__pycache__/views.cpython-39.pyc
new file mode 100644
index 0000000..4bfade8
Binary files /dev/null and b/LcvSearch/search/__pycache__/views.cpython-39.pyc differ
diff --git a/LcvSearch/search/models.py b/LcvSearch/search/models.py
index fd18c6e..df9c9c7 100644
--- a/LcvSearch/search/models.py
+++ b/LcvSearch/search/models.py
@@ -1,3 +1,47 @@
from django.db import models
# Create your models here.
+# -*- coding: utf-8 -*-
+__author__ = 'bobby'
+
+from datetime import datetime
+from elasticsearch_dsl import DocType, Date, Nested, Boolean, \
+ analyzer, InnerObjectWrapper, Completion, Keyword, Text, Integer
+
+from elasticsearch_dsl.analysis import CustomAnalyzer as _CustomAnalyzer
+
+from elasticsearch_dsl.connections import connections
+
+connections.create_connection(hosts=["localhost"])
+
+
+class CustomAnalyzer(_CustomAnalyzer):
+ def get_analysis_definition(self):
+ return {}
+
+
+ik_analyzer = CustomAnalyzer("ik_max_word", filter=["lowercase"])
+
+
+class ArticleType(DocType):
+ # 伯乐在线文章类型
+ suggest = Completion(analyzer=ik_analyzer)
+ title = Text(analyzer="ik_max_word")
+ create_date = Date()
+ url = Keyword()
+ url_object_id = Keyword()
+ front_image_url = Keyword()
+ front_image_path = Keyword()
+ praise_nums = Integer()
+ comment_nums = Integer()
+ fav_nums = Integer()
+ tags = Text(analyzer="ik_max_word")
+ content = Text(analyzer="ik_max_word")
+
+ class Meta:
+ index = "jobbole"
+ doc_type = "article"
+
+
+if __name__ == "__main__":
+ ArticleType.init()
diff --git a/LcvSearch/search/views.py b/LcvSearch/search/views.py
index c60c790..8c31c11 100644
--- a/LcvSearch/search/views.py
+++ b/LcvSearch/search/views.py
@@ -1,3 +1,88 @@
from django.shortcuts import render
+from django.views.generic.base import View
+from search.models import ArticleType
+from django.http import HttpResponse
+import json
+from elasticsearch import Elasticsearch
+from datetime import datetime
+
+client = Elasticsearch(hosts=['127.0.0.1'])
+
# Create your views here.
+class SearchSuggest(View):
+ # 搜索建议模块
+ def get(self, request):
+ key_words = request.GET.get('s', '')
+ re_datas = []
+ if key_words:
+ s = ArticleType.search()
+ s = s.suggest('my_suggest', key_words, completion={
+ "field": "suggest", "fuzzy": {
+ "fuzziness": 2
+ },
+ "size": 10
+ })
+ suggestions = s.execute_suggest()
+ for match in suggestions.my_suggest[0].options:
+ source = match._source
+ re_datas.append(source["title"])
+ return HttpResponse(json.dumps(re_datas), content_type="application/json")
+
+
+class SearchView(View):
+ def get(self, request):
+ key_words = request.GET.get("q", '')
+ page = request.GET.get('p', '1')
+ try:
+ page = int(page)
+ except:
+ page = 1
+ start_time = datetime.now()
+ response = client.search(
+ index="jobbole",
+ body={
+ "query": {
+ "multi_match": {
+ "query": key_words,
+ "fields": ["tags", "title", "content"]
+ }
+ },
+ "from": (page - 1) * 10,
+ "size": 10,
+ "highlight": {
+ "pre_tags": [''],
+ "post_tags": [''],
+ "fields": {
+ "title": {},
+ "content": {},
+ }
+ }
+ }
+ )
+ end_time = datetime.now()
+ last_seconds = (end_time - start_time).total_seconds()
+ total_nums = response['hits']['total']
+ if (page % 10) > 0:
+ page_nums = int(total_nums / 10) + 1
+ else:
+ page_nums = int(total_nums / 10)
+ # 构造值,获取每个字段的值
+ hit_list = []
+ for hit in response['hits']['hits']:
+ hit_dict = {}
+ if 'title' in hit['highlight']:
+ hit_dict['title'] = "".join(hit['highlight']['title'])
+ else:
+ hit_dict['title'] = hit['_source']['title']
+ if 'content' in hit['highlight']:
+ hit_dict['content'] = "".join(hit['highlight']['content'])[:500]
+ else:
+ hit_dict['content'] = hit['_source']['content'][:500]
+ hit_dict["create_date"] = hit['_source']['create_date']
+ hit_dict["url"] = hit['_source']['url']
+ hit_dict["score"] = hit['_score']
+ hit_list.append(hit_dict)
+ return render(request, 'result.html',
+ {'page': page, 'total_nums': total_nums, 'all_hits': hit_list, 'key_words': key_words,
+ 'page_nums': page_nums, 'total_nums': total_nums,'last_seconds':last_seconds})
diff --git a/LcvSearch/templates/index.html b/LcvSearch/templates/index.html
index 42d3a1c..8b08c57 100644
--- a/LcvSearch/templates/index.html
+++ b/LcvSearch/templates/index.html
@@ -66,8 +66,8 @@