|
|
|
@ -0,0 +1,70 @@
|
|
|
|
|
{
|
|
|
|
|
"cells": [
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 1,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"ename": "ModuleNotFoundError",
|
|
|
|
|
"evalue": "No module named 'parsel'",
|
|
|
|
|
"output_type": "error",
|
|
|
|
|
"traceback": [
|
|
|
|
|
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
|
|
|
|
"\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
|
|
|
|
|
"\u001b[1;32m<ipython-input-1-0a98fa3dc863>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mrequests\u001b[0m \u001b[1;31m#数据请求模块\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[1;32mimport\u001b[0m \u001b[0mparsel\u001b[0m \u001b[1;31m#数据解析模块\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[0murl\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m'https://www.biqugee.com/book/12564/4856870.html'\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[0mresponse\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mrequests\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0murl\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m#返回相应数据\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
|
|
|
|
"\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'parsel'"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"import requests #数据请求模块\n",
|
|
|
|
|
"import parsel #数据解析模块\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"url = 'https://www.biqugee.com/book/12564/4856870.html' \n",
|
|
|
|
|
"response = requests.get(url) #返回相应数据\n",
|
|
|
|
|
" #print(response.text)#发送请求,获取数据\n",
|
|
|
|
|
" #解析数据\n",
|
|
|
|
|
"selector = parsel.Selector(response.text) #调用selector里的css选择器的方法,进行解析数提取\n",
|
|
|
|
|
"title = selector.css('#wrapper > div.content_read > div > div.bookname > h1::text').get()\n",
|
|
|
|
|
"content_list = selector.css('#content::text').getall()\n",
|
|
|
|
|
"content = '\\n'.join(content_list)\n",
|
|
|
|
|
"print(title)\n",
|
|
|
|
|
"print(content)\n",
|
|
|
|
|
"with open(title + '.csv',mode='a',encoding='utf-8')as f:\n",
|
|
|
|
|
" f.write(title)\n",
|
|
|
|
|
" f.write(content)"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {
|
|
|
|
|
"collapsed": true
|
|
|
|
|
},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": []
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"metadata": {
|
|
|
|
|
"kernelspec": {
|
|
|
|
|
"display_name": "Python 3",
|
|
|
|
|
"language": "python",
|
|
|
|
|
"name": "python3"
|
|
|
|
|
},
|
|
|
|
|
"language_info": {
|
|
|
|
|
"codemirror_mode": {
|
|
|
|
|
"name": "ipython",
|
|
|
|
|
"version": 3
|
|
|
|
|
},
|
|
|
|
|
"file_extension": ".py",
|
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
|
"name": "python",
|
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
|
"pygments_lexer": "ipython3",
|
|
|
|
|
"version": "3.6.1"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"nbformat": 4,
|
|
|
|
|
"nbformat_minor": 2
|
|
|
|
|
}
|