{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "ename": "ModuleNotFoundError", "evalue": "No module named 'parsel'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mrequests\u001b[0m \u001b[1;31m#数据请求模块\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[1;32mimport\u001b[0m \u001b[0mparsel\u001b[0m \u001b[1;31m#数据解析模块\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[0murl\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m'https://www.biqugee.com/book/12564/4856870.html'\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[0mresponse\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mrequests\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0murl\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m#返回相应数据\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'parsel'" ] } ], "source": [ "import requests #数据请求模块\n", "import parsel #数据解析模块\n", "\n", "url = 'https://www.biqugee.com/book/12564/4856870.html' \n", "response = requests.get(url) #返回相应数据\n", " #print(response.text)#发送请求,获取数据\n", " #解析数据\n", "selector = parsel.Selector(response.text) #调用selector里的css选择器的方法,进行解析数提取\n", "title = selector.css('#wrapper > div.content_read > div > div.bookname > h1::text').get()\n", "content_list = selector.css('#content::text').getall()\n", "content = '\\n'.join(content_list)\n", "print(title)\n", "print(content)\n", "with open(title + '.csv',mode='a',encoding='utf-8')as f:\n", " f.write(title)\n", " f.write(content)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 2 }