From 643acec60200e2d9d843f72faace12a6dafc2043 Mon Sep 17 00:00:00 2001 From: p7mq6zalo <2715209264@qq.com> Date: Fri, 15 Apr 2022 23:54:34 +0800 Subject: [PATCH] ADD file via upload --- tgj.ipynb | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 tgj.ipynb diff --git a/tgj.ipynb b/tgj.ipynb new file mode 100644 index 0000000..6922020 --- /dev/null +++ b/tgj.ipynb @@ -0,0 +1,70 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'parsel'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mrequests\u001b[0m \u001b[1;31m#数据请求模块\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[1;32mimport\u001b[0m \u001b[0mparsel\u001b[0m \u001b[1;31m#数据解析模块\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[0murl\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m'https://www.biqugee.com/book/12564/4856870.html'\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[0mresponse\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mrequests\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0murl\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m#返回相应数据\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'parsel'" + ] + } + ], + "source": [ + "import requests #数据请求模块\n", + "import parsel #数据解析模块\n", + "\n", + "url = 'https://www.biqugee.com/book/12564/4856870.html' \n", + "response = requests.get(url) #返回相应数据\n", + " #print(response.text)#发送请求,获取数据\n", + " #解析数据\n", + "selector = parsel.Selector(response.text) #调用selector里的css选择器的方法,进行解析数提取\n", + "title = selector.css('#wrapper > div.content_read > div > div.bookname > h1::text').get()\n", + "content_list = selector.css('#content::text').getall()\n", + "content = '\\n'.join(content_list)\n", + "print(title)\n", + "print(content)\n", + "with open(title + '.csv',mode='a',encoding='utf-8')as f:\n", + " f.write(title)\n", + " f.write(content)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}