Compare commits
5 Commits
Author | SHA1 | Date |
---|---|---|
2648226350 | 51e096fd48 | 2 years ago |
2648226350 | 3b4effc19a | 2 years ago |
2648226350 | 7a1d6161f6 | 2 years ago |
2648226350 | 316d47f22b | 2 years ago |
pitohmfkr | 43a1d5152d | 2 years ago |
@ -0,0 +1,82 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import json
|
||||
import math
|
||||
|
||||
"""
|
||||
Bilibili简易评论抓取
|
||||
"""
|
||||
|
||||
headers = {
|
||||
'accept': 'application/json, text/plain, */*',
|
||||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36 Edg/105.0.1343.42',
|
||||
'cookie': "buvid3=1BE9BF5E-174E-E36F-638F-A83EBFE3923B73812infoc; buvid_fp=1BE9BF5E-174E-E36F-638F-A83EBFE3923B73812infoc; blackside_state=0; rpdid=|(u)Rm|uR|)l0J'uYRukmJ~RJ; LIVE_BUVID=AUTO1016419942077939; DedeUserID=1128547717; DedeUserID__ckMd5=6e2c06fc4d4a080a; b_ut=5; fingerprint_s=a1a842bc7f2ecb138fcd0ee9ebd6c264; i-wanna-go-back=-1; nostalgia_conf=-1; is-2022-channel=1; fingerprint3=da72afc7e244beb108e5842d6c8c8ccb; go_old_video=-1; i-wanna-go-feeds=-1; CURRENT_BLACKGAP=0; CURRENT_QUALITY=80; buvid4=null; fingerprint=aae73dad5de59138dcd2680c4d451960; buvid_fp_plain=undefined; b_nut=1666187479; SESSDATA=4df18d94%2C1683255514%2C2be5f%2Ab1; bili_jct=a2c88699d75f8024401a4deae6d82e76; sid=8vjk7l6x; bp_video_offset_1128547717=725453644720242800; innersign=1; CURRENT_FNVAL=4048",
|
||||
'Connection': 'keep-alive',
|
||||
'TE': 'Trailers'
|
||||
}
|
||||
|
||||
videoList = []
|
||||
|
||||
def getVideo():
|
||||
search = input("输入搜索关键字:")
|
||||
print('正在获取视频列表.......')
|
||||
for i in range(1):
|
||||
video_url = 'https://api.bilibili.com/x/web-interface/search/all/v2?keyword={}&page={}&duration=&tids_1=&tids_2=&__refresh__=true&_extra=&highlight=1&single_column=0&jsonp=jsonp'.format(search,i+1)
|
||||
resp = requests.get(video_url,headers=headers)
|
||||
data = json.loads(resp.text)
|
||||
try:
|
||||
video_data = data['data']['result'][10]['data']
|
||||
for d in video_data:
|
||||
videoList.append(d['id'])
|
||||
except KeyError:
|
||||
print('KeyError')
|
||||
print('视频列表获取成功!')
|
||||
|
||||
|
||||
|
||||
# bs = BeautifulSoup(resp.text,"html.parser")
|
||||
# video_aid = bs.find_all()
|
||||
|
||||
|
||||
|
||||
def getReplyPageNum(oid):
|
||||
url="https://api.bilibili.com/x/v2/reply?&jsonp=jsonp&pn=1"+"&type=1&oid="+str(oid)+"&sort=2"
|
||||
respond=requests.get(url)
|
||||
res_dirct=json.loads(respond.text)
|
||||
replyPageNum = 1
|
||||
try:
|
||||
replyNum=int(res_dirct['data']['page']['acount'])
|
||||
replyPageCount=int(res_dirct['data']['page']['count'])
|
||||
replyPageSize=int(res_dirct['data']['page']['size'])
|
||||
replyPageNum=math.ceil(replyPageCount/replyPageSize)
|
||||
except KeyError:
|
||||
print("KeyError")
|
||||
return replyPageNum
|
||||
|
||||
# def getAid(bvid):
|
||||
# url = "http://api.bilibili.com/x/web-interface/view?bvid="+str(bvid)
|
||||
# response = requests.get(url)
|
||||
# dirt=json.loads(response.text)
|
||||
# aid=dirt['data']['aid']
|
||||
# return aid
|
||||
|
||||
if __name__ == "__main__":
|
||||
getVideo()
|
||||
for video in videoList:
|
||||
messageList = []
|
||||
for page in range(1,getReplyPageNum(video)+1):
|
||||
url = 'https://api.bilibili.com/x/v2/reply/main?mode=3&next={}&oid={}&plat=1&type=1'.format(page,video)
|
||||
resp = requests.get(url, headers=headers)
|
||||
try:
|
||||
replies = resp.json()['data']['replies']
|
||||
for reply in replies:
|
||||
messageList.append(reply['content']['message'])
|
||||
except KeyError:
|
||||
print(KeyError)
|
||||
# 控制每个视频爬取的评论页数
|
||||
if page > 30:
|
||||
break
|
||||
with open('comments.json','a',encoding='utf-8') as fp:
|
||||
json.dump(messageList,fp,ensure_ascii=False)
|
||||
fp.close()
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,16 @@
|
||||
"""
|
||||
ASGI config for demo project.
|
||||
|
||||
It exposes the ASGI callable as a module-level variable named ``application``.
|
||||
|
||||
For more information on this file, see
|
||||
https://docs.djangoproject.com/en/3.2/howto/deployment/asgi/
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
from django.core.asgi import get_asgi_application
|
||||
|
||||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'demo.settings')
|
||||
|
||||
application = get_asgi_application()
|
@ -0,0 +1,125 @@
|
||||
"""
|
||||
Django settings for demo project.
|
||||
|
||||
Generated by 'django-admin startproject' using Django 3.2.5.
|
||||
|
||||
For more information on this file, see
|
||||
https://docs.djangoproject.com/en/3.2/topics/settings/
|
||||
|
||||
For the full list of settings and their values, see
|
||||
https://docs.djangoproject.com/en/3.2/ref/settings/
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
# Build paths inside the project like this: BASE_DIR / 'subdir'.
|
||||
BASE_DIR = Path(__file__).resolve().parent.parent
|
||||
|
||||
|
||||
# Quick-start development settings - unsuitable for production
|
||||
# See https://docs.djangoproject.com/en/3.2/howto/deployment/checklist/
|
||||
|
||||
# SECURITY WARNING: keep the secret key used in production secret!
|
||||
SECRET_KEY = 'django-insecure--b389dekoa6=lh3p+c0^q3p%!32rv$$3@y6qnl3@9_bd=+pj)8'
|
||||
|
||||
# SECURITY WARNING: don't run with debug turned on in production!
|
||||
DEBUG = True
|
||||
|
||||
ALLOWED_HOSTS = []
|
||||
|
||||
|
||||
# Application definition
|
||||
|
||||
INSTALLED_APPS = [
|
||||
'django.contrib.admin',
|
||||
'django.contrib.auth',
|
||||
'django.contrib.contenttypes',
|
||||
'django.contrib.sessions',
|
||||
'django.contrib.messages',
|
||||
'django.contrib.staticfiles',
|
||||
]
|
||||
|
||||
MIDDLEWARE = [
|
||||
'django.middleware.security.SecurityMiddleware',
|
||||
'django.contrib.sessions.middleware.SessionMiddleware',
|
||||
'django.middleware.common.CommonMiddleware',
|
||||
'django.middleware.csrf.CsrfViewMiddleware',
|
||||
'django.contrib.auth.middleware.AuthenticationMiddleware',
|
||||
'django.contrib.messages.middleware.MessageMiddleware',
|
||||
'django.middleware.clickjacking.XFrameOptionsMiddleware',
|
||||
]
|
||||
|
||||
ROOT_URLCONF = 'demo.urls'
|
||||
|
||||
TEMPLATES = [
|
||||
{
|
||||
'BACKEND': 'django.template.backends.django.DjangoTemplates',
|
||||
'DIRS': [],
|
||||
'APP_DIRS': True,
|
||||
'OPTIONS': {
|
||||
'context_processors': [
|
||||
'django.template.context_processors.debug',
|
||||
'django.template.context_processors.request',
|
||||
'django.contrib.auth.context_processors.auth',
|
||||
'django.contrib.messages.context_processors.messages',
|
||||
],
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
WSGI_APPLICATION = 'demo.wsgi.application'
|
||||
|
||||
|
||||
# Database
|
||||
# https://docs.djangoproject.com/en/3.2/ref/settings/#databases
|
||||
|
||||
DATABASES = {
|
||||
'default': {
|
||||
'ENGINE': 'django.db.backends.sqlite3',
|
||||
'NAME': BASE_DIR / 'db.sqlite3',
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# Password validation
|
||||
# https://docs.djangoproject.com/en/3.2/ref/settings/#auth-password-validators
|
||||
|
||||
AUTH_PASSWORD_VALIDATORS = [
|
||||
{
|
||||
'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
|
||||
},
|
||||
{
|
||||
'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
|
||||
},
|
||||
{
|
||||
'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
|
||||
},
|
||||
{
|
||||
'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
# Internationalization
|
||||
# https://docs.djangoproject.com/en/3.2/topics/i18n/
|
||||
|
||||
LANGUAGE_CODE = 'en-us'
|
||||
|
||||
TIME_ZONE = 'UTC'
|
||||
|
||||
USE_I18N = True
|
||||
|
||||
USE_L10N = True
|
||||
|
||||
USE_TZ = True
|
||||
|
||||
|
||||
# Static files (CSS, JavaScript, Images)
|
||||
# https://docs.djangoproject.com/en/3.2/howto/static-files/
|
||||
|
||||
STATIC_URL = '/static/'
|
||||
|
||||
# Default primary key field type
|
||||
# https://docs.djangoproject.com/en/3.2/ref/settings/#default-auto-field
|
||||
|
||||
DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField'
|
@ -0,0 +1,21 @@
|
||||
"""demo URL Configuration
|
||||
|
||||
The `urlpatterns` list routes URLs to views. For more information please see:
|
||||
https://docs.djangoproject.com/en/3.2/topics/http/urls/
|
||||
Examples:
|
||||
Function views
|
||||
1. Add an import: from my_app import views
|
||||
2. Add a URL to urlpatterns: path('', views.home, name='home')
|
||||
Class-based views
|
||||
1. Add an import: from other_app.views import Home
|
||||
2. Add a URL to urlpatterns: path('', Home.as_view(), name='home')
|
||||
Including another URLconf
|
||||
1. Import the include() function: from django.urls import include, path
|
||||
2. Add a URL to urlpatterns: path('blog/', include('blog.urls'))
|
||||
"""
|
||||
from django.contrib import admin
|
||||
from django.urls import path
|
||||
|
||||
urlpatterns = [
|
||||
path('admin/', admin.site.urls),
|
||||
]
|
@ -0,0 +1,16 @@
|
||||
"""
|
||||
WSGI config for demo project.
|
||||
|
||||
It exposes the WSGI callable as a module-level variable named ``application``.
|
||||
|
||||
For more information on this file, see
|
||||
https://docs.djangoproject.com/en/3.2/howto/deployment/wsgi/
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
from django.core.wsgi import get_wsgi_application
|
||||
|
||||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'demo.settings')
|
||||
|
||||
application = get_wsgi_application()
|
@ -0,0 +1,22 @@
|
||||
#!/usr/bin/env python
|
||||
"""Django's command-line utility for administrative tasks."""
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def main():
|
||||
"""Run administrative tasks."""
|
||||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'demo.settings')
|
||||
try:
|
||||
from django.core.management import execute_from_command_line
|
||||
except ImportError as exc:
|
||||
raise ImportError(
|
||||
"Couldn't import Django. Are you sure it's installed and "
|
||||
"available on your PYTHONPATH environment variable? Did you "
|
||||
"forget to activate a virtual environment?"
|
||||
) from exc
|
||||
execute_from_command_line(sys.argv)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Loading…
Reference in new issue