import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
def get_danmu(urls):
    # 获取BV号
    bv_id = []
    for url in urls:
        parts = url.split('/')
        for part in parts:
            if part.startswith('BV'):
                bv_id.append(part)
    #获取cid号
    cids = []
    for id in bv_id:
        url = f"https://api.bilibili.com/x/player/pagelist?bvid={id}&jsonp=jsonp"
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0"
        }
        response = requests.get(url,headers=headers)
        time.sleep(0.5)
        if response.status_code == 200:
            data = response.json()
            if data['code'] == 0 and data['data']:
                cid = data['data'][0]['cid']
                cids.append(cid)
    print("cid",len(cids))
    #获取弹幕数据
    danmu_data = []
    fail_count = 0
    for id in cids:
        url = f"https://api.bilibili.com/x/v1/dm/list.so?oid={id}"
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0"
        }
        response = requests.get(url,headers=headers)
        time.sleep(0.5)
        if response.status_code != 200:
            fail_count += 1
        response.encoding = 'utf-8'
        danmu_data.append(response.text)
    print("danmu_data_html",len(danmu_data))
    print("fail_count",fail_count)
    #返回一个所有弹幕的list
    all_danmu = []
    for html in danmu_data:
        soup = BeautifulSoup(html, 'html.parser')
        for d in soup.find_all('d'):
            all_danmu.append(d.get_text())

    print("all_danmu",len(all_danmu))
    df = pd.DataFrame({'danmu': all_danmu})
    df.to_excel("all_danmu_data.xlsx", index=False,engine='openpyxl')
    return all_danmu