import requests from bs4 import BeautifulSoup def get_news_content(url): headers = { "cookie": "buvid4=7D777810-9AD0-8C21-1D6A-C607F528C7B427206-022123000-Sfw%2Bq8N2F39WtfvTG9WlSA%3D%3D; DedeUserID=352875468; DedeUserID__ckMd5=0a90e72ce13d5f80; buvid_fp_plain=undefined; is-2022-channel=1; FEED_LIVE_VERSION=V8; hit-new-style-dyn=1; enable_web_push=DISABLE; header_theme_version=CLOSE; buvid3=D0F7F1A0-043F-F252-3B6E-407C2F74F3E233731infoc; b_nut=1703867934; _uuid=10782D5106-6BD5-2F25-DB56-93210AD5B1077B97901infoc; hit-dyn-v2=1; rpdid=|(u))kkYuu|u0J'u~|)~)k)Ju; LIVE_BUVID=AUTO3717091338559729; CURRENT_QUALITY=80; fingerprint=77f845d4623f8049224f6d42350abef3; PVID=1; home_feed_column=5; browser_resolution=1897-998; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MjYxNTAzOTIsImlhdCI6MTcyNTg5MTEzMiwicGx0IjotMX0.-PIHJf-y8eiCUpgjbSr80_6MMNByawusvATxL1TlWEg; bili_ticket_expires=1726150332; SESSDATA=e4358344%2C1741449782%2Cf61a4%2A92CjCRkwSuqlWD2A7212ZB1TNWsYRH10J2J7J5MA_OLtgkpVB-NiWTShPTTV1Uvij4R28SVjRCcl9yUW9sREZnQWNxMXpsQ3IxdC03QVJsMURLUTBuMjJkRVVZYjlNVUZfVUhNYjZlUEgtdDI0aVlfZnZRUjBqb01EZjFMWDAyaW9LQW9XWDJPdnBnIIEC; bili_jct=a84563cf63b9e39aae094c54f58ac264; sid=690q9qio; buvid_fp=77f845d4623f8049224f6d42350abef3; CURRENT_FNVAL=4048; b_lsid=883A9109D_191E0390D0E; bp_t_offset_352875468=975839826899107840", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36" } # 请求头伪装浏览器 response = requests.get(url, headers=headers) soup = BeautifulSoup(response.content, 'html.parser') # 提取新闻标题和正文 title = soup.find('h1').text paragraphs = soup.find_all('p') content = " ".join([p.text for p in paragraphs]) return title, content # 示例:爬取某个媒体的新闻 url = "https://example.com/news/world-event" title, content = get_news_content(url) print(f"Title: {title}") print(f"Content: {content}")