parent
ea2fc35a1f
commit
d42cc26d04
@ -0,0 +1,130 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "9d67d3ba-beb6-426d-8482-9adc32e5b1bb",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[('加州', '5959487', '76983'), ('德克萨斯', '4946880', '76364'), ('佛罗里达', '4486276', '62622'), ('纽约', '3929750', '60276'), ('伊利诺伊', '2382423', '30794'), ('宾夕法尼亚', '2206899', '37522'), ('俄亥俄', '2151829', '30072'), ('乔治亚', '1928942', '31521'), ('密西根', '1855658', '29955'), ('北卡', '1816380', '19619'), ('新泽西', '1782463', '29374'), ('田纳西', '1462996', '20941'), ('亚利桑那', '1430241', '24586'), ('印第安纳', '1314688', '19448'), ('马萨诸塞', '1250765', '20510'), ('弗吉尼亚', '1220869', '15651'), ('威斯康星', '1186066', '11402'), ('密苏里', '1075994', '16476'), ('明尼苏达', '1064065', '10906'), ('南卡', '1048252', '14746'), ('科罗拉多', '979437', '10402'), ('阿拉巴马', '957848', '16585'), ('华盛顿州', '913980', '9974'), ('路易斯安那', '908428', '15054'), ('肯塔基', '904916', '12319'), ('马里兰', '800743', '12107'), ('俄克拉荷马', '737593', '12592'), ('犹他', '682036', '3887'), ('阿肯色', '604843', '9298'), ('爱荷华', '596296', '8019'), ('密西西比', '586547', '10527'), ('堪萨斯', '565288', '7101'), ('康涅狄格', '563635', '9281'), ('内华达', '509472', '8509'), ('俄勒冈', '449267', '5728'), ('新墨西哥', '364861', '5969'), ('内布拉斯加', '355928', '3416'), ('西弗吉尼亚', '351688', '5421'), ('波多黎各', '340152', '3357'), ('爱达荷', '325730', '4223'), ('罗德岛', '266066', '3121'), ('新罕布什尔', '211952', '2017'), ('蒙大拿', '202863', '2917'), ('特拉华', '195600', '2286'), ('南达科塔', '189411', '2522'), ('北达科塔', '180646', '2073'), ('阿拉斯加', '161678', '979'), ('缅因', '152173', '1599'), ('夏威夷', '134442', '1101'), ('怀俄明', '119093', '1572'), ('华盛顿特区', '108112', '1225'), ('佛蒙特', '73802', '482'), ('关岛', '20594', '274'), ('维京群岛', '11833', '89'), ('北马里亚纳群岛', '3518', '18'), ('至尊公主号', '103', '3'), ('钻石公主号', '49', '0'), ('美属萨摩亚', '10', '0')]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import requests\n",
|
||||
"url = \"https://www.bitpush.news/covid19/\"\n",
|
||||
"# 设置请求头信息\n",
|
||||
"headers = {\n",
|
||||
" \"user-agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36\"\n",
|
||||
"}\n",
|
||||
"# 使用reqeusts模快发起 GET 请求\n",
|
||||
"response = requests.get(url, headers=headers)\n",
|
||||
"# 获取请求的返回结果\n",
|
||||
"html = response.text\n",
|
||||
"# 导入 lxml\n",
|
||||
"from lxml import etree\n",
|
||||
"parse = etree.HTMLParser(encoding='utf-8') # 添加编码\n",
|
||||
"# 解析 requests 返回的响应结果\n",
|
||||
"doc = etree.HTML(html) \n",
|
||||
"# 提取美国各州数据\n",
|
||||
"base_path = '//h1[contains(text(), \"美国境内统计数据\")]/following-sibling::div[@class=\"table_container\"]//tbody/tr'\n",
|
||||
"# base_path = '//div[@class=\"table_container\"][2]//tbody/tr'\n",
|
||||
"# # 提取数据\n",
|
||||
"county = doc.xpath(f'{base_path}/td[1]/span/text()')\n",
|
||||
"person = doc.xpath(f'{base_path}/td[2]/text()')\n",
|
||||
"death = doc.xpath(f'{base_path}/td[3]/text()')\n",
|
||||
"# 解析 requests 返回的响应结果//*[@id=\"main\"]/div[1]/div/div/div/div/div[1]/table/tbody/tr[2]/td[1]\n",
|
||||
"# county = doc.xpath('//*[@id=\"main\"]//div[2]/div/div/div/div/div[1]/table/tbody/tr[1]/td[1]/span/text()')\n",
|
||||
"# person = doc.xpath('//*[@id=\"main\"]//div[2]/div/div/div/div/div[1]/table/tbody/tr[1]/td[2]/text()')\n",
|
||||
"# death = doc.xpath('//*[@id=\"main\"]//div[2]/div/div/div/div/div[1]/table/tbody/tr[1]/td[3]/text()')\n",
|
||||
"person = [x.replace(\",\", \"\") for x in person]\n",
|
||||
"death = [x.replace(\",\", \"\") for x in death]\n",
|
||||
"message = list(zip(county, person, death))\n",
|
||||
"import csv\n",
|
||||
"with open(\"contents1.csv\", \"w\") as f:\n",
|
||||
" w = csv.writer(f)\n",
|
||||
" w.writerows(message)\n",
|
||||
"print(message)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "16882b4e-02b7-4ee7-a5be-8f1555723c4a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "ValueError",
|
||||
"evalue": "2 columns passed, passed data had 3 columns",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[1;31mAssertionError\u001b[0m Traceback (most recent call last)",
|
||||
"File \u001b[1;32mD:\\AI\\Lib\\site-packages\\pandas\\core\\internals\\construction.py:939\u001b[0m, in \u001b[0;36m_finalize_columns_and_data\u001b[1;34m(content, columns, dtype)\u001b[0m\n\u001b[0;32m 938\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 939\u001b[0m columns \u001b[38;5;241m=\u001b[39m _validate_or_indexify_columns(contents, columns)\n\u001b[0;32m 940\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mAssertionError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[0;32m 941\u001b[0m \u001b[38;5;66;03m# GH#26429 do not raise user-facing AssertionError\u001b[39;00m\n",
|
||||
"File \u001b[1;32mD:\\AI\\Lib\\site-packages\\pandas\\core\\internals\\construction.py:986\u001b[0m, in \u001b[0;36m_validate_or_indexify_columns\u001b[1;34m(content, columns)\u001b[0m\n\u001b[0;32m 984\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_mi_list \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(columns) \u001b[38;5;241m!=\u001b[39m \u001b[38;5;28mlen\u001b[39m(content): \u001b[38;5;66;03m# pragma: no cover\u001b[39;00m\n\u001b[0;32m 985\u001b[0m \u001b[38;5;66;03m# caller's responsibility to check for this...\u001b[39;00m\n\u001b[1;32m--> 986\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAssertionError\u001b[39;00m(\n\u001b[0;32m 987\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlen\u001b[39m(columns)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m columns passed, passed data had \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 988\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlen\u001b[39m(content)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m columns\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 989\u001b[0m )\n\u001b[0;32m 990\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_mi_list:\n\u001b[0;32m 991\u001b[0m \u001b[38;5;66;03m# check if nested list column, length of each sub-list should be equal\u001b[39;00m\n",
|
||||
"\u001b[1;31mAssertionError\u001b[0m: 2 columns passed, passed data had 3 columns",
|
||||
"\nThe above exception was the direct cause of the following exception:\n",
|
||||
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
|
||||
"Cell \u001b[1;32mIn[3], line 6\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmatplotlib\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpyplot\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mplt\u001b[39;00m\n\u001b[0;32m 5\u001b[0m data \u001b[38;5;241m=\u001b[39m message\n\u001b[1;32m----> 6\u001b[0m df \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mDataFrame(data, columns\u001b[38;5;241m=\u001b[39m[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m地区\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m确诊人数\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n\u001b[0;32m 8\u001b[0m df \u001b[38;5;241m=\u001b[39m df\u001b[38;5;241m.\u001b[39msort_values(by\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m确诊人数\u001b[39m\u001b[38;5;124m\"\u001b[39m, ascending\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[0;32m 10\u001b[0m top_15 \u001b[38;5;241m=\u001b[39m df\u001b[38;5;241m.\u001b[39mhead(\u001b[38;5;241m15\u001b[39m)\n",
|
||||
"File \u001b[1;32mD:\\AI\\Lib\\site-packages\\pandas\\core\\frame.py:851\u001b[0m, in \u001b[0;36mDataFrame.__init__\u001b[1;34m(self, data, index, columns, dtype, copy)\u001b[0m\n\u001b[0;32m 849\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m columns \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 850\u001b[0m columns \u001b[38;5;241m=\u001b[39m ensure_index(columns)\n\u001b[1;32m--> 851\u001b[0m arrays, columns, index \u001b[38;5;241m=\u001b[39m nested_data_to_arrays(\n\u001b[0;32m 852\u001b[0m \u001b[38;5;66;03m# error: Argument 3 to \"nested_data_to_arrays\" has incompatible\u001b[39;00m\n\u001b[0;32m 853\u001b[0m \u001b[38;5;66;03m# type \"Optional[Collection[Any]]\"; expected \"Optional[Index]\"\u001b[39;00m\n\u001b[0;32m 854\u001b[0m data,\n\u001b[0;32m 855\u001b[0m columns,\n\u001b[0;32m 856\u001b[0m index, \u001b[38;5;66;03m# type: ignore[arg-type]\u001b[39;00m\n\u001b[0;32m 857\u001b[0m dtype,\n\u001b[0;32m 858\u001b[0m )\n\u001b[0;32m 859\u001b[0m mgr \u001b[38;5;241m=\u001b[39m arrays_to_mgr(\n\u001b[0;32m 860\u001b[0m arrays,\n\u001b[0;32m 861\u001b[0m columns,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 864\u001b[0m typ\u001b[38;5;241m=\u001b[39mmanager,\n\u001b[0;32m 865\u001b[0m )\n\u001b[0;32m 866\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
|
||||
"File \u001b[1;32mD:\\AI\\Lib\\site-packages\\pandas\\core\\internals\\construction.py:520\u001b[0m, in \u001b[0;36mnested_data_to_arrays\u001b[1;34m(data, columns, index, dtype)\u001b[0m\n\u001b[0;32m 517\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_named_tuple(data[\u001b[38;5;241m0\u001b[39m]) \u001b[38;5;129;01mand\u001b[39;00m columns \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 518\u001b[0m columns \u001b[38;5;241m=\u001b[39m ensure_index(data[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39m_fields)\n\u001b[1;32m--> 520\u001b[0m arrays, columns \u001b[38;5;241m=\u001b[39m to_arrays(data, columns, dtype\u001b[38;5;241m=\u001b[39mdtype)\n\u001b[0;32m 521\u001b[0m columns \u001b[38;5;241m=\u001b[39m ensure_index(columns)\n\u001b[0;32m 523\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m index \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
|
||||
"File \u001b[1;32mD:\\AI\\Lib\\site-packages\\pandas\\core\\internals\\construction.py:845\u001b[0m, in \u001b[0;36mto_arrays\u001b[1;34m(data, columns, dtype)\u001b[0m\n\u001b[0;32m 842\u001b[0m data \u001b[38;5;241m=\u001b[39m [\u001b[38;5;28mtuple\u001b[39m(x) \u001b[38;5;28;01mfor\u001b[39;00m x \u001b[38;5;129;01min\u001b[39;00m data]\n\u001b[0;32m 843\u001b[0m arr \u001b[38;5;241m=\u001b[39m _list_to_arrays(data)\n\u001b[1;32m--> 845\u001b[0m content, columns \u001b[38;5;241m=\u001b[39m _finalize_columns_and_data(arr, columns, dtype)\n\u001b[0;32m 846\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m content, columns\n",
|
||||
"File \u001b[1;32mD:\\AI\\Lib\\site-packages\\pandas\\core\\internals\\construction.py:942\u001b[0m, in \u001b[0;36m_finalize_columns_and_data\u001b[1;34m(content, columns, dtype)\u001b[0m\n\u001b[0;32m 939\u001b[0m columns \u001b[38;5;241m=\u001b[39m _validate_or_indexify_columns(contents, columns)\n\u001b[0;32m 940\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mAssertionError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[0;32m 941\u001b[0m \u001b[38;5;66;03m# GH#26429 do not raise user-facing AssertionError\u001b[39;00m\n\u001b[1;32m--> 942\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(err) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n\u001b[0;32m 944\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(contents) \u001b[38;5;129;01mand\u001b[39;00m contents[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39mdtype \u001b[38;5;241m==\u001b[39m np\u001b[38;5;241m.\u001b[39mobject_:\n\u001b[0;32m 945\u001b[0m contents \u001b[38;5;241m=\u001b[39m convert_object_array(contents, dtype\u001b[38;5;241m=\u001b[39mdtype)\n",
|
||||
"\u001b[1;31mValueError\u001b[0m: 2 columns passed, passed data had 3 columns"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import pandas as pd\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"data = message\n",
|
||||
"df = pd.DataFrame(data, columns=[\"地区\", \"确诊人数\"])\n",
|
||||
"\n",
|
||||
"df = df.sort_values(by=\"确诊人数\", ascending=False)\n",
|
||||
"\n",
|
||||
"top_15 = df.head(15)\n",
|
||||
"plt.bar(top_15[\"地区\"], top_15[\"确诊人数\"])\n",
|
||||
"plt.xlabel(\"地区\")\n",
|
||||
"plt.ylabel(\"确诊人数\")\n",
|
||||
"plt.title(\"美国各地区疫情确诊人数排名前15情况\")\n",
|
||||
"plt.xticks(rotation=45)\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ec776e08-7c62-408d-aa3c-189b74b32b82",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
Loading…
Reference in new issue