You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
633 lines
62 KiB
633 lines
62 KiB
|
|
<!DOCTYPE HTML>
|
|
<html lang="" >
|
|
<head>
|
|
<meta charset="UTF-8">
|
|
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
|
|
<title>群众的力量是伟大的-随机森林 · GitBook</title>
|
|
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
|
|
<meta name="description" content="">
|
|
<meta name="generator" content="GitBook 3.2.3">
|
|
|
|
|
|
|
|
|
|
<link rel="stylesheet" href="gitbook/style.css">
|
|
|
|
|
|
|
|
|
|
<link rel="stylesheet" href="gitbook/gitbook-plugin-katex/katex.min.css">
|
|
|
|
|
|
|
|
<link rel="stylesheet" href="gitbook/gitbook-plugin-highlight/website.css">
|
|
|
|
|
|
|
|
<link rel="stylesheet" href="gitbook/gitbook-plugin-search/search.css">
|
|
|
|
|
|
|
|
<link rel="stylesheet" href="gitbook/gitbook-plugin-fontsettings/website.css">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<meta name="HandheldFriendly" content="true"/>
|
|
<meta name="viewport" content="width=device-width, initial-scale=1, user-scalable=no">
|
|
<meta name="apple-mobile-web-app-capable" content="yes">
|
|
<meta name="apple-mobile-web-app-status-bar-style" content="black">
|
|
<link rel="apple-touch-icon-precomposed" sizes="152x152" href="gitbook/images/apple-touch-icon-precomposed-152.png">
|
|
<link rel="shortcut icon" href="gitbook/images/favicon.ico" type="image/x-icon">
|
|
|
|
|
|
<link rel="next" href="kMeans.html" />
|
|
|
|
|
|
<link rel="prev" href="decision_tree.html" />
|
|
|
|
|
|
</head>
|
|
<body>
|
|
|
|
<div class="book">
|
|
<div class="book-summary">
|
|
|
|
|
|
<div id="book-search-input" role="search">
|
|
<input type="text" placeholder="Type to search" />
|
|
</div>
|
|
|
|
|
|
<nav role="navigation">
|
|
|
|
|
|
|
|
<ul class="summary">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="chapter " data-level="1.1" data-path="./">
|
|
|
|
<a href="./">
|
|
|
|
|
|
简介
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.2" data-path="machine_learning.html">
|
|
|
|
<a href="machine_learning.html">
|
|
|
|
|
|
机器学习概述
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.3" data-path="algorithm.html">
|
|
|
|
<a href="algorithm.html">
|
|
|
|
|
|
常见机器学习算法
|
|
|
|
</a>
|
|
|
|
|
|
|
|
<ul class="articles">
|
|
|
|
|
|
<li class="chapter " data-level="1.3.1" data-path="kNN.html">
|
|
|
|
<a href="kNN.html">
|
|
|
|
|
|
近朱者赤近墨者黑-kNN
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.3.2" data-path="linear_regression.html">
|
|
|
|
<a href="linear_regression.html">
|
|
|
|
|
|
最简单的回归算法-线性回归
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.3.3" data-path="logistic_regression.html">
|
|
|
|
<a href="logistic_regression.html">
|
|
|
|
|
|
使用回归的思想进行分类-逻辑回归
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.3.4" data-path="decision_tree.html">
|
|
|
|
<a href="decision_tree.html">
|
|
|
|
|
|
最接近人类思维的算法-决策树
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter active" data-level="1.3.5" data-path="random_forest.html">
|
|
|
|
<a href="random_forest.html">
|
|
|
|
|
|
群众的力量是伟大的-随机森林
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.3.6" data-path="kMeans.html">
|
|
|
|
<a href="kMeans.html">
|
|
|
|
|
|
物以类聚人以群分-kMeans
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.3.7" data-path="AGNES.html">
|
|
|
|
<a href="AGNES.html">
|
|
|
|
|
|
以距离为尺-AGNES
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
|
|
</ul>
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.4" data-path="metrics.html">
|
|
|
|
<a href="metrics.html">
|
|
|
|
|
|
模型评估指标
|
|
|
|
</a>
|
|
|
|
|
|
|
|
<ul class="articles">
|
|
|
|
|
|
<li class="chapter " data-level="1.4.1" data-path="classification_metrics.html">
|
|
|
|
<a href="classification_metrics.html">
|
|
|
|
|
|
分类性能评估指标
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.4.2" data-path="regression_metrics.html">
|
|
|
|
<a href="regression_metrics.html">
|
|
|
|
|
|
回归性能评估指标
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.4.3" data-path="cluster_metrics.html">
|
|
|
|
<a href="cluster_metrics.html">
|
|
|
|
|
|
聚类性能评估指标
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
|
|
</ul>
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.5" data-path="sklearn.html">
|
|
|
|
<a href="sklearn.html">
|
|
|
|
|
|
使用sklearn进行机器学习
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.6" >
|
|
|
|
<span>
|
|
|
|
|
|
综合实战案例
|
|
|
|
</span>
|
|
|
|
|
|
|
|
<ul class="articles">
|
|
|
|
|
|
<li class="chapter " data-level="1.6.1" >
|
|
|
|
<span>
|
|
|
|
|
|
泰坦尼克生还预测
|
|
|
|
</span>
|
|
|
|
|
|
|
|
<ul class="articles">
|
|
|
|
|
|
<li class="chapter " data-level="1.6.1.1" data-path="titanic/introduction.html">
|
|
|
|
<a href="titanic/introduction.html">
|
|
|
|
|
|
简介
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.6.1.2" data-path="titanic/EDA.html">
|
|
|
|
<a href="titanic/EDA.html">
|
|
|
|
|
|
探索性数据分析(EDA)
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.6.1.3" data-path="titanic/feature engerning.html">
|
|
|
|
<a href="titanic/feature engerning.html">
|
|
|
|
|
|
特征工程
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.6.1.4" data-path="titanic/fit and predict.html">
|
|
|
|
<a href="titanic/fit and predict.html">
|
|
|
|
|
|
构建模型进行预测
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.6.1.5" data-path="titanic/tuning.html">
|
|
|
|
<a href="titanic/tuning.html">
|
|
|
|
|
|
调参
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
|
|
</ul>
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.6.2" >
|
|
|
|
<span>
|
|
|
|
|
|
使用强化学习玩乒乓球游戏
|
|
|
|
</span>
|
|
|
|
|
|
|
|
<ul class="articles">
|
|
|
|
|
|
<li class="chapter " data-level="1.6.2.1" data-path="pingpong/what is reinforce learning.html">
|
|
|
|
<a href="pingpong/what is reinforce learning.html">
|
|
|
|
|
|
什么是强化学习
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.6.2.2" data-path="pingpong/Policy Gradient.html">
|
|
|
|
<a href="pingpong/Policy Gradient.html">
|
|
|
|
|
|
Policy Gradient原理
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.6.2.3" data-path="pingpong/coding.html">
|
|
|
|
<a href="pingpong/coding.html">
|
|
|
|
|
|
使用Policy Gradient玩乒乓球游戏
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
|
|
</ul>
|
|
|
|
</li>
|
|
|
|
|
|
</ul>
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.7" data-path="recommand.html">
|
|
|
|
<a href="recommand.html">
|
|
|
|
|
|
实训推荐
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
|
|
|
|
|
|
<li class="divider"></li>
|
|
|
|
<li>
|
|
<a href="https://www.gitbook.com" target="blank" class="gitbook-link">
|
|
Published with GitBook
|
|
</a>
|
|
</li>
|
|
</ul>
|
|
|
|
|
|
</nav>
|
|
|
|
|
|
</div>
|
|
|
|
<div class="book-body">
|
|
|
|
<div class="body-inner">
|
|
|
|
|
|
|
|
<div class="book-header" role="navigation">
|
|
|
|
|
|
<!-- Title -->
|
|
<h1>
|
|
<i class="fa fa-circle-o-notch fa-spin"></i>
|
|
<a href="." >群众的力量是伟大的-随机森林</a>
|
|
</h1>
|
|
</div>
|
|
|
|
|
|
|
|
|
|
<div class="page-wrapper" tabindex="-1" role="main">
|
|
<div class="page-inner">
|
|
|
|
<div id="book-search-results">
|
|
<div class="search-noresults">
|
|
|
|
<section class="normal markdown-section">
|
|
|
|
<h1 id="群众的力量是伟大的-随机森林">群众的力量是伟大的-随机森林</h1>
|
|
<p>既然有决策树,那有没有用多棵决策树组成森林的算法呢?有!那就是<strong>随机森林</strong>。随机森林是一种叫<strong>Bagging</strong>的算法框架的变体。所以想要理解<strong>随机森林</strong>首先要理解<strong>Bagging</strong>。</p>
|
|
<h2 id="bagging">Bagging</h2>
|
|
<h3 id="什么是bagging">什么是Bagging</h3>
|
|
<p><strong>Bagging</strong> 是 Bootstrap Aggregating 的英文缩写,刚接触的您不要误认为 <strong>Bagging</strong> 是一种算法,<strong>Bagging</strong> 是集成学习中的学习框架, <strong>Bagging</strong> 是并行式集成学习方法。大名鼎鼎的随机森林算法就是在 $Bagging$ 的基础上修改的算法。</p>
|
|
<p><strong> Bagging 方法的核心思想就是三个臭皮匠顶个诸葛亮</strong>。如果使用 <strong>Bagging</strong> 解决分类问题,就是将多个分类器的结果整合起来进行投票,选取票数最高的结果作为最终结果。如果使用 <strong>Bagging</strong> 解决回归问题,就将多个回归器的结果加起来然后求平均,将平均值作为最终结果。</p>
|
|
<p>那么 <strong>Bagging</strong> 方法如此有效呢,举个例子。狼人杀我相信您应该玩过,在天黑之前,村民们都要根据当天所发生的事和别人的发现来投票决定谁可能是狼人。</p>
|
|
<p>如果我们将每个村民看成是一个分类器,那么每个村民的任务就是二分类,假设 <span class="katex"><span class="katex-mathml"><math><semantics><mrow><msub><mi>h</mi><mi>i</mi></msub><mo>(</mo><mi>x</mi><mo>)</mo></mrow><annotation encoding="application/x-tex">h_i(x)</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.75em;"></span><span class="strut bottom" style="height:1em;vertical-align:-0.25em;"></span><span class="base textstyle uncramped"><span class="mord"><span class="mord mathit">h</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:0em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord mathit mtight">i</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span>​</span></span></span></span><span class="mopen">(</span><span class="mord mathit">x</span><span class="mclose">)</span></span></span></span> 表示第 <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>i</mi></mrow><annotation encoding="application/x-tex">i</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.65952em;"></span><span class="strut bottom" style="height:0.65952em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathit">i</span></span></span></span> 个村民认为 <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>x</mi></mrow><annotation encoding="application/x-tex">x</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.43056em;"></span><span class="strut bottom" style="height:0.43056em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathit">x</span></span></span></span> 是不是狼人( <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mo>−</mo><mn>1</mn></mrow><annotation encoding="application/x-tex">-1</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.64444em;"></span><span class="strut bottom" style="height:0.72777em;vertical-align:-0.08333em;"></span><span class="base textstyle uncramped"><span class="mord">−</span><span class="mord mathrm">1</span></span></span></span> 代表不是狼人, <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mn>1</mn></mrow><annotation encoding="application/x-tex">1</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.64444em;"></span><span class="strut bottom" style="height:0.64444em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathrm">1</span></span></span></span> 代表是狼人),<span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>f</mi><mo>(</mo><mi>x</mi><mo>)</mo></mrow><annotation encoding="application/x-tex">f(x)</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.75em;"></span><span class="strut bottom" style="height:1em;vertical-align:-0.25em;"></span><span class="base textstyle uncramped"><span class="mord mathit" style="margin-right:0.10764em;">f</span><span class="mopen">(</span><span class="mord mathit">x</span><span class="mclose">)</span></span></span></span> 表示 <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>x</mi></mrow><annotation encoding="application/x-tex">x</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.43056em;"></span><span class="strut bottom" style="height:0.43056em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathit">x</span></span></span></span> 真正的身份(是不是狼人),<span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>ϵ</mi></mrow><annotation encoding="application/x-tex">\epsilon</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.43056em;"></span><span class="strut bottom" style="height:0.43056em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathit">ϵ</span></span></span></span> 表示为村民判断错误的错误率。则有 <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>P</mi><mo>(</mo><msub><mi>h</mi><mi>i</mi></msub><mo>(</mo><mi>x</mi><mo>)</mo><mo>≠</mo><mi>f</mi><mo>(</mo><mi>x</mi><mo>)</mo><mo>)</mo><mo>=</mo><mi>ϵ</mi></mrow><annotation encoding="application/x-tex">P(h_i(x)\neq f(x))=\epsilon</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.75em;"></span><span class="strut bottom" style="height:1em;vertical-align:-0.25em;"></span><span class="base textstyle uncramped"><span class="mord mathit" style="margin-right:0.13889em;">P</span><span class="mopen">(</span><span class="mord"><span class="mord mathit">h</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:0em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord mathit mtight">i</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span>​</span></span></span></span><span class="mopen">(</span><span class="mord mathit">x</span><span class="mclose">)</span><span class="mrel">≠</span><span class="mord mathit" style="margin-right:0.10764em;">f</span><span class="mopen">(</span><span class="mord mathit">x</span><span class="mclose">)</span><span class="mclose">)</span><span class="mrel">=</span><span class="mord mathit">ϵ</span></span></span></span>。</p>
|
|
<p>根据狼人杀的规则,村民们需要投票决定天黑前谁是狼人,也就是说如果有超过半数的村民投票时猜对了,那么这一轮就猜对了。那么假设现在有 <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>T</mi></mrow><annotation encoding="application/x-tex">T</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.68333em;"></span><span class="strut bottom" style="height:0.68333em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathit" style="margin-right:0.13889em;">T</span></span></span></span> 个村民,<span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>H</mi><mo>(</mo><mi>x</mi><mo>)</mo></mrow><annotation encoding="application/x-tex">H(x)</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.75em;"></span><span class="strut bottom" style="height:1em;vertical-align:-0.25em;"></span><span class="base textstyle uncramped"><span class="mord mathit" style="margin-right:0.08125em;">H</span><span class="mopen">(</span><span class="mord mathit">x</span><span class="mclose">)</span></span></span></span> 表示投票后最终的结果,则有 <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>H</mi><mo>(</mo><mi>x</mi><mo>)</mo><mo>=</mo><mi>s</mi><mi>i</mi><mi>g</mi><mi>n</mi><mo>(</mo><msubsup><mo>∑</mo><mrow><mi>i</mi><mo>=</mo><mn>1</mn></mrow><mi>T</mi></msubsup><msub><mi>h</mi><mi>i</mi></msub><mo>(</mo><mi>x</mi><mo>)</mo><mo>)</mo></mrow><annotation encoding="application/x-tex">H(x)=sign(\sum_{i=1}^Th_i(x))</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.8423309999999999em;"></span><span class="strut bottom" style="height:1.142341em;vertical-align:-0.30001em;"></span><span class="base textstyle uncramped"><span class="mord mathit" style="margin-right:0.08125em;">H</span><span class="mopen">(</span><span class="mord mathit">x</span><span class="mclose">)</span><span class="mrel">=</span><span class="mord mathit">s</span><span class="mord mathit">i</span><span class="mord mathit" style="margin-right:0.03588em;">g</span><span class="mord mathit">n</span><span class="mopen">(</span><span class="mop"><span class="mop op-symbol small-op" style="top:-0.0000050000000000050004em;">∑</span><span class="msupsub"><span class="vlist"><span style="top:0.30001em;margin-left:0em;margin-right:0.05em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord scriptstyle cramped mtight"><span class="mord mathit mtight">i</span><span class="mrel mtight">=</span><span class="mord mathrm mtight">1</span></span></span></span><span style="top:-0.364em;margin-right:0.05em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle uncramped mtight"><span class="mord mathit mtight" style="margin-right:0.13889em;">T</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span>​</span></span></span></span><span class="mord"><span class="mord mathit">h</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:0em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord mathit mtight">i</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span>​</span></span></span></span><span class="mopen">(</span><span class="mord mathit">x</span><span class="mclose">)</span><span class="mclose">)</span></span></span></span>。</p>
|
|
<p>现在假设每个村民都是有主见的人,对于谁是狼人都有自己的想法,那么他们的错误率也是相互独立的。那么根据<strong>Hoeffding不等式</strong>可知,<span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>H</mi><mo>(</mo><mi>x</mi><mo>)</mo></mrow><annotation encoding="application/x-tex">H(x)</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.75em;"></span><span class="strut bottom" style="height:1em;vertical-align:-0.25em;"></span><span class="base textstyle uncramped"><span class="mord mathit" style="margin-right:0.08125em;">H</span><span class="mopen">(</span><span class="mord mathit">x</span><span class="mclose">)</span></span></span></span> 的错误率为:</p>
|
|
<p><span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>P</mi><mo>(</mo><mi>H</mi><mo>(</mo><mi>x</mi><mo>)</mo><mo>≠</mo><mi>f</mi><mo>(</mo><mi>x</mi><mo>)</mo><mo>)</mo><mo>=</mo><msubsup><mo>∑</mo><mrow><mi>k</mi><mo>=</mo><mn>0</mn></mrow><mrow><mi>T</mi><mi mathvariant="normal">/</mi><mn>2</mn></mrow></msubsup><msubsup><mi>C</mi><mi>T</mi><mi>k</mi></msubsup><mo>(</mo><mn>1</mn><mo>−</mo><mi>ϵ</mi><msup><mo>)</mo><mi>k</mi></msup><msup><mi>ϵ</mi><mrow><mi>T</mi><mo>−</mo><mi>k</mi></mrow></msup><mo>≤</mo><mi>e</mi><mi>x</mi><mi>p</mi><mo>(</mo><mo>−</mo><mfrac><mrow><mn>1</mn></mrow><mrow><mn>2</mn></mrow></mfrac><mi>T</mi><mo>(</mo><mn>1</mn><mo>−</mo><mn>2</mn><mi>ϵ</mi><msup><mo>)</mo><mn>2</mn></msup><mo>)</mo></mrow><annotation encoding="application/x-tex">
|
|
P(H(x)\neq f(x))=\sum_{k=0}^{T/2}C_T^k(1-\epsilon)^k\epsilon ^{T-k} \leq exp(-\frac{1}{2}T(1-2\epsilon)^2)
|
|
</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:1.0448em;"></span><span class="strut bottom" style="height:1.3898em;vertical-align:-0.345em;"></span><span class="base textstyle uncramped"><span class="mord mathit" style="margin-right:0.13889em;">P</span><span class="mopen">(</span><span class="mord mathit" style="margin-right:0.08125em;">H</span><span class="mopen">(</span><span class="mord mathit">x</span><span class="mclose">)</span><span class="mrel">≠</span><span class="mord mathit" style="margin-right:0.10764em;">f</span><span class="mopen">(</span><span class="mord mathit">x</span><span class="mclose">)</span><span class="mclose">)</span><span class="mrel">=</span><span class="mop"><span class="mop op-symbol small-op" style="top:-0.0000050000000000050004em;">∑</span><span class="msupsub"><span class="vlist"><span style="top:0.30130799999999996em;margin-left:0em;margin-right:0.05em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord scriptstyle cramped mtight"><span class="mord mathit mtight" style="margin-right:0.03148em;">k</span><span class="mrel mtight">=</span><span class="mord mathrm mtight">0</span></span></span></span><span style="top:-0.5197999999999999em;margin-right:0.05em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle uncramped mtight"><span class="mord scriptstyle uncramped mtight"><span class="mord mathit mtight" style="margin-right:0.13889em;">T</span><span class="mord mathrm mtight">/</span><span class="mord mathrm mtight">2</span></span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span>​</span></span></span></span><span class="mord"><span class="mord mathit" style="margin-right:0.07153em;">C</span><span class="msupsub"><span class="vlist"><span style="top:0.275331em;margin-left:-0.07153em;margin-right:0.05em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord mathit mtight" style="margin-right:0.13889em;">T</span></span></span><span style="top:-0.363em;margin-right:0.05em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle uncramped mtight"><span class="mord mathit mtight" style="margin-right:0.03148em;">k</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span>​</span></span></span></span><span class="mopen">(</span><span class="mord mathrm">1</span><span class="mbin">−</span><span class="mord mathit">ϵ</span><span class="mclose"><span class="mclose">)</span><span class="msupsub"><span class="vlist"><span style="top:-0.363em;margin-right:0.05em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle uncramped mtight"><span class="mord mathit mtight" style="margin-right:0.03148em;">k</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span>​</span></span></span></span><span class="mord"><span class="mord mathit">ϵ</span><span class="msupsub"><span class="vlist"><span style="top:-0.363em;margin-right:0.05em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle uncramped mtight"><span class="mord scriptstyle uncramped mtight"><span class="mord mathit mtight" style="margin-right:0.13889em;">T</span><span class="mbin mtight">−</span><span class="mord mathit mtight" style="margin-right:0.03148em;">k</span></span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span>​</span></span></span></span><span class="mrel">≤</span><span class="mord mathit">e</span><span class="mord mathit">x</span><span class="mord mathit">p</span><span class="mopen">(</span><span class="mord">−</span><span class="mord reset-textstyle textstyle uncramped"><span class="mopen sizing reset-size5 size5 reset-textstyle textstyle uncramped nulldelimiter"></span><span class="mfrac"><span class="vlist"><span style="top:0.345em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord scriptstyle cramped mtight"><span class="mord mathrm mtight">2</span></span></span></span><span style="top:-0.22999999999999998em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle textstyle uncramped frac-line"></span></span><span style="top:-0.394em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle uncramped mtight"><span class="mord scriptstyle uncramped mtight"><span class="mord mathrm mtight">1</span></span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span>​</span></span></span><span class="mclose sizing reset-size5 size5 reset-textstyle textstyle uncramped nulldelimiter"></span></span><span class="mord mathit" style="margin-right:0.13889em;">T</span><span class="mopen">(</span><span class="mord mathrm">1</span><span class="mbin">−</span><span class="mord mathrm">2</span><span class="mord mathit">ϵ</span><span class="mclose"><span class="mclose">)</span><span class="msupsub"><span class="vlist"><span style="top:-0.363em;margin-right:0.05em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle uncramped mtight"><span class="mord mathrm mtight">2</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span>​</span></span></span></span><span class="mclose">)</span></span></span></span></p>
|
|
<p>根据上式可知,如果 <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mn>5</mn></mrow><annotation encoding="application/x-tex">5</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.64444em;"></span><span class="strut bottom" style="height:0.64444em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathrm">5</span></span></span></span> 个村民,每个村民的错误率为 <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mn>0</mn><mi mathvariant="normal">.</mi><mn>3</mn><mn>3</mn></mrow><annotation encoding="application/x-tex">0.33</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.64444em;"></span><span class="strut bottom" style="height:0.64444em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathrm">0</span><span class="mord mathrm">.</span><span class="mord mathrm">3</span><span class="mord mathrm">3</span></span></span></span> ,那么投票的错误率为 <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mn>0</mn><mi mathvariant="normal">.</mi><mn>7</mn><mn>4</mn><mn>9</mn></mrow><annotation encoding="application/x-tex">0.749</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.64444em;"></span><span class="strut bottom" style="height:0.64444em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathrm">0</span><span class="mord mathrm">.</span><span class="mord mathrm">7</span><span class="mord mathrm">4</span><span class="mord mathrm">9</span></span></span></span> ;如果 <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mn>2</mn><mn>0</mn></mrow><annotation encoding="application/x-tex">20</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.64444em;"></span><span class="strut bottom" style="height:0.64444em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathrm">2</span><span class="mord mathrm">0</span></span></span></span> 个村民,每个村民的错误率为 <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mn>0</mn><mi mathvariant="normal">.</mi><mn>3</mn><mn>3</mn></mrow><annotation encoding="application/x-tex">0.33</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.64444em;"></span><span class="strut bottom" style="height:0.64444em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathrm">0</span><span class="mord mathrm">.</span><span class="mord mathrm">3</span><span class="mord mathrm">3</span></span></span></span> ,那么投票的错误率为 <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mn>0</mn><mi mathvariant="normal">.</mi><mn>3</mn><mn>1</mn><mn>5</mn></mrow><annotation encoding="application/x-tex">0.315</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.64444em;"></span><span class="strut bottom" style="height:0.64444em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathrm">0</span><span class="mord mathrm">.</span><span class="mord mathrm">3</span><span class="mord mathrm">1</span><span class="mord mathrm">5</span></span></span></span> ;如果 <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mn>5</mn><mn>0</mn></mrow><annotation encoding="application/x-tex">50</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.64444em;"></span><span class="strut bottom" style="height:0.64444em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathrm">5</span><span class="mord mathrm">0</span></span></span></span> 个村民,每个村民的错误率为 <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mn>0</mn><mi mathvariant="normal">.</mi><mn>3</mn><mn>3</mn></mrow><annotation encoding="application/x-tex">0.33</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.64444em;"></span><span class="strut bottom" style="height:0.64444em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathrm">0</span><span class="mord mathrm">.</span><span class="mord mathrm">3</span><span class="mord mathrm">3</span></span></span></span> ,那么投票的错误率为 <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mn>0</mn><mi mathvariant="normal">.</mi><mn>0</mn><mn>5</mn><mn>6</mn></mrow><annotation encoding="application/x-tex">0.056</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.64444em;"></span><span class="strut bottom" style="height:0.64444em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathrm">0</span><span class="mord mathrm">.</span><span class="mord mathrm">0</span><span class="mord mathrm">5</span><span class="mord mathrm">6</span></span></span></span> ;如果 <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mn>1</mn><mn>0</mn><mn>0</mn></mrow><annotation encoding="application/x-tex">100</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.64444em;"></span><span class="strut bottom" style="height:0.64444em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathrm">1</span><span class="mord mathrm">0</span><span class="mord mathrm">0</span></span></span></span> 个村民,每个村民的错误率为 <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mn>0</mn><mi mathvariant="normal">.</mi><mn>3</mn><mn>3</mn></mrow><annotation encoding="application/x-tex">0.33</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.64444em;"></span><span class="strut bottom" style="height:0.64444em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathrm">0</span><span class="mord mathrm">.</span><span class="mord mathrm">3</span><span class="mord mathrm">3</span></span></span></span> ,那么投票的错误率为 <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mn>0</mn><mi mathvariant="normal">.</mi><mn>0</mn><mn>0</mn><mn>3</mn></mrow><annotation encoding="application/x-tex">0.003</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.64444em;"></span><span class="strut bottom" style="height:0.64444em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathrm">0</span><span class="mord mathrm">.</span><span class="mord mathrm">0</span><span class="mord mathrm">0</span><span class="mord mathrm">3</span></span></span></span> 。<strong>从结果可以看出,村民的数量越大,那么投票后犯错的错误率就越小。</strong>这也是<strong>Bagging</strong>性能强的原因之一。</p>
|
|
<h3 id="bagging方法如何训练">Bagging方法如何训练</h3>
|
|
<p><strong>Bagging</strong> 在训练时的特点就是<strong>随机有放回采样</strong>和<strong>并行</strong>。</p>
|
|
<p><strong>随机有放回采样:</strong> 假设训练数据集有 <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>m</mi></mrow><annotation encoding="application/x-tex">m</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.43056em;"></span><span class="strut bottom" style="height:0.43056em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathit">m</span></span></span></span> 条样本数据,每次从这 <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>m</mi></mrow><annotation encoding="application/x-tex">m</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.43056em;"></span><span class="strut bottom" style="height:0.43056em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathit">m</span></span></span></span> 条数据中随机取一条数据放入采样集,然后将其返回,让下一次采样有机会仍然能被采样。然后重复 <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>m</mi></mrow><annotation encoding="application/x-tex">m</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.43056em;"></span><span class="strut bottom" style="height:0.43056em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathit">m</span></span></span></span> 次,就能得到拥有 <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>m</mi></mrow><annotation encoding="application/x-tex">m</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.43056em;"></span><span class="strut bottom" style="height:0.43056em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathit">m</span></span></span></span> 条数据的采样集,该采样集作为 <strong>Bagging</strong> 的众多分类器中的一个作为训练数据集。假设有 <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>T</mi></mrow><annotation encoding="application/x-tex">T</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.68333em;"></span><span class="strut bottom" style="height:0.68333em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathit" style="margin-right:0.13889em;">T</span></span></span></span> 个分类器(随便什么分类器),那么就重复 <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>T</mi></mrow><annotation encoding="application/x-tex">T</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.68333em;"></span><span class="strut bottom" style="height:0.68333em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathit" style="margin-right:0.13889em;">T</span></span></span></span> 此随机有放回采样,构建出 <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>T</mi></mrow><annotation encoding="application/x-tex">T</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.68333em;"></span><span class="strut bottom" style="height:0.68333em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathit" style="margin-right:0.13889em;">T</span></span></span></span> 个采样集分别作为 <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>T</mi></mrow><annotation encoding="application/x-tex">T</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.68333em;"></span><span class="strut bottom" style="height:0.68333em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathit" style="margin-right:0.13889em;">T</span></span></span></span> 个分类器的训练数据集。</p>
|
|
<p><strong>并行:</strong> 假设有 <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mn>1</mn><mn>0</mn></mrow><annotation encoding="application/x-tex">10</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.64444em;"></span><span class="strut bottom" style="height:0.64444em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathrm">1</span><span class="mord mathrm">0</span></span></span></span> 个分类器,在<strong>Boosting</strong>中,<span class="katex"><span class="katex-mathml"><math><semantics><mrow><mn>1</mn></mrow><annotation encoding="application/x-tex">1</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.64444em;"></span><span class="strut bottom" style="height:0.64444em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathrm">1</span></span></span></span> 号分类器训练完成之后才能开始<span class="katex"><span class="katex-mathml"><math><semantics><mrow><mn>2</mn></mrow><annotation encoding="application/x-tex">2</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.64444em;"></span><span class="strut bottom" style="height:0.64444em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathrm">2</span></span></span></span> 号分类器的训练,而在<strong>Bagging</strong>中,分类器可以同时进行训练,当所有分类器训练完成之后,整个<strong>Bagging</strong>的训练过程就结束了。</p>
|
|
<p><strong>Bagging</strong>训练过程如下图所示:</p>
|
|
<p><img src="img/27.jpg" alt=""></p>
|
|
<h3 id="bagging方法如何预测">Bagging方法如何预测</h3>
|
|
<p><strong>Bagging</strong>在预测时非常简单,就是<strong>投票</strong>!比如现在有 <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mn>5</mn></mrow><annotation encoding="application/x-tex">5</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.64444em;"></span><span class="strut bottom" style="height:0.64444em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathrm">5</span></span></span></span> 个分类器,有 <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mn>3</mn></mrow><annotation encoding="application/x-tex">3</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.64444em;"></span><span class="strut bottom" style="height:0.64444em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathrm">3</span></span></span></span> 个分类器认为当前样本属于 <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>A</mi></mrow><annotation encoding="application/x-tex">A</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.68333em;"></span><span class="strut bottom" style="height:0.68333em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathit">A</span></span></span></span> 类,<span class="katex"><span class="katex-mathml"><math><semantics><mrow><mn>1</mn></mrow><annotation encoding="application/x-tex">1</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.64444em;"></span><span class="strut bottom" style="height:0.64444em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathrm">1</span></span></span></span> 个分类器认为属于 <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>B</mi></mrow><annotation encoding="application/x-tex">B</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.68333em;"></span><span class="strut bottom" style="height:0.68333em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathit" style="margin-right:0.05017em;">B</span></span></span></span> 类,<span class="katex"><span class="katex-mathml"><math><semantics><mrow><mn>1</mn></mrow><annotation encoding="application/x-tex">1</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.64444em;"></span><span class="strut bottom" style="height:0.64444em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathrm">1</span></span></span></span> 个分类器认为属于 <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>C</mi></mrow><annotation encoding="application/x-tex">C</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.68333em;"></span><span class="strut bottom" style="height:0.68333em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathit" style="margin-right:0.07153em;">C</span></span></span></span> 类,那么<strong>Bagging</strong>的结果会是 <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>A</mi></mrow><annotation encoding="application/x-tex">A</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.68333em;"></span><span class="strut bottom" style="height:0.68333em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathit">A</span></span></span></span> 类(因为 <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>A</mi></mrow><annotation encoding="application/x-tex">A</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.68333em;"></span><span class="strut bottom" style="height:0.68333em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathit">A</span></span></span></span> 类的票数最高)。</p>
|
|
<p><strong>Bagging</strong>预测过程如下图所示:</p>
|
|
<p><img src="img/28.jpg" alt=""></p>
|
|
<h2 id="随机森林">随机森林</h2>
|
|
<p><strong>随机森林</strong>是<strong>Bagging</strong>的一种扩展变体,<strong>随机森林</strong>的训练过程相对与<strong>Bagging</strong>的训练过程的改变有:</p>
|
|
<ul>
|
|
<li>基学习器:<strong>Bagging</strong>的基学习器可以是任意学习器,而随机森林则是以决策树作为基学习器。</li>
|
|
<li>随机属性选择:假设原始训练数据集有 <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mn>1</mn><mn>0</mn></mrow><annotation encoding="application/x-tex">10</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.64444em;"></span><span class="strut bottom" style="height:0.64444em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathrm">1</span><span class="mord mathrm">0</span></span></span></span> 个特征,从这 <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mn>1</mn><mn>0</mn></mrow><annotation encoding="application/x-tex">10</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.64444em;"></span><span class="strut bottom" style="height:0.64444em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathrm">1</span><span class="mord mathrm">0</span></span></span></span> 个特征中随机选取 <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>k</mi></mrow><annotation encoding="application/x-tex">k</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.69444em;"></span><span class="strut bottom" style="height:0.69444em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathit" style="margin-right:0.03148em;">k</span></span></span></span> 个特征构成训练数据子集,然后将这个子集作为训练集扔给决策树去训练。其中 <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>k</mi></mrow><annotation encoding="application/x-tex">k</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.69444em;"></span><span class="strut bottom" style="height:0.69444em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathit" style="margin-right:0.03148em;">k</span></span></span></span> 的取值一般为 <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>l</mi><mi>o</mi><mi>g</mi><mn>2</mn></mrow><annotation encoding="application/x-tex">log2</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.69444em;"></span><span class="strut bottom" style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="base textstyle uncramped"><span class="mord mathit" style="margin-right:0.01968em;">l</span><span class="mord mathit">o</span><span class="mord mathit" style="margin-right:0.03588em;">g</span><span class="mord mathrm">2</span></span></span></span> (特征数量)。</li>
|
|
</ul>
|
|
<p>这样的改动通常会使得随机森林具有更加强的泛化性,因为每一棵决策树的训练数据集是随机的,而且训练数据集中的特征也是随机抽取的。如果每一棵决策树模型的差异比较大,那么就很容易能够解决决策树容易过拟合的问题。</p>
|
|
|
|
|
|
</section>
|
|
|
|
</div>
|
|
<div class="search-results">
|
|
<div class="has-results">
|
|
|
|
<h1 class="search-results-title"><span class='search-results-count'></span> results matching "<span class='search-query'></span>"</h1>
|
|
<ul class="search-results-list"></ul>
|
|
|
|
</div>
|
|
<div class="no-results">
|
|
|
|
<h1 class="search-results-title">No results matching "<span class='search-query'></span>"</h1>
|
|
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
</div>
|
|
</div>
|
|
|
|
</div>
|
|
|
|
|
|
|
|
<a href="decision_tree.html" class="navigation navigation-prev " aria-label="Previous page: 最接近人类思维的算法-决策树">
|
|
<i class="fa fa-angle-left"></i>
|
|
</a>
|
|
|
|
|
|
<a href="kMeans.html" class="navigation navigation-next " aria-label="Next page: 物以类聚人以群分-kMeans">
|
|
<i class="fa fa-angle-right"></i>
|
|
</a>
|
|
|
|
|
|
|
|
</div>
|
|
|
|
<script>
|
|
var gitbook = gitbook || [];
|
|
gitbook.push(function() {
|
|
gitbook.page.hasChanged({"page":{"title":"群众的力量是伟大的-随机森林","level":"1.3.5","depth":2,"next":{"title":"物以类聚人以群分-kMeans","level":"1.3.6","depth":2,"path":"kMeans.md","ref":"kMeans.md","articles":[]},"previous":{"title":"最接近人类思维的算法-决策树","level":"1.3.4","depth":2,"path":"decision_tree.md","ref":"decision_tree.md","articles":[]},"dir":"ltr"},"config":{"gitbook":"*","theme":"default","variables":{},"plugins":["katex"],"pluginsConfig":{"katex":{},"highlight":{},"search":{},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"fontsettings":{"theme":"white","family":"sans","size":2},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":false}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"}},"file":{"path":"random_forest.md","mtime":"2019-07-04T06:50:16.353Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2019-07-06T07:31:21.537Z"},"basePath":".","book":{"language":""}});
|
|
});
|
|
</script>
|
|
</div>
|
|
|
|
|
|
<script src="gitbook/gitbook.js"></script>
|
|
<script src="gitbook/theme.js"></script>
|
|
|
|
|
|
<script src="gitbook/gitbook-plugin-search/search-engine.js"></script>
|
|
|
|
|
|
|
|
<script src="gitbook/gitbook-plugin-search/search.js"></script>
|
|
|
|
|
|
|
|
<script src="gitbook/gitbook-plugin-lunr/lunr.min.js"></script>
|
|
|
|
|
|
|
|
<script src="gitbook/gitbook-plugin-lunr/search-lunr.js"></script>
|
|
|
|
|
|
|
|
<script src="gitbook/gitbook-plugin-sharing/buttons.js"></script>
|
|
|
|
|
|
|
|
<script src="gitbook/gitbook-plugin-fontsettings/fontsettings.js"></script>
|
|
|
|
|
|
|
|
</body>
|
|
</html>
|
|
|