You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
684 lines
46 KiB
684 lines
46 KiB
5 years ago
|
|
||
|
<!DOCTYPE HTML>
|
||
|
<html lang="" >
|
||
|
<head>
|
||
|
<meta charset="UTF-8">
|
||
|
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
|
||
|
<title>以距离为尺-AGNES · GitBook</title>
|
||
|
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
|
||
|
<meta name="description" content="">
|
||
|
<meta name="generator" content="GitBook 3.2.3">
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
<link rel="stylesheet" href="gitbook/style.css">
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
<link rel="stylesheet" href="gitbook/gitbook-plugin-katex/katex.min.css">
|
||
|
|
||
|
|
||
|
|
||
|
<link rel="stylesheet" href="gitbook/gitbook-plugin-highlight/website.css">
|
||
|
|
||
|
|
||
|
|
||
|
<link rel="stylesheet" href="gitbook/gitbook-plugin-search/search.css">
|
||
|
|
||
|
|
||
|
|
||
|
<link rel="stylesheet" href="gitbook/gitbook-plugin-fontsettings/website.css">
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
<meta name="HandheldFriendly" content="true"/>
|
||
|
<meta name="viewport" content="width=device-width, initial-scale=1, user-scalable=no">
|
||
|
<meta name="apple-mobile-web-app-capable" content="yes">
|
||
|
<meta name="apple-mobile-web-app-status-bar-style" content="black">
|
||
|
<link rel="apple-touch-icon-precomposed" sizes="152x152" href="gitbook/images/apple-touch-icon-precomposed-152.png">
|
||
|
<link rel="shortcut icon" href="gitbook/images/favicon.ico" type="image/x-icon">
|
||
|
|
||
|
|
||
|
<link rel="next" href="metrics.html" />
|
||
|
|
||
|
|
||
|
<link rel="prev" href="kMeans.html" />
|
||
|
|
||
|
|
||
|
</head>
|
||
|
<body>
|
||
|
|
||
|
<div class="book">
|
||
|
<div class="book-summary">
|
||
|
|
||
|
|
||
|
<div id="book-search-input" role="search">
|
||
|
<input type="text" placeholder="Type to search" />
|
||
|
</div>
|
||
|
|
||
|
|
||
|
<nav role="navigation">
|
||
|
|
||
|
|
||
|
|
||
|
<ul class="summary">
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
<li class="chapter " data-level="1.1" data-path="./">
|
||
|
|
||
|
<a href="./">
|
||
|
|
||
|
|
||
|
简介
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
|
<li class="chapter " data-level="1.2" data-path="machine_learning.html">
|
||
|
|
||
|
<a href="machine_learning.html">
|
||
|
|
||
|
|
||
|
机器学习概述
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
|
<li class="chapter " data-level="1.3" data-path="algorithm.html">
|
||
|
|
||
|
<a href="algorithm.html">
|
||
|
|
||
|
|
||
|
常见机器学习算法
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
<ul class="articles">
|
||
|
|
||
|
|
||
|
<li class="chapter " data-level="1.3.1" data-path="kNN.html">
|
||
|
|
||
|
<a href="kNN.html">
|
||
|
|
||
|
|
||
|
近朱者赤近墨者黑-kNN
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
|
<li class="chapter " data-level="1.3.2" data-path="linear_regression.html">
|
||
|
|
||
|
<a href="linear_regression.html">
|
||
|
|
||
|
|
||
|
最简单的回归算法-线性回归
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
|
<li class="chapter " data-level="1.3.3" data-path="logistic_regression.html">
|
||
|
|
||
|
<a href="logistic_regression.html">
|
||
|
|
||
|
|
||
|
使用回归的思想进行分类-逻辑回归
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
|
<li class="chapter " data-level="1.3.4" data-path="decision_tree.html">
|
||
|
|
||
|
<a href="decision_tree.html">
|
||
|
|
||
|
|
||
|
最接近人类思维的算法-决策树
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
|
<li class="chapter " data-level="1.3.5" data-path="random_forest.html">
|
||
|
|
||
|
<a href="random_forest.html">
|
||
|
|
||
|
|
||
|
群众的力量是伟大的-随机森林
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
|
<li class="chapter " data-level="1.3.6" data-path="kMeans.html">
|
||
|
|
||
|
<a href="kMeans.html">
|
||
|
|
||
|
|
||
|
物以类聚人以群分-kMeans
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
|
<li class="chapter active" data-level="1.3.7" data-path="AGNES.html">
|
||
|
|
||
|
<a href="AGNES.html">
|
||
|
|
||
|
|
||
|
以距离为尺-AGNES
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
|
|
||
|
</ul>
|
||
|
|
||
|
</li>
|
||
|
|
||
|
<li class="chapter " data-level="1.4" data-path="metrics.html">
|
||
|
|
||
|
<a href="metrics.html">
|
||
|
|
||
|
|
||
|
模型评估指标
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
<ul class="articles">
|
||
|
|
||
|
|
||
|
<li class="chapter " data-level="1.4.1" data-path="classification_metrics.html">
|
||
|
|
||
|
<a href="classification_metrics.html">
|
||
|
|
||
|
|
||
|
分类性能评估指标
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
|
<li class="chapter " data-level="1.4.2" data-path="regression_metrics.html">
|
||
|
|
||
|
<a href="regression_metrics.html">
|
||
|
|
||
|
|
||
|
回归性能评估指标
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
|
<li class="chapter " data-level="1.4.3" data-path="cluster_metrics.html">
|
||
|
|
||
|
<a href="cluster_metrics.html">
|
||
|
|
||
|
|
||
|
聚类性能评估指标
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
|
|
||
|
</ul>
|
||
|
|
||
|
</li>
|
||
|
|
||
|
<li class="chapter " data-level="1.5" data-path="sklearn.html">
|
||
|
|
||
|
<a href="sklearn.html">
|
||
|
|
||
|
|
||
|
使用sklearn进行机器学习
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
|
<li class="chapter " data-level="1.6" >
|
||
|
|
||
|
<span>
|
||
|
|
||
|
|
||
|
综合实战案例
|
||
|
|
||
|
</span>
|
||
|
|
||
|
|
||
|
|
||
|
<ul class="articles">
|
||
|
|
||
|
|
||
|
<li class="chapter " data-level="1.6.1" >
|
||
|
|
||
|
<span>
|
||
|
|
||
|
|
||
|
泰坦尼克生还预测
|
||
|
|
||
|
</span>
|
||
|
|
||
|
|
||
|
|
||
|
<ul class="articles">
|
||
|
|
||
|
|
||
|
<li class="chapter " data-level="1.6.1.1" data-path="titanic/introduction.html">
|
||
|
|
||
|
<a href="titanic/introduction.html">
|
||
|
|
||
|
|
||
|
简介
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
|
<li class="chapter " data-level="1.6.1.2" data-path="titanic/EDA.html">
|
||
|
|
||
|
<a href="titanic/EDA.html">
|
||
|
|
||
|
|
||
|
探索性数据分析(EDA)
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
|
<li class="chapter " data-level="1.6.1.3" data-path="titanic/feature engerning.html">
|
||
|
|
||
|
<a href="titanic/feature engerning.html">
|
||
|
|
||
|
|
||
|
特征工程
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
|
<li class="chapter " data-level="1.6.1.4" data-path="titanic/fit and predict.html">
|
||
|
|
||
|
<a href="titanic/fit and predict.html">
|
||
|
|
||
|
|
||
|
构建模型进行预测
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
|
<li class="chapter " data-level="1.6.1.5" data-path="titanic/tuning.html">
|
||
|
|
||
|
<a href="titanic/tuning.html">
|
||
|
|
||
|
|
||
|
调参
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
|
|
||
|
</ul>
|
||
|
|
||
|
</li>
|
||
|
|
||
|
<li class="chapter " data-level="1.6.2" >
|
||
|
|
||
|
<span>
|
||
|
|
||
|
|
||
|
使用强化学习玩乒乓球游戏
|
||
|
|
||
|
</span>
|
||
|
|
||
|
|
||
|
|
||
|
<ul class="articles">
|
||
|
|
||
|
|
||
|
<li class="chapter " data-level="1.6.2.1" data-path="pingpong/what is reinforce learning.html">
|
||
|
|
||
|
<a href="pingpong/what is reinforce learning.html">
|
||
|
|
||
|
|
||
|
什么是强化学习
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
|
<li class="chapter " data-level="1.6.2.2" data-path="pingpong/Policy Gradient.html">
|
||
|
|
||
|
<a href="pingpong/Policy Gradient.html">
|
||
|
|
||
|
|
||
|
Policy Gradient原理
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
|
<li class="chapter " data-level="1.6.2.3" data-path="pingpong/coding.html">
|
||
|
|
||
|
<a href="pingpong/coding.html">
|
||
|
|
||
|
|
||
|
使用Policy Gradient玩乒乓球游戏
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
|
|
||
|
</ul>
|
||
|
|
||
|
</li>
|
||
|
|
||
|
|
||
|
</ul>
|
||
|
|
||
|
</li>
|
||
|
|
||
|
<li class="chapter " data-level="1.7" data-path="recommand.html">
|
||
|
|
||
|
<a href="recommand.html">
|
||
|
|
||
|
|
||
|
实训推荐
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
<li class="divider"></li>
|
||
|
|
||
|
<li>
|
||
|
<a href="https://www.gitbook.com" target="blank" class="gitbook-link">
|
||
|
Published with GitBook
|
||
|
</a>
|
||
|
</li>
|
||
|
</ul>
|
||
|
|
||
|
|
||
|
</nav>
|
||
|
|
||
|
|
||
|
</div>
|
||
|
|
||
|
<div class="book-body">
|
||
|
|
||
|
<div class="body-inner">
|
||
|
|
||
|
|
||
|
|
||
|
<div class="book-header" role="navigation">
|
||
|
|
||
|
|
||
|
<!-- Title -->
|
||
|
<h1>
|
||
|
<i class="fa fa-circle-o-notch fa-spin"></i>
|
||
|
<a href="." >以距离为尺-AGNES</a>
|
||
|
</h1>
|
||
|
</div>
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
<div class="page-wrapper" tabindex="-1" role="main">
|
||
|
<div class="page-inner">
|
||
|
|
||
|
<div id="book-search-results">
|
||
|
<div class="search-noresults">
|
||
|
|
||
|
<section class="normal markdown-section">
|
||
|
|
||
|
<h1 id="以距离为尺-agnes算法">以距离为尺-AGNES算法</h1>
|
||
|
<p>AGNES 算法是一种聚类算法,最初将每个对象作为一个簇,然后这些簇根据某些距离准则被一步步地合并。两个簇间的相似度有多种不同的计算方法。聚类的合并过程反复进行直到所有的对象最终满足簇数目。所以理解 AGNES 算法前需要先理解一些距离准则。</p>
|
||
|
<h1 id="距离准则">距离准则</h1>
|
||
|
<h2 id="为什么需要距离">为什么需要距离</h2>
|
||
|
<p>AGNES 算法是一种自底向上聚合的层次聚类算法,它先会将数据集中的每个样本看作一个初始簇,然后在算法运行的每一步中找出距离最近的两个簇进行合并,直至达到预设的簇的数量。所以AGNES算法需要不断的计算簇之间的距离,这也符合聚类的核心思想(物以类聚,人以群分),因此怎样度量两个簇之间的距离成为了关键。</p>
|
||
|
<h2 id="距离的计算">距离的计算</h2>
|
||
|
<p>衡量两个簇之间的距离通常分为最小距离、最大距离和平均距离。在 AGNES 算法中可根据具体业务选择其中一种距离作为度量标准。</p>
|
||
|
<h3 id="最小距离">最小距离</h3>
|
||
|
<p>最小距离描述的是两个簇之间距离最近的两个样本所对应的距离。例如下图中圆圈和菱形分别代表两个簇,两个簇之间离得最近的样本的<strong>欧式距离</strong>为 3.3 ,则最小距离为 3.3。</p>
|
||
|
<p><img src="img/59.jpg" alt=""></p>
|
||
|
<p>假设给定簇<span class="katex"><span class="katex-mathml"><math><semantics><mrow><msub><mi>C</mi><mi>i</mi></msub></mrow><annotation encoding="application/x-tex">C_i</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.68333em;"></span><span class="strut bottom" style="height:0.83333em;vertical-align:-0.15em;"></span><span class="base textstyle uncramped"><span class="mord"><span class="mord mathit" style="margin-right:0.07153em;">C</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:-0.07153em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord mathit mtight">i</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span>​</span></span></span></span></span></span></span>与<span class="katex"><span class="katex-mathml"><math><semantics><mrow><msub><mi>C</mi><mi>j</mi></msub></mrow><annotation encoding="application/x-tex">C_j</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.68333em;"></span><span class="strut bottom" style="height:0.969438em;vertical-align:-0.286108em;"></span><span class="base textstyle uncramped"><span class="mord"><span class="mord mathit" style="margin-right:0.07153em;">C</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:-0.07153em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord mathit mtight" style="margin-right:0.05724em;">j</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span>​</span></span></span></span></span></span></span>,则最小距离为:<span class="katex"><span class="katex-mathml"><math><semantics><mrow><msub><mi>d</mi><mrow><mi>m</mi><mi>i</mi><mi>n</mi></mrow></msub><mo>=</mo><mi>m</mi><mi>i</mi><msub><mi>n</mi><mrow><mi>x</mi><mo>∈</mo><mi>i</mi><mo separator="true">,</mo><mi>z</mi><mo>∈</mo><mi>j</mi></mrow></msub><mi>d</mi><mi>i</mi><mi>s</mi><mi>t</mi><mo>(</mo><mi>x</mi><mo separator="true">,</mo><mi>z</mi><mo>)</mo></mrow><annotation encoding="application/x-tex">d_{min}=min_{x\in i,z\in j}dist(x,z)</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.75em;"></span><span class="strut bottom" style="height:1.036108em;vertical-align:-0.286108em;"></span><span class="base textstyle uncramped"><span class="mord"><span class="mord mathit">d</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:0em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord scriptstyle cramped mtight"><span class="mord mathit mtight">m</span><span class="mord mathit mtight">i</span><span class="mord mathit mtight">n</span></span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span>​</span></span></span></span><span class="mrel">=</span><span class="mord mathit">m</span><span class="mord mathit">i</span><span class="mord"><span class="mord mathit">n</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:0em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord scriptstyle cramped mtight"><span class="mord mathit mtight">x</span><span class="mrel
|
||
|
<h3 id="最大距离">最大距离</h3>
|
||
|
<p>最大距离描述的是两个簇之间距离最远的两个样本所对应的距离。例如下图中圆圈和菱形分别代表两个簇,两个簇之间离得最远的样本的<strong>欧式距离</strong>为 23.3 ,则最大距离为 23.3 。</p>
|
||
|
<p><img src="img/60.jpg" alt=""></p>
|
||
|
<p>假设给定簇<span class="katex"><span class="katex-mathml"><math><semantics><mrow><msub><mi>C</mi><mi>i</mi></msub></mrow><annotation encoding="application/x-tex">C_i</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.68333em;"></span><span class="strut bottom" style="height:0.83333em;vertical-align:-0.15em;"></span><span class="base textstyle uncramped"><span class="mord"><span class="mord mathit" style="margin-right:0.07153em;">C</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:-0.07153em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord mathit mtight">i</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span>​</span></span></span></span></span></span></span>与<span class="katex"><span class="katex-mathml"><math><semantics><mrow><msub><mi>C</mi><mi>j</mi></msub></mrow><annotation encoding="application/x-tex">C_j</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.68333em;"></span><span class="strut bottom" style="height:0.969438em;vertical-align:-0.286108em;"></span><span class="base textstyle uncramped"><span class="mord"><span class="mord mathit" style="margin-right:0.07153em;">C</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:-0.07153em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord mathit mtight" style="margin-right:0.05724em;">j</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span>​</span></span></span></span></span></span></span>,则最大距离为:<span class="katex"><span class="katex-mathml"><math><semantics><mrow><msub><mi>d</mi><mrow><mi>m</mi><mi>i</mi><mi>n</mi></mrow></msub><mo>=</mo><mi>m</mi><mi>a</mi><msub><mi>x</mi><mrow><mi>x</mi><mo>∈</mo><mi>i</mi><mo separator="true">,</mo><mi>z</mi><mo>∈</mo><mi>j</mi></mrow></msub><mi>d</mi><mi>i</mi><mi>s</mi><mi>t</mi><mo>(</mo><mi>x</mi><mo separator="true">,</mo><mi>z</mi><mo>)</mo></mrow><annotation encoding="application/x-tex">d_{min}=max_{x\in i,z\in j}dist(x,z)</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.75em;"></span><span class="strut bottom" style="height:1.036108em;vertical-align:-0.286108em;"></span><span class="base textstyle uncramped"><span class="mord"><span class="mord mathit">d</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:0em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord scriptstyle cramped mtight"><span class="mord mathit mtight">m</span><span class="mord mathit mtight">i</span><span class="mord mathit mtight">n</span></span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span>​</span></span></span></span><span class="mrel">=</span><span class="mord mathit">m</span><span class="mord mathit">a</span><span class="mord"><span class="mord mathit">x</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:0em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord scriptstyle cramped mtight"><span class="mord mathit mtight">x</span><span class="mrel
|
||
|
<h3 id="平均距离">平均距离</h3>
|
||
|
<p>平均距离描述的是两个簇之间样本的平均距离。例如下图中圆圈和菱形分别代表两个簇,计算两个簇之间的所有样本之间的欧式距离并求其平均值。</p>
|
||
|
<p><img src="img/61.jpg" alt=""></p>
|
||
|
<p>假设给定簇<span class="katex"><span class="katex-mathml"><math><semantics><mrow><msub><mi>C</mi><mi>i</mi></msub></mrow><annotation encoding="application/x-tex">C_i</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.68333em;"></span><span class="strut bottom" style="height:0.83333em;vertical-align:-0.15em;"></span><span class="base textstyle uncramped"><span class="mord"><span class="mord mathit" style="margin-right:0.07153em;">C</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:-0.07153em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord mathit mtight">i</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span>​</span></span></span></span></span></span></span>与<span class="katex"><span class="katex-mathml"><math><semantics><mrow><msub><mi>C</mi><mi>j</mi></msub></mrow><annotation encoding="application/x-tex">C_j</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.68333em;"></span><span class="strut bottom" style="height:0.969438em;vertical-align:-0.286108em;"></span><span class="base textstyle uncramped"><span class="mord"><span class="mord mathit" style="margin-right:0.07153em;">C</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:-0.07153em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord mathit mtight" style="margin-right:0.05724em;">j</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span>​</span></span></span></span></span></span></span>,<span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi mathvariant="normal">∣</mi><msub><mi>C</mi><mi>i</mi></msub><mi mathvariant="normal">∣</mi><mo separator="true">,</mo><mi mathvariant="normal">∣</mi><msub><mi>C</mi><mi>j</mi></msub><mi mathvariant="normal">∣</mi></mrow><annotation encoding="application/x-tex">|C_i|,|C_j|</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.75em;"></span><span class="strut bottom" style="height:1.036108em;vertical-align:-0.286108em;"></span><span class="base textstyle uncramped"><span class="mord mathrm">∣</span><span class="mord"><span class="mord mathit" style="margin-right:0.07153em;">C</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:-0.07153em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord mathit mtight">i</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span>​</span></span></span></span><span class="mord mathrm">∣</span><span class="mpunct">,</span><span class="mord mathrm">∣</span><span class="mord"><span class="mord mathit" style="margin-right:0.07153em;">C</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:-0.07153em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord mathit mtight" style="margin-right:0.05724em;">j</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span>​</span></span></span></span><span class="mord mathrm">"
|
||
|
<h1 id="agnes-算法流程">AGNES 算法流程</h1>
|
||
|
<p>AGNES 算法是一种自底向上聚合的层次聚类算法,它先会将数据集中的每个样本看作一个<strong>初始簇</strong>,然后在算法运行的每一步中找出距离最近的两个簇进行合并,直至达到预设的簇的数量。</p>
|
||
|
<p>举个例子,现在先要将西瓜数据聚成两类,数据如下表所示:</p>
|
||
|
<table>
|
||
|
<thead>
|
||
|
<tr>
|
||
|
<th>编号</th>
|
||
|
<th>体积</th>
|
||
|
<th>重量</th>
|
||
|
</tr>
|
||
|
</thead>
|
||
|
<tbody>
|
||
|
<tr>
|
||
|
<td>1</td>
|
||
|
<td>1.2</td>
|
||
|
<td>2.3</td>
|
||
|
</tr>
|
||
|
<tr>
|
||
|
<td>2</td>
|
||
|
<td>3.6</td>
|
||
|
<td>7.1</td>
|
||
|
</tr>
|
||
|
<tr>
|
||
|
<td>3</td>
|
||
|
<td>1.1</td>
|
||
|
<td>2.2</td>
|
||
|
</tr>
|
||
|
<tr>
|
||
|
<td>4</td>
|
||
|
<td>3.5</td>
|
||
|
<td>6.9</td>
|
||
|
</tr>
|
||
|
<tr>
|
||
|
<td>5</td>
|
||
|
<td>1.5</td>
|
||
|
<td>2.5</td>
|
||
|
</tr>
|
||
|
</tbody>
|
||
|
</table>
|
||
|
<p>一开始,每个样本都看成是一个簇( 1 号样本看成是 1 号簇, 2 号样本看成是 2 号簇,..., 5 号样本看成是 5 号簇),假设簇的集合为 C=[[1], [2], [3], [4], [5]] 。</p>
|
||
|
<p>假设使用簇间最小距离来度量两个簇之间的远近,从表中可以看出 1 号簇与 3 号簇的簇间最小距离最小。因此需要将 1 号簇和 3 号簇合并,那么此时簇的集合 C=[[1, 3], [2], [4], [5]]。</p>
|
||
|
<p>然后继续看这 4 个簇中哪两个簇之间的最小距离最小,我们发现 2 号簇与 4 号簇的最小距离最小,因此我们要进行合并,合并之后 C=[[1, 3], [2, 4], [5]]。</p>
|
||
|
<p>然后继续看这 3 个簇中哪两个簇之间的最小距离最小,我们发现 5 号簇与 [1, 3] 簇的最小距离最小,因此我们要进行合并,合并之后 C=[[1, 3, 5], [2, 4]]。</p>
|
||
|
<p>这个时候 C 中只有两个簇了,达到了我们的预期目标(想要聚成两类),所以算法停止。算法停止后会发现,我们已经将 5 个西瓜,聚成了两类,一类是小西瓜,另一类是大西瓜。</p>
|
||
|
<p>如果将整个聚类过程中的合并,与合并的次序可视化出来,就能看出为什么说 AGNES 是自底向上的层次聚类算法了。</p>
|
||
|
<p><img src="img/62.jpg" alt=""></p>
|
||
|
<p>所以 AGNES 伪代码如下:</p>
|
||
|
<pre><code class="lang-python"><span class="hljs-comment">#假设数据集为D,想要聚成的簇的数量为k</span>
|
||
|
<span class="hljs-function"><span class="hljs-keyword">def</span> <span class="hljs-title">AGNES</span><span class="hljs-params">(D, k)</span>:</span>
|
||
|
<span class="hljs-comment">#C为聚类结果</span>
|
||
|
C = []
|
||
|
<span class="hljs-comment">#将每个样本看成一个簇</span>
|
||
|
<span class="hljs-keyword">for</span> d <span class="hljs-keyword">in</span> D:
|
||
|
C.append(d)
|
||
|
|
||
|
<span class="hljs-comment">#C中簇的数量</span>
|
||
|
q=len(C)
|
||
|
<span class="hljs-keyword">while</span> q > k:
|
||
|
寻找距离最小的两个簇a和b
|
||
|
将a和b合并,并修改C
|
||
|
q = len(C)
|
||
|
<span class="hljs-keyword">return</span> C
|
||
|
</code></pre>
|
||
|
|
||
|
|
||
|
</section>
|
||
|
|
||
|
</div>
|
||
|
<div class="search-results">
|
||
|
<div class="has-results">
|
||
|
|
||
|
<h1 class="search-results-title"><span class='search-results-count'></span> results matching "<span class='search-query'></span>"</h1>
|
||
|
<ul class="search-results-list"></ul>
|
||
|
|
||
|
</div>
|
||
|
<div class="no-results">
|
||
|
|
||
|
<h1 class="search-results-title">No results matching "<span class='search-query'></span>"</h1>
|
||
|
|
||
|
</div>
|
||
|
</div>
|
||
|
</div>
|
||
|
|
||
|
</div>
|
||
|
</div>
|
||
|
|
||
|
</div>
|
||
|
|
||
|
|
||
|
|
||
|
<a href="kMeans.html" class="navigation navigation-prev " aria-label="Previous page: 物以类聚人以群分-kMeans">
|
||
|
<i class="fa fa-angle-left"></i>
|
||
|
</a>
|
||
|
|
||
|
|
||
|
<a href="metrics.html" class="navigation navigation-next " aria-label="Next page: 模型评估指标">
|
||
|
<i class="fa fa-angle-right"></i>
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</div>
|
||
|
|
||
|
<script>
|
||
|
var gitbook = gitbook || [];
|
||
|
gitbook.push(function() {
|
||
|
gitbook.page.hasChanged({"page":{"title":"以距离为尺-AGNES","level":"1.3.7","depth":2,"next":{"title":"模型评估指标","level":"1.4","depth":1,"path":"metrics.md","ref":"metrics.md","articles":[{"title":"分类性能评估指标","level":"1.4.1","depth":2,"path":"classification_metrics.md","ref":"classification_metrics.md","articles":[]},{"title":"回归性能评估指标","level":"1.4.2","depth":2,"path":"regression_metrics.md","ref":"regression_metrics.md","articles":[]},{"title":"聚类性能评估指标","level":"1.4.3","depth":2,"path":"cluster_metrics.md","ref":"cluster_metrics.md","articles":[]}]},"previous":{"title":"物以类聚人以群分-kMeans","level":"1.3.6","depth":2,"path":"kMeans.md","ref":"kMeans.md","articles":[]},"dir":"ltr"},"config":{"gitbook":"*","theme":"default","variables":{},"plugins":["katex"],"pluginsConfig":{"katex":{},"highlight":{},"search":{},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"fontsettings":{"theme":"white","family":"sans","size":2},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":false}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"}},"file":{"path":"AGNES.md","mtime":"2019-07-04T06:39:28.790Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2019-07-06T07:31:21.537Z"},"basePath":".","book":{"language":""}});
|
||
|
});
|
||
|
</script>
|
||
|
</div>
|
||
|
|
||
|
|
||
|
<script src="gitbook/gitbook.js"></script>
|
||
|
<script src="gitbook/theme.js"></script>
|
||
|
|
||
|
|
||
|
<script src="gitbook/gitbook-plugin-search/search-engine.js"></script>
|
||
|
|
||
|
|
||
|
|
||
|
<script src="gitbook/gitbook-plugin-search/search.js"></script>
|
||
|
|
||
|
|
||
|
|
||
|
<script src="gitbook/gitbook-plugin-lunr/lunr.min.js"></script>
|
||
|
|
||
|
|
||
|
|
||
|
<script src="gitbook/gitbook-plugin-lunr/search-lunr.js"></script>
|
||
|
|
||
|
|
||
|
|
||
|
<script src="gitbook/gitbook-plugin-sharing/buttons.js"></script>
|
||
|
|
||
|
|
||
|
|
||
|
<script src="gitbook/gitbook-plugin-fontsettings/fontsettings.js"></script>
|
||
|
|
||
|
|
||
|
|
||
|
</body>
|
||
|
</html>
|
||
|
|