You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
965 lines
65 KiB
965 lines
65 KiB
5 years ago
|
|
||
|
<!DOCTYPE HTML>
|
||
|
<html lang="" >
|
||
|
<head>
|
||
|
<meta charset="UTF-8">
|
||
|
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
|
||
|
<title>分类性能评估指标 · GitBook</title>
|
||
|
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
|
||
|
<meta name="description" content="">
|
||
|
<meta name="generator" content="GitBook 3.2.3">
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
<link rel="stylesheet" href="gitbook/style.css">
|
||
|
|
||
|
|
||
|
|
||
|
|
||
5 years ago
|
<link rel="stylesheet" href="gitbook/gitbook-plugin-katex/katex.min.css">
|
||
|
|
||
|
|
||
|
|
||
5 years ago
|
<link rel="stylesheet" href="gitbook/gitbook-plugin-highlight/website.css">
|
||
|
|
||
|
|
||
|
|
||
|
<link rel="stylesheet" href="gitbook/gitbook-plugin-search/search.css">
|
||
|
|
||
|
|
||
|
|
||
|
<link rel="stylesheet" href="gitbook/gitbook-plugin-fontsettings/website.css">
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
<meta name="HandheldFriendly" content="true"/>
|
||
|
<meta name="viewport" content="width=device-width, initial-scale=1, user-scalable=no">
|
||
|
<meta name="apple-mobile-web-app-capable" content="yes">
|
||
|
<meta name="apple-mobile-web-app-status-bar-style" content="black">
|
||
|
<link rel="apple-touch-icon-precomposed" sizes="152x152" href="gitbook/images/apple-touch-icon-precomposed-152.png">
|
||
|
<link rel="shortcut icon" href="gitbook/images/favicon.ico" type="image/x-icon">
|
||
|
|
||
|
|
||
|
<link rel="next" href="regression_metrics.html" />
|
||
|
|
||
|
|
||
|
<link rel="prev" href="metrics.html" />
|
||
|
|
||
|
|
||
|
</head>
|
||
|
<body>
|
||
|
|
||
|
<div class="book">
|
||
|
<div class="book-summary">
|
||
|
|
||
|
|
||
|
<div id="book-search-input" role="search">
|
||
|
<input type="text" placeholder="Type to search" />
|
||
|
</div>
|
||
|
|
||
|
|
||
|
<nav role="navigation">
|
||
|
|
||
|
|
||
|
|
||
|
<ul class="summary">
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
<li class="chapter " data-level="1.1" data-path="./">
|
||
|
|
||
|
<a href="./">
|
||
|
|
||
|
|
||
|
简介
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
|
<li class="chapter " data-level="1.2" data-path="machine_learning.html">
|
||
|
|
||
|
<a href="machine_learning.html">
|
||
|
|
||
|
|
||
|
机器学习概述
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
|
<li class="chapter " data-level="1.3" data-path="algorithm.html">
|
||
|
|
||
|
<a href="algorithm.html">
|
||
|
|
||
|
|
||
|
常见机器学习算法
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
<ul class="articles">
|
||
|
|
||
|
|
||
|
<li class="chapter " data-level="1.3.1" data-path="kNN.html">
|
||
|
|
||
|
<a href="kNN.html">
|
||
|
|
||
|
|
||
|
近朱者赤近墨者黑-kNN
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
|
<li class="chapter " data-level="1.3.2" data-path="linear_regression.html">
|
||
|
|
||
|
<a href="linear_regression.html">
|
||
|
|
||
|
|
||
|
最简单的回归算法-线性回归
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
|
<li class="chapter " data-level="1.3.3" data-path="logistic_regression.html">
|
||
|
|
||
|
<a href="logistic_regression.html">
|
||
|
|
||
|
|
||
|
使用回归的思想进行分类-逻辑回归
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
|
<li class="chapter " data-level="1.3.4" data-path="decision_tree.html">
|
||
|
|
||
|
<a href="decision_tree.html">
|
||
|
|
||
|
|
||
|
最接近人类思维的算法-决策树
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
|
<li class="chapter " data-level="1.3.5" data-path="random_forest.html">
|
||
|
|
||
|
<a href="random_forest.html">
|
||
|
|
||
|
|
||
|
群众的力量是伟大的-随机森林
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
|
<li class="chapter " data-level="1.3.6" data-path="kMeans.html">
|
||
|
|
||
|
<a href="kMeans.html">
|
||
|
|
||
|
|
||
|
物以类聚人以群分-kMeans
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
|
<li class="chapter " data-level="1.3.7" data-path="AGNES.html">
|
||
|
|
||
|
<a href="AGNES.html">
|
||
|
|
||
|
|
||
|
以距离为尺-AGNES
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
|
|
||
|
</ul>
|
||
|
|
||
|
</li>
|
||
|
|
||
|
<li class="chapter " data-level="1.4" data-path="metrics.html">
|
||
|
|
||
|
<a href="metrics.html">
|
||
|
|
||
|
|
||
|
模型评估指标
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
<ul class="articles">
|
||
|
|
||
|
|
||
|
<li class="chapter active" data-level="1.4.1" data-path="classification_metrics.html">
|
||
|
|
||
|
<a href="classification_metrics.html">
|
||
|
|
||
|
|
||
|
分类性能评估指标
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
|
<li class="chapter " data-level="1.4.2" data-path="regression_metrics.html">
|
||
|
|
||
|
<a href="regression_metrics.html">
|
||
|
|
||
|
|
||
|
回归性能评估指标
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
|
<li class="chapter " data-level="1.4.3" data-path="cluster_metrics.html">
|
||
|
|
||
|
<a href="cluster_metrics.html">
|
||
|
|
||
|
|
||
|
聚类性能评估指标
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
|
|
||
|
</ul>
|
||
|
|
||
|
</li>
|
||
|
|
||
|
<li class="chapter " data-level="1.5" data-path="sklearn.html">
|
||
|
|
||
|
<a href="sklearn.html">
|
||
|
|
||
|
|
||
|
使用sklearn进行机器学习
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
5 years ago
|
<li class="chapter " data-level="1.6" >
|
||
|
|
||
|
<span>
|
||
|
|
||
|
|
||
|
综合实战案例
|
||
|
|
||
|
</span>
|
||
|
|
||
|
|
||
|
|
||
|
<ul class="articles">
|
||
|
|
||
|
|
||
|
<li class="chapter " data-level="1.6.1" >
|
||
|
|
||
|
<span>
|
||
|
|
||
|
|
||
|
泰坦尼克生还预测
|
||
|
|
||
|
</span>
|
||
|
|
||
|
|
||
|
|
||
|
<ul class="articles">
|
||
|
|
||
|
|
||
|
<li class="chapter " data-level="1.6.1.1" data-path="titanic/introduction.html">
|
||
|
|
||
|
<a href="titanic/introduction.html">
|
||
|
|
||
|
|
||
|
简介
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
|
<li class="chapter " data-level="1.6.1.2" data-path="titanic/EDA.html">
|
||
|
|
||
|
<a href="titanic/EDA.html">
|
||
|
|
||
|
|
||
|
探索性数据分析(EDA)
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
|
<li class="chapter " data-level="1.6.1.3" data-path="titanic/feature engerning.html">
|
||
|
|
||
|
<a href="titanic/feature engerning.html">
|
||
|
|
||
|
|
||
|
特征工程
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
|
<li class="chapter " data-level="1.6.1.4" data-path="titanic/fit and predict.html">
|
||
|
|
||
|
<a href="titanic/fit and predict.html">
|
||
|
|
||
|
|
||
|
构建模型进行预测
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
|
<li class="chapter " data-level="1.6.1.5" data-path="titanic/tuning.html">
|
||
|
|
||
|
<a href="titanic/tuning.html">
|
||
|
|
||
|
|
||
|
调参
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
|
|
||
|
</ul>
|
||
|
|
||
|
</li>
|
||
|
|
||
|
<li class="chapter " data-level="1.6.2" >
|
||
|
|
||
|
<span>
|
||
|
|
||
|
|
||
|
使用强化学习玩乒乓球游戏
|
||
|
|
||
|
</span>
|
||
|
|
||
|
|
||
|
|
||
|
<ul class="articles">
|
||
|
|
||
|
|
||
|
<li class="chapter " data-level="1.6.2.1" data-path="pingpong/what is reinforce learning.html">
|
||
|
|
||
|
<a href="pingpong/what is reinforce learning.html">
|
||
|
|
||
|
|
||
|
什么是强化学习
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
|
<li class="chapter " data-level="1.6.2.2" data-path="pingpong/Policy Gradient.html">
|
||
|
|
||
|
<a href="pingpong/Policy Gradient.html">
|
||
|
|
||
|
|
||
|
Policy Gradient原理
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
|
<li class="chapter " data-level="1.6.2.3" data-path="pingpong/coding.html">
|
||
|
|
||
|
<a href="pingpong/coding.html">
|
||
|
|
||
|
|
||
|
使用Policy Gradient玩乒乓球游戏
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
|
|
||
|
</ul>
|
||
|
|
||
|
</li>
|
||
|
|
||
|
|
||
|
</ul>
|
||
|
|
||
|
</li>
|
||
|
|
||
|
<li class="chapter " data-level="1.7" data-path="recommand.html">
|
||
|
|
||
|
<a href="recommand.html">
|
||
|
|
||
|
|
||
|
实训推荐
|
||
|
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</li>
|
||
|
|
||
5 years ago
|
|
||
|
|
||
|
|
||
|
<li class="divider"></li>
|
||
|
|
||
|
<li>
|
||
|
<a href="https://www.gitbook.com" target="blank" class="gitbook-link">
|
||
|
Published with GitBook
|
||
|
</a>
|
||
|
</li>
|
||
|
</ul>
|
||
|
|
||
|
|
||
|
</nav>
|
||
|
|
||
|
|
||
|
</div>
|
||
|
|
||
|
<div class="book-body">
|
||
|
|
||
|
<div class="body-inner">
|
||
|
|
||
|
|
||
|
|
||
|
<div class="book-header" role="navigation">
|
||
|
|
||
|
|
||
|
<!-- Title -->
|
||
|
<h1>
|
||
|
<i class="fa fa-circle-o-notch fa-spin"></i>
|
||
|
<a href="." >分类性能评估指标</a>
|
||
|
</h1>
|
||
|
</div>
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
<div class="page-wrapper" tabindex="-1" role="main">
|
||
|
<div class="page-inner">
|
||
|
|
||
|
<div id="book-search-results">
|
||
|
<div class="search-noresults">
|
||
|
|
||
|
<section class="normal markdown-section">
|
||
|
|
||
|
<h1 id="分类模型性能评估指标">分类模型性能评估指标</h1>
|
||
|
<h2 id="准确度的缺陷">准确度的缺陷</h2>
|
||
|
<p>准确度这个概念相信对于大家来说肯定并不陌生,就是正确率。例如模型的预测结果与数据真实结果如下表所示:</p>
|
||
|
<table>
|
||
|
<thead>
|
||
|
<tr>
|
||
|
<th>编号</th>
|
||
|
<th>预测结果</th>
|
||
|
<th>真实结果</th>
|
||
|
</tr>
|
||
|
</thead>
|
||
|
<tbody>
|
||
|
<tr>
|
||
|
<td>1</td>
|
||
|
<td>1</td>
|
||
|
<td>2</td>
|
||
|
</tr>
|
||
|
<tr>
|
||
|
<td>2</td>
|
||
|
<td>2</td>
|
||
|
<td>2</td>
|
||
|
</tr>
|
||
|
<tr>
|
||
|
<td>3</td>
|
||
|
<td>3</td>
|
||
|
<td>3</td>
|
||
|
</tr>
|
||
|
<tr>
|
||
|
<td>4</td>
|
||
|
<td>1</td>
|
||
|
<td>1</td>
|
||
|
</tr>
|
||
|
<tr>
|
||
|
<td>5</td>
|
||
|
<td>2</td>
|
||
|
<td>3</td>
|
||
|
</tr>
|
||
|
</tbody>
|
||
|
</table>
|
||
|
<p>很明显,连小朋友都能算出来该模型的准确度为 3/5 。</p>
|
||
|
<p>那么准确对越高就能说明模型的分类性能越好吗?非也!举个例子,现在我开发了一套癌症检测系统,只要输入你的一些基本健康信息,就能预测出你现在是否患有癌症,并且分类的准确度为 0.999 。您认为这样的系统的预测性能好不好呢?</p>
|
||
|
<p>您可能会觉得,哇,这么高的准确度!这个系统肯定很牛逼!但是我们知道,一般年轻人患癌症的概率非常低,假设患癌症的概率为 0.001 ,那么其实我这个癌症检测系统只要一直输出您没有患癌症,准确度也可能能够达到 0.999 。</p>
|
||
|
<p>假如现在有一个人本身已经患有癌症,但是他自己不知道自己患有癌症。这个时候用我的癌症检测系统检测发现他没有得癌症,那很显然我这个系统已经把他给坑了(耽误了治疗)。</p>
|
||
|
<p>看到这里您应该已经体会到了,一个分类模型如果光看准确度是不够的,尤其是对这种样本<strong>极度不平衡</strong>的情况( 10000 条健康信息数据中,只有 1 条的类别是患有癌症,其他的类别都是健康)。</p>
|
||
|
<h2 id="混淆矩阵">混淆矩阵</h2>
|
||
|
<p>想进一步的考量分类模型的性能如何,可以使用其他的一些性能指标,例如精准率和召回率。但这些指标计算的基础是<strong>混淆矩阵</strong>。</p>
|
||
|
<p>继续以癌症检测系统为例,癌症检测系统的输出不是有癌症就是健康,这里为了方便,就用 1 表示患有癌症, 0 表示健康。假设现在拿 10000 条数据来进行测试,其中有 9978 条数据的真实类别是 0 ,系统预测的类别也是 0 ,有 2 条数据的真实类别是 1 却预测成了 0 ,有 12 条数据的真实类别是 0 但预测成了 1 ,有 8 条数据的真实类别是 1 ,预测结果也是 1 。</p>
|
||
|
<p>如果我们把这些结果组成如下矩阵,则该矩阵就成为<strong>混淆矩阵</strong>。</p>
|
||
|
<table>
|
||
|
<thead>
|
||
|
<tr>
|
||
|
<th>真实\预测</th>
|
||
|
<th>0</th>
|
||
|
<th>1</th>
|
||
|
</tr>
|
||
|
</thead>
|
||
|
<tbody>
|
||
|
<tr>
|
||
|
<td>0</td>
|
||
|
<td>9978</td>
|
||
|
<td>12</td>
|
||
|
</tr>
|
||
|
<tr>
|
||
|
<td>1</td>
|
||
|
<td>2</td>
|
||
|
<td>8</td>
|
||
|
</tr>
|
||
|
</tbody>
|
||
|
</table>
|
||
|
<p>混淆矩阵中每个格子所代表的的意义也很明显,意义如下:</p>
|
||
|
<table>
|
||
|
<thead>
|
||
|
<tr>
|
||
|
<th>真实\预测</th>
|
||
|
<th>0</th>
|
||
|
<th>1</th>
|
||
|
</tr>
|
||
|
</thead>
|
||
|
<tbody>
|
||
|
<tr>
|
||
|
<td>0</td>
|
||
|
<td>预测 0 正确的数量</td>
|
||
|
<td>预测 1 错误的数量</td>
|
||
|
</tr>
|
||
|
<tr>
|
||
|
<td>1</td>
|
||
|
<td>预测 0 错误的数量</td>
|
||
|
<td>预测 1 正确的数量</td>
|
||
|
</tr>
|
||
|
</tbody>
|
||
|
</table>
|
||
|
<p>如果将正确看成是 True ,错误看成是 False , 0 看成是 Negtive , 1 看成是 Positive 。然后将上表中的文字替换掉,混淆矩阵如下:</p>
|
||
|
<table>
|
||
|
<thead>
|
||
|
<tr>
|
||
|
<th>真实\预测</th>
|
||
|
<th>0</th>
|
||
|
<th>1</th>
|
||
|
</tr>
|
||
|
</thead>
|
||
|
<tbody>
|
||
|
<tr>
|
||
|
<td>0</td>
|
||
|
<td>TN</td>
|
||
|
<td>FP</td>
|
||
|
</tr>
|
||
|
<tr>
|
||
|
<td>1</td>
|
||
|
<td>FN</td>
|
||
|
<td>TP</td>
|
||
|
</tr>
|
||
|
</tbody>
|
||
|
</table>
|
||
|
<p>因此 TN 表示真实类别是 Negtive ,预测结果也是 Negtive 的数量; FP 表示真实类别是 Negtive ,预测结果是 Positive 的数量; FN 表示真实类别是 Positive ,预测结果是Negtive 的数量; TP 表示真实类别是 Positive ,预测结果也是 Positive 的数量。</p>
|
||
|
<p>很明显,当 FN 和 FP 都等于 0 时,模型的性能应该是最好的,因为模型并没有在预测的时候犯错误。即如下混淆矩阵:</p>
|
||
|
<table>
|
||
|
<thead>
|
||
|
<tr>
|
||
|
<th>真实\预测</th>
|
||
|
<th>0</th>
|
||
|
<th>1</th>
|
||
|
</tr>
|
||
|
</thead>
|
||
|
<tbody>
|
||
|
<tr>
|
||
|
<td>0</td>
|
||
|
<td>9978</td>
|
||
|
<td>0</td>
|
||
|
</tr>
|
||
|
<tr>
|
||
|
<td>1</td>
|
||
|
<td>0</td>
|
||
|
<td>22</td>
|
||
|
</tr>
|
||
|
</tbody>
|
||
|
</table>
|
||
|
<p><strong>所以模型分类性能越好,混淆矩阵中非对角线上的数值越小。</strong></p>
|
||
|
<h2 id="精准率">精准率</h2>
|
||
|
<p><strong>精准率(Precision)</strong>指的是模型预测为 Positive 时的预测准确度,其计算公式如下:</p>
|
||
5 years ago
|
<p><span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>P</mi><mi>r</mi><mi>e</mi><mi>c</mi><mi>i</mi><mi>s</mi><mi>i</mi><mi>o</mi><mi>i</mi><mi>n</mi><mo>=</mo><mfrac><mrow><mi>T</mi><mi>P</mi></mrow><mrow><mi>T</mi><mi>P</mi><mo>+</mo><mi>F</mi><mi>P</mi></mrow></mfrac></mrow><annotation encoding="application/x-tex">
|
||
5 years ago
|
Precisioin=\frac{TP}{TP+FP}
|
||
5 years ago
|
</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.872331em;"></span><span class="strut bottom" style="height:1.275662em;vertical-align:-0.403331em;"></span><span class="base textstyle uncramped"><span class="mord mathit" style="margin-right:0.13889em;">P</span><span class="mord mathit" style="margin-right:0.02778em;">r</span><span class="mord mathit">e</span><span class="mord mathit">c</span><span class="mord mathit">i</span><span class="mord mathit">s</span><span class="mord mathit">i</span><span class="mord mathit">o</span><span class="mord mathit">i</span><span class="mord mathit">n</span><span class="mrel">=</span><span class="mord reset-textstyle textstyle uncramped"><span class="mopen sizing reset-size5 size5 reset-textstyle textstyle uncramped nulldelimiter"></span><span class="mfrac"><span class="vlist"><span style="top:0.345em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord scriptstyle cramped mtight"><span class="mord mathit mtight" style="margin-right:0.13889em;">T</span><span class="mord mathit mtight" style="margin-right:0.13889em;">P</span><span class="mbin mtight">+</span><span class="mord mathit mtight" style="margin-right:0.13889em;">F</span><span class="mord mathit mtight" style="margin-right:0.13889em;">P</span></span></span></span><span style="top:-0.22999999999999998em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle textstyle uncramped frac-line"></span></span><span style="top:-0.394em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle uncramped mtight"><span class="mord scriptstyle uncramped mtight"><span class="mord mathit mtight" style="margin-right:0.13889em;">T</span><span class="mord mathit mtight" style="margin-right:0.13889em;">P</span></span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span>​</span></span></span><span class="mclose sizing reset-size5 size5 reset-textstyle textstyle uncramped nulldelimiter"></span></span></span></span></span></p>
|
||
5 years ago
|
<p>假如癌症检测系统的混淆矩阵如下:</p>
|
||
|
<table>
|
||
|
<thead>
|
||
|
<tr>
|
||
|
<th>真实\预测</th>
|
||
|
<th>0</th>
|
||
|
<th>1</th>
|
||
|
</tr>
|
||
|
</thead>
|
||
|
<tbody>
|
||
|
<tr>
|
||
|
<td>0</td>
|
||
|
<td>9978</td>
|
||
|
<td>12</td>
|
||
|
</tr>
|
||
|
<tr>
|
||
|
<td>1</td>
|
||
|
<td>2</td>
|
||
|
<td>8</td>
|
||
|
</tr>
|
||
|
</tbody>
|
||
|
</table>
|
||
|
<p>则该系统的精准率=8/(8+12)=0.4 。</p>
|
||
|
<p>0.4 这个值表示癌症检测系统的预测结果中如果有 100 个人被预测成患有癌症,那么其中有 40 人是真的患有癌症。<strong>也就是说,精准率越高,那么癌症检测系统预测某人患有癌症的可信度就越高。</strong></p>
|
||
|
<h2 id="召回率">召回率</h2>
|
||
|
<p><strong>召回率(Recall)</strong>指的是我们关注的事件发生了,并且模型预测正确了的比值,其计算公式如下:</p>
|
||
5 years ago
|
<p><span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>R</mi><mi>e</mi><mi>c</mi><mi>a</mi><mi>l</mi><mi>l</mi><mo>=</mo><mfrac><mrow><mi>T</mi><mi>P</mi></mrow><mrow><mi>F</mi><mi>N</mi><mo>+</mo><mi>T</mi><mi>P</mi></mrow></mfrac></mrow><annotation encoding="application/x-tex">
|
||
5 years ago
|
Recall=\frac{TP}{FN+TP}
|
||
5 years ago
|
</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.872331em;"></span><span class="strut bottom" style="height:1.275662em;vertical-align:-0.403331em;"></span><span class="base textstyle uncramped"><span class="mord mathit" style="margin-right:0.00773em;">R</span><span class="mord mathit">e</span><span class="mord mathit">c</span><span class="mord mathit">a</span><span class="mord mathit" style="margin-right:0.01968em;">l</span><span class="mord mathit" style="margin-right:0.01968em;">l</span><span class="mrel">=</span><span class="mord reset-textstyle textstyle uncramped"><span class="mopen sizing reset-size5 size5 reset-textstyle textstyle uncramped nulldelimiter"></span><span class="mfrac"><span class="vlist"><span style="top:0.345em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord scriptstyle cramped mtight"><span class="mord mathit mtight" style="margin-right:0.13889em;">F</span><span class="mord mathit mtight" style="margin-right:0.10903em;">N</span><span class="mbin mtight">+</span><span class="mord mathit mtight" style="margin-right:0.13889em;">T</span><span class="mord mathit mtight" style="margin-right:0.13889em;">P</span></span></span></span><span style="top:-0.22999999999999998em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle textstyle uncramped frac-line"></span></span><span style="top:-0.394em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle uncramped mtight"><span class="mord scriptstyle uncramped mtight"><span class="mord mathit mtight" style="margin-right:0.13889em;">T</span><span class="mord mathit mtight" style="margin-right:0.13889em;">P</span></span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span>​</span></span></span><span class="mclose sizing reset-size5 size5 reset-textstyle textstyle uncramped nulldelimiter"></span></span></span></span></span></p>
|
||
5 years ago
|
<p>假如癌症检测系统的混淆矩阵如下:</p>
|
||
|
<table>
|
||
|
<thead>
|
||
|
<tr>
|
||
|
<th>真实\预测</th>
|
||
|
<th>0</th>
|
||
|
<th>1</th>
|
||
|
</tr>
|
||
|
</thead>
|
||
|
<tbody>
|
||
|
<tr>
|
||
|
<td>0</td>
|
||
|
<td>9978</td>
|
||
|
<td>12</td>
|
||
|
</tr>
|
||
|
<tr>
|
||
|
<td>1</td>
|
||
|
<td>2</td>
|
||
|
<td>8</td>
|
||
|
</tr>
|
||
|
</tbody>
|
||
|
</table>
|
||
|
<p>则该系统的召回率=8/(8+2)=0.8。</p>
|
||
|
<p>从计算出的召回率可以看出,假设有 100 个患有癌症的病人使用这个系统进行癌症检测,系统能够检测出 80 人是患有癌症的。<strong>也就是说,召回率越高,那么我们感兴趣的对象成为漏网之鱼的可能性越低。</strong></p>
|
||
|
<h2 id="精准率与召回率之间的关系">精准率与召回率之间的关系</h2>
|
||
|
<p>假设有这么一组数据,菱形代表 Positive ,圆形代表 Negtive 。</p>
|
||
|
<p><img src="img/66.jpg" alt=""></p>
|
||
|
<p>现在需要训练一个模型对数据进行分类,假如该模型非常简单,就是在数据上画一条线作为分类边界。模型认为边界的左边是 Negtive ,右边是 Positive 。如果该模型的分类边界向左或者向右移动的话,模型所对应的精准率和召回率如下图所示:</p>
|
||
|
<p><img src="img/67.jpg" alt=""></p>
|
||
|
<p>从上图可知,<strong>模型的精准率变高,召回率会变低,精准率变低,召回率会变高。</strong></p>
|
||
|
<h2 id="f1-score">F1 Score</h2>
|
||
|
<p>上一关中提到了精准率变高,召回率会变低,精准率变低,召回率会变高。那如果想要同时兼顾精准率和召回率,这个时候就可以使用<strong>F1 Score</strong>来作为性能度量指标了。</p>
|
||
|
<p>F1 Score 是统计学中用来衡量二分类模型精确度的一种指标。它同时兼顾了分类模型的准确率和召回率。F1 Score 可以看作是模型准确率和召回率的一种加权平均,它的最大值是 1 ,最小值是0 。其公式如下:</p>
|
||
5 years ago
|
<p><span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>F</mi><mn>1</mn><mo>=</mo><mfrac><mrow><mn>2</mn><mo>∗</mo><mi>p</mi><mi>r</mi><mi>e</mi><mi>c</mi><mi>i</mi><mi>s</mi><mi>i</mi><mi>o</mi><mi>n</mi><mo>∗</mo><mi>r</mi><mi>e</mi><mi>c</mi><mi>a</mi><mi>l</mi><mi>l</mi></mrow><mrow><mi>p</mi><mi>r</mi><mi>e</mi><mi>c</mi><mi>i</mi><mi>s</mi><mi>i</mi><mi>o</mi><mi>n</mi><mo>+</mo><mi>r</mi><mi>e</mi><mi>c</mi><mi>a</mi><mi>l</mi><mi>l</mi></mrow></mfrac></mrow><annotation encoding="application/x-tex">
|
||
5 years ago
|
F1=\frac{2*precision*recall}{precision+recall}
|
||
5 years ago
|
</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.9322159999999999em;"></span><span class="strut bottom" style="height:1.4133239999999998em;vertical-align:-0.481108em;"></span><span class="base textstyle uncramped"><span class="mord mathit" style="margin-right:0.13889em;">F</span><span class="mord mathrm">1</span><span class="mrel">=</span><span class="mord reset-textstyle textstyle uncramped"><span class="mopen sizing reset-size5 size5 reset-textstyle textstyle uncramped nulldelimiter"></span><span class="mfrac"><span class="vlist"><span style="top:0.345em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord scriptstyle cramped mtight"><span class="mord mathit mtight">p</span><span class="mord mathit mtight" style="margin-right:0.02778em;">r</span><span class="mord mathit mtight">e</span><span class="mord mathit mtight">c</span><span class="mord mathit mtight">i</span><span class="mord mathit mtight">s</span><span class="mord mathit mtight">i</span><span class="mord mathit mtight">o</span><span class="mord mathit mtight">n</span><span class="mbin mtight">+</span><span class="mord mathit mtight" style="margin-right:0.02778em;">r</span><span class="mord mathit mtight">e</span><span class="mord mathit mtight">c</span><span class="mord mathit mtight">a</span><span class="mord mathit mtight" style="margin-right:0.01968em;">l</span><span class="mord mathit mtight" style="margin-right:0.01968em;">l</span></span></span></span><span style="top:-0.22999999999999998em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle textstyle uncramped frac-line"></span></span><span style="top:-0.44610799999999995em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle uncramped mtight"><span class="mord scriptstyle uncramped mtight"><span class="mord mathrm mtight">2</span><span class="mbin mtight">∗</span><span class="mord mathit mtight">p</span><span class="mord mathit mtight" style="margin-right:0.02778em;">r</span><span class="mord mathit mtight">e</span><span class="mord mathit mtight">c</span><span class="mord mathit mtight">i</span><span class="mord mathit mtight">s</span><span class="mord mathit mtight">i</span><span class="mord mathit mtight">o</span><span class="mord mathit mtight">n</span><span class="mbin mtight">∗</span><span class="mord mathit mtight" style="margin-right:0.02778em;">r</span><span class="mord mathit mtight">e</span><span class="mord mathit mtight">c</span><span class="mord mathit mtight">a</span><span class="mord mathit mtight" style="margin-right:0.01968em;">l</span><span class="mord mathit mtight" style="margin-right:0.01968em;">l</span></span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span>​</span></span></span><span class="mclose sizing reset-size5 size5 reset-textstyle textstyle uncramped nulldelimiter"></span></span></span></span></span></p>
|
||
5 years ago
|
<ul>
|
||
|
<li><p>假设模型 A 的精准率为 0.2 ,召回率为 0.7 ,那么模型 A 的 F1 Score 为 0.31111 。</p>
|
||
|
</li>
|
||
|
<li><p>假设模型 B 的精准率为 0.7 ,召回率为 0.2 ,那么模型 B 的 F1 Score 为 0.31111 。</p>
|
||
|
</li>
|
||
|
<li><p>假设模型 C 的精准率为 0.8 ,召回率为 0.7 ,那么模型 C 的 F1 Score 为 0.74667 。</p>
|
||
|
</li>
|
||
|
<li><p>假设模型 D 的精准率为 0.2 ,召回率为 0.3 ,那么模型 D 的 F1 Score 为 0.24 。</p>
|
||
|
</li>
|
||
|
</ul>
|
||
|
<p>从上述 4 个模型的各种性能可以看出,模型C的精准率和召回率都比较高,因此它的 F1 Score 也比较高。而其他模型的精准率和召回率要么都比较低,要么一个低一个高,所以它们的 F1 Score 比较低。</p>
|
||
|
<p>这也说明了只有当模型的精准率和召回率都比较高时 F1 Score 才会比较高。这也是 F1 Score 能够同时兼顾精准率和召回率的原因。</p>
|
||
|
<h2 id="roc曲线">ROC曲线</h2>
|
||
|
<p>ROC曲线(Receiver Operating Characteristic Curve)描述的 TPR(True Positive Rate)与 FPR(False Positive Rate)之间关系的曲线。</p>
|
||
|
<p>TPR 与 FPR 的计算公式如下:</p>
|
||
5 years ago
|
<p><span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>T</mi><mi>P</mi><mi>R</mi><mo>=</mo><mfrac><mrow><mi>T</mi><mi>P</mi></mrow><mrow><mi>T</mi><mi>P</mi><mo>+</mo><mi>F</mi><mi>N</mi></mrow></mfrac></mrow><annotation encoding="application/x-tex">
|
||
5 years ago
|
TPR=\frac{TP}{TP+FN}
|
||
5 years ago
|
</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.872331em;"></span><span class="strut bottom" style="height:1.275662em;vertical-align:-0.403331em;"></span><span class="base textstyle uncramped"><span class="mord mathit" style="margin-right:0.13889em;">T</span><span class="mord mathit" style="margin-right:0.13889em;">P</span><span class="mord mathit" style="margin-right:0.00773em;">R</span><span class="mrel">=</span><span class="mord reset-textstyle textstyle uncramped"><span class="mopen sizing reset-size5 size5 reset-textstyle textstyle uncramped nulldelimiter"></span><span class="mfrac"><span class="vlist"><span style="top:0.345em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord scriptstyle cramped mtight"><span class="mord mathit mtight" style="margin-right:0.13889em;">T</span><span class="mord mathit mtight" style="margin-right:0.13889em;">P</span><span class="mbin mtight">+</span><span class="mord mathit mtight" style="margin-right:0.13889em;">F</span><span class="mord mathit mtight" style="margin-right:0.10903em;">N</span></span></span></span><span style="top:-0.22999999999999998em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle textstyle uncramped frac-line"></span></span><span style="top:-0.394em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle uncramped mtight"><span class="mord scriptstyle uncramped mtight"><span class="mord mathit mtight" style="margin-right:0.13889em;">T</span><span class="mord mathit mtight" style="margin-right:0.13889em;">P</span></span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span>​</span></span></span><span class="mclose sizing reset-size5 size5 reset-textstyle textstyle uncramped nulldelimiter"></span></span></span></span></span></p>
|
||
|
<p><span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>F</mi><mi>P</mi><mi>R</mi><mo>=</mo><mfrac><mrow><mi>F</mi><mi>P</mi></mrow><mrow><mi>F</mi><mi>P</mi><mo>+</mo><mi>T</mi><mi>N</mi></mrow></mfrac></mrow><annotation encoding="application/x-tex">
|
||
5 years ago
|
FPR=\frac{FP}{FP+TN}
|
||
5 years ago
|
</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.872331em;"></span><span class="strut bottom" style="height:1.275662em;vertical-align:-0.403331em;"></span><span class="base textstyle uncramped"><span class="mord mathit" style="margin-right:0.13889em;">F</span><span class="mord mathit" style="margin-right:0.13889em;">P</span><span class="mord mathit" style="margin-right:0.00773em;">R</span><span class="mrel">=</span><span class="mord reset-textstyle textstyle uncramped"><span class="mopen sizing reset-size5 size5 reset-textstyle textstyle uncramped nulldelimiter"></span><span class="mfrac"><span class="vlist"><span style="top:0.345em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord scriptstyle cramped mtight"><span class="mord mathit mtight" style="margin-right:0.13889em;">F</span><span class="mord mathit mtight" style="margin-right:0.13889em;">P</span><span class="mbin mtight">+</span><span class="mord mathit mtight" style="margin-right:0.13889em;">T</span><span class="mord mathit mtight" style="margin-right:0.10903em;">N</span></span></span></span><span style="top:-0.22999999999999998em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle textstyle uncramped frac-line"></span></span><span style="top:-0.394em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle uncramped mtight"><span class="mord scriptstyle uncramped mtight"><span class="mord mathit mtight" style="margin-right:0.13889em;">F</span><span class="mord mathit mtight" style="margin-right:0.13889em;">P</span></span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span>​</span></span></span><span class="mclose sizing reset-size5 size5 reset-textstyle textstyle uncramped nulldelimiter"></span></span></span></span></span></p>
|
||
5 years ago
|
<p>其中 TPR 的计算公式您可能有点眼熟,没错!就是召回率的计算公式。<strong>也就是说 TPR 就是召回率</strong>。<strong>所以 TPR 描述的是模型预测 Positive 并且预测正确的数量占真实类别为 Positive 样本的比例。而 FPR 描述的模型预测 Positive 并且预测错了的数量占真实类别为 Negtive 样本的比例。</strong></p>
|
||
|
<p>和精准率与召回率一样, TPR 与 FPR 之间也存在关系。假设有这么一组数据,菱形代表 Positive ,圆形代表 Negtive 。</p>
|
||
|
<p><img src="img/66.jpg" alt=""></p>
|
||
|
<p>现在需要训练一个逻辑回归的模型对数据进行分类,假如将从 0 到 1 中的一些值作为模型的分类阈值。若模型认为当前数据是 Positive 的概率<strong>小于</strong>分类阈值则分类为 Negtive ,<strong>否则</strong>就分类为 Positive (<strong>假设分类阈值为 0.8 ,模型认为这条数据是 Positive 的概率为 0.7 , 0.7 小于 0.8 ,那么模型就认为这条数据是 Negtive</strong>)。在不同的分类阈值下,模型所对应的 TPR 与 FPR 如下图所示(竖线代表分类阈值,模型会将竖线左边的数据分类成 Negtive ,竖线右边的分类成 Positive ):</p>
|
||
|
<p><img src="img/68.jpg" alt=""></p>
|
||
|
<p>从图中可以看出,<strong>当模型的 TPR 越高 FPR 也会越高, TPR 越低 FPR 也会越低。这与精准率和召回率之间的关系刚好相反。</strong>并且,模型的分类阈值一但改变,就有一组对应的 TPR 与 FPR 。假设该模型在不同的分类阈值下其对应的 TPR 与 FPR 如下表所示:</p>
|
||
|
<table>
|
||
|
<thead>
|
||
|
<tr>
|
||
|
<th>TPR</th>
|
||
|
<th>FPR</th>
|
||
|
</tr>
|
||
|
</thead>
|
||
|
<tbody>
|
||
|
<tr>
|
||
|
<td>0.2</td>
|
||
|
<td>0.08</td>
|
||
|
</tr>
|
||
|
<tr>
|
||
|
<td>0.35</td>
|
||
|
<td>0.1</td>
|
||
|
</tr>
|
||
|
<tr>
|
||
|
<td>0.37</td>
|
||
|
<td>0.111</td>
|
||
|
</tr>
|
||
|
<tr>
|
||
|
<td>0.51</td>
|
||
|
<td>0.12</td>
|
||
|
</tr>
|
||
|
<tr>
|
||
|
<td>0.53</td>
|
||
|
<td>0.13</td>
|
||
|
</tr>
|
||
|
<tr>
|
||
|
<td>0.56</td>
|
||
|
<td>0.14</td>
|
||
|
</tr>
|
||
|
<tr>
|
||
|
<td>0.71</td>
|
||
|
<td>0.21</td>
|
||
|
</tr>
|
||
|
<tr>
|
||
|
<td>0.82</td>
|
||
|
<td>0.26</td>
|
||
|
</tr>
|
||
|
<tr>
|
||
|
<td>0.92</td>
|
||
|
<td>0.41</td>
|
||
|
</tr>
|
||
|
<tr>
|
||
|
<td>0.93</td>
|
||
|
<td>0.42</td>
|
||
|
</tr>
|
||
|
</tbody>
|
||
|
</table>
|
||
|
<p>若将 FPR 作为横轴, TPR 作为纵轴,将上面的表格以折线图的形式画出来就是 ROC曲线 。</p>
|
||
|
<p><img src="img/69.jpg" alt=""></p>
|
||
|
<p>假设现在有模型 A 和模型 B ,它们的 ROC 曲线如下图所示(其中模型 A 的 ROC曲线 为黄色,模型 B 的 ROC曲线 为蓝色):</p>
|
||
|
<p><img src="img/70.jpg" alt=""></p>
|
||
|
<p>那么模型 A 的性能比模型 B 的性能好,因为模型 A 当 FPR 较低时所对应的 TPR 比模型 B 的低 FPR 所对应的 TPR 更高。由由于随着 FPR 的增大, TPR 也会增大。所以 ROC 曲线与横轴所围成的面积越大,模型的分类性能就越高。而 ROC曲线 的面积称为 AUC。</p>
|
||
|
<h5 id="auc">AUC</h5>
|
||
|
<p>很明显模型的 AUC 越高,模型的二分类性能就越强。 AUC 的计算公式如下:</p>
|
||
5 years ago
|
<p><span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>A</mi><mi>U</mi><mi>C</mi><mo>=</mo><mfrac><mrow><msub><mo>∑</mo><mrow><mi>i</mi><mi>e</mi><mi>p</mi><mi>o</mi><mi>s</mi><mi>i</mi><mi>t</mi><mi>i</mi><mi>v</mi><mi>e</mi><mi>c</mi><mi>l</mi><mi>a</mi><mi>s</mi><mi>s</mi></mrow></msub><mi>r</mi><mi>a</mi><mi>n</mi><msub><mi>k</mi><mi>i</mi></msub><mo>−</mo><mfrac><mrow><mi>M</mi><mo>(</mo><mi>M</mi><mo>+</mo><mn>1</mn><mo>)</mo></mrow><mrow><mn>2</mn></mrow></mfrac></mrow><mrow><mi>M</mi><mo>∗</mo><mi>N</mi></mrow></mfrac></mrow><annotation encoding="application/x-tex">
|
||
5 years ago
|
AUC=\frac{\sum_{ie positive class}rank_i-\frac{M(M+1)}{2}}{M*N}
|
||
5 years ago
|
</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:1.366927em;"></span><span class="strut bottom" style="height:1.711927em;vertical-align:-0.345em;"></span><span class="base textstyle uncramped"><span class="mord mathit">A</span><span class="mord mathit" style="margin-right:0.10903em;">U</span><span class="mord mathit" style="margin-right:0.07153em;">C</span><span class="mrel">=</span><span class="mord reset-textstyle textstyle uncramped"><span class="mopen sizing reset-size5 size5 reset-textstyle textstyle uncramped nulldelimiter"></span><span class="mfrac"><span class="vlist"><span style="top:0.345em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord scriptstyle cramped mtight"><span class="mord mathit mtight" style="margin-right:0.10903em;">M</span><span class="mbin mtight">∗</span><span class="mord mathit mtight" style="margin-right:0.10903em;">N</span></span></span></span><span style="top:-0.22999999999999998em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle textstyle uncramped frac-line"></span></span><span style="top:-0.631927em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle uncramped mtight"><span class="mord scriptstyle uncramped mtight"><span class="mop mtight"><span class="mop op-symbol small-op mtight" style="top:0.074995em;">∑</span><span class="msupsub"><span class="vlist"><span style="top:0.32101em;margin-right:0.07142857142857144em;margin-left:0em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-scriptstyle scriptscriptstyle cramped mtight"><span class="mord scriptscriptstyle cramped mtight"><span class="mord mathit mtight">i</span><span class="mord mathit mtight">e</span><span class="mord mathit mtight">p</span><span class="mord mathit mtight">o</span><span class="mord mathit mtight">s</span><span class="mord mathit mtight">i</span><span class="mord mathit mtight">t</span><span class="mord mathit mtight">i</span><span class="mord mathit mtight" style="margin-right:0.03588em;">v</span><span class="mord mathit mtight">e</span><span class="mord mathit mtight">c</span><span class="mord mathit mtight" style="margin-right:0.01968em;">l</span><span class="mord mathit mtight">a</span><span class="mord mathit mtight">s</span><span class="mord mathit mtight">s</span></span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span>​</span></span></span></span><span class="mord mathit mtight" style="margin-right:0.02778em;">r</span><span class="mord mathit mtight">a</span><span class="mord mathit mtight">n</span><span class="mord mtight"><span class="mord mathit mtight" style="margin-right:0.03148em;">k</span><span class="msupsub"><span class="vlist"><span style="top:0.143em;margin-right:0.07142857142857144em;margin-left:-0.03148em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-scriptstyle scriptscriptstyle cramped mtight"><span class="mord mathit mtight">i</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span>​</span></span></span></span><span class="mbin mtight">−</span><span class="mord reset-scriptstyle scriptstyle uncramped mtight"><span class="mopen sizing reset-size5 size5 reset-scriptstyle textstyle uncramped nulldelimiter"></span><span class="mfrac"><span class="vlist"><span style="top:0.344em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-scriptstyle scriptscriptstyle cramped mtight"><span class="mord scriptscriptstyle
|
||
5 years ago
|
<p>其中 M 为真实类别为 Positive 的样本数量, N 为真实类别为 Negtive 的样本数量。 ranki 代表了真实类别为 Positive 的样本点额预测概率从小到大排序后,该预测概率排在第几。</p>
|
||
|
<p>举个例子,现有预测概率与真实类别的表格如下所示(其中 0 表示 Negtive , 1 表示 Positive ):</p>
|
||
|
<table>
|
||
|
<thead>
|
||
|
<tr>
|
||
|
<th>编号</th>
|
||
|
<th>预测概率</th>
|
||
|
<th>真实类别</th>
|
||
|
</tr>
|
||
|
</thead>
|
||
|
<tbody>
|
||
|
<tr>
|
||
|
<td>1</td>
|
||
|
<td>0.1</td>
|
||
|
<td>0</td>
|
||
|
</tr>
|
||
|
<tr>
|
||
|
<td>2</td>
|
||
|
<td>0.4</td>
|
||
|
<td>0</td>
|
||
|
</tr>
|
||
|
<tr>
|
||
|
<td>3</td>
|
||
|
<td>0.3</td>
|
||
|
<td>1</td>
|
||
|
</tr>
|
||
|
<tr>
|
||
|
<td>4</td>
|
||
|
<td>0.8</td>
|
||
|
<td>1</td>
|
||
|
</tr>
|
||
|
</tbody>
|
||
|
</table>
|
||
|
<p>想要得到公式中的 rank ,就需要将预测概率从小到大排序,排序后如下:</p>
|
||
|
<table>
|
||
|
<thead>
|
||
|
<tr>
|
||
|
<th>编号</th>
|
||
|
<th>预测概率</th>
|
||
|
<th>真实类别</th>
|
||
|
</tr>
|
||
|
</thead>
|
||
|
<tbody>
|
||
|
<tr>
|
||
|
<td>1</td>
|
||
|
<td>0.1</td>
|
||
|
<td>0</td>
|
||
|
</tr>
|
||
|
<tr>
|
||
|
<td>3</td>
|
||
|
<td>0.3</td>
|
||
|
<td>1</td>
|
||
|
</tr>
|
||
|
<tr>
|
||
|
<td>2</td>
|
||
|
<td>0.4</td>
|
||
|
<td>0</td>
|
||
|
</tr>
|
||
|
<tr>
|
||
|
<td>4</td>
|
||
|
<td>0.8</td>
|
||
|
<td>1</td>
|
||
|
</tr>
|
||
|
</tbody>
|
||
|
</table>
|
||
|
<p>排序后的表格中,真实类别为 Positive 只有编号为 3 和编号为 4 的数据,并且编号为 3 的数据排在第 2 ,编号为 4 的数据排在第 4 。所以 rank=[2, 4]。又因表格中真是类别为 Positive 的数据有 2 条,Negtive 的数据有 2 条。因此 M 为2,N 为2。所以根据 AUC 的计算公式可知:</p>
|
||
5 years ago
|
<p><span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>A</mi><mi>U</mi><mi>C</mi><mo>=</mo><mfrac><mrow><mo>(</mo><mn>2</mn><mo>+</mo><mn>4</mn><mo>)</mo><mo>−</mo><mfrac><mrow><mn>2</mn><mo>(</mo><mn>2</mn><mo>+</mo><mn>1</mn><mo>)</mo></mrow><mrow><mn>2</mn></mrow></mfrac></mrow><mrow><mn>2</mn><mo>∗</mo><mn>2</mn></mrow></mfrac><mo>=</mo><mn>0</mn><mi mathvariant="normal">.</mi><mn>7</mn><mn>5</mn></mrow><annotation encoding="application/x-tex">
|
||
5 years ago
|
AUC=\frac{(2+4)-\frac{2(2+1)}{2}}{2*2}=0.75
|
||
5 years ago
|
</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:1.2858em;"></span><span class="strut bottom" style="height:1.6308em;vertical-align:-0.345em;"></span><span class="base textstyle uncramped"><span class="mord mathit">A</span><span class="mord mathit" style="margin-right:0.10903em;">U</span><span class="mord mathit" style="margin-right:0.07153em;">C</span><span class="mrel">=</span><span class="mord reset-textstyle textstyle uncramped"><span class="mopen sizing reset-size5 size5 reset-textstyle textstyle uncramped nulldelimiter"></span><span class="mfrac"><span class="vlist"><span style="top:0.345em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord scriptstyle cramped mtight"><span class="mord mathrm mtight">2</span><span class="mbin mtight">∗</span><span class="mord mathrm mtight">2</span></span></span></span><span style="top:-0.22999999999999998em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle textstyle uncramped frac-line"></span></span><span style="top:-0.5508em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle uncramped mtight"><span class="mord scriptstyle uncramped mtight"><span class="mopen mtight">(</span><span class="mord mathrm mtight">2</span><span class="mbin mtight">+</span><span class="mord mathrm mtight">4</span><span class="mclose mtight">)</span><span class="mbin mtight">−</span><span class="mord reset-scriptstyle scriptstyle uncramped mtight"><span class="mopen sizing reset-size5 size5 reset-scriptstyle textstyle uncramped nulldelimiter"></span><span class="mfrac"><span class="vlist"><span style="top:0.344em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-scriptstyle scriptscriptstyle cramped mtight"><span class="mord scriptscriptstyle cramped mtight"><span class="mord mathrm mtight">2</span></span></span></span><span style="top:-0.22142857142857142em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-scriptstyle textstyle uncramped frac-line"></span></span><span style="top:-0.5142857142857142em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-scriptstyle scriptscriptstyle uncramped mtight"><span class="mord scriptscriptstyle uncramped mtight"><span class="mord mathrm mtight">2</span><span class="mopen mtight">(</span><span class="mord mathrm mtight">2</span><span class="mbin mtight">+</span><span class="mord mathrm mtight">1</span><span class="mclose mtight">)</span></span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span>​</span></span></span><span class="mclose sizing reset-size5 size5 reset-scriptstyle textstyle uncramped nulldelimiter"></span></span></span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span>​</span></span></span><span class="mclose sizing reset-size5 size5 reset-textstyle textstyle uncramped nulldelimiter"></span></span><span class="mrel">=</span><span class="mord mathrm">0</span><span class="mord mathrm">.</span><span class="mord mathrm">7</span><span class="mord mathrm">5</span></span></span></span>。</p>
|
||
5 years ago
|
|
||
|
|
||
|
</section>
|
||
|
|
||
|
</div>
|
||
|
<div class="search-results">
|
||
|
<div class="has-results">
|
||
|
|
||
|
<h1 class="search-results-title"><span class='search-results-count'></span> results matching "<span class='search-query'></span>"</h1>
|
||
|
<ul class="search-results-list"></ul>
|
||
|
|
||
|
</div>
|
||
|
<div class="no-results">
|
||
|
|
||
|
<h1 class="search-results-title">No results matching "<span class='search-query'></span>"</h1>
|
||
|
|
||
|
</div>
|
||
|
</div>
|
||
|
</div>
|
||
|
|
||
|
</div>
|
||
|
</div>
|
||
|
|
||
|
</div>
|
||
|
|
||
|
|
||
|
|
||
|
<a href="metrics.html" class="navigation navigation-prev " aria-label="Previous page: 模型评估指标">
|
||
|
<i class="fa fa-angle-left"></i>
|
||
|
</a>
|
||
|
|
||
|
|
||
|
<a href="regression_metrics.html" class="navigation navigation-next " aria-label="Next page: 回归性能评估指标">
|
||
|
<i class="fa fa-angle-right"></i>
|
||
|
</a>
|
||
|
|
||
|
|
||
|
|
||
|
</div>
|
||
|
|
||
|
<script>
|
||
|
var gitbook = gitbook || [];
|
||
|
gitbook.push(function() {
|
||
5 years ago
|
gitbook.page.hasChanged({"page":{"title":"分类性能评估指标","level":"1.4.1","depth":2,"next":{"title":"回归性能评估指标","level":"1.4.2","depth":2,"path":"regression_metrics.md","ref":"regression_metrics.md","articles":[]},"previous":{"title":"模型评估指标","level":"1.4","depth":1,"path":"metrics.md","ref":"metrics.md","articles":[{"title":"分类性能评估指标","level":"1.4.1","depth":2,"path":"classification_metrics.md","ref":"classification_metrics.md","articles":[]},{"title":"回归性能评估指标","level":"1.4.2","depth":2,"path":"regression_metrics.md","ref":"regression_metrics.md","articles":[]},{"title":"聚类性能评估指标","level":"1.4.3","depth":2,"path":"cluster_metrics.md","ref":"cluster_metrics.md","articles":[]}]},"dir":"ltr"},"config":{"gitbook":"*","theme":"default","variables":{},"plugins":["katex"],"pluginsConfig":{"katex":{},"highlight":{},"search":{},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"fontsettings":{"theme":"white","family":"sans","size":2},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":false}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"}},"file":{"path":"classification_metrics.md","mtime":"2019-07-04T07:26:51.693Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2019-07-06T07:31:21.537Z"},"basePath":".","book":{"language":""}});
|
||
5 years ago
|
});
|
||
|
</script>
|
||
|
</div>
|
||
|
|
||
|
|
||
|
<script src="gitbook/gitbook.js"></script>
|
||
|
<script src="gitbook/theme.js"></script>
|
||
|
|
||
|
|
||
|
<script src="gitbook/gitbook-plugin-search/search-engine.js"></script>
|
||
|
|
||
|
|
||
|
|
||
|
<script src="gitbook/gitbook-plugin-search/search.js"></script>
|
||
|
|
||
|
|
||
|
|
||
|
<script src="gitbook/gitbook-plugin-lunr/lunr.min.js"></script>
|
||
|
|
||
|
|
||
|
|
||
|
<script src="gitbook/gitbook-plugin-lunr/search-lunr.js"></script>
|
||
|
|
||
|
|
||
|
|
||
|
<script src="gitbook/gitbook-plugin-sharing/buttons.js"></script>
|
||
|
|
||
|
|
||
|
|
||
|
<script src="gitbook/gitbook-plugin-fontsettings/fontsettings.js"></script>
|
||
|
|
||
|
|
||
|
|
||
|
</body>
|
||
|
</html>
|
||
|
|