You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

682 lines
98 KiB

5 years ago
<!DOCTYPE HTML>
<html lang="" >
<head>
<meta charset="UTF-8">
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
<title>Policy Gradient原理 · GitBook</title>
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<meta name="description" content="">
<meta name="generator" content="GitBook 3.2.3">
<link rel="stylesheet" href="../gitbook/style.css">
<link rel="stylesheet" href="../gitbook/gitbook-plugin-katex/katex.min.css">
<link rel="stylesheet" href="../gitbook/gitbook-plugin-highlight/website.css">
<link rel="stylesheet" href="../gitbook/gitbook-plugin-search/search.css">
<link rel="stylesheet" href="../gitbook/gitbook-plugin-fontsettings/website.css">
<meta name="HandheldFriendly" content="true"/>
<meta name="viewport" content="width=device-width, initial-scale=1, user-scalable=no">
<meta name="apple-mobile-web-app-capable" content="yes">
<meta name="apple-mobile-web-app-status-bar-style" content="black">
<link rel="apple-touch-icon-precomposed" sizes="152x152" href="../gitbook/images/apple-touch-icon-precomposed-152.png">
<link rel="shortcut icon" href="../gitbook/images/favicon.ico" type="image/x-icon">
<link rel="next" href="coding.html" />
<link rel="prev" href="what is reinforce learning.html" />
</head>
<body>
<div class="book">
<div class="book-summary">
<div id="book-search-input" role="search">
<input type="text" placeholder="Type to search" />
</div>
<nav role="navigation">
<ul class="summary">
<li class="chapter " data-level="1.1" data-path="../">
<a href="../">
简介
</a>
</li>
5 years ago
<li class="chapter " data-level="1.2" data-path="../machine_learning.html">
<a href="../machine_learning.html">
机器学习概述
</a>
</li>
<li class="chapter " data-level="1.3" data-path="../algorithm.html">
<a href="../algorithm.html">
常见机器学习算法
</a>
<ul class="articles">
<li class="chapter " data-level="1.3.1" data-path="../kNN.html">
<a href="../kNN.html">
近朱者赤近墨者黑-kNN
</a>
</li>
<li class="chapter " data-level="1.3.2" data-path="../linear_regression.html">
<a href="../linear_regression.html">
最简单的回归算法-线性回归
</a>
</li>
<li class="chapter " data-level="1.3.3" data-path="../logistic_regression.html">
<a href="../logistic_regression.html">
使用回归的思想进行分类-逻辑回归
</a>
</li>
<li class="chapter " data-level="1.3.4" data-path="../decision_tree.html">
<a href="../decision_tree.html">
最接近人类思维的算法-决策树
</a>
</li>
<li class="chapter " data-level="1.3.5" data-path="../random_forest.html">
<a href="../random_forest.html">
群众的力量是伟大的-随机森林
</a>
</li>
<li class="chapter " data-level="1.3.6" data-path="../kMeans.html">
<a href="../kMeans.html">
物以类聚人以群分-kMeans
</a>
</li>
<li class="chapter " data-level="1.3.7" data-path="../AGNES.html">
<a href="../AGNES.html">
以距离为尺-AGNES
</a>
</li>
</ul>
</li>
<li class="chapter " data-level="1.4" data-path="../metrics.html">
<a href="../metrics.html">
模型评估指标
</a>
<ul class="articles">
<li class="chapter " data-level="1.4.1" data-path="../classification_metrics.html">
<a href="../classification_metrics.html">
分类性能评估指标
</a>
</li>
<li class="chapter " data-level="1.4.2" data-path="../regression_metrics.html">
<a href="../regression_metrics.html">
回归性能评估指标
</a>
</li>
<li class="chapter " data-level="1.4.3" data-path="../cluster_metrics.html">
<a href="../cluster_metrics.html">
聚类性能评估指标
</a>
</li>
</ul>
</li>
<li class="chapter " data-level="1.5" data-path="../sklearn.html">
<a href="../sklearn.html">
使用sklearn进行机器学习
</a>
</li>
<li class="chapter " data-level="1.6" >
<span>
综合实战案例
</span>
<ul class="articles">
<li class="chapter " data-level="1.6.1" >
5 years ago
<span>
泰坦尼克生还预测
</span>
<ul class="articles">
5 years ago
<li class="chapter " data-level="1.6.1.1" data-path="../titanic/introduction.html">
5 years ago
<a href="../titanic/introduction.html">
简介
</a>
</li>
5 years ago
<li class="chapter " data-level="1.6.1.2" data-path="../titanic/EDA.html">
5 years ago
<a href="../titanic/EDA.html">
探索性数据分析(EDA)
</a>
</li>
5 years ago
<li class="chapter " data-level="1.6.1.3" data-path="../titanic/feature engerning.html">
5 years ago
<a href="../titanic/feature engerning.html">
特征工程
</a>
</li>
5 years ago
<li class="chapter " data-level="1.6.1.4" data-path="../titanic/fit and predict.html">
5 years ago
<a href="../titanic/fit and predict.html">
构建模型进行预测
</a>
</li>
5 years ago
<li class="chapter " data-level="1.6.1.5" data-path="../titanic/tuning.html">
5 years ago
<a href="../titanic/tuning.html">
调参
</a>
</li>
</ul>
</li>
5 years ago
<li class="chapter " data-level="1.6.2" >
5 years ago
<span>
使用强化学习玩乒乓球游戏
</span>
<ul class="articles">
5 years ago
<li class="chapter " data-level="1.6.2.1" data-path="what is reinforce learning.html">
5 years ago
<a href="what is reinforce learning.html">
什么是强化学习
</a>
</li>
5 years ago
<li class="chapter active" data-level="1.6.2.2" data-path="Policy Gradient.html">
5 years ago
<a href="Policy Gradient.html">
Policy Gradient原理
</a>
</li>
5 years ago
<li class="chapter " data-level="1.6.2.3" data-path="coding.html">
5 years ago
<a href="coding.html">
使用Policy Gradient玩乒乓球游戏
</a>
</li>
</ul>
</li>
5 years ago
</ul>
</li>
<li class="chapter " data-level="1.7" data-path="../recommand.html">
<a href="../recommand.html">
实训推荐
</a>
</li>
5 years ago
<li class="divider"></li>
<li>
<a href="https://www.gitbook.com" target="blank" class="gitbook-link">
Published with GitBook
</a>
</li>
</ul>
</nav>
</div>
<div class="book-body">
<div class="body-inner">
<div class="book-header" role="navigation">
<!-- Title -->
<h1>
<i class="fa fa-circle-o-notch fa-spin"></i>
<a href=".." >Policy Gradient原理</a>
</h1>
</div>
<div class="page-wrapper" tabindex="-1" role="main">
<div class="page-inner">
<div id="book-search-results">
<div class="search-noresults">
<section class="normal markdown-section">
<h1 id="policy-gradient">Policy Gradient</h1>
<h2 id="policy-gradient&#x7684;&#x6838;&#x5FC3;&#x601D;&#x60F3;">Policy Gradient&#x7684;&#x6838;&#x5FC3;&#x601D;&#x60F3;</h2>
<p>&#x5176;&#x5B9E; Policy Gradient &#x7684;&#x6838;&#x5FC3;&#x601D;&#x60F3;&#x975E;&#x5E38;&#x7B80;&#x5355;&#xFF0C;&#x5C31;&#x662F;&#x627E;&#x4E00;&#x4E2A;&#x51FD;&#x6570;<span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>&#x3C0;</mi></mrow><annotation encoding="application/x-tex">\pi</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.43056em;"></span><span class="strut bottom" style="height:0.43056em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathit" style="margin-right:0.03588em;">&#x3C0;</span></span></span></span>&#xFF0C;&#x8FD9;&#x4E2A;&#x51FD;&#x6570;<span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>&#x3C0;</mi></mrow><annotation encoding="application/x-tex">\pi</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.43056em;"></span><span class="strut bottom" style="height:0.43056em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathit" style="margin-right:0.03588em;">&#x3C0;</span></span></span></span>&#x80FD;&#x591F;&#x6839;&#x636E;&#x73B0;&#x5728;&#x73AF;&#x5883;&#x7684;&#x72B6;&#x6001;(state)&#x6765;&#x4EA7;&#x751F;&#x63A5;&#x4E0B;&#x6765;&#x8981;&#x91C7;&#x53D6;&#x7684;&#x884C;&#x52A8;&#x6216;&#x8005;&#x52A8;&#x4F5C;(action)&#x3002;&#x5373;<span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>&#x3C0;</mi><mo>(</mo><mi>s</mi><mi>t</mi><mi>a</mi><mi>t</mi><mi>e</mi><mo>)</mo><mo>&#x2192;</mo><mi>a</mi><mi>c</mi><mi>t</mi><mi>i</mi><mi>o</mi><mi>n</mi></mrow><annotation encoding="application/x-tex">\pi(state)\rightarrow action</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.75em;"></span><span class="strut bottom" style="height:1em;vertical-align:-0.25em;"></span><span class="base textstyle uncramped"><span class="mord mathit" style="margin-right:0.03588em;">&#x3C0;</span><span class="mopen">(</span><span class="mord mathit">s</span><span class="mord mathit">t</span><span class="mord mathit">a</span><span class="mord mathit">t</span><span class="mord mathit">e</span><span class="mclose">)</span><span class="mrel">&#x2192;</span><span class="mord mathit">a</span><span class="mord mathit">c</span><span class="mord mathit">t</span><span class="mord mathit">i</span><span class="mord mathit">o</span><span class="mord mathit">n</span></span></span></span>&#x3002;</p>
<p>&#x51FD;&#x6570;<span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>&#x3C0;</mi></mrow><annotation encoding="application/x-tex">\pi</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.43056em;"></span><span class="strut bottom" style="height:0.43056em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathit" style="margin-right:0.03588em;">&#x3C0;</span></span></span></span>&#x5176;&#x5B9E;&#x53EF;&#x4EE5;&#x770B;&#x6210;&#x662F;&#x4E00;&#x4E2A;&#x6A21;&#x578B;&#xFF0C;&#x90A3;&#x4E48;&#x60F3;&#x5728;&#x65E0;&#x6570;&#x6B21;&#x5C1D;&#x8BD5;&#x4E2D;&#x5BFB;&#x627E;&#x51FA;&#x80FD;&#x8BA9; Agent &#x5C3D;&#x91CF;&#x62FF;&#x9AD8;&#x5206;&#x7684;&#x6A21;&#x578B;&#x5E94;&#x8BE5;&#x600E;&#x6837;&#x6765;&#x627E;&#x5462;&#xFF1F;&#x6211;&#x76F8;&#x4FE1;&#x60A8;&#x5E94;&#x8BE5;&#x731C;&#x5230;&#x4E86;&#xFF01;&#x6CA1;&#x9519;&#xFF01;&#x5C31;&#x662F;&#x795E;&#x7ECF;&#x7F51;&#x7EDC;&#xFF01;</p>
<p>&#x6211;&#x4EEC;&#x53EF;&#x4EE5;&#x5C06;&#x6E38;&#x620F;&#x753B;&#x9762;&#x4F20;&#x7ED9;&#x795E;&#x7ECF;&#x7F51;&#x7EDC;&#x4F5C;&#x4E3A;&#x8F93;&#x5165;&#xFF0C;&#x7136;&#x540E;&#x795E;&#x7ECF;&#x7F51;&#x7EDC;&#x9884;&#x6D4B;&#x4E00;&#x4E0B;&#x5F53;&#x524D;&#x6E38;&#x620F;&#x753B;&#x9762;&#x4E0B;&#xFF0C;&#x4E0B;&#x4E00;&#x6B65;&#x52A8;&#x4F5C;&#x7684;&#x6982;&#x7387;&#x5206;&#x5E03;&#x3002;</p>
<p><img src="../img/4.jpg" alt=""></p>
<p>&#x7EC6;&#x5FC3;&#x7684;&#x60A8;&#x53EF;&#x80FD;&#x4F1A;&#x53D1;&#x73B0;&#xFF0C;&#x5982;&#x679C;&#x6BCF;&#x6B21;&#x53D6;&#x6982;&#x7387;&#x6700;&#x9AD8;&#x7684;&#x52A8;&#x4F5C;&#x4F5C;&#x4E3A;&#x4E0B;&#x4E00;&#x6B65;&#x7684;&#x52A8;&#x4F5C;&#xFF0C;&#x90A3;&#x4E0D;&#x5C31;&#x6210;&#x5206;&#x7C7B;&#x4E86;&#x4E48;&#x3002;&#x5176;&#x5B9E; Policy Gradient &#x7684;&#x5E76;&#x4E0D;&#x662F;&#x6BCF;&#x6B21;&#x90FD;&#x9009;&#x53D6;&#x6982;&#x7387;&#x6700;&#x9AD8;&#x7684;&#x52A8;&#x4F5C;&#xFF0C;&#x800C;&#x662F;&#x6839;&#x636E;&#x52A8;&#x4F5C;&#x7684;&#x6982;&#x7387;&#x5206;&#x5E03;&#x8FDB;&#x884C;&#x91C7;&#x6837;&#x3002;&#x4E5F;&#x5C31;&#x662F;&#x8BF4;&#x5C31;&#x7B97;&#x6211;&#x9884;&#x6D4B;&#x51FA;&#x6765;&#x7684;&#x5411;&#x4E0A;&#x632A;&#x7684;&#x6982;&#x7387;&#x4E3A; 80% &#xFF0C;&#x4E5F;&#x4E0D;&#x4E00;&#x5B9A;&#x4F1A;&#x5411;&#x4E0A;&#x632A;&#x3002;</p>
<p>&#x90A3;&#x4E48;&#x4E3A;&#x4EC0;&#x4E48;&#x91C7;&#x6837;&#x800C;&#x4E0D;&#x662F;&#x76F4;&#x63A5;&#x9009;&#x53D6;&#x6982;&#x7387;&#x6700;&#x5927;&#x7684;&#x5462;&#xFF1F;&#x56E0;&#x4E3A;&#x8FD9;&#x6837;&#x5F88;&#x6709;&#x7075;&#x6027;&#x3002;&#x53EF;&#x4EE5;&#x60F3;&#x8C61;&#x4E00;&#x4E0B;&#xFF0C;&#x6211;&#x4EEC;&#x548C;&#x522B;&#x4EBA;&#x4E0B;&#x68CB;&#x7684;&#x65F6;&#x5019;&#xFF0C;&#x5982;&#x679C;&#x4E00;&#x76F4;&#x6309;&#x7167;&#x5957;&#x8DEF;&#x6765;&#x4E0B;&#xFF0C;&#x90A3;&#x4E48;&#x5BF9;&#x624B;&#x5F88;&#x53EF;&#x80FD;&#x80FD;&#x591F;&#x731C;&#x5230;&#x6211;&#x4EEC;&#x4E0B;&#x4E00;&#x6B65;&#x68CB;&#x4F1A;&#x600E;&#x4E48;&#x8D70;&#xFF0C;&#x4ECE;&#x800C;&#x5360;&#x636E;&#x4E3B;&#x52A8;&#x3002;&#x5982;&#x679C;&#x6211;&#x4EEC;&#x65F6;&#x4E0D;&#x65F6;&#x5730;&#x4E0D;&#x6309;&#x5957;&#x8DEF;&#x51FA;&#x724C;&#xFF0C;&#x4F46;&#x662F;&#x8FD9;&#x79CD;&#x4E0D;&#x6309;&#x5957;&#x8DEF;&#x7684;&#x52A8;&#x4F5C;&#x4E0D;&#x4F1A;&#x964D;&#x4F4E;&#x592A;&#x591A;&#x5BF9;&#x4E8E;&#x6211;&#x4EEC;&#x80FD;&#x591F;&#x8D62;&#x4E0B;&#x8FD9;&#x4E00;&#x5C40;&#x68CB;&#x7684;&#x51E0;&#x7387;&#x3002;&#x90A3;&#x4E48;&#x5BF9;&#x624B;&#x5F88;&#x53EF;&#x80FD;&#x4F1A;&#x4E0D;&#x77E5;&#x6240;&#x63AA;&#xFF0C;&#x4E3B;&#x52A8;&#x6743;&#x5C31;&#x638C;&#x63E1;&#x5728;&#x6211;&#x4EEC;&#x624B;&#x91CC;&#x3002;&#x5C31;&#x50CF;&#x300A;&#x5929;&#x9F99;&#x516B;&#x90E8;&#x300B;&#x4E2D;&#x865A;&#x7AF9;&#x5927;&#x7834;&#x73CD;&#x73D1;&#x68CB;&#x5C40;&#x65F6;&#x4E00;&#x6837;&#xFF0C;&#x53EF;&#x80FD;&#x6709;&#x7075;&#x6027;&#x4E00;&#x70B9;&#xFF0C;&#x4F1A;&#x6709;&#x610F;&#x60F3;&#x4E0D;&#x5230;&#x7684;&#x6548;&#x679C;&#x3002;</p>
<p><img src="../img/5.jpg" alt=""></p>
<h2 id="policy-gradient-&#x7684;&#x539F;&#x7406;">Policy Gradient &#x7684;&#x539F;&#x7406;</h2>
<p>&#x73B0;&#x5728;&#x5DF2;&#x7ECF;&#x77E5;&#x9053; Policy Gradient &#x662F;&#x901A;&#x8FC7;&#x795E;&#x7ECF;&#x7F51;&#x7EDC;&#x6765;&#x8BAD;&#x7EC3;&#x6A21;&#x578B;&#xFF0C;&#x8BE5;&#x6A21;&#x578B;&#x9700;&#x8981;&#x6839;&#x636E;&#x73AF;&#x5883;&#x72B6;&#x6001;&#x6765;&#x9884;&#x6D4B;&#x51FA;&#x4E0B;&#x4E00;&#x6B65;&#x52A8;&#x4F5C;&#x7684;&#x6982;&#x7387;&#x5206;&#x5E03;&#xFF0C;&#x5E76;&#x6839;&#x636E;&#x8FD9;&#x4E2A;&#x6982;&#x7387;&#x5206;&#x5E03;&#x8FDB;&#x884C;&#x91C7;&#x6837;&#xFF0C;&#x5C06;&#x91C7;&#x6837;&#x5230;&#x7684;&#x52A8;&#x4F5C;&#x4F5C;&#x4E3A;&#x4E0B;&#x4E00;&#x6B65;&#x7684;&#x52A8;&#x4F5C;&#x3002;</p>
<p>&#x90A3;&#x4E48;&#x4F1A;&#x6709;&#x4E00;&#x4E2A;&#x7075;&#x9B42;&#x62F7;&#x95EE;&#xFF0C;&#x5C31;&#x662F;&#x600E;&#x6837;&#x6765;&#x9274;&#x5B9A;&#x6211;&#x7684;&#x795E;&#x7ECF;&#x7F51;&#x7EDC;&#x662F;&#x597D;&#x8FD8;&#x662F;&#x574F;&#x5462;&#xFF1F;&#x5F88;&#x663E;&#x7136;&#xFF0C;&#x5F53;&#x7136;&#x662F;&#x8D62;&#x7684;&#x8D8A;&#x591A;&#x8D8A;&#x597D;&#x4E86;&#xFF01;&#x6240;&#x4EE5;&#x6211;&#x4EEC;&#x4E0D;&#x59A8;&#x5047;&#x8BBE;&#xFF0C;&#x8BA9;&#x8BA1;&#x7B97;&#x673A;&#x73A9; 10 &#x628A;&#x4E52;&#x4E53;&#x7403;&#x6E38;&#x620F;&#xFF0C;&#x90A3;&#x4E48;&#x53EF;&#x80FD;&#x4F1A;&#x6709;&#x8FD9;&#x6837;&#x7684;&#x4E00;&#x4E2A;&#x7EDF;&#x8BA1;&#x7ED3;&#x679C;&#x3002;</p>
<p><img src="../img/6.jpg" alt=""></p>
<p>&#x90A3;&#x4E48;&#x600E;&#x6837;&#x8BC4;&#x4EF7;&#x8FD9; 10 &#x628A;&#x6E38;&#x620F;&#x6253;&#x7684;&#x597D;&#x8FD8;&#x662F;&#x4E0D;&#x597D;&#x5462;&#xFF1F;&#x4E5F;&#x5F88;&#x660E;&#x7EC6;&#xFF0C;&#x628A; 10 &#x628A;&#x6E38;&#x620F;&#x7684;&#x6240;&#x6709;&#x53CD;&#x9988;&#x5168;&#x90E8;&#x90FD;&#x52A0;&#x8D77;&#x6765;&#x5C31;&#x597D;&#x4E86;&#x3002;&#x5982;&#x679C;&#x628A;&#x8FD9;&#x4E9B;&#x53CD;&#x9988;&#x7684;&#x548C;&#x79F0;&#x4E3A;&#x603B;&#x53CD;&#x9988;(&#x603B;&#x5F97;&#x5206;)&#xFF0C;&#x90A3;&#x4E48;&#x5C31;&#x6709;<strong>&#x603B;&#x53CD;&#x9988;(&#x603B;&#x5F97;&#x5206;)=&#x7B2C;1&#x628A;&#x53CD;&#x9988;1+&#x7B2C;1&#x628A;&#x53CD;&#x9988;2+...+&#x7B2C;10&#x628A;&#x53CD;&#x9988;m</strong>&#x3002;&#x4E5F;&#x5C31;&#x662F;&#x8BF4;&#x603B;&#x53CD;&#x9988;&#x8D8A;&#x9AD8;&#x8D8A;&#x597D;&#x3002;</p>
<p>&#x8BF4;&#x5230;&#x8FD9;&#xFF0C;&#x6709;&#x4E00;&#x4E2A;&#x95EE;&#x9898;&#x9700;&#x8981;&#x5F04;&#x6E05;&#x695A;&#xFF1A;&#x5047;&#x8BBE;&#x603B;&#x5171;&#x73A9;&#x4E86; 100 &#x628A;&#xFF0C;&#x6BCF; 10 &#x628A;&#x8BA1;&#x7B97;&#x4E00;&#x6B21;&#x603B;&#x53CD;&#x9988;&#xFF0C;&#x90A3;&#x4E48;&#x8FD9; 10 &#x6B21;&#x7684;&#x603B;&#x53CD;&#x9988;&#x4F1A;&#x4E0D;&#x4F1A;&#x662F;&#x4E00;&#x6A21;&#x4E00;&#x6837;&#x7684;&#x5462;&#xFF1F;&#x5176;&#x5B9E;&#x4ED4;&#x7EC6;&#x60F3;&#x60F3;&#x4F1A;&#x53D1;&#x73B0;&#x4E0D;&#x4F1A;&#x4E00;&#x6478;&#x4E00;&#x6837;&#xFF0C;&#x56E0;&#x4E3A;&#xFF1A;</p>
<ul>
<li>&#x6E38;&#x620F;&#x7684;&#x72B6;&#x6001;&#x5B9E;&#x65F6;&#x5728;&#x53D8;&#xFF0C;&#x6240;&#x4EE5;&#x73AF;&#x5883;&#x72B6;&#x6001;&#x4E0D;&#x53EF;&#x80FD;&#x4E00;&#x76F4;&#x662F;&#x4E00;&#x6837;&#x7684;&#x3002;</li>
<li>&#x52A8;&#x4F5C;&#x662F;&#x4ECE;&#x4E00;&#x4E2A;&#x6982;&#x7387;&#x5206;&#x5E03;&#x4E2D;&#x91C7;&#x6837;&#x51FA;&#x6765;&#x7684;&#x3002;</li>
</ul>
<p>&#x65E2;&#x7136;&#x603B;&#x53CD;&#x9988;&#x4E00;&#x76F4;&#x4F1A;&#x53D8;&#xFF0C;&#x90A3;&#x4E48;&#x6211;&#x4EEC;&#x53EF;&#x4EE5;&#x5C1D;&#x8BD5;&#x6362;&#x4E00;&#x79CD;&#x601D;&#x8DEF;&#xFF0C;&#x5373;&#x8BA1;&#x7B97;&#x603B;&#x53CD;&#x9988;&#x7684;&#x671F;&#x671B;&#xFF0C;&#x5373;&#x603B;&#x53CD;&#x9988;&#x7684;&#x671F;&#x671B;&#x8D8A;&#x9AD8;&#x8D8A;&#x597D;&#x3002;&#x90A3;&#x8FD9;&#x4E2A;&#x671F;&#x671B;&#x600E;&#x4E48;&#x7B97;&#x5462;&#xFF1F;</p>
<p>&#x9996;&#x5148;&#x6211;&#x4EEC;&#x53EF;&#x4EE5;&#x5C06;&#x6BCF;&#x4E00;&#x628A;&#x6E38;&#x620F;&#x770B;&#x6210;&#x4E00;&#x4E2A;&#x6E38;&#x620F;&#x5E8F;&#x5217;(<strong>&#x72B6;&#x6001;1-&gt;&#x52A8;&#x4F5C;1-&gt;&#x53CD;&#x9988;1-&gt;&#x72B6;&#x6001;2-&gt;&#x52A8;&#x4F5C;2-&gt;&#x53CD;&#x9988;2 ... &#x72B6;&#x6001;N-&gt;&#x52A8;&#x4F5C;N-&gt;&#x53CD;&#x9988;N</strong>)&#x3002;&#x90A3;&#x4E48;&#x6BCF;&#x4E00;&#x4E2A;&#x6E38;&#x620F;&#x5E8F;&#x5217;(&#x5373;&#x6BCF;&#x4E00;&#x628A;&#x6E38;&#x620F;)&#x7684;<strong>&#x53CD;&#x9988;=&#x53CD;&#x9988;1+&#x53CD;&#x9988;2+...+&#x53CD;&#x9988;N</strong>&#x3002;&#x56E0;&#x6B64;&#xFF0C;&#x82E5;&#x5047;&#x8BBE;<span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>R</mi><mo>(</mo><mi>&#x3C4;</mi><mo>)</mo></mrow><annotation encoding="application/x-tex">R(\tau)</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.75em;"></span><span class="strut bottom" style="height:1em;vertical-align:-0.25em;"></span><span class="base textstyle uncramped"><span class="mord mathit" style="margin-right:0.00773em;">R</span><span class="mopen">(</span><span class="mord mathit" style="margin-right:0.1132em;">&#x3C4;</span><span class="mclose">)</span></span></span></span>&#x8868;&#x793A;&#x6E38;&#x620F;&#x5E8F;&#x5217;<span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>&#x3C4;</mi></mrow><annotation encoding="application/x-tex">\tau</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.43056em;"></span><span class="strut bottom" style="height:0.43056em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathit" style="margin-right:0.1132em;">&#x3C4;</span></span></span></span>&#x7684;&#x53CD;&#x9988;&#xFF0C;&#x5219;&#x6709;:<span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>R</mi><mo>(</mo><mi>&#x3C4;</mi><mo>)</mo><mo>=</mo><msubsup><mo>&#x2211;</mo><mrow><mi>n</mi><mo>=</mo><mn>1</mn></mrow><mi>N</mi></msubsup><msub><mi>&#x3C4;</mi><mi>n</mi></msub></mrow><annotation encoding="application/x-tex">R(\tau)=\sum_{n=1}^N\tau_n</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.8423309999999999em;"></span><span class="strut bottom" style="height:1.142341em;vertical-align:-0.30001em;"></span><span class="base textstyle uncramped"><span class="mord mathit" style="margin-right:0.00773em;">R</span><span class="mopen">(</span><span class="mord mathit" style="margin-right:0.1132em;">&#x3C4;</span><span class="mclose">)</span><span class="mrel">=</span><span class="mop"><span class="mop op-symbol small-op" style="top:-0.0000050000000000050004em;">&#x2211;</span><span class="msupsub"><span class="vlist"><span style="top:0.30001em;margin-left:0em;margin-right:0.05em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord scriptstyle cramped mtight"><span class="mord mathit mtight">n</span><span class="mrel mtight">=</span><span class="mord mathrm mtight">1</span></span></span></span><span style="top:-0.364em;margin-right:0.05em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle uncramped mtight"><span class="mord mathit mtight" style="margin-right:0.10903em;">N</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span>&#x200B;</span></span></span></span><span class="mord"><span class="mord mathit" style="margin-right:0.1132em;">&#x3C4;</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:-0.1132em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord mathit
<p>&#x5982;&#x679C;&#x6211;&#x4EEC;&#x628A;&#x6574;&#x4E2A;&#x4E52;&#x4E53;&#x7403;&#x6E38;&#x620F;&#x6240;&#x6709;&#x53EF;&#x80FD;&#x51FA;&#x73B0;&#x7684;&#x72B6;&#x6001;&#xFF0C;&#x52A8;&#x4F5C;&#xFF0C;&#x53CD;&#x9988;&#x7EC4;&#x5408;&#x8D77;&#x6765;&#x770B;&#x6210;&#x662F;&#x73A9;&#x4E86; N(N&#x5F88;&#x5927;&#x5F88;&#x5927;) &#x628A;&#x6E38;&#x620F;&#xFF0C;&#x5C31;&#x4F1A;&#x6709; N &#x4E2A;&#x6E38;&#x620F;&#x5E8F;&#x5217;(<strong>&#x6E38;&#x620F;&#x5E8F;&#x5217;1&#xFF0C;&#x6E38;&#x620F;&#x5E8F;&#x5217;2&#xFF0C;&#x6E38;&#x620F;&#x5E8F;&#x5217;3, ... , &#x6E38;&#x620F;&#x5E8F;&#x5217;N</strong>)&#x3002;&#x90A3;&#x4E48;&#x6211;&#x4EEC;&#x5728;&#x73A9;&#x6E38;&#x620F;&#x65F6;&#x6240;&#x5F97;&#x5230;&#x7684;&#x6E38;&#x620F;&#x5E8F;&#x5217;&#x5B9E;&#x9645;&#x4E0A;&#x5C31;&#x662F;&#x4ECE;&#x8FD9; N &#x4E2A;&#x6E38;&#x620F;&#x5E8F;&#x5217;&#x4E2D;&#x91C7;&#x6837;&#x5F97;&#x5230;&#x7684;&#x3002;</p>
<p>&#x6240;&#x4EE5;&#x6211;&#x4EEC;&#x6E38;&#x620F;&#x7684;&#x603B;&#x7684;&#x53CD;&#x9988;&#x671F;&#x671B;<span class="katex"><span class="katex-mathml"><math><semantics><mrow><mover accent="true"><mrow><msub><mi>R</mi><mi>&#x3B8;</mi></msub></mrow><mo stretchy="true">&#x203E;</mo></mover></mrow><annotation encoding="application/x-tex">\overline{R_\theta}</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.8833300000000001em;"></span><span class="strut bottom" style="height:1.03333em;vertical-align:-0.15em;"></span><span class="base textstyle uncramped"><span class="mord overline"><span class="vlist"><span style="top:0em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:1em;">&#x200B;</span></span><span class="mord textstyle cramped"><span class="mord"><span class="mord mathit" style="margin-right:0.00773em;">R</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:-0.00773em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord mathit mtight" style="margin-right:0.02778em;">&#x3B8;</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span>&#x200B;</span></span></span></span></span></span><span style="top:-0.80333em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:1em;">&#x200B;</span></span><span class="reset-textstyle textstyle uncramped overline-line"></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:1em;">&#x200B;</span></span>&#x200B;</span></span></span></span></span></span>&#x53EF;&#x8868;&#x793A;&#x4E3A;&#xFF1A;<span class="katex"><span class="katex-mathml"><math><semantics><mrow><mover accent="true"><mrow><msub><mi>R</mi><mi>&#x3B8;</mi></msub></mrow><mo stretchy="true">&#x203E;</mo></mover><mo>=</mo><msub><mo>&#x2211;</mo><mi>&#x3C4;</mi></msub><mi>R</mi><mo>(</mo><mi>&#x3C4;</mi><mo>)</mo><mi>P</mi><mo>(</mo><mi>&#x3C4;</mi><mi mathvariant="normal">&#x2223;</mi><mi>&#x3B8;</mi><mo>)</mo></mrow><annotation encoding="application/x-tex">\overline{R_\theta}=\sum_\tau R(\tau)P(\tau|\theta)</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.8833300000000001em;"></span><span class="strut bottom" style="height:1.18334em;vertical-align:-0.30001em;"></span><span class="base textstyle uncramped"><span class="mord overline"><span class="vlist"><span style="top:0em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:1em;">&#x200B;</span></span><span class="mord textstyle cramped"><span class="mord"><span class="mord mathit" style="margin-right:0.00773em;">R</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:-0.00773em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord mathit mtight" style="margin-right:0.02778em;">&#x3B8;</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span>&#x200B;</span></span></span></span></span></span><span style="top:-0.80333em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:1em;">&#x200B;</span></span><span class="reset-textstyle textstyle uncramped overline-line"></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:1em;">&#x200B;</span></span>&#x200B;</span></span></span><span class="mrel">=</span><span class="mop"><span class="mop op-symbol small-op" style="top:-0.0000050000000000050004em;">&#x2211;</span><span class="msupsub"><span class="vlist"><span style="top:0.30001em;margin-right:0.05em;margin-left:0em;"><span class="fo
<p><img src="../img/7.jpg" alt=""></p>
<p>&#x5047;&#x8BBE;&#x6211;&#x4EEC;&#x73A9;&#x4E86; 10 &#x628A;&#x6E38;&#x620F;&#xFF0C;&#x5C31;&#x76F8;&#x5F53;&#x4E8E;&#x5F97;&#x5230;&#x4E86; 10 &#x4E2A;&#x6E38;&#x620F;&#x5E8F;&#x5217;[<span class="katex"><span class="katex-mathml"><math><semantics><mrow><msub><mi>&#x3C4;</mi><mn>1</mn></msub><mo separator="true">,</mo><msub><mi>&#x3C4;</mi><mn>2</mn></msub><mo separator="true">,</mo><mi mathvariant="normal">.</mi><mi mathvariant="normal">.</mi><mi mathvariant="normal">.</mi><mo separator="true">,</mo><msub><mi>&#x3C4;</mi><mrow><mn>1</mn><mn>0</mn></mrow></msub></mrow><annotation encoding="application/x-tex">\tau_1, \tau_2, ..., \tau_{10}</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.43056em;"></span><span class="strut bottom" style="height:0.625em;vertical-align:-0.19444em;"></span><span class="base textstyle uncramped"><span class="mord"><span class="mord mathit" style="margin-right:0.1132em;">&#x3C4;</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:-0.1132em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord mathrm mtight">1</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span>&#x200B;</span></span></span></span><span class="mpunct">,</span><span class="mord"><span class="mord mathit" style="margin-right:0.1132em;">&#x3C4;</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:-0.1132em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord mathrm mtight">2</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span>&#x200B;</span></span></span></span><span class="mpunct">,</span><span class="mord mathrm">.</span><span class="mord mathrm">.</span><span class="mord mathrm">.</span><span class="mpunct">,</span><span class="mord"><span class="mord mathit" style="margin-right:0.1132em;">&#x3C4;</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:-0.1132em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord scriptstyle cramped mtight"><span class="mord mathrm mtight">1</span><span class="mord mathrm mtight">0</span></span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span>&#x200B;</span></span></span></span></span></span></span>]&#x3002;&#x8FD9; 10 &#x4E2A;&#x6E38;&#x620F;&#x5E8F;&#x5217;&#x5C31;&#x76F8;&#x5F53;&#x4E8E;&#x4ECE; P &#x4E2D;&#x91C7;&#x6837;&#x4E86; 10 &#x6B21;<span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>&#x3C4;</mi></mrow><annotation encoding="application/x-tex">\tau</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.43056em;"></span><span class="strut bottom" style="height:0.43056em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathit" style="margin-right:0.1132em;">&#x3C4;</span></span></span></span>&#x3002;&#x6240;&#x4EE5;&#x603B;&#x53CD;&#x9988;&#x671F;&#x671B;<span class="katex"><span class="katex-mathml"><math><semantics><mrow><mover accent="true"><mrow><msub><mi>R</mi><mi>&#x3B8;</mi></msub></mrow><mo stretchy="true">&#x203E;</mo></mover></mrow><annotation encoding="application/x-tex">\overline{R_\theta}</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.8833300000000001em;"></span><span class="strut bottom" style="height:1.03333em;vertical-ali
<p><center>
<span class="katex"><span class="katex-mathml"><math><semantics><mrow><mover accent="true"><mrow><msub><mi>R</mi><mi>&#x3B8;</mi></msub></mrow><mo stretchy="true">&#x203E;</mo></mover><mo>&#x2248;</mo><mfrac><mrow><mn>1</mn></mrow><mrow><mi>N</mi></mrow></mfrac><msubsup><mo>&#x2211;</mo><mrow><mi>n</mi><mo>=</mo><mn>1</mn></mrow><mi>N</mi></msubsup><mi>R</mi><mo>(</mo><msup><mi>&#x3C4;</mi><mi>n</mi></msup><mo>)</mo></mrow><annotation encoding="application/x-tex">
\overline{R_\theta} \approx \frac{1}{N}\sum_{n=1}^NR(\tau^n)
</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.8833300000000001em;"></span><span class="strut bottom" style="height:1.2283300000000001em;vertical-align:-0.345em;"></span><span class="base textstyle uncramped"><span class="mord overline"><span class="vlist"><span style="top:0em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:1em;">&#x200B;</span></span><span class="mord textstyle cramped"><span class="mord"><span class="mord mathit" style="margin-right:0.00773em;">R</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:-0.00773em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord mathit mtight" style="margin-right:0.02778em;">&#x3B8;</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span>&#x200B;</span></span></span></span></span></span><span style="top:-0.80333em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:1em;">&#x200B;</span></span><span class="reset-textstyle textstyle uncramped overline-line"></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:1em;">&#x200B;</span></span>&#x200B;</span></span></span><span class="mrel">&#x2248;</span><span class="mord reset-textstyle textstyle uncramped"><span class="mopen sizing reset-size5 size5 reset-textstyle textstyle uncramped nulldelimiter"></span><span class="mfrac"><span class="vlist"><span style="top:0.345em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord scriptstyle cramped mtight"><span class="mord mathit mtight" style="margin-right:0.10903em;">N</span></span></span></span><span style="top:-0.22999999999999998em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle textstyle uncramped frac-line"></span></span><span style="top:-0.394em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle uncramped mtight"><span class="mord scriptstyle uncramped mtight"><span class="mord mathrm mtight">1</span></span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span>&#x200B;</span></span></span><span class="mclose sizing reset-size5 size5 reset-textstyle textstyle uncramped nulldelimiter"></span></span><span class="mop"><span class="mop op-symbol small-op" style="top:-0.0000050000000000050004em;">&#x2211;</span><span class="msupsub"><span class="vlist"><span style="top:0.30001em;margin-left:0em;margin-right:0.05em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord scriptstyle cramped mtight"><span class="mord mathit mtight">n</span><span class="mrel mtight">=</span><span class="mord mathrm mtight">1</span></span></span></span><span style="top:-0.364em;margin-right:0.05em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle uncramped mtight"><span class="mord mathit mtight" style="margin-right:0.10903em;">N</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span>&#x200B;</span></span></span></span><span class="mord mathit" style="margin-right:0.00773em;">R</span><span class="mopen">(</span><span class="mord"><span class="mord mathit" style="margin-right:0.1132em;">&#x3C4;</span><span class="msupsub"><span class="vlist"><span style="top:-0.363em;margin-right:0.05e
</center>
<br></p>
<p>&#x7531;&#x4E8E;<span class="katex"><span class="katex-mathml"><math><semantics><mrow><mover accent="true"><mrow><msub><mi>R</mi><mi>&#x3B8;</mi></msub></mrow><mo stretchy="true">&#x203E;</mo></mover></mrow><annotation encoding="application/x-tex">\overline{R_\theta}</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.8833300000000001em;"></span><span class="strut bottom" style="height:1.03333em;vertical-align:-0.15em;"></span><span class="base textstyle uncramped"><span class="mord overline"><span class="vlist"><span style="top:0em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:1em;">&#x200B;</span></span><span class="mord textstyle cramped"><span class="mord"><span class="mord mathit" style="margin-right:0.00773em;">R</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:-0.00773em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord mathit mtight" style="margin-right:0.02778em;">&#x3B8;</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span>&#x200B;</span></span></span></span></span></span><span style="top:-0.80333em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:1em;">&#x200B;</span></span><span class="reset-textstyle textstyle uncramped overline-line"></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:1em;">&#x200B;</span></span>&#x200B;</span></span></span></span></span></span>&#x7684;&#x503C;&#x8D8A;&#x5927;&#x8D8A;&#x597D;&#xFF0C;&#x6240;&#x4EE5;&#x6211;&#x4EEC;&#x53EF;&#x4EE5;&#x4F7F;&#x7528;&#x68AF;&#x5EA6;&#x4E0A;&#x5347;&#x7684;&#x65B9;&#x5F0F;&#x6765;&#x66F4;&#x65B0;<span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>&#x3B8;</mi></mrow><annotation encoding="application/x-tex">\theta</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.69444em;"></span><span class="strut bottom" style="height:0.69444em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathit" style="margin-right:0.02778em;">&#x3B8;</span></span></span></span>&#x3002;&#x6240;&#x4EE5;&#x5C31;&#x6709;&#x5982;&#x4E0B;&#x6570;&#x5B66;&#x63A8;&#x5BFC;&#xFF1A;</p>
<p><div align="center"><img src="../img/8.jpg" alt=""></div></p>
<p>&#x53C8;&#x7531;&#x4E8E;&#xFF1A;</p>
<p><center>
<span class="katex"><span class="katex-mathml"><math><semantics><mrow><mover accent="true"><mrow><msub><mi>R</mi><mi>&#x3B8;</mi></msub></mrow><mo stretchy="true">&#x203E;</mo></mover><mo>=</mo><msub><mo>&#x2211;</mo><mi>&#x3C4;</mi></msub><mi>R</mi><mo>(</mo><mi>&#x3C4;</mi><mo>)</mo><mi>P</mi><mo>(</mo><mi>&#x3C4;</mi><mi mathvariant="normal">&#x2223;</mi><mi>&#x3B8;</mi><mo>)</mo><mo>&#x2248;</mo><mfrac><mrow><mn>1</mn></mrow><mrow><mi>N</mi></mrow></mfrac><msubsup><mo>&#x2211;</mo><mrow><mi>n</mi><mo>=</mo><mn>1</mn></mrow><mi>N</mi></msubsup><mi>R</mi><mo>(</mo><msup><mi>&#x3C4;</mi><mi>n</mi></msup><mo>)</mo></mrow><annotation encoding="application/x-tex">
\overline{R_\theta} = \sum_\tau R(\tau)P(\tau|\theta) \approx \frac{1}{N}\sum_{n=1}^NR(\tau^n)
</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.8833300000000001em;"></span><span class="strut bottom" style="height:1.2283300000000001em;vertical-align:-0.345em;"></span><span class="base textstyle uncramped"><span class="mord overline"><span class="vlist"><span style="top:0em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:1em;">&#x200B;</span></span><span class="mord textstyle cramped"><span class="mord"><span class="mord mathit" style="margin-right:0.00773em;">R</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:-0.00773em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord mathit mtight" style="margin-right:0.02778em;">&#x3B8;</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span>&#x200B;</span></span></span></span></span></span><span style="top:-0.80333em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:1em;">&#x200B;</span></span><span class="reset-textstyle textstyle uncramped overline-line"></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:1em;">&#x200B;</span></span>&#x200B;</span></span></span><span class="mrel">=</span><span class="mop"><span class="mop op-symbol small-op" style="top:-0.0000050000000000050004em;">&#x2211;</span><span class="msupsub"><span class="vlist"><span style="top:0.30001em;margin-right:0.05em;margin-left:0em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord mathit mtight" style="margin-right:0.1132em;">&#x3C4;</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span>&#x200B;</span></span></span></span><span class="mord mathit" style="margin-right:0.00773em;">R</span><span class="mopen">(</span><span class="mord mathit" style="margin-right:0.1132em;">&#x3C4;</span><span class="mclose">)</span><span class="mord mathit" style="margin-right:0.13889em;">P</span><span class="mopen">(</span><span class="mord mathit" style="margin-right:0.1132em;">&#x3C4;</span><span class="mord mathrm">&#x2223;</span><span class="mord mathit" style="margin-right:0.02778em;">&#x3B8;</span><span class="mclose">)</span><span class="mrel">&#x2248;</span><span class="mord reset-textstyle textstyle uncramped"><span class="mopen sizing reset-size5 size5 reset-textstyle textstyle uncramped nulldelimiter"></span><span class="mfrac"><span class="vlist"><span style="top:0.345em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord scriptstyle cramped mtight"><span class="mord mathit mtight" style="margin-right:0.10903em;">N</span></span></span></span><span style="top:-0.22999999999999998em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle textstyle uncramped frac-line"></span></span><span style="top:-0.394em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle uncramped mtight"><span class="mord scriptstyle uncramped mtight"><span class="mord mathrm mtight">1</span></span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span>&#x200B;</span></span></span><span class="mclose sizing reset-size5 size5 reset-textstyle textstyle uncramped nulldelimiter"></span></span><span class="mop"><span class="mop op-symbol small-op" style="top:-0.0000050000000000050004em;">&#x2211;</span><span class="msu
</center>
<br></p>
<p>&#x6240;&#x4EE5;&#x5C31;&#x6709;&#xFF1A;</p>
<p><center>
<span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi mathvariant="normal">&#x2207;</mi><mover accent="true"><mrow><msub><mi>R</mi><mi>&#x3B8;</mi></msub></mrow><mo stretchy="true">&#x203E;</mo></mover><mo>&#x2248;</mo><mfrac><mrow><mn>1</mn></mrow><mrow><mi>N</mi></mrow></mfrac><msubsup><mo>&#x2211;</mo><mrow><mi>n</mi><mo>=</mo><mn>1</mn></mrow><mi>N</mi></msubsup><mi>R</mi><mo>(</mo><msup><mi>&#x3C4;</mi><mi>n</mi></msup><mo>)</mo><mi mathvariant="normal">&#x2207;</mi><mi>l</mi><mi>o</mi><mi>g</mi><mi>P</mi><mo>(</mo><msup><mi>&#x3C4;</mi><mi>n</mi></msup><mi mathvariant="normal">&#x2223;</mi><mi>&#x3B8;</mi><mo>)</mo></mrow><annotation encoding="application/x-tex">
\nabla \overline{R_\theta} \approx \frac{1}{N}\sum_{n=1}^NR(\tau^n) \nabla logP(\tau^n|\theta)
</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.8833300000000001em;"></span><span class="strut bottom" style="height:1.2283300000000001em;vertical-align:-0.345em;"></span><span class="base textstyle uncramped"><span class="mord mathrm">&#x2207;</span><span class="mord overline"><span class="vlist"><span style="top:0em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:1em;">&#x200B;</span></span><span class="mord textstyle cramped"><span class="mord"><span class="mord mathit" style="margin-right:0.00773em;">R</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:-0.00773em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord mathit mtight" style="margin-right:0.02778em;">&#x3B8;</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span>&#x200B;</span></span></span></span></span></span><span style="top:-0.80333em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:1em;">&#x200B;</span></span><span class="reset-textstyle textstyle uncramped overline-line"></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:1em;">&#x200B;</span></span>&#x200B;</span></span></span><span class="mrel">&#x2248;</span><span class="mord reset-textstyle textstyle uncramped"><span class="mopen sizing reset-size5 size5 reset-textstyle textstyle uncramped nulldelimiter"></span><span class="mfrac"><span class="vlist"><span style="top:0.345em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord scriptstyle cramped mtight"><span class="mord mathit mtight" style="margin-right:0.10903em;">N</span></span></span></span><span style="top:-0.22999999999999998em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle textstyle uncramped frac-line"></span></span><span style="top:-0.394em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle uncramped mtight"><span class="mord scriptstyle uncramped mtight"><span class="mord mathrm mtight">1</span></span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span>&#x200B;</span></span></span><span class="mclose sizing reset-size5 size5 reset-textstyle textstyle uncramped nulldelimiter"></span></span><span class="mop"><span class="mop op-symbol small-op" style="top:-0.0000050000000000050004em;">&#x2211;</span><span class="msupsub"><span class="vlist"><span style="top:0.30001em;margin-left:0em;margin-right:0.05em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord scriptstyle cramped mtight"><span class="mord mathit mtight">n</span><span class="mrel mtight">=</span><span class="mord mathrm mtight">1</span></span></span></span><span style="top:-0.364em;margin-right:0.05em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle uncramped mtight"><span class="mord mathit mtight" style="margin-right:0.10903em;">N</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span>&#x200B;</span></span></span></span><span class="mord mathit" style="margin-right:0.00773em;">R</span><span class="mopen">(</span><span class="mord"><span class="mord mathit" style="margin-right:0.1132em;">&#x3C4;</span><span class="msupsub"><span class="vlist"><sp
</center>
<br></p>
<p>&#x60A8;&#x4F1A;&#x53D1;&#x73B0;<span class="katex"><span class="katex-mathml"><math><semantics><mrow><msubsup><mo>&#x2211;</mo><mrow><mi>n</mi><mo>=</mo><mn>1</mn></mrow><mi>N</mi></msubsup><mi>R</mi><mo>(</mo><msup><mi>&#x3C4;</mi><mi>n</mi></msup><mo>)</mo></mrow><annotation encoding="application/x-tex">\sum_{n=1}^NR(\tau^n)</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.8423309999999999em;"></span><span class="strut bottom" style="height:1.142341em;vertical-align:-0.30001em;"></span><span class="base textstyle uncramped"><span class="mop"><span class="mop op-symbol small-op" style="top:-0.0000050000000000050004em;">&#x2211;</span><span class="msupsub"><span class="vlist"><span style="top:0.30001em;margin-left:0em;margin-right:0.05em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord scriptstyle cramped mtight"><span class="mord mathit mtight">n</span><span class="mrel mtight">=</span><span class="mord mathrm mtight">1</span></span></span></span><span style="top:-0.364em;margin-right:0.05em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle uncramped mtight"><span class="mord mathit mtight" style="margin-right:0.10903em;">N</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span>&#x200B;</span></span></span></span><span class="mord mathit" style="margin-right:0.00773em;">R</span><span class="mopen">(</span><span class="mord"><span class="mord mathit" style="margin-right:0.1132em;">&#x3C4;</span><span class="msupsub"><span class="vlist"><span style="top:-0.363em;margin-right:0.05em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle uncramped mtight"><span class="mord mathit mtight">n</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span>&#x200B;</span></span></span></span><span class="mclose">)</span></span></span></span>&#x5F88;&#x597D;&#x7B97;&#xFF0C;&#x53EA;&#x8981;&#x628A;&#x53CD;&#x9988;&#x5168;&#x90E8;&#x52A0;&#x8D77;&#x6765;&#x5C31;&#x5B8C;&#x4E8B;&#x4E86;&#xFF0C;&#x96BE;&#x7B97;&#x7684;&#x662F;<span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi mathvariant="normal">&#x2207;</mi><mi>l</mi><mi>o</mi><mi>g</mi><mi>P</mi><mo>(</mo><msup><mi>&#x3C4;</mi><mi>n</mi></msup><mi mathvariant="normal">&#x2223;</mi><mi>&#x3B8;</mi><mo>)</mo></mrow><annotation encoding="application/x-tex">\nabla logP(\tau^n|\theta)</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.75em;"></span><span class="strut bottom" style="height:1em;vertical-align:-0.25em;"></span><span class="base textstyle uncramped"><span class="mord mathrm">&#x2207;</span><span class="mord mathit" style="margin-right:0.01968em;">l</span><span class="mord mathit">o</span><span class="mord mathit" style="margin-right:0.03588em;">g</span><span class="mord mathit" style="margin-right:0.13889em;">P</span><span class="mopen">(</span><span class="mord"><span class="mord mathit" style="margin-right:0.1132em;">&#x3C4;</span><span class="msupsub"><span class="vlist"><span style="top:-0.363em;margin-right:0.05em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle uncramped mtight"><span class="mord mathit mtight">n</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span>&#x200B;</span></span></span></span><span class="mord mathrm">&#x2223;</span><span class="mord mathit" style="margin-right:0.02778em;">&#x3B8;</span><span class="mclose">)</span></span
<p>&#x7531;&#x4E8E;&#x4E00;&#x4E2A;&#x6E38;&#x620F;&#x5E8F;&#x5217;<span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>&#x3C4;</mi></mrow><annotation encoding="application/x-tex">\tau</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.43056em;"></span><span class="strut bottom" style="height:0.43056em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathit" style="margin-right:0.1132em;">&#x3C4;</span></span></span></span>&#x662F;&#x7531;&#x591A;&#x4E2A;&#x72B6;&#x6001;&#xFF0C;&#x52A8;&#x4F5C;&#xFF0C;&#x53CD;&#x9988;&#x6784;&#x6210;&#x7684;&#xFF0C;&#x5373;&#xFF1A;</p>
<p><center>
<span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>&#x3C4;</mi><mo>=</mo><mo>{</mo><msub><mi>s</mi><mn>1</mn></msub><mo separator="true">,</mo><msub><mi>a</mi><mn>1</mn></msub><mo separator="true">,</mo><msub><mi>r</mi><mn>1</mn></msub><mo separator="true">,</mo><msub><mi>s</mi><mn>2</mn></msub><mo separator="true">,</mo><msub><mi>a</mi><mn>2</mn></msub><mo separator="true">,</mo><msub><mi>r</mi><mn>2</mn></msub><mo separator="true">,</mo><mi mathvariant="normal">.</mi><mi mathvariant="normal">.</mi><mi mathvariant="normal">.</mi><mo separator="true">,</mo><msub><mi>s</mi><mi>T</mi></msub><mo separator="true">,</mo><msub><mi>a</mi><mi>T</mi></msub><mo separator="true">,</mo><msub><mi>r</mi><mi>T</mi></msub><mo>}</mo></mrow><annotation encoding="application/x-tex">
\tau=\{s_1, a_1, r_1, s_2, a_2, r_2, ..., s_T, a_T, r_T\}
</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.75em;"></span><span class="strut bottom" style="height:1em;vertical-align:-0.25em;"></span><span class="base textstyle uncramped"><span class="mord mathit" style="margin-right:0.1132em;">&#x3C4;</span><span class="mrel">=</span><span class="mopen">{</span><span class="mord"><span class="mord mathit">s</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:0em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord mathrm mtight">1</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span>&#x200B;</span></span></span></span><span class="mpunct">,</span><span class="mord"><span class="mord mathit">a</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:0em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord mathrm mtight">1</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span>&#x200B;</span></span></span></span><span class="mpunct">,</span><span class="mord"><span class="mord mathit" style="margin-right:0.02778em;">r</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:-0.02778em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord mathrm mtight">1</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span>&#x200B;</span></span></span></span><span class="mpunct">,</span><span class="mord"><span class="mord mathit">s</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:0em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord mathrm mtight">2</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span>&#x200B;</span></span></span></span><span class="mpunct">,</span><span class="mord"><span class="mord mathit">a</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:0em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord mathrm mtight">2</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span>&#x200B;</span></span></span></span><span class="mpunct">,</span><span class="mord"><span class="mord mathit" style="margin-right:0.02778em;">r</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:-0.02778em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord mathrm mtight">2</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span>&#x200B;</span></span></span></span><span class="mpunct">,</span><span class="mord mathrm">.</span><span class="mord mathrm">.</span><span class="mord mathrm">.</span><span class="mpunct">,</span><span class="mord"><span class="mord mathit">s</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin
</center>
<br></p>
<p>&#x6240;&#x4EE5;&#xFF1A;</p>
<p><center>
<span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>P</mi><mo>(</mo><mi>&#x3C4;</mi><mi mathvariant="normal">&#x2223;</mi><mi>&#x3B8;</mi><mo>)</mo><mo>=</mo><mi>P</mi><mo>(</mo><msub><mi>s</mi><mn>1</mn></msub><mo>)</mo><mi>P</mi><mo>(</mo><msub><mi>a</mi><mn>1</mn></msub><mi mathvariant="normal">&#x2223;</mi><msub><mi>s</mi><mn>1</mn></msub><mo separator="true">,</mo><mi>&#x3B8;</mi><mo>)</mo><mi>P</mi><mo>(</mo><msub><mi>r</mi><mn>1</mn></msub><mo separator="true">,</mo><msub><mi>s</mi><mn>2</mn></msub><mi mathvariant="normal">&#x2223;</mi><msub><mi>s</mi><mn>1</mn></msub><mo separator="true">,</mo><msub><mi>a</mi><mn>1</mn></msub><mo>)</mo><mi>P</mi><mo>(</mo><msub><mi>a</mi><mn>2</mn></msub><mi mathvariant="normal">&#x2223;</mi><msub><mi>s</mi><mn>2</mn></msub><mo separator="true">,</mo><mi>&#x3B8;</mi><mo>)</mo><mi>P</mi><mo>(</mo><msub><mi>r</mi><mn>2</mn></msub><mo separator="true">,</mo><msub><mi>s</mi><mn>3</mn></msub><mi mathvariant="normal">&#x2223;</mi><msub><mi>s</mi><mn>2</mn></msub><mo separator="true">,</mo><msub><mi>a</mi><mn>2</mn></msub><mo>)</mo><mi mathvariant="normal">.</mi><mi mathvariant="normal">.</mi><mi mathvariant="normal">.</mi></mrow><annotation encoding="application/x-tex">
P(\tau|\theta)=P(s_1)P(a_1|s_1,\theta)P(r_1,s_2|s_1,a_1)P(a_2|s_2,\theta)P(r_2,s_3|s_2,a_2)...
</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.75em;"></span><span class="strut bottom" style="height:1em;vertical-align:-0.25em;"></span><span class="base textstyle uncramped"><span class="mord mathit" style="margin-right:0.13889em;">P</span><span class="mopen">(</span><span class="mord mathit" style="margin-right:0.1132em;">&#x3C4;</span><span class="mord mathrm">&#x2223;</span><span class="mord mathit" style="margin-right:0.02778em;">&#x3B8;</span><span class="mclose">)</span><span class="mrel">=</span><span class="mord mathit" style="margin-right:0.13889em;">P</span><span class="mopen">(</span><span class="mord"><span class="mord mathit">s</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:0em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord mathrm mtight">1</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span>&#x200B;</span></span></span></span><span class="mclose">)</span><span class="mord mathit" style="margin-right:0.13889em;">P</span><span class="mopen">(</span><span class="mord"><span class="mord mathit">a</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:0em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord mathrm mtight">1</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span>&#x200B;</span></span></span></span><span class="mord mathrm">&#x2223;</span><span class="mord"><span class="mord mathit">s</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:0em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord mathrm mtight">1</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span>&#x200B;</span></span></span></span><span class="mpunct">,</span><span class="mord mathit" style="margin-right:0.02778em;">&#x3B8;</span><span class="mclose">)</span><span class="mord mathit" style="margin-right:0.13889em;">P</span><span class="mopen">(</span><span class="mord"><span class="mord mathit" style="margin-right:0.02778em;">r</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:-0.02778em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord mathrm mtight">1</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span>&#x200B;</span></span></span></span><span class="mpunct">,</span><span class="mord"><span class="mord mathit">s</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:0em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord mathrm mtight">2</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span>&#x200B;</span></span></span></span><span class="mord mathrm">&#x2223;</span><span class="mord"><span class="mord mathit">s</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:0em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><spa
</center>
<br></p>
<p>&#x7A0D;&#x5FAE;&#x6574;&#x7406;&#x4E00;&#x4E0B;&#x53EF;&#x77E5;&#xFF1A;</p>
<p><center>
<span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>P</mi><mo>(</mo><mi>&#x3C4;</mi><mi mathvariant="normal">&#x2223;</mi><mi>&#x3B8;</mi><mo>)</mo><mo>=</mo><mi>P</mi><mo>(</mo><msub><mi>s</mi><mn>1</mn></msub><mo>)</mo><msubsup><mo>&#x220F;</mo><mrow><mi>t</mi><mo>=</mo><mn>1</mn></mrow><mi>T</mi></msubsup><mi>P</mi><mo>(</mo><msub><mi>a</mi><mi>t</mi></msub><mi mathvariant="normal">&#x2223;</mi><msub><mi>s</mi><mi>t</mi></msub><mo separator="true">,</mo><mi>&#x3B8;</mi><mo>)</mo><mi>P</mi><mo>(</mo><msub><mi>&#x3C4;</mi><mi>t</mi></msub><mo separator="true">,</mo><msub><mi>s</mi><mrow><mi>t</mi><mo>+</mo><mn>1</mn></mrow></msub><mi mathvariant="normal">&#x2223;</mi><msub><mi>s</mi><mi>t</mi></msub><mo separator="true">,</mo><msub><mi>a</mi><mi>t</mi></msub><mo>)</mo></mrow><annotation encoding="application/x-tex">
P(\tau|\theta)=P(s_1)\prod_{t=1}^TP(a_t|s_t,\theta)P(\tau_t,s_{t+1}|s_t,a_t)
</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.8423309999999999em;"></span><span class="strut bottom" style="height:1.142341em;vertical-align:-0.30001em;"></span><span class="base textstyle uncramped"><span class="mord mathit" style="margin-right:0.13889em;">P</span><span class="mopen">(</span><span class="mord mathit" style="margin-right:0.1132em;">&#x3C4;</span><span class="mord mathrm">&#x2223;</span><span class="mord mathit" style="margin-right:0.02778em;">&#x3B8;</span><span class="mclose">)</span><span class="mrel">=</span><span class="mord mathit" style="margin-right:0.13889em;">P</span><span class="mopen">(</span><span class="mord"><span class="mord mathit">s</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:0em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord mathrm mtight">1</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span>&#x200B;</span></span></span></span><span class="mclose">)</span><span class="mop"><span class="mop op-symbol small-op" style="top:-0.0000050000000000050004em;">&#x220F;</span><span class="msupsub"><span class="vlist"><span style="top:0.30001em;margin-left:0em;margin-right:0.05em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord scriptstyle cramped mtight"><span class="mord mathit mtight">t</span><span class="mrel mtight">=</span><span class="mord mathrm mtight">1</span></span></span></span><span style="top:-0.364em;margin-right:0.05em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle uncramped mtight"><span class="mord mathit mtight" style="margin-right:0.13889em;">T</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span>&#x200B;</span></span></span></span><span class="mord mathit" style="margin-right:0.13889em;">P</span><span class="mopen">(</span><span class="mord"><span class="mord mathit">a</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:0em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord mathit mtight">t</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span>&#x200B;</span></span></span></span><span class="mord mathrm">&#x2223;</span><span class="mord"><span class="mord mathit">s</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:0em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord mathit mtight">t</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span>&#x200B;</span></span></span></span><span class="mpunct">,</span><span class="mord mathit" style="margin-right:0.02778em;">&#x3B8;</span><span class="mclose">)</span><span class="mord mathit" style="margin-right:0.13889em;">P</span><span class="mopen">(</span><span class="mord"><span class="mord mathit" style="margin-right:0.1132em;">&#x3C4;</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:-0.1132em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord mathit mtight">t</sp
</center>
<br></p>
<p>&#x7136;&#x540E;&#x4E24;&#x8FB9;&#x53D6;<span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>l</mi><mi>o</mi><mi>g</mi></mrow><annotation encoding="application/x-tex">log</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.69444em;"></span><span class="strut bottom" style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="base textstyle uncramped"><span class="mord mathit" style="margin-right:0.01968em;">l</span><span class="mord mathit">o</span><span class="mord mathit" style="margin-right:0.03588em;">g</span></span></span></span>&#x4F1A;&#x5F97;&#x5230;&#xFF1A;</p>
<p><center>
<span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>l</mi><mi>o</mi><mi>g</mi><mi>P</mi><mo>(</mo><mi>&#x3C4;</mi><mi mathvariant="normal">&#x2223;</mi><mi>&#x3B8;</mi><mo>)</mo><mo>=</mo><msubsup><mo>&#x2211;</mo><mrow><mi>t</mi><mo>=</mo><mn>1</mn></mrow><mi>T</mi></msubsup><mi mathvariant="normal">&#x2207;</mi><mi>l</mi><mi>o</mi><mi>g</mi><mi>P</mi><mo>(</mo><msub><mi>a</mi><mi>t</mi></msub><mi mathvariant="normal">&#x2223;</mi><msub><mi>s</mi><mi>t</mi></msub><mo separator="true">,</mo><mi>&#x3B8;</mi><mo>)</mo></mrow><annotation encoding="application/x-tex">
logP(\tau|\theta)=\sum_{t=1}^T\nabla logP(a_t|s_t,\theta)
</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.8423309999999999em;"></span><span class="strut bottom" style="height:1.142341em;vertical-align:-0.30001em;"></span><span class="base textstyle uncramped"><span class="mord mathit" style="margin-right:0.01968em;">l</span><span class="mord mathit">o</span><span class="mord mathit" style="margin-right:0.03588em;">g</span><span class="mord mathit" style="margin-right:0.13889em;">P</span><span class="mopen">(</span><span class="mord mathit" style="margin-right:0.1132em;">&#x3C4;</span><span class="mord mathrm">&#x2223;</span><span class="mord mathit" style="margin-right:0.02778em;">&#x3B8;</span><span class="mclose">)</span><span class="mrel">=</span><span class="mop"><span class="mop op-symbol small-op" style="top:-0.0000050000000000050004em;">&#x2211;</span><span class="msupsub"><span class="vlist"><span style="top:0.30001em;margin-left:0em;margin-right:0.05em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord scriptstyle cramped mtight"><span class="mord mathit mtight">t</span><span class="mrel mtight">=</span><span class="mord mathrm mtight">1</span></span></span></span><span style="top:-0.364em;margin-right:0.05em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle uncramped mtight"><span class="mord mathit mtight" style="margin-right:0.13889em;">T</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span>&#x200B;</span></span></span></span><span class="mord mathrm">&#x2207;</span><span class="mord mathit" style="margin-right:0.01968em;">l</span><span class="mord mathit">o</span><span class="mord mathit" style="margin-right:0.03588em;">g</span><span class="mord mathit" style="margin-right:0.13889em;">P</span><span class="mopen">(</span><span class="mord"><span class="mord mathit">a</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:0em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord mathit mtight">t</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span>&#x200B;</span></span></span></span><span class="mord mathrm">&#x2223;</span><span class="mord"><span class="mord mathit">s</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:0em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord mathit mtight">t</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span>&#x200B;</span></span></span></span><span class="mpunct">,</span><span class="mord mathit" style="margin-right:0.02778em;">&#x3B8;</span><span class="mclose">)</span></span></span></span>
</center>
<br></p>
<p><span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>P</mi><mo>(</mo><msub><mi>a</mi><mi>t</mi></msub><mi mathvariant="normal">&#x2223;</mi><msub><mi>s</mi><mi>t</mi></msub><mo separator="true">,</mo><mi>&#x3B8;</mi><mo>)</mo></mrow><annotation encoding="application/x-tex">P(a_t|s_t,\theta)</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.75em;"></span><span class="strut bottom" style="height:1em;vertical-align:-0.25em;"></span><span class="base textstyle uncramped"><span class="mord mathit" style="margin-right:0.13889em;">P</span><span class="mopen">(</span><span class="mord"><span class="mord mathit">a</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:0em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord mathit mtight">t</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span>&#x200B;</span></span></span></span><span class="mord mathrm">&#x2223;</span><span class="mord"><span class="mord mathit">s</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:0em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord mathit mtight">t</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x200B;</span></span>&#x200B;</span></span></span></span><span class="mpunct">,</span><span class="mord mathit" style="margin-right:0.02778em;">&#x3B8;</span><span class="mclose">)</span></span></span></span>&#x5176;&#x5B9E;&#x5C31;&#x662F;&#x6211;&#x4EEC;&#x795E;&#x7ECF;&#x7F51;&#x7EDC;&#x6839;&#x636E;&#x73AF;&#x5883;&#x72B6;&#x6001;&#x9884;&#x6D4B;&#x51FA;&#x6765;&#x7684;&#x4E0B;&#x4E00;&#x6B65;&#x7684;&#x52A8;&#x4F5C;&#x6982;&#x7387;&#x5206;&#x5E03;&#x3002;</p>
<p><img src="../img/9.jpg" alt=""></p>
<p>OK&#xFF0C;&#x5230;&#x8FD9;&#x91CC;&#xFF0C;Policy Gradient &#x7684;&#x6570;&#x5B66;&#x63A8;&#x5BFC;&#x5168;&#x90E8;&#x63A8;&#x5BFC;&#x5B8C;&#x6BD5;&#x4E86;&#x3002;&#x6211;&#x4EEC;&#x4E0D;&#x59A8;&#x7528;&#x4E00;&#x5F20;&#x56FE;&#x6765;&#x603B;&#x7ED3;&#x4E00;&#x4E0B; Policy Gradient &#x7684;&#x7B97;&#x6CD5;&#x6D41;&#x7A0B;&#x3002;&#x6D41;&#x7A0B;&#x5982;&#x4E0B;&#xFF1A;</p>
<p><img src="../img/10.jpg" alt=""></p>
</section>
</div>
<div class="search-results">
<div class="has-results">
<h1 class="search-results-title"><span class='search-results-count'></span> results matching "<span class='search-query'></span>"</h1>
<ul class="search-results-list"></ul>
</div>
<div class="no-results">
<h1 class="search-results-title">No results matching "<span class='search-query'></span>"</h1>
</div>
</div>
</div>
</div>
</div>
</div>
<a href="what is reinforce learning.html" class="navigation navigation-prev " aria-label="Previous page: 什么是强化学习">
<i class="fa fa-angle-left"></i>
</a>
<a href="coding.html" class="navigation navigation-next " aria-label="Next page: 使用Policy Gradient玩乒乓球游戏">
<i class="fa fa-angle-right"></i>
</a>
</div>
<script>
var gitbook = gitbook || [];
gitbook.push(function() {
5 years ago
gitbook.page.hasChanged({"page":{"title":"Policy Gradient原理","level":"1.6.2.2","depth":3,"next":{"title":"使用Policy Gradient玩乒乓球游戏","level":"1.6.2.3","depth":3,"path":"pingpong/coding.md","ref":"./pingpong/coding.md","articles":[]},"previous":{"title":"什么是强化学习","level":"1.6.2.1","depth":3,"path":"pingpong/what is reinforce learning.md","ref":"./pingpong/what is reinforce learning.md","articles":[]},"dir":"ltr"},"config":{"gitbook":"*","theme":"default","variables":{},"plugins":["katex"],"pluginsConfig":{"katex":{},"highlight":{},"search":{},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"fontsettings":{"theme":"white","family":"sans","size":2},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":false}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"}},"file":{"path":"pingpong/Policy Gradient.md","mtime":"2019-07-05T02:04:09.124Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2019-07-06T07:31:21.537Z"},"basePath":"..","book":{"language":""}});
5 years ago
});
</script>
</div>
<script src="../gitbook/gitbook.js"></script>
<script src="../gitbook/theme.js"></script>
<script src="../gitbook/gitbook-plugin-search/search-engine.js"></script>
<script src="../gitbook/gitbook-plugin-search/search.js"></script>
<script src="../gitbook/gitbook-plugin-lunr/lunr.min.js"></script>
<script src="../gitbook/gitbook-plugin-lunr/search-lunr.js"></script>
<script src="../gitbook/gitbook-plugin-sharing/buttons.js"></script>
<script src="../gitbook/gitbook-plugin-fontsettings/fontsettings.js"></script>
</body>
</html>