0%

贝尔曼方程

\begin{aligned}
\text{价值函数:} v_{\pi}(s) &= \mathbb{E_{\pi}}(G_{t}|S_{t}=s) \\
\text{其中:} G_{t} &= R_{t+1} + \gamma R_{t+2} + \gamma^2 R_{t+3} + \dots \\
v_{\pi}(s) &= \mathbb{E_{\pi}}(R_{t+1} + \gamma R_{t+2} + \gamma^2 R_{t+3} + \dots|S_{t}=s) \\
&= \mathbb{E_{\pi}}(R_{t+1} + \gamma (R_{t+2} + \gamma R_{t+3} + \dots)|S_{t}=s) \\
&= \mathbb{E_{\pi}}(R_{t+1} + \gamma v_{\pi}(s+1)|S_{t}=s)
\end{aligned}