Antos et al. "Learning Near-Optimal Policies with Bellman-Residual Minimization Based Fitted Policy Iteration and a Single Sample Path." Machine Learning, 2008. doi:10.1007/S10994-007-5038-2
Markdown
[Antos et al. "Learning Near-Optimal Policies with Bellman-Residual Minimization Based Fitted Policy Iteration and a Single Sample Path." Machine Learning, 2008.](https://mlanthology.org/mlj/2008/antos2008mlj-learning/) doi:10.1007/S10994-007-5038-2
BibTeX
@article{antos2008mlj-learning,
title = {{Learning Near-Optimal Policies with Bellman-Residual Minimization Based Fitted Policy Iteration and a Single Sample Path}},
author = {Antos, András and Szepesvári, Csaba and Munos, Rémi},
journal = {Machine Learning},
year = {2008},
pages = {89-129},
doi = {10.1007/S10994-007-5038-2},
volume = {71},
url = {https://mlanthology.org/mlj/2008/antos2008mlj-learning/}
}