Antos et al. "Learning Near-Optimal Policies with Bellman-Residual Minimization Based Fitted Policy Iteration and a Single Sample Path." Annual Conference on Computational Learning Theory, 2006. doi:10.1007/11776420_42
Markdown
[Antos et al. "Learning Near-Optimal Policies with Bellman-Residual Minimization Based Fitted Policy Iteration and a Single Sample Path." Annual Conference on Computational Learning Theory, 2006.](https://mlanthology.org/colt/2006/antos2006colt-learning/) doi:10.1007/11776420_42
BibTeX
@inproceedings{antos2006colt-learning,
title = {{Learning Near-Optimal Policies with Bellman-Residual Minimization Based Fitted Policy Iteration and a Single Sample Path}},
author = {Antos, András and Szepesvári, Csaba and Munos, Rémi},
booktitle = {Annual Conference on Computational Learning Theory},
year = {2006},
pages = {574-588},
doi = {10.1007/11776420_42},
url = {https://mlanthology.org/colt/2006/antos2006colt-learning/}
}