Singh et al. "Convergence Results for Single-Step On-Policy Reinforcement-Learning Algorithms." Machine Learning, 2000. doi:10.1023/A:1007678930559
Markdown
[Singh et al. "Convergence Results for Single-Step On-Policy Reinforcement-Learning Algorithms." Machine Learning, 2000.](https://mlanthology.org/mlj/2000/singh2000mlj-convergence/) doi:10.1023/A:1007678930559
BibTeX
@article{singh2000mlj-convergence,
title = {{Convergence Results for Single-Step On-Policy Reinforcement-Learning Algorithms}},
author = {Singh, Satinder and Jaakkola, Tommi S. and Littman, Michael L. and Szepesvári, Csaba},
journal = {Machine Learning},
year = {2000},
pages = {287-308},
doi = {10.1023/A:1007678930559},
volume = {38},
url = {https://mlanthology.org/mlj/2000/singh2000mlj-convergence/}
}