Ma et al. "An Online Policy Gradient Algorithm for Markov Decision Processes with Continuous States and Actions." European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases, 2014. doi:10.1007/978-3-662-44851-9_23
Markdown
[Ma et al. "An Online Policy Gradient Algorithm for Markov Decision Processes with Continuous States and Actions." European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases, 2014.](https://mlanthology.org/ecmlpkdd/2014/ma2014ecmlpkdd-online/) doi:10.1007/978-3-662-44851-9_23
BibTeX
@inproceedings{ma2014ecmlpkdd-online,
title = {{An Online Policy Gradient Algorithm for Markov Decision Processes with Continuous States and Actions}},
author = {Ma, Yao and Zhao, Tingting and Hatano, Kohei and Sugiyama, Masashi},
booktitle = {European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases},
year = {2014},
pages = {354-369},
doi = {10.1007/978-3-662-44851-9_23},
url = {https://mlanthology.org/ecmlpkdd/2014/ma2014ecmlpkdd-online/}
}