Song et al. "V-MPO: On-Policy Maximum a Posteriori Policy Optimization for Discrete and Continuous Control." International Conference on Learning Representations, 2020.
Markdown
[Song et al. "V-MPO: On-Policy Maximum a Posteriori Policy Optimization for Discrete and Continuous Control." International Conference on Learning Representations, 2020.](https://mlanthology.org/iclr/2020/song2020iclr-vmpo/)
BibTeX
@inproceedings{song2020iclr-vmpo,
title = {{V-MPO: On-Policy Maximum a Posteriori Policy Optimization for Discrete and Continuous Control}},
author = {Song, H. Francis and Abdolmaleki, Abbas and Springenberg, Jost Tobias and Clark, Aidan and Soyer, Hubert and Rae, Jack W. and Noury, Seb and Ahuja, Arun and Liu, Siqi and Tirumala, Dhruva and Heess, Nicolas and Belov, Dan and Riedmiller, Martin and Botvinick, Matthew M.},
booktitle = {International Conference on Learning Representations},
year = {2020},
url = {https://mlanthology.org/iclr/2020/song2020iclr-vmpo/}
}