Bai et al. "Regret Analysis of Policy Gradient Algorithm for Infinite Horizon Average Reward Markov Decision Processes." AAAI Conference on Artificial Intelligence, 2024. doi:10.1609/AAAI.V38I10.28973
Markdown
[Bai et al. "Regret Analysis of Policy Gradient Algorithm for Infinite Horizon Average Reward Markov Decision Processes." AAAI Conference on Artificial Intelligence, 2024.](https://mlanthology.org/aaai/2024/bai2024aaai-regret/) doi:10.1609/AAAI.V38I10.28973
BibTeX
@inproceedings{bai2024aaai-regret,
title = {{Regret Analysis of Policy Gradient Algorithm for Infinite Horizon Average Reward Markov Decision Processes}},
author = {Bai, Qinbo and Mondal, Washim Uddin and Aggarwal, Vaneet},
booktitle = {AAAI Conference on Artificial Intelligence},
year = {2024},
pages = {10980-10988},
doi = {10.1609/AAAI.V38I10.28973},
url = {https://mlanthology.org/aaai/2024/bai2024aaai-regret/}
}