Knox et al. "Learning Optimal Advantage from Preferences and Mistaking It for Reward." AAAI Conference on Artificial Intelligence, 2024. doi:10.1609/AAAI.V38I9.28870
Markdown
[Knox et al. "Learning Optimal Advantage from Preferences and Mistaking It for Reward." AAAI Conference on Artificial Intelligence, 2024.](https://mlanthology.org/aaai/2024/knox2024aaai-learning/) doi:10.1609/AAAI.V38I9.28870
BibTeX
@inproceedings{knox2024aaai-learning,
title = {{Learning Optimal Advantage from Preferences and Mistaking It for Reward}},
author = {Knox, W. Bradley and Hatgis-Kessell, Stephane and Adalgeirsson, Sigurdur O. and Booth, Serena and Dragan, Anca D. and Stone, Peter and Niekum, Scott},
booktitle = {AAAI Conference on Artificial Intelligence},
year = {2024},
pages = {10066-10073},
doi = {10.1609/AAAI.V38I9.28870},
url = {https://mlanthology.org/aaai/2024/knox2024aaai-learning/}
}