Feng et al. "LEGEND: Leveraging Representation Engineering to Annotate Safety Margin for Preference Datasets." AAAI Conference on Artificial Intelligence, 2025. doi:10.1609/AAAI.V39I26.34937
Markdown
[Feng et al. "LEGEND: Leveraging Representation Engineering to Annotate Safety Margin for Preference Datasets." AAAI Conference on Artificial Intelligence, 2025.](https://mlanthology.org/aaai/2025/feng2025aaai-legend/) doi:10.1609/AAAI.V39I26.34937
BibTeX
@inproceedings{feng2025aaai-legend,
title = {{LEGEND: Leveraging Representation Engineering to Annotate Safety Margin for Preference Datasets}},
author = {Feng, Duanyu and Qin, Bowen and Huang, Chen and Huang, Youcheng and Zhang, Zheng and Lei, Wenqiang},
booktitle = {AAAI Conference on Artificial Intelligence},
year = {2025},
pages = {27277-27285},
doi = {10.1609/AAAI.V39I26.34937},
url = {https://mlanthology.org/aaai/2025/feng2025aaai-legend/}
}