Zhu et al. "Locate Then Generate: Bridging Vision and Language with Bounding Box for Scene-Text VQA." AAAI Conference on Artificial Intelligence, 2023. doi:10.1609/AAAI.V37I9.26357
Markdown
[Zhu et al. "Locate Then Generate: Bridging Vision and Language with Bounding Box for Scene-Text VQA." AAAI Conference on Artificial Intelligence, 2023.](https://mlanthology.org/aaai/2023/zhu2023aaai-locate/) doi:10.1609/AAAI.V37I9.26357
BibTeX
@inproceedings{zhu2023aaai-locate,
title = {{Locate Then Generate: Bridging Vision and Language with Bounding Box for Scene-Text VQA}},
author = {Zhu, Yongxin and Liu, Zhen and Liang, Yukang and Li, Xin and Liu, Hao and Bao, Changcun and Xu, Linli},
booktitle = {AAAI Conference on Artificial Intelligence},
year = {2023},
pages = {11479-11487},
doi = {10.1609/AAAI.V37I9.26357},
url = {https://mlanthology.org/aaai/2023/zhu2023aaai-locate/}
}