Shen et al. "GroundVLP: Harnessing Zero-Shot Visual Grounding from Vision-Language Pre-Training and Open-Vocabulary Object Detection." AAAI Conference on Artificial Intelligence, 2024. doi:10.1609/AAAI.V38I5.28278
[Shen et al. "GroundVLP: Harnessing Zero-Shot Visual Grounding from Vision-Language Pre-Training and Open-Vocabulary Object Detection." AAAI Conference on Artificial Intelligence, 2024.](https://mlanthology.org/aaai/2024/shen2024aaai-groundvlp/) doi:10.1609/AAAI.V38I5.28278
@inproceedings{shen2024aaai-groundvlp,
title = {{GroundVLP: Harnessing Zero-Shot Visual Grounding from Vision-Language Pre-Training and Open-Vocabulary Object Detection}},
author = {Shen, Haozhan and Zhao, Tiancheng and Zhu, Mingwei and Yin, Jianwei},
booktitle = {AAAI Conference on Artificial Intelligence},
year = {2024},
pages = {4766-4775},
doi = {10.1609/AAAI.V38I5.28278},
url = {https://mlanthology.org/aaai/2024/shen2024aaai-groundvlp/}
}