Ilaslan et al. "VG-TVP: Multimodal Procedural Planning via Visually Grounded Text-Video Prompting." AAAI Conference on Artificial Intelligence, 2025. doi:10.1609/AAAI.V39I4.32406
Markdown
[Ilaslan et al. "VG-TVP: Multimodal Procedural Planning via Visually Grounded Text-Video Prompting." AAAI Conference on Artificial Intelligence, 2025.](https://mlanthology.org/aaai/2025/ilaslan2025aaai-vg/) doi:10.1609/AAAI.V39I4.32406
BibTeX
@inproceedings{ilaslan2025aaai-vg,
title = {{VG-TVP: Multimodal Procedural Planning via Visually Grounded Text-Video Prompting}},
author = {Ilaslan, Muhammet Furkan and Köksal, Ali and Lin, Kevin Qinghong and Satar, Burak and Shou, Mike Zheng and Xu, Qianli},
booktitle = {AAAI Conference on Artificial Intelligence},
year = {2025},
pages = {3886-3894},
doi = {10.1609/AAAI.V39I4.32406},
url = {https://mlanthology.org/aaai/2025/ilaslan2025aaai-vg/}
}