Doveh et al. "Towards Multimodal In-Context Learning for Vision and Language Models." European Conference on Computer Vision Workshops, 2024. doi:10.1007/978-3-031-93806-1_19
Markdown
[Doveh et al. "Towards Multimodal In-Context Learning for Vision and Language Models." European Conference on Computer Vision Workshops, 2024.](https://mlanthology.org/eccvw/2024/doveh2024eccvw-multimodal/) doi:10.1007/978-3-031-93806-1_19
BibTeX
@inproceedings{doveh2024eccvw-multimodal,
title = {{Towards Multimodal In-Context Learning for Vision and Language Models}},
author = {Doveh, Sivan and Perek, Shaked and Mirza, Muhammad Jehanzeb and Lin, Wei and Alfassy, Amit and Arbelle, Assaf and Ullman, Shimon and Karlinsky, Leonid},
booktitle = {European Conference on Computer Vision Workshops},
year = {2024},
pages = {250-267},
doi = {10.1007/978-3-031-93806-1_19},
url = {https://mlanthology.org/eccvw/2024/doveh2024eccvw-multimodal/}
}