Peng et al. "Efficient End-to-End Video Question Answering with Pyramidal Multimodal Transformer." AAAI Conference on Artificial Intelligence, 2023. doi:10.1609/AAAI.V37I2.25296
Markdown
[Peng et al. "Efficient End-to-End Video Question Answering with Pyramidal Multimodal Transformer." AAAI Conference on Artificial Intelligence, 2023.](https://mlanthology.org/aaai/2023/peng2023aaai-efficient/) doi:10.1609/AAAI.V37I2.25296
BibTeX
@inproceedings{peng2023aaai-efficient,
title = {{Efficient End-to-End Video Question Answering with Pyramidal Multimodal Transformer}},
author = {Peng, Min and Wang, Chongyang and Shi, Yu and Zhou, Xiang-Dong},
booktitle = {AAAI Conference on Artificial Intelligence},
year = {2023},
pages = {2038-2046},
doi = {10.1609/AAAI.V37I2.25296},
url = {https://mlanthology.org/aaai/2023/peng2023aaai-efficient/}
}