Hori et al. "Multimodal Attention for Fusion of Audio and Spatiotemporal Features for Video Description." IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops, 2018.
Markdown
[Hori et al. "Multimodal Attention for Fusion of Audio and Spatiotemporal Features for Video Description." IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops, 2018.](https://mlanthology.org/cvprw/2018/hori2018cvprw-multimodal/)
BibTeX
@inproceedings{hori2018cvprw-multimodal,
title = {{Multimodal Attention for Fusion of Audio and Spatiotemporal Features for Video Description}},
author = {Hori, Chiori and Hori, Takaaki and Wichern, Gordon and Wang, Jue and Lee, Teng-Yok and Cherian, Anoop and Marks, Tim K.},
booktitle = {IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops},
year = {2018},
pages = {2528-2531},
url = {https://mlanthology.org/cvprw/2018/hori2018cvprw-multimodal/}
}