Zhu et al. "AVSal: Enhancing Video Saliency Prediction Through Audio-Visual Fusion and Temporal Aggregation." European Conference on Computer Vision Workshops, 2024. doi:10.1007/978-3-031-91856-8_8
Markdown
[Zhu et al. "AVSal: Enhancing Video Saliency Prediction Through Audio-Visual Fusion and Temporal Aggregation." European Conference on Computer Vision Workshops, 2024.](https://mlanthology.org/eccvw/2024/zhu2024eccvw-avsal/) doi:10.1007/978-3-031-91856-8_8
BibTeX
@inproceedings{zhu2024eccvw-avsal,
title = {{AVSal: Enhancing Video Saliency Prediction Through Audio-Visual Fusion and Temporal Aggregation}},
author = {Zhu, Yuxin and Sun, Yinan and Duan, Huiyu and Cao, Yuqin and Jia, Ziheng and Hu, Qiang and Min, Xiongkuo and Zhai, Guangtao},
booktitle = {European Conference on Computer Vision Workshops},
year = {2024},
pages = {127-143},
doi = {10.1007/978-3-031-91856-8_8},
url = {https://mlanthology.org/eccvw/2024/zhu2024eccvw-avsal/}
}