Zhong et al. "STOA-VLP: Spatial-Temporal Modeling of Object and Action for Video-Language Pre-Training." AAAI Conference on Artificial Intelligence, 2023. doi:10.1609/AAAI.V37I3.25483
Markdown
[Zhong et al. "STOA-VLP: Spatial-Temporal Modeling of Object and Action for Video-Language Pre-Training." AAAI Conference on Artificial Intelligence, 2023.](https://mlanthology.org/aaai/2023/zhong2023aaai-stoa/) doi:10.1609/AAAI.V37I3.25483
BibTeX
@inproceedings{zhong2023aaai-stoa,
title = {{STOA-VLP: Spatial-Temporal Modeling of Object and Action for Video-Language Pre-Training}},
author = {Zhong, Weihong and Zheng, Mao and Tang, Duyu and Luo, Xuan and Gong, Heng and Feng, Xiaocheng and Qin, Bing},
booktitle = {AAAI Conference on Artificial Intelligence},
year = {2023},
pages = {3715-3723},
doi = {10.1609/AAAI.V37I3.25483},
url = {https://mlanthology.org/aaai/2023/zhong2023aaai-stoa/}
}