Zhang et al. "DocKylin: A Large Multimodal Model for Visual Document Understanding with Efficient Visual Slimming." AAAI Conference on Artificial Intelligence, 2025. doi:10.1609/AAAI.V39I9.33076
Markdown
[Zhang et al. "DocKylin: A Large Multimodal Model for Visual Document Understanding with Efficient Visual Slimming." AAAI Conference on Artificial Intelligence, 2025.](https://mlanthology.org/aaai/2025/zhang2025aaai-dockylin/) doi:10.1609/AAAI.V39I9.33076
BibTeX
@inproceedings{zhang2025aaai-dockylin,
title = {{DocKylin: A Large Multimodal Model for Visual Document Understanding with Efficient Visual Slimming}},
author = {Zhang, Jiaxin and Yang, Wentao and Lai, Songxuan and Xie, Zecheng and Jin, Lianwen},
booktitle = {AAAI Conference on Artificial Intelligence},
year = {2025},
pages = {9923-9932},
doi = {10.1609/AAAI.V39I9.33076},
url = {https://mlanthology.org/aaai/2025/zhang2025aaai-dockylin/}
}