Li et al. "Semi-Clairvoyant Scheduling of Speculative Decoding Requests to Minimize LLM Inference Latency." International Joint Conference on Artificial Intelligence, 2025. doi:10.24963/IJCAI.2025/951
Markdown
[Li et al. "Semi-Clairvoyant Scheduling of Speculative Decoding Requests to Minimize LLM Inference Latency." International Joint Conference on Artificial Intelligence, 2025.](https://mlanthology.org/ijcai/2025/li2025ijcai-semi/) doi:10.24963/IJCAI.2025/951
BibTeX
@inproceedings{li2025ijcai-semi,
title = {{Semi-Clairvoyant Scheduling of Speculative Decoding Requests to Minimize LLM Inference Latency}},
author = {Li, Ruixiao and Chen, Fahao and Li, Peng},
booktitle = {International Joint Conference on Artificial Intelligence},
year = {2025},
pages = {8554-8562},
doi = {10.24963/IJCAI.2025/951},
url = {https://mlanthology.org/ijcai/2025/li2025ijcai-semi/}
}