Abstract
Estimating tumor-specific transcript proportions from mixed bulk samples has potential to inform novel biology. However, estimation accuracy using existing methods in sparse-count data such as microRNA-seq and spatial transcriptomics has yet to be established. We generated a mixed small RNA benchmark dataset to demonstrate analytical challenges. To resolve them, we developed DeMixNB, a semi-reference-based deconvolution model assuming a sum of negative binomial distributions. Applications to miRNA-seq from 856 patients with breast cancer and 3,755 spatial spots from lung cancer generated either clinical or mechanistic insights into tumor cell plasticity. This supports the important utility of DeMixNB to investigate cancer RNomes. ## Relevant links
Citation
@article {Montierth2025.11.21.689822,
author = {Montierth, Matthew D and Yan, Hao and Xie, Liyang and Nemeth, Kinga and Pan, Xiaoxi and Li, Ruonan and Ercan, Caner and Yang, Peng and Sinjab, Ansam and Zhou, Tieling and Peng, Fuduan and Singh, Manisha and Wang, Linghua and Kopetz, Scott and Kadara, Humam and Yuan, Yinyin and Calin, George A. and Wang, Wenyi},
title = {Deconvolution of Sparse-count RNA Sequencing Data for Tumor Cells Using Embedded Negative Binomial Distributions},
elocation-id = {2025.11.21.689822},
year = {2025},
doi = {10.1101/2025.11.21.689822},
publisher = {Cold Spring Harbor Laboratory},
URL = {https://www.biorxiv.org/content/early/2025/11/24/2025.11.21.689822},
eprint = {https://www.biorxiv.org/content/early/2025/11/24/2025.11.21.689822.full.pdf},
journal = {bioRxiv}
}