@article{103536, keywords = {Machine learning, Misdiagnosis, China}, author = {Guo Y and Yin L and Yang H and Yang X and Yu X and Zhang C and Zhou L and Zhao F and Lu S and He Q and Han L and Wang W and Liu Y and Li Y}, title = {Machine learning methods to predict leprosy misdiagnosis in Yunnan Province, People's Republic of China}, abstract = {

Background and objective

Leprosy is a chronic infectious disease caused by Mycobacterium leprae that is often misdiagnosed. This study aimed to identify factors associated with leprosy misdiagnosis and develop and compare machine learning (ML) models to predict the risk of misdiagnosis.

Methods

A retrospective analysis was conducted on clinical and epidemiological data in 486 diagnosed leprosy patients. The outcome was a binary variable to indicate whether a patient had experienced a prior misdiagnosis. Features analyzed included sociodemographic factors, clinical characteristics, and epidemiological exposures. LASSO regression analysis performed feature selection. Class imbalance was handled using synthetic minority oversampling technique. Nine ML models were trained and validated with a 80–20 data split. The best model performance was evaluated based on AUC-ROC, sensitivity, and specificity. Important features were interpreted using the SHapley Additive exPlanation (SHAP) technique.

Results

Among 486 leprosy patients, 159 (32.7%) experienced misdiagnoses. Nineteen features were selected for model development. The best-performing model was Neural Network, which demonstrated the most balanced performance (AUC: 0.79 and 0.68, sensitivity: 0.93 and 0.78, specificity: 0.68 and 0.57 in train and test, respectively). The SHAP analysis identified key predictors associated with the detection of leprosy misdiagnosis, including mode of detection, aspartate aminotransferase level, gender, and the presence of skin lesions. In addition, ethnicity, education, leprosy reaction, household contact with an active case, and source of infection also contributed to the detection of leprosy misdiagnosis.

Conclusion

Applying ML to clinical data can effectively identify leprosy patients at high risk of being misdiagnosed using clinical, social and epidemiology characteristics. A ML-based support tool could aid frontline healthcare providers to reduce overlooking leprosy diseases.

}, year = {2026}, journal = {Frontiers in Public Health}, volume = {14}, pages = {1 - 11}, month = {05/2026}, publisher = {Frontiers Media SA}, issn = {2296-2565}, url = {https://www.frontiersin.org/journals/public-health/articles/10.3389/fpubh.2026.1785606/pdf}, doi = {10.3389/fpubh.2026.1785606}, language = {ENG}, }