@inproceedings{3c9cdab9234f4b26a8be834c5dd5208c,
title = "Contrastive Pre-training and Representation Distillation for Medical Visual Question Answering Based on Radiology Images",
abstract = "One of the primary challenges facing medical visual question answering (Med-VQA) is the lack of large-scale well-annotated datasets for training. To overcome this challenge, this paper proposes a two-stage pre-training framework by learning transferable feature representations of radiology images and distilling a lightweight visual feature extractor for Med-VQA. Specifically, we leverage large amounts of unlabeled radiology images to train three teacher models for the body regions of brain, chest, and abdomen respectively via contrastive learning. Then, we distill the teacher models to a lightweight student model that can be used as a universal visual feature extractor for any Med-VQA system. The lightweight feature extractor can be readily fine-tuned on the training radiology images of any Med-VQA dataset, saving the annotation effort while preventing overfitting to small-scale training data. The effectiveness and advantages of the pre-trained model are demonstrated by extensive experiments with state-of-the-art Med-VQA methods on existing benchmarks. The source code and the pre-training dataset can be downloaded from https://github.com/awenbocc/cprd.",
keywords = "Contrastive learning, Medical visual question answering, Model compression, Representation distillation",
author = "Bo Liu and Zhan, {Li Ming} and Wu, {Xiao Ming}",
note = "Funding Information: Acknowledgment. This research was supported by the grant of P0030935 (ZVPY) funded by PolyU (UGC). Publisher Copyright: {\textcopyright} 2021, Springer Nature Switzerland AG.; 24th International Conference on Medical Image Computing and Computer Assisted Intervention, MICCAI 2021 ; Conference date: 27-09-2021 Through 01-10-2021",
year = "2021",
month = sep,
doi = "10.1007/978-3-030-87196-3_20",
language = "English",
isbn = "9783030871956",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "210--220",
editor = "{de Bruijne}, Marleen and Cattin, {Philippe C.} and St{\'e}phane Cotin and Nicolas Padoy and Stefanie Speidel and Yefeng Zheng and Caroline Essert",
booktitle = "Medical Image Computing and Computer Assisted Intervention – MICCAI 2021 - 24th International Conference, Proceedings",
address = "Germany",
}