@inproceedings{25b7f53c114847799f15c1930baaf331,
title = "Disentangled speaker embedding for robust speaker verification",
abstract = "Entanglement of speaker features and redundant features may lead to poor performance when evaluating speaker verification systems on an unseen domain. To address this issue, we propose an InfoMax domain separation and adaptation network (InfoMax–DSAN) to disentangle the domain-specific features and domain-invariant speaker features based on domain adaptation techniques. A frame-based mutual information neural estimator is proposed to maximize the mutual information between frame-level features and input acoustic features, which can help retain more useful information. Furthermore, we propose adopting triplet loss based on the idea of self-supervised learning to overcome the label mismatch problem. Experimental results on VOiCES Challenge 2019 demonstrate that our proposed method can help learn more discriminative and robust speaker embeddings.",
keywords = "Speaker verification, domain adaptation, mutual information, self-supervised learning",
author = "Lu Yi and Mak, {Man Wai}",
note = "Funding Information: This work was supported by the RGC of Hong Kong SAR, Grant No. PolyU 152137/17E and National Natural Science Foundation of China (NSFC), Grant No. 61971371. Publisher Copyright: {\textcopyright} 2022 IEEE",
year = "2022",
month = may,
doi = "10.1109/ICASSP43922.2022.9747778",
language = "English",
series = "ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings",
publisher = "IEEE",
pages = "4633--4637",
booktitle = "2022 IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 2022 - Proceedings",
address = "United States",
}