@inproceedings{d8d7578e4cf7446eb772c92a296d1fb1,
title = "MAP estimation of subspace transform for speaker recognition",
abstract = "We propose using the maximum-a-posteriori (MAP) estimation of subspace transform for speaker recognition. The linear transform is defined on the mean vectors of the Gaussian mixture model (GMM), where transform matrices and bias vectors are associated with separate regression classes so that both can be estimated with sufficient statistics given limited training data. The transform matrices are further defined as a linear combination of a set of basis transforms so that the weights are parameters to be estimated. We characterize the speakers with the transform parameters and model them using support vector machine (SVM). Experiments on the 2008 NIST SRE task illustrate the effectiveness of the method.",
keywords = "Maximum a posteriori, Speaker recognition, Subspace transform",
author = "Donglai Zhu and Bin Ma and Lee, {Kong Aik} and Leung, {Cheung Chi} and Haizhou Li",
year = "2010",
month = sep,
language = "English",
series = "Proceedings of the 11th Annual Conference of the International Speech Communication Association, INTERSPEECH 2010",
publisher = "International Speech Communication Association",
pages = "1465--1468",
booktitle = "Proceedings of the 11th Annual Conference of the International Speech Communication Association, INTERSPEECH 2010",
}