@inproceedings{7c50a1d067e34f8c9c1f50232885ab19,
title = "Differential Privacy for Text Analytics via Natural Text Sanitization",
abstract = "Texts convey sophisticated knowledge. However, texts also convey sensitive information. Despite the success of general-purpose language models and domain-specific mechanisms with differential privacy (DP), existing text sanitization mechanisms still provide low utility, as cursed by the high-dimensional text representation. The companion issue of utilizing sanitized texts for downstream analytics is also under-explored. This paper takes a direct approach to text sanitization. Our insight is to consider both sensitivity and similarity via our new local DP notion. The sanitized texts also contribute to our sanitization-aware pretraining and fine-tuning, enabling privacy-preserving natural language processing over the BERT language model with promising utility. Surprisingly, the high utility does not boost up the success rate of inference attacks.",
author = "Xiang Yue and Minxin Du and Tianhao Wang and Yaliang Li and Huan Sun and Chow, {Sherman S.M.}",
note = "Publisher Copyright: {\textcopyright} 2021 Association for Computational Linguistics; Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021 ; Conference date: 01-08-2021 Through 06-08-2021",
year = "2021",
month = aug,
language = "English",
series = "Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021",
publisher = "Association for Computational Linguistics (ACL)",
pages = "3853--3866",
editor = "Chengqing Zong and Fei Xia and Wenjie Li and Roberto Navigli",
booktitle = "Findings of the Association for Computational Linguistics",
address = "United States",
}