@inproceedings{7178b5a5c7ce465bb2f9d20c1ba31190,
title = "Discovering Correlations between Sparse Features in Distant Supervision for Relation Extraction",
abstract = "The recent art in relation extraction is distant supervision which generates training data by heuristically aligning a knowledge base with free texts and thus avoids human labelling. However, the concerned relation mentions often use the bag-of-words representation, which ignores inner correlations between features located in different dimensions and makes relation extraction less effective. To capture the complex characteristics of relation expression and tighten the correlated features, we attempt to discover and utilise informative correlations between features by the following four phases: 1) formulating semantic similarities between lexical features using the embedding method; 2) constructing generative relation for lexical features with different sizes of side windows; 3) computing correlation scores between syntactic features through a kernel-based method; and 4) conducting a distillation process for the obtained correlated feature pairs and integrating informative pairs with existing relation extraction models. The extensive experiments demonstrate that our method can effectively discover correlation information and improve the performance of state-of-the-art relation extraction methods.",
keywords = "Bag-of-words representation, Distant supervision, Feature correlation, Lexical features, Syntactic features",
author = "Jianfeng Qu and Dantong Ouyang and Yuxin Ye and Wen Hua and Xiaofang Zhou",
note = "Funding Information: This research is partially supported by Natural Science Foundation of China (Grant No. 61772356) and the Australian Research Council (Grants No. DP170101172) Publisher Copyright: {\textcopyright} 2019 Association for Computing Machinery.; 12th ACM International Conference on Web Search and Data Mining, WSDM 2019 ; Conference date: 11-02-2019 Through 15-02-2019",
year = "2019",
month = jan,
day = "30",
doi = "10.1145/3289600.3291004",
language = "English",
series = "WSDM 2019 - Proceedings of the 12th ACM International Conference on Web Search and Data Mining",
publisher = "Association for Computing Machinery, Inc",
pages = "726--734",
booktitle = "WSDM 2019 - Proceedings of the 12th ACM International Conference on Web Search and Data Mining",
}