@inproceedings{9c49c517ba4d4b3383a80ff0138758b4,
title = "Mandarin Relata: A Dataset of Word Relations and Their Semantic Types",
abstract = "For both the training and evaluation of semantic distributional models, language datasets are needed that are both elaborate in their word level descriptors and readily intuitive to human judgment. The current paper introduces a dataset for Mandarin Chinese constructed through the combination of word relation pairs from two distinct sources: corpus extraction, and human elicitation. Our results show that while more word relation pairs were gained through the corpus extraction process, human elicited semantic neighbors were almost twice as likely to show agreement with human raters. The current methods created 4091 word relation pairs that span hypernymy, hyponymy, synonymy, antonymy, and meronymy alongside semantic type information. To date, this is the largest collection of human-rated word relation pairs in Mandarin Chinese.",
keywords = "Dataset, DSM, Semantic types, Word relation",
author = "Hongchao Liu and Huang, {Chu Ren} and Hou, {Ren Kui}",
year = "2018",
month = jan,
day = "1",
doi = "10.1007/978-3-319-73573-3_30",
language = "English",
isbn = "9783319735726",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer-Verlag",
pages = "336--340",
editor = "Yunfang Wu and Qi Su and Jia-Fei Hong",
booktitle = "Chinese Lexical Semantics - 18th Workshop, CLSW 2017, Revised Selected Papers",
note = "18th Chinese Lexical Semantics Workshop, CLSW 2017 ; Conference date: 18-05-2017 Through 20-05-2017",
}