@inproceedings{ea33b60b4bb646318bed97603a37810b,
title = "Create a manual chinese word segmentation dataset using crowdsourcing method",
abstract = "The manual Chinese word segmentation dataset WordSegCHC 1.0 which was built by eight crowdsourcing tasks conducted on the Crowdflower platform contains the manual word segmentation data of 152 Chinese sentences whose length ranges from 20 to 46 characters without punctuations. All the sentences received 200 segmentation responses in their corresponding crowdsourcing tasks and the numbers of valid response of them range from 123 to 143 (each sentence was segmented by more than 120 subjects). We also proposed an evaluation method called manual segmentation error rate (MSER) to evaluate the dataset; the MSER of the dataset is proved to be very low which indicates reliable data quality. In this work, we applied the crowdsourcing method to Chinese word segmentation task and the results confirmed again that the crowdsourcing method is a promising tool for linguistic data collection; the framework of crowdsourcing linguistic data collection used in this work can be reused in similar tasks; the resultant dataset filled a gap in Chinese language resources to the best of our knowledge, and it has potential applications in the research of word intuition of Chinese speakers and Chinese language processing.",
author = "Shichang Wang and Huang, {Chu Ren} and Yao Yao and Angel Chan",
note = "Funding Information: The work described in this paper was supported by a grant from the Research Grants Council of the Hong Kong SAR, China (Project No. 544011). Publisher Copyright: {\textcopyright} 2015 Proceedings of the 8th SIGHAN Workshop on Chinese Language Processing, SIGHAN 2015 - co-located with 53rd Annual Meeting of the Association for Computational Linguistics and the 7th International Joint Conference on Natural Language Processing, ACL IJCNLP 2015. All rights reserved.; 8th SIGHAN Workshop on Chinese Language Processing, SIGHAN 2015 ; Conference date: 30-07-2015 Through 31-07-2015",
year = "2015",
language = "English",
series = "Proceedings of the 8th SIGHAN Workshop on Chinese Language Processing, SIGHAN 2015 - co-located with 53rd Annual Meeting of the Association for Computational Linguistics and the 7th International Joint Conference on Natural Language Processing, ACL IJCNLP 2015",
publisher = "Association for Computational Linguistics (ACL)",
pages = "7--14",
editor = "Liang-Chih Yu and Zhifang Sui and Yue Zhang and Vincent Ng",
booktitle = "Proceedings of the 8th SIGHAN Workshop on Chinese Language Processing, SIGHAN 2015 - co-located with 53rd Annual Meeting of the Association for Computational Linguistics and the 7th International Joint Conference on Natural Language Processing, ACL IJCNLP 2015",
}