@inproceedings{acl07:yue,
  abstract = "Standard approaches to Chinese word segmentation treat the problem as a tagging task, assigning labels to the characters in the sequence indicating whether the character marks a word boundary. Discriminatively trained models based on local character features are used to make the tagging decisions, with Viterbi decoding &#64257;nding th highest scoring segmentation. In this pape we propose an alternative, word-based segmentor, which uses features based on complete words and word sequences. The generalized perceptron algorithm is used for discriminative training, and we use a beam search decoder. Closed tests on the &#64257;rst an second SIGHAN bakeoffs show that our system is competitive with the best in the literature, achieving the highest reported F-score for a number of corpora.",
  address = "Prague, Czech Republic",
  author = "Yue Zhang and Stephen Clark",
  booktitle = "Proceedings of ACL",
  month = "June",
  title = "Chinese Segmentation Using a Word-based Perceptron Algorithm",
  url = "http://acl.ldc.upenn.edu/P/P07/P07-1106.pdf",
  year = "2007",
}

