@inproceedings{summerville-etal-2020-tame,
title = "How to Tame Your Data: Data Augmentation for Dialog State Tracking",
author = "Summerville, Adam and
Hashemi, Jordan and
Ryan, James and
Ferguson, William",
editor = "Wen, Tsung-Hsien and
Celikyilmaz, Asli and
Yu, Zhou and
Papangelis, Alexandros and
Eric, Mihail and
Kumar, Anuj and
Casanueva, I{\~n}igo and
Shah, Rushin",
booktitle = "Proceedings of the 2nd Workshop on Natural Language Processing for Conversational AI",
month = jul,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.nlp4convai-1.4",
doi = "10.18653/v1/2020.nlp4convai-1.4",
pages = "32--37",
abstract = "Dialog State Tracking (DST) is a problem space in which the effective vocabulary is practically limitless. For example, the domain of possible movie titles or restaurant names is bound only by the limits of language. As such, DST systems often encounter out-of-vocabulary words at inference time that were never encountered during training. To combat this issue, we present a targeted data augmentation process, by which a practitioner observes the types of errors made on held-out evaluation data, and then modifies the training data with additional corpora to increase the vocabulary size at training time. Using this with a RoBERTa-based Transformer architecture, we achieve state-of-the-art results in comparison to systems that only mask trouble slots with special tokens. Additionally, we present a data-representation scheme for seamlessly retargeting DST architectures to new domains.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="summerville-etal-2020-tame">
<titleInfo>
<title>How to Tame Your Data: Data Augmentation for Dialog State Tracking</title>
</titleInfo>
<name type="personal">
<namePart type="given">Adam</namePart>
<namePart type="family">Summerville</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jordan</namePart>
<namePart type="family">Hashemi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Ryan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">William</namePart>
<namePart type="family">Ferguson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Workshop on Natural Language Processing for Conversational AI</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tsung-Hsien</namePart>
<namePart type="family">Wen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asli</namePart>
<namePart type="family">Celikyilmaz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhou</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexandros</namePart>
<namePart type="family">Papangelis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mihail</namePart>
<namePart type="family">Eric</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anuj</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iñigo</namePart>
<namePart type="family">Casanueva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rushin</namePart>
<namePart type="family">Shah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Dialog State Tracking (DST) is a problem space in which the effective vocabulary is practically limitless. For example, the domain of possible movie titles or restaurant names is bound only by the limits of language. As such, DST systems often encounter out-of-vocabulary words at inference time that were never encountered during training. To combat this issue, we present a targeted data augmentation process, by which a practitioner observes the types of errors made on held-out evaluation data, and then modifies the training data with additional corpora to increase the vocabulary size at training time. Using this with a RoBERTa-based Transformer architecture, we achieve state-of-the-art results in comparison to systems that only mask trouble slots with special tokens. Additionally, we present a data-representation scheme for seamlessly retargeting DST architectures to new domains.</abstract>
<identifier type="citekey">summerville-etal-2020-tame</identifier>
<identifier type="doi">10.18653/v1/2020.nlp4convai-1.4</identifier>
<location>
<url>https://aclanthology.org/2020.nlp4convai-1.4</url>
</location>
<part>
<date>2020-07</date>
<extent unit="page">
<start>32</start>
<end>37</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T How to Tame Your Data: Data Augmentation for Dialog State Tracking
%A Summerville, Adam
%A Hashemi, Jordan
%A Ryan, James
%A Ferguson, William
%Y Wen, Tsung-Hsien
%Y Celikyilmaz, Asli
%Y Yu, Zhou
%Y Papangelis, Alexandros
%Y Eric, Mihail
%Y Kumar, Anuj
%Y Casanueva, Iñigo
%Y Shah, Rushin
%S Proceedings of the 2nd Workshop on Natural Language Processing for Conversational AI
%D 2020
%8 July
%I Association for Computational Linguistics
%C Online
%F summerville-etal-2020-tame
%X Dialog State Tracking (DST) is a problem space in which the effective vocabulary is practically limitless. For example, the domain of possible movie titles or restaurant names is bound only by the limits of language. As such, DST systems often encounter out-of-vocabulary words at inference time that were never encountered during training. To combat this issue, we present a targeted data augmentation process, by which a practitioner observes the types of errors made on held-out evaluation data, and then modifies the training data with additional corpora to increase the vocabulary size at training time. Using this with a RoBERTa-based Transformer architecture, we achieve state-of-the-art results in comparison to systems that only mask trouble slots with special tokens. Additionally, we present a data-representation scheme for seamlessly retargeting DST architectures to new domains.
%R 10.18653/v1/2020.nlp4convai-1.4
%U https://aclanthology.org/2020.nlp4convai-1.4
%U https://doi.org/10.18653/v1/2020.nlp4convai-1.4
%P 32-37
Markdown (Informal)
[How to Tame Your Data: Data Augmentation for Dialog State Tracking](https://aclanthology.org/2020.nlp4convai-1.4) (Summerville et al., NLP4ConvAI 2020)
ACL