@inproceedings{adouane-bernardy-2020-multi,
title = "When is Multi-task Learning Beneficial for Low-Resource Noisy Code-switched User-generated {A}lgerian Texts?",
author = "Adouane, Wafia and
Bernardy, Jean-Philippe",
editor = "Solorio, Thamar and
Choudhury, Monojit and
Bali, Kalika and
Sitaram, Sunayana and
Das, Amitava and
Diab, Mona",
booktitle = "Proceedings of the 4th Workshop on Computational Approaches to Code Switching",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.calcs-1.3",
pages = "17--25",
abstract = "We investigate when is it beneficial to simultaneously learn representations for several tasks, in low-resource settings. For this, we work with noisy user-generated texts in Algerian, a low-resource non-standardised Arabic variety. That is, to mitigate the problem of the data scarcity, we experiment with jointly learning progressively 4 tasks, namely code-switch detection, named entity recognition, spell normalisation and correction, and identifying users{'} sentiments. The selection of these tasks is motivated by the lack of labelled data for automatic morpho-syntactic or semantic sequence-tagging tasks for Algerian, in contrast to the case of much multi-task learning for NLP. Our empirical results show that multi-task learning is beneficial for some tasks in particular settings, and that the effect of each task on another, the order of the tasks, and the size of the training data of the task with more data do matter. Moreover, the data augmentation that we performed with no external resources has been shown to be beneficial for certain tasks.",
language = "English",
ISBN = "979-10-95546-66-5",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="adouane-bernardy-2020-multi">
<titleInfo>
<title>When is Multi-task Learning Beneficial for Low-Resource Noisy Code-switched User-generated Algerian Texts?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wafia</namePart>
<namePart type="family">Adouane</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jean-Philippe</namePart>
<namePart type="family">Bernardy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th Workshop on Computational Approaches to Code Switching</title>
</titleInfo>
<name type="personal">
<namePart type="given">Thamar</namePart>
<namePart type="family">Solorio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Monojit</namePart>
<namePart type="family">Choudhury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sunayana</namePart>
<namePart type="family">Sitaram</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amitava</namePart>
<namePart type="family">Das</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mona</namePart>
<namePart type="family">Diab</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-66-5</identifier>
</relatedItem>
<abstract>We investigate when is it beneficial to simultaneously learn representations for several tasks, in low-resource settings. For this, we work with noisy user-generated texts in Algerian, a low-resource non-standardised Arabic variety. That is, to mitigate the problem of the data scarcity, we experiment with jointly learning progressively 4 tasks, namely code-switch detection, named entity recognition, spell normalisation and correction, and identifying users’ sentiments. The selection of these tasks is motivated by the lack of labelled data for automatic morpho-syntactic or semantic sequence-tagging tasks for Algerian, in contrast to the case of much multi-task learning for NLP. Our empirical results show that multi-task learning is beneficial for some tasks in particular settings, and that the effect of each task on another, the order of the tasks, and the size of the training data of the task with more data do matter. Moreover, the data augmentation that we performed with no external resources has been shown to be beneficial for certain tasks.</abstract>
<identifier type="citekey">adouane-bernardy-2020-multi</identifier>
<location>
<url>https://aclanthology.org/2020.calcs-1.3</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>17</start>
<end>25</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T When is Multi-task Learning Beneficial for Low-Resource Noisy Code-switched User-generated Algerian Texts?
%A Adouane, Wafia
%A Bernardy, Jean-Philippe
%Y Solorio, Thamar
%Y Choudhury, Monojit
%Y Bali, Kalika
%Y Sitaram, Sunayana
%Y Das, Amitava
%Y Diab, Mona
%S Proceedings of the 4th Workshop on Computational Approaches to Code Switching
%D 2020
%8 May
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-66-5
%G English
%F adouane-bernardy-2020-multi
%X We investigate when is it beneficial to simultaneously learn representations for several tasks, in low-resource settings. For this, we work with noisy user-generated texts in Algerian, a low-resource non-standardised Arabic variety. That is, to mitigate the problem of the data scarcity, we experiment with jointly learning progressively 4 tasks, namely code-switch detection, named entity recognition, spell normalisation and correction, and identifying users’ sentiments. The selection of these tasks is motivated by the lack of labelled data for automatic morpho-syntactic or semantic sequence-tagging tasks for Algerian, in contrast to the case of much multi-task learning for NLP. Our empirical results show that multi-task learning is beneficial for some tasks in particular settings, and that the effect of each task on another, the order of the tasks, and the size of the training data of the task with more data do matter. Moreover, the data augmentation that we performed with no external resources has been shown to be beneficial for certain tasks.
%U https://aclanthology.org/2020.calcs-1.3
%P 17-25
Markdown (Informal)
[When is Multi-task Learning Beneficial for Low-Resource Noisy Code-switched User-generated Algerian Texts?](https://aclanthology.org/2020.calcs-1.3) (Adouane & Bernardy, CALCS 2020)
ACL