@inproceedings{kezar-pujara-2021-finding,
title = "Finding Pragmatic Differences Between Disciplines",
author = "Kezar, Lee and
Pujara, Jay",
editor = "Beltagy, Iz and
Cohan, Arman and
Feigenblat, Guy and
Freitag, Dayne and
Ghosal, Tirthankar and
Hall, Keith and
Herrmannova, Drahomira and
Knoth, Petr and
Lo, Kyle and
Mayr, Philipp and
Patton, Robert M. and
Shmueli-Scheuer, Michal and
de Waard, Anita and
Wang, Kuansan and
Wang, Lucy Lu",
booktitle = "Proceedings of the Second Workshop on Scholarly Document Processing",
month = jun,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.sdp-1.10",
doi = "10.18653/v1/2021.sdp-1.10",
pages = "83--90",
abstract = "Scholarly documents have a great degree of variation, both in terms of content (semantics) and structure (pragmatics). Prior work in scholarly document understanding emphasizes semantics through document summarization and corpus topic modeling but tends to omit pragmatics such as document organization and flow. Using a corpus of scholarly documents across 19 disciplines and state-of-the-art language modeling techniques, we learn a fixed set of domain-agnostic descriptors for document sections and {``}retrofit{''} the corpus to these descriptors (also referred to as {``}normalization{''}). Then, we analyze the position and ordering of these descriptors across documents to understand the relationship between discipline and structure. We report within-discipline structural archetypes, variability, and between-discipline comparisons, supporting the hypothesis that scholarly communities, despite their size, diversity, and breadth, share similar avenues for expressing their work. Our findings lay the foundation for future work in assessing research quality, domain style transfer, and further pragmatic analysis.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kezar-pujara-2021-finding">
<titleInfo>
<title>Finding Pragmatic Differences Between Disciplines</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lee</namePart>
<namePart type="family">Kezar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jay</namePart>
<namePart type="family">Pujara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Scholarly Document Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Iz</namePart>
<namePart type="family">Beltagy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arman</namePart>
<namePart type="family">Cohan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Guy</namePart>
<namePart type="family">Feigenblat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dayne</namePart>
<namePart type="family">Freitag</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tirthankar</namePart>
<namePart type="family">Ghosal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Keith</namePart>
<namePart type="family">Hall</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Drahomira</namePart>
<namePart type="family">Herrmannova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Petr</namePart>
<namePart type="family">Knoth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kyle</namePart>
<namePart type="family">Lo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Mayr</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Robert</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Patton</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michal</namePart>
<namePart type="family">Shmueli-Scheuer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anita</namePart>
<namePart type="family">de Waard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kuansan</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucy</namePart>
<namePart type="given">Lu</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Scholarly documents have a great degree of variation, both in terms of content (semantics) and structure (pragmatics). Prior work in scholarly document understanding emphasizes semantics through document summarization and corpus topic modeling but tends to omit pragmatics such as document organization and flow. Using a corpus of scholarly documents across 19 disciplines and state-of-the-art language modeling techniques, we learn a fixed set of domain-agnostic descriptors for document sections and “retrofit” the corpus to these descriptors (also referred to as “normalization”). Then, we analyze the position and ordering of these descriptors across documents to understand the relationship between discipline and structure. We report within-discipline structural archetypes, variability, and between-discipline comparisons, supporting the hypothesis that scholarly communities, despite their size, diversity, and breadth, share similar avenues for expressing their work. Our findings lay the foundation for future work in assessing research quality, domain style transfer, and further pragmatic analysis.</abstract>
<identifier type="citekey">kezar-pujara-2021-finding</identifier>
<identifier type="doi">10.18653/v1/2021.sdp-1.10</identifier>
<location>
<url>https://aclanthology.org/2021.sdp-1.10</url>
</location>
<part>
<date>2021-06</date>
<extent unit="page">
<start>83</start>
<end>90</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Finding Pragmatic Differences Between Disciplines
%A Kezar, Lee
%A Pujara, Jay
%Y Beltagy, Iz
%Y Cohan, Arman
%Y Feigenblat, Guy
%Y Freitag, Dayne
%Y Ghosal, Tirthankar
%Y Hall, Keith
%Y Herrmannova, Drahomira
%Y Knoth, Petr
%Y Lo, Kyle
%Y Mayr, Philipp
%Y Patton, Robert M.
%Y Shmueli-Scheuer, Michal
%Y de Waard, Anita
%Y Wang, Kuansan
%Y Wang, Lucy Lu
%S Proceedings of the Second Workshop on Scholarly Document Processing
%D 2021
%8 June
%I Association for Computational Linguistics
%C Online
%F kezar-pujara-2021-finding
%X Scholarly documents have a great degree of variation, both in terms of content (semantics) and structure (pragmatics). Prior work in scholarly document understanding emphasizes semantics through document summarization and corpus topic modeling but tends to omit pragmatics such as document organization and flow. Using a corpus of scholarly documents across 19 disciplines and state-of-the-art language modeling techniques, we learn a fixed set of domain-agnostic descriptors for document sections and “retrofit” the corpus to these descriptors (also referred to as “normalization”). Then, we analyze the position and ordering of these descriptors across documents to understand the relationship between discipline and structure. We report within-discipline structural archetypes, variability, and between-discipline comparisons, supporting the hypothesis that scholarly communities, despite their size, diversity, and breadth, share similar avenues for expressing their work. Our findings lay the foundation for future work in assessing research quality, domain style transfer, and further pragmatic analysis.
%R 10.18653/v1/2021.sdp-1.10
%U https://aclanthology.org/2021.sdp-1.10
%U https://doi.org/10.18653/v1/2021.sdp-1.10
%P 83-90
Markdown (Informal)
[Finding Pragmatic Differences Between Disciplines](https://aclanthology.org/2021.sdp-1.10) (Kezar & Pujara, sdp 2021)
ACL