<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
	<id>https://www.aclweb.org/aclwiki/api.php?action=feedcontributions&amp;feedformat=atom&amp;user=Kopotev</id>
	<title>ACL Wiki - User contributions [en]</title>
	<link rel="self" type="application/atom+xml" href="https://www.aclweb.org/aclwiki/api.php?action=feedcontributions&amp;feedformat=atom&amp;user=Kopotev"/>
	<link rel="alternate" type="text/html" href="https://www.aclweb.org/aclwiki/Special:Contributions/Kopotev"/>
	<updated>2026-04-24T12:59:13Z</updated>
	<subtitle>User contributions</subtitle>
	<generator>MediaWiki 1.43.6</generator>
	<entry>
		<id>https://www.aclweb.org/aclwiki/index.php?title=Resources_for_Russian&amp;diff=7868</id>
		<title>Resources for Russian</title>
		<link rel="alternate" type="text/html" href="https://www.aclweb.org/aclwiki/index.php?title=Resources_for_Russian&amp;diff=7868"/>
		<updated>2010-04-05T11:55:47Z</updated>

		<summary type="html">&lt;p&gt;Kopotev: /* Corpora */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;==Corpora==&lt;br /&gt;
&amp;lt;!-- Please keep this list in alphabetical order --&amp;gt;&lt;br /&gt;
&lt;br /&gt;
* [http://www.helsinki.fi/venaja/english/e-material/hanco/index.htm HANCO: The Helsinki annotated corpus of Russian texts]&lt;br /&gt;
* [http://www.sfb441.uni-tuebingen.de/b1/korpora.html Russian Corpora (uni-tuebingen.de)]&lt;br /&gt;
* [http://corpus.leeds.ac.uk/ruscorpora.html Russian Internet Corpus]&lt;br /&gt;
* [http://www.ruscorpora.ru/ Russian National Corpus]&lt;br /&gt;
* [http://www.philol.msu.ru/~lex/corpus/ Russian Newspaper Corpus]&lt;br /&gt;
* [http://lib.ru/ Various texts in Russian (lib.ru)]&lt;br /&gt;
&lt;br /&gt;
== POS taggers ==&lt;br /&gt;
&lt;br /&gt;
* [http://www.aot.ru/ AOT, morphological analyser]&lt;br /&gt;
* [http://corpus.leeds.ac.uk/mocky/ Mocky, statistical taggers and lemmatiser]&lt;br /&gt;
* [http://company.yandex.ru/technology/mystem/ Mystem, morphological analyser]&lt;br /&gt;
&lt;br /&gt;
== Grammars ==&lt;br /&gt;
* [[Generation grammars|KPML generation grammar]]&lt;br /&gt;
&lt;br /&gt;
==Various resources==&lt;br /&gt;
* [http://rykov-cl.narod.ru/r.html Russian Corpora (rykov-cl.narod.ru)]&lt;br /&gt;
* [http://corpus.leeds.ac.uk/serge/frqlist/ Russian frequency lists]&lt;br /&gt;
* [http://www.philol.msu.ru/rus/galya-1 Russian Phonetics on the Web]&lt;br /&gt;
* [http://schools.keldysh.ru/uvk1838/Sciper/volume2/langres/russiclr.htm Russicon Resources]&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
[[Category:Resources by language|Russian]]&lt;/div&gt;</summary>
		<author><name>Kopotev</name></author>
	</entry>
	<entry>
		<id>https://www.aclweb.org/aclwiki/index.php?title=Resources_for_Russian&amp;diff=4474</id>
		<title>Resources for Russian</title>
		<link rel="alternate" type="text/html" href="https://www.aclweb.org/aclwiki/index.php?title=Resources_for_Russian&amp;diff=4474"/>
		<updated>2007-10-18T15:52:00Z</updated>

		<summary type="html">&lt;p&gt;Kopotev: &lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;==Corpora==&lt;br /&gt;
* Russian National Corpus (http://www.ruscorpora.ru)&amp;lt;p&amp;gt;&lt;br /&gt;
* Helsinki Annotated Corpus HANCO (http://www.slav.helsinki.fi/hanco/index_en.html)&lt;/div&gt;</summary>
		<author><name>Kopotev</name></author>
	</entry>
	<entry>
		<id>https://www.aclweb.org/aclwiki/index.php?title=List_of_resources_by_language&amp;diff=4473</id>
		<title>List of resources by language</title>
		<link rel="alternate" type="text/html" href="https://www.aclweb.org/aclwiki/index.php?title=List_of_resources_by_language&amp;diff=4473"/>
		<updated>2007-10-18T15:50:34Z</updated>

		<summary type="html">&lt;p&gt;Kopotev: /* R */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;List of pages which give links and commentary on computational resources by language.&lt;br /&gt;
&lt;br /&gt;
==A==&lt;br /&gt;
__NOTOC__&lt;br /&gt;
{{compactTOC2}}&lt;br /&gt;
* [[Resources for Amharic]]&lt;br /&gt;
* [[Resources for Arabic]]&lt;br /&gt;
* [[Resources for Afrikaans]]&lt;br /&gt;
&lt;br /&gt;
==B==&lt;br /&gt;
__NOTOC__&lt;br /&gt;
{{compactTOC2}}&lt;br /&gt;
* [[Resources for Bulgarian]]&lt;br /&gt;
&lt;br /&gt;
==C==&lt;br /&gt;
__NOTOC__&lt;br /&gt;
{{compactTOC2}}&lt;br /&gt;
* [[Resources for Catalan]]&lt;br /&gt;
* [[Resources for Chinese]]&lt;br /&gt;
* [[Resources for Croatian]] (see also [[Resources for Serbian]], [[Resources for Bosnian]], [[Resources for Serbo-Croatian]])&lt;br /&gt;
&lt;br /&gt;
==D==&lt;br /&gt;
__NOTOC__&lt;br /&gt;
{{compactTOC2}}&lt;br /&gt;
* [[Resources for Danish]]&lt;br /&gt;
* [[Resources for Dutch]]&lt;br /&gt;
&lt;br /&gt;
==E==&lt;br /&gt;
__NOTOC__&lt;br /&gt;
{{compactTOC2}}&lt;br /&gt;
* [[Resources for Estonian]]&lt;br /&gt;
&lt;br /&gt;
==F==&lt;br /&gt;
__NOTOC__&lt;br /&gt;
{{compactTOC2}}&lt;br /&gt;
* [[Resources for Faroese]]&lt;br /&gt;
* [[Resources for Finnish]]&lt;br /&gt;
* [[Resources for French]]&lt;br /&gt;
&lt;br /&gt;
==G==&lt;br /&gt;
__NOTOC__&lt;br /&gt;
{{compactTOC2}}&lt;br /&gt;
* [[Resources for Galician]]&lt;br /&gt;
* [[Resources for German]]&lt;br /&gt;
* [[Resources for Greek]]&lt;br /&gt;
* [[Resources for Greenlandic]]&lt;br /&gt;
&lt;br /&gt;
==H==&lt;br /&gt;
__NOTOC__&lt;br /&gt;
{{compactTOC2}}&lt;br /&gt;
* [[Resources for Hebrew]]&lt;br /&gt;
&lt;br /&gt;
* [[Resources for Hindi]]&lt;br /&gt;
&lt;br /&gt;
==I==&lt;br /&gt;
__NOTOC__&lt;br /&gt;
{{compactTOC2}}&lt;br /&gt;
* [[Resources for Icelandic]]&lt;br /&gt;
* [[Resources for Iñupiaq]]&lt;br /&gt;
* [[Resources for Italian]]&lt;br /&gt;
* [[Resources for Irish]]&lt;br /&gt;
&lt;br /&gt;
==J==&lt;br /&gt;
__NOTOC__&lt;br /&gt;
{{compactTOC2}}&lt;br /&gt;
* [[Resources for Japanese]]&lt;br /&gt;
&lt;br /&gt;
==K==&lt;br /&gt;
__NOTOC__&lt;br /&gt;
{{compactTOC2}}&lt;br /&gt;
* [[Resources for Korean]]&lt;br /&gt;
&lt;br /&gt;
==M==&lt;br /&gt;
__NOTOC__&lt;br /&gt;
{{compactTOC2}}&lt;br /&gt;
* [[Resources for Macedonian]]&lt;br /&gt;
* [[Resources for Malay]]&lt;br /&gt;
&lt;br /&gt;
==O==&lt;br /&gt;
__NOTOC__&lt;br /&gt;
{{compactTOC2}}&lt;br /&gt;
* [[Resources for Occitan]]&lt;br /&gt;
&lt;br /&gt;
==N==&lt;br /&gt;
__NOTOC__&lt;br /&gt;
{{compactTOC2}}&lt;br /&gt;
* [[Resources for Norwegian]]&lt;br /&gt;
&lt;br /&gt;
==P==&lt;br /&gt;
__NOTOC__&lt;br /&gt;
{{compactTOC2}}&lt;br /&gt;
* [[Resources for Pashto]]&lt;br /&gt;
* [[Resources for Persian]]&lt;br /&gt;
* [[Resources for Polish]]&lt;br /&gt;
&lt;br /&gt;
==R==&lt;br /&gt;
__NOTOC__&lt;br /&gt;
{{compactTOC2}}&lt;br /&gt;
* [[Resources for Romanian]]&lt;br /&gt;
* [[Resources for Russian]]&lt;br /&gt;
&lt;br /&gt;
==S==&lt;br /&gt;
__NOTOC__&lt;br /&gt;
{{compactTOC2}}&lt;br /&gt;
* [[Resources for Sámi]]&lt;br /&gt;
* [[Resources for Slovak]]&lt;br /&gt;
* [[Resources for Sorbian]]&lt;br /&gt;
* [[Resources for Swedish]]&lt;br /&gt;
&lt;br /&gt;
==T==&lt;br /&gt;
__NOTOC__&lt;br /&gt;
{{compactTOC2}}&lt;br /&gt;
* [[Resources for Tajik]]&lt;br /&gt;
* [[Resources for Turkish]]&lt;br /&gt;
&lt;br /&gt;
==W==&lt;br /&gt;
__NOTOC__&lt;br /&gt;
{{compactTOC2}}&lt;br /&gt;
* [[Resources for Welsh]]&lt;br /&gt;
&lt;br /&gt;
==Z==&lt;br /&gt;
__NOTOC__&lt;br /&gt;
{{compactTOC2}}&lt;br /&gt;
* [[Resources for Zulu]]&lt;br /&gt;
&lt;br /&gt;
==See also==&lt;br /&gt;
&lt;br /&gt;
* [[Resources for African languages]]&lt;br /&gt;
&lt;br /&gt;
[[Category:Resources by language|*]]&lt;/div&gt;</summary>
		<author><name>Kopotev</name></author>
	</entry>
	<entry>
		<id>https://www.aclweb.org/aclwiki/index.php?title=Resources_for_Russian&amp;diff=4472</id>
		<title>Resources for Russian</title>
		<link rel="alternate" type="text/html" href="https://www.aclweb.org/aclwiki/index.php?title=Resources_for_Russian&amp;diff=4472"/>
		<updated>2007-10-18T15:50:13Z</updated>

		<summary type="html">&lt;p&gt;Kopotev: &lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;Russian National Corpus (http://www.ruscorpora.ru)&amp;lt;p&amp;gt;&lt;br /&gt;
Helsinki Annotated Corpus HANCO (http://www.slav.helsinki.fi/hanco/index_en.html)&lt;/div&gt;</summary>
		<author><name>Kopotev</name></author>
	</entry>
	<entry>
		<id>https://www.aclweb.org/aclwiki/index.php?title=Resources_for_Russian&amp;diff=4471</id>
		<title>Resources for Russian</title>
		<link rel="alternate" type="text/html" href="https://www.aclweb.org/aclwiki/index.php?title=Resources_for_Russian&amp;diff=4471"/>
		<updated>2007-10-18T15:49:30Z</updated>

		<summary type="html">&lt;p&gt;Kopotev: &lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;Russian National Corpus (http://www.ruscorpora.ru)&lt;br /&gt;
Helsinki Annotated Corpus HANCO (http://www.slav.helsinki.fi/hanco/index_en.html)&lt;/div&gt;</summary>
		<author><name>Kopotev</name></author>
	</entry>
	<entry>
		<id>https://www.aclweb.org/aclwiki/index.php?title=Resources_for_Russian&amp;diff=4470</id>
		<title>Resources for Russian</title>
		<link rel="alternate" type="text/html" href="https://www.aclweb.org/aclwiki/index.php?title=Resources_for_Russian&amp;diff=4470"/>
		<updated>2007-10-18T15:49:10Z</updated>

		<summary type="html">&lt;p&gt;Kopotev: &lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;Russian National Corpus (www.ruscorpora.ru)&lt;br /&gt;
Helsinki Annotated Corpus HANCO (http://www.slav.helsinki.fi/hanco/index_en.html)&lt;/div&gt;</summary>
		<author><name>Kopotev</name></author>
	</entry>
	<entry>
		<id>https://www.aclweb.org/aclwiki/index.php?title=Corpora_for_English&amp;diff=3472</id>
		<title>Corpora for English</title>
		<link rel="alternate" type="text/html" href="https://www.aclweb.org/aclwiki/index.php?title=Corpora_for_English&amp;diff=3472"/>
		<updated>2007-02-22T09:01:17Z</updated>

		<summary type="html">&lt;p&gt;Kopotev: /* Russian */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;&#039;&#039;This list needs some cleaning. Please help.&#039;&#039;&lt;br /&gt;
&lt;br /&gt;
==English==&lt;br /&gt;
&amp;lt;!-- Please keep this list in alphabetical order --&amp;gt;&lt;br /&gt;
&lt;br /&gt;
*[http://www.elda.fr/catalogue/en/speech/S0115.html American English SpeechDat-Car]&lt;br /&gt;
*[http://americannationalcorpus.org/ American National Corpus (ANC)]&lt;br /&gt;
*[http://americannationalcorpus.org/FirstRelease/ AMERICAN NATIONAL CORPUS FIRST RELEASE]&lt;br /&gt;
*[http://compbio.uchsc.edu/ccp/corpora/index.shtml Biomedical corpora]&lt;br /&gt;
*[http://homepage.mac.com/bncweb/ BNCweb a web-based interface to the British National Corpus]&lt;br /&gt;
*[http://devoted.to/corpora Bookmarks for Corpus-based Linguists]&lt;br /&gt;
*[http://info.ox.ac.uk/bnc/ British National Corpus (from Oxford University)]&lt;br /&gt;
*[http://www.natcorp.ox.ac.uk/ British National Corpus (BNC)]&lt;br /&gt;
*[http://www.comp.lancs.ac.uk/computing/research/ucrel/bnc.html British National Corpus project page (from UCREL)]&lt;br /&gt;
*[http://clwww.essex.ac.uk/w3c/corpus_ling/content/corpora/list/private/brown/brown.html Brown Corpus]&lt;br /&gt;
*[http://www.collins.co.uk/books.aspx?group=154 Collins Wordbanks]&lt;br /&gt;
*[http://www.athel.com/corpdes.html Corpus of Spoken Professional English]&lt;br /&gt;
*[http://www-rcf.usc.edu/~billmann/diversity/DDivers-site.htm Dialogue Diversity Corpus]&lt;br /&gt;
*[http://etext.lib.virginia.edu/ Electronic Text Center -- University of Virginia]&lt;br /&gt;
*[http://www.phon.ox.ac.uk/~esther/ivyweb/ English Intonation in the British Isles -The IViE Corpus]&lt;br /&gt;
*[http://www-2.cs.cmu.edu/afs/cs.cmu.edu/project/theo-11/www/naive-bayes/bow-0.8/stopwords.c English stop words (from SMART)]&lt;br /&gt;
*[http://www-personal.umich.edu/~jlawler/levin.html English Verb Classes And Alternations: A Preliminary Investigation (Index)]&lt;br /&gt;
*[http://usna.edu/LangStudy/BNC/ Exploring Words and Phrases from the British National Corpus]&lt;br /&gt;
*[http://ir.dcs.gla.ac.uk/test_collections/gov2-summary.htm GOV2 Corpus] - 426 gigabytes of text&lt;br /&gt;
*[http://www.gutenberg.org/wiki/Main_Page Gutenberg]&lt;br /&gt;
*[http://prize.hutter1.net/ Hutter Prize for Lossless Compression of Human Knowledge 100M sample of Wikipedia]&lt;br /&gt;
*[http://nora.hd.uib.no/icame.html ICAME]&lt;br /&gt;
*[http://www.cs.fit.edu/~mmahoney/compression/text.html Large Text Compression Benchmark&#039;s 1G sample of Wikipedia]&lt;br /&gt;
*[http://www-2.cs.cmu.edu/afs/cs.cmu.edu/project/theo-11/www/naive-bayes/bow-0.8/stopwords.c List of English stopwords]&lt;br /&gt;
*[http://www.lsi.upc.es/~nlp/tools/mapping.html Mapping WordNet Versions 1.6 and 2.0]&lt;br /&gt;
*[http://www.cs.cornell.edu/People/pabo/movie-review-data/ Movie Review Data]&lt;br /&gt;
*[http://mwe.stanford.edu/resources/ Multiword Expression Resources]&lt;br /&gt;
*[http://www.askoxford.com/oec/mainpage/?view=uk Oxford English Corpus]&lt;br /&gt;
*[http://pie.usna.edu/ Phrases in English]&lt;br /&gt;
*[http://homepages.feis.herts.ac.uk/~comrcml/Lyon-thesis.ps Restricted English Corpus from Dr. Caroline Lyon for PhD]&lt;br /&gt;
*[http://www.sketchengine.co.uk/ Sketch Engine]&lt;br /&gt;
*[http://www-2.cs.cmu.edu/afs/cs/project/ai-repository/ai/areas/nlp/corpora/susanne/0.html Susanne: Annotated American English Corpus]&lt;br /&gt;
*[http://clix.to/davidlee00 The BNC Index (for the BNCWorld Edition)]&lt;br /&gt;
*[http://www-users.york.ac.uk/~sp20/corpus.html The Brooklyn-Geneva-Amsterdam-Helsinki Parsed Corpus of Old English]&lt;br /&gt;
*[http://www-rcf.usc.edu/~billmann/diversity/DDivers-site.htm The Dialogue Diversity Corpus]&lt;br /&gt;
*[http://www.grsampson.net/LucyDoc.html The LUCY Corpus - Documentation]&lt;br /&gt;
*[http://www.cs.rochester.edu/research/cisd/resources/trains.html TRAINS Dialogue Corpus]&lt;br /&gt;
*[http://www.webcorp.org.uk/guide/ WebCorp]&lt;br /&gt;
&lt;br /&gt;
==German==&lt;br /&gt;
&amp;lt;!-- Please keep this list in alphabetical order --&amp;gt;&lt;br /&gt;
&lt;br /&gt;
*[http://www.phonetik.uni-muenchen.de/Bas/BasKorporaeng.html Bavarian Archive for Speech Signals Corpora]&lt;br /&gt;
*[http://corpora.ids-mannheim.de/~cosmas/ COSMAS II]&lt;br /&gt;
*[http://www.coli.uni-sb.de/sfb378/negra-corpus/negra-corpus.html NEGRA Corpus]&lt;br /&gt;
&lt;br /&gt;
==Multilingual==&lt;br /&gt;
&amp;lt;!-- Please keep this list in alphabetical order --&amp;gt;&lt;br /&gt;
&lt;br /&gt;
*[http://wt.jrc.it/lt/Acquis/ ACQUIS COMMUNAUTAIRE Multilingual Corpus]&lt;br /&gt;
*[http://spraakbanken.gu.se/ Bank of Swedish]&lt;br /&gt;
*[http://hnk.ffzg.hr/ Croatian National Corpus (HNK)]&lt;br /&gt;
*[http://ucnk.ff.cuni.cz/ Czech National Corpus (CNC)]&lt;br /&gt;
*[http://www.kun.nl/celex CELEX - The Dutch Center for Lexical Information]&lt;br /&gt;
*[http://www.cdc.gov/ncidod/sars/languages.htm Centre for Disease Control - Chinese, French, Japanese, Spanish info on SARS]&lt;br /&gt;
*[http://www.linguateca.pt/COMPARA/ COMPARA corpus]&lt;br /&gt;
*[http://www.debian.org/international/ Debian free software community]&lt;br /&gt;
*[http://www.ling.lancs.ac.uk/corplang/emille EMILLE corpus]&lt;br /&gt;
*[http://www.statmt.org/europarl/ European Parliament Proceedings Parallel Corpus 1996-2003]&lt;br /&gt;
*[http://www.illc.uva.nl/EuroWordNet EuroWordNet]&lt;br /&gt;
*[http://www.france.diplomatie.fr/label_france/index.html French Foreign Ministry&#039;s magazine]&lt;br /&gt;
*[http://glossa.fltr.ucl.ac.be/ GlossaNet]&lt;br /&gt;
*[http://hometown.aol.com/mit2haiti/JA-HC-kr.htm Haitian Creole corpus -Teknoloji pou lang kreyol]&lt;br /&gt;
*[http://corpus.nytud.hu/mnsz/ Hungarian National Corpus]&lt;br /&gt;
*[http://www.ldc.upenn.edu/Catalog/CatalogEntry.jsp?catalogId=LDC95T20 Hansard French-English parallel corpus]&lt;br /&gt;
*[http://www.ucl.ac.uk/english-usage/ice/avail.htm ICE corpora]&lt;br /&gt;
*[http://korpus.pl/ IPI PAN Corpus of Polish]&lt;br /&gt;
*[http://www.tu-chemnitz.de/phil/InternetGrammar/ Learner Behaviour on the Internet]&lt;br /&gt;
*[http://muchmore.dfki.de/resources1.htm MuchMore Springer Bilingual Corpus]&lt;br /&gt;
*[http://nl.ijs.si/ME/ MULTEXT-East: Multilingual Corpora for Eastern and Central European Languages]&lt;br /&gt;
*[http://tcc.itc.it/people/forner/multilingualcorpora.html Multilingual Corpora: Available Resources]&lt;br /&gt;
* [http://www.csse.monash.edu.au/~jwb/tanakacorpus.html Tanaka Corpus: Japanese-English sentence pairs]&lt;br /&gt;
*[http://multisemcor.itc.it MultiSemCor]&lt;br /&gt;
*[http://www.ims.uni-stuttgart.de/info/Newspapers.html Newspapers on the Internet]&lt;br /&gt;
*[http://logos.uio.no/opus/ OPUS - an open source parallel corpus]&lt;br /&gt;
*[http://www.tekstlab.uio.no/Bosnian/Corpus.html Oslo Corpus of Bosnian]&lt;br /&gt;
*[http://langbank.engl.polyu.edu.hk/indexl.html PolyU Language Bank]&lt;br /&gt;
*[http://www.corpusdoportugues.org/ Portuguese Corpus]&lt;br /&gt;
*[http://register.consilium.eu.int/ Public registry of the Council of the EU]&lt;br /&gt;
*[http://www.ruscorpora.ru/ Russian National Corpus (RNK)]&lt;br /&gt;
*[http://www.multilingual.com/allen51.htm The Bible as a Resource for Translation Software]&lt;br /&gt;
*[http://www.cogsci.ed.ac.uk/elsnet/eci.html The ECI Multilingual corpus]&lt;br /&gt;
*[http://www.fida.net/ Slovenian Corpus FIDA] and [http://www.fidaplus.net/ FIDA+]&lt;br /&gt;
*[http://www.corpusdelespanol.org/ Spanish Corpus]&lt;br /&gt;
*[http://www.unhchr.ch/udhr/index.htm UN declaration of human rights in multiple languages]&lt;br /&gt;
*[http://www-igm.univ-mlv.fr/~unitex/ UNITEX]&lt;br /&gt;
*[http://www.u-grenoble3.fr/kraif/liens.htm Useful links about parallel corpora, by Olivier Kraif]&lt;br /&gt;
*[http://wacky.sslmit.unibo.it/ WaCky Project]&lt;br /&gt;
*[http://www.wortschatz.uni-leipzig.de/html/wliste.html Wortlisten: spoken German, English, French, and Dutch]&lt;br /&gt;
&lt;br /&gt;
==Russian==&lt;br /&gt;
&amp;lt;!-- Please keep this list in alphabetical order --&amp;gt;&lt;br /&gt;
&lt;br /&gt;
*[http://bokrcorpora.narod.ru Bokr Russian Reference Corpus]&lt;br /&gt;
*[http://www.slav.helsinki.fi/hanco/index_en.html HANCO: The Helsinki annotated corpus of Russian texts]&lt;br /&gt;
*[http://www.sfb441.uni-tuebingen.de/b1/korpora.html Russian Corpora]&lt;br /&gt;
*[http://rykov-cl.narod.ru/r.html Russian Corpora]&lt;br /&gt;
*[http://lib.ru/ Russian Corpus Site]&lt;br /&gt;
*[http://www.ruscorpora.ru/ The Russian National Corpus]&lt;br /&gt;
*[http://www.philol.msu.ru/~lex/corpus/ Russian Newspaper Corpus]&lt;br /&gt;
*[http://schools.keldysh.ru/uvk1838/Sciper/volume2/langres/russiclr.htm Russicon Resources]&lt;br /&gt;
&lt;br /&gt;
==Slovak==&lt;br /&gt;
&amp;lt;!-- Please keep this list in alphabetical order --&amp;gt;&lt;br /&gt;
&lt;br /&gt;
*[http://korpus.juls.savba.sk/index.en.html Slovak National Corpus]&lt;br /&gt;
&lt;br /&gt;
==Italian==&lt;br /&gt;
&amp;lt;!-- Please keep this list in alphabetical order --&amp;gt;&lt;br /&gt;
&lt;br /&gt;
*[http://languageserver.uni-graz.at/badip/badip/20_corpusLip.php LIP - Lessico di frequenza dell&#039;Italiano Parlato - Access via BADIP]&lt;br /&gt;
*[http://www.istc.cnr.it/material/database/colfis/ ColFIS Corpus e Lessico di Frequenza dell&#039;Italiano Scritto]&lt;br /&gt;
*[http://corpus.cilta.unibo.it:8080/coris_ita.html Corpus di Italiano Scritto contemporaneo (CORIS/CODIS)]&lt;br /&gt;
*[http://tlio.ovi.cnr.it/TLIO/ Tesoro della lingua italiana delle origini (TLIO)]&lt;br /&gt;
&lt;br /&gt;
==Link collections==&lt;br /&gt;
&amp;lt;!-- Please keep this list in alphabetical order --&amp;gt;&lt;br /&gt;
&lt;br /&gt;
*[http://www.dcs.gla.ac.uk/idom/ir_resources/ Collections of texts and corpora]&lt;br /&gt;
*[http://www.bmanuel.org/clr2_mp.html Manuel Barbera: General Corpora and Corpus Linguistics Resources]&lt;br /&gt;
*[http://www.alphabit.net Isabella Chiari: Corpora, Software and Linguistic resources]&lt;br /&gt;
*[http://www.sultry.arts.usyd.edu.au/links/statnlp.html Annotated list of resources on statistical NLP and corpus-based CL]&lt;br /&gt;
&lt;br /&gt;
==Corpora tools==&lt;br /&gt;
&amp;lt;!-- Please keep this list in alphabetical order --&amp;gt;&lt;br /&gt;
&lt;br /&gt;
*[http://www.dcs.gla.ac.uk/idom/ir_resources/linguistic_utils/stop_words List of stop words]&lt;br /&gt;
*[http://www.sketchengine.co.uk/ The Sketch Engine]&lt;br /&gt;
*[http://www.cis.upenn.edu/~treebank/tokenization.html Treebank tokenization scheme]&lt;br /&gt;
&lt;br /&gt;
==Uncategorized==&lt;br /&gt;
&amp;lt;!-- Please keep this list in alphabetical order --&amp;gt;&lt;br /&gt;
&lt;br /&gt;
===Arabic===&lt;br /&gt;
*[http://www.ldc.upenn.edu/Catalog/LDC2001T55.html Arabic Newswire Part 1]&lt;br /&gt;
===Bosnian===&lt;br /&gt;
*[http://www.tekstlab.uio.no/Bosnian/Corpus.html The Oslo Corpus of Bosnian Texts]&lt;br /&gt;
===Bulgarian===&lt;br /&gt;
*[http://www.hf.uio.no/easteur-orient/bulg/mat/ Corpus of spoken Bulgarian]&lt;br /&gt;
===Czech===&lt;br /&gt;
*[http://ucnk.ff.cuni.cz/english/index.html Czech National Corpus]&lt;br /&gt;
===Danish===&lt;br /&gt;
*[http://korpus.dsl.dk/korpus2000/indgang.php Danish news corpus]&lt;br /&gt;
===English===&lt;br /&gt;
*[ftp://ftp.cs.cornell.edu/pub/smart/time/ 1963 Time Magazine corpus]&lt;br /&gt;
*[http://www.cornelsen.de/international/ An Empirical Grammar of the English Verb System]&lt;br /&gt;
*[http://thetis.bl.uk/ BNC Online Service]&lt;br /&gt;
*[http://info.ox.ac.uk/bnc/ BRITISH NATIONAL CORPUS - WORLD EDITION]&lt;br /&gt;
===Finnish===&lt;br /&gt;
*[http://www.csc.fi/kielipankki/ Finnish text bank]&lt;br /&gt;
===French===&lt;br /&gt;
*[http://atilf.atilf.fr/dmf.htm Base Textuelle de Moyen Francais]&lt;br /&gt;
===German===&lt;br /&gt;
*[http://www.coli.uni-sb.de/sfb378/negra-corpus/ A Syntactically Annotated Corpus of German Newspaper Texts]&lt;br /&gt;
*[http://www.ims.uni-stuttgart.de/projekte/tc/CQP.html Experimental Corpus Query System (University of Stuttgart, Germany)]&lt;br /&gt;
===Haitian Creole===&lt;br /&gt;
*[http://hometown.aol.com/mit2haiti/Index4.html HAITIAN CREOLE ELECTRONIC TEXTS]&lt;br /&gt;
===Italian===&lt;br /&gt;
*[http://www.uni-duisburg.de/Fak2/FremdPhil/Romanistik/Personal/Burr/humcomp/ Oxford Text Archive Corpus of Italian Newspapers]&lt;br /&gt;
===Japanese===&lt;br /&gt;
*[http://www.csse.monash.edu.au/~jwb/afaq/jitadoushi.html list of Japanese transitive - intransitive verb pairs]&lt;br /&gt;
===Polish===&lt;br /&gt;
*[http://korpus.pl/en/ IPI PAN Polish Corpus]&lt;br /&gt;
===Romanian===&lt;br /&gt;
*[http://www.cs.unt.edu/~rada/downloads.html Romanian NLP]&lt;br /&gt;
===Sanskrit===&lt;br /&gt;
*[http://sanskritlibrary.org/ Sanskrit Library]&lt;br /&gt;
&lt;br /&gt;
===Slovenian===&lt;br /&gt;
*[http://nl.ijs.si/elan/#corpus Slovene-English Parallel Corpus]&lt;br /&gt;
===Spanish===&lt;br /&gt;
*[http://www.corpusdelespanol.org/ Corpus del Espanol]&lt;br /&gt;
*[http://www.lllf.uam.es/~fmarcos/informes/corpus/corpulee.html Corpus de referencia de la lengua Espanola contemporanea: corpus oral peninsular]&lt;br /&gt;
===Swahili===&lt;br /&gt;
*[http://www.csc.fi/kielipankki/aineistot/hcs/index.phtml.en Helsinki Corpus of Swahili (HCS)]&lt;br /&gt;
----&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
*[http://www.ldc.upenn.edu/Catalog/LDC2001S97.html 2000 NIST Speaker Recognition Evaluation Corpus]&lt;br /&gt;
*[http://ixa.si.ehu.es/Ixa/resources/sensecorpus A Web Corpus and Topic Signatures for All WordNet 1.6 Nominal Senses (v 1.0)]&lt;br /&gt;
*[http://odur.let.rug.nl/~vannoord/trees/ Alpino Treebank]&lt;br /&gt;
*[http://www.aot.ru/search1.html AOT]&lt;br /&gt;
*[http://pioneer.chula.ac.th/~awirote/ling/corpuslst.htm Corpus Resources (Chulalongkorn University, Thailand)]&lt;br /&gt;
*[ftp://ftp.cs.cornell.edu/pub/smart/cran/ Cranfield collection]&lt;br /&gt;
*[http://corpus.rae.es/creanet.html CREA]&lt;br /&gt;
*[http://www.eat.rl.ac.uk/ Edinburgh Associative Thesaurus (EAT)]&lt;br /&gt;
*[http://www.hum.uva.nl/~ewn EuroWordNet]&lt;br /&gt;
*[http://rali.iro.umontreal.ca/ Hansards Corpus - Searchable]&lt;br /&gt;
*[http://www.hcrc.ed.ac.uk/maptask/ HCRC Map Task Corpus XML annotations]&lt;br /&gt;
*[http://nats-www.informatik.uni-hamburg.de/~ingo/icopost/ ICOPOST]&lt;br /&gt;
*[http://www.ims.uni-stuttgart.de/projekte/TC.html IMS Corpus Toolbox, Univ. of Stuttgart]&lt;br /&gt;
*[http://www.ims.uni-stuttgart.de/projekte/CorpusWorkbench/ IMS Corpus Workbench (CWB)]&lt;br /&gt;
*[http://cecl.fltr.ucl.ac.be/Cecl-Projects/Icle/icle.htm International Corpus of Learner English]&lt;br /&gt;
*[http://www.ipds.uni-kiel.de/links/datenmaterial.en.html Kiel University&#039;s Institute on Phonetics and Speech Procesing]&lt;br /&gt;
*[http://www.nilc.icmc.usp.br/lacioweb Lacio Web Corpora]&lt;br /&gt;
*[http://www.vuw.ac.nz/llc/ LANGUAGE LEARNING CENTER - ACADEMIC CORPUS]&lt;br /&gt;
*[http://www.bmanuel.org/clr2_mp.html Manuel Barbera: General Corpora and Corpus Linguistics Resources]&lt;br /&gt;
*[ftp://ftp.cs.cornell.edu/pub/smart/med/ Medlars collection]&lt;br /&gt;
*[ftp://ftp.ox.ac.uk/pub/wordlists/ Miscellaneous Word Lists from Oxford University]&lt;br /&gt;
*[http://www.lpl.univ-aix.fr/projects/multext/ Multilingual Text Tools and Corpora]&lt;br /&gt;
*[http://www.census.gov/genealogy/names Name lists from US census]&lt;br /&gt;
*[http://www.di.fc.ul.pt/~ahb/nexing.htm Nexing Corpus]&lt;br /&gt;
*[http://www.cs.cmu.edu/web/books.html On-line books at CMU]&lt;br /&gt;
*[http://logos.uio.no/opus/ OPUS -- An Open Source Parallel Corpus]&lt;br /&gt;
*[http://elex.amu.edu.pl/~przemka/PICLE_search.php Polish subcorpus of the International Corpus of Learner English]&lt;br /&gt;
*[http://www.cirp.es/WXN/wxn/frames/proxectos.html Ramon Piero Center for Research]&lt;br /&gt;
*[http://about.reuters.com/researchandstandards/corpus/ Reuters Corpus]&lt;br /&gt;
*[http://www.ldc.upenn.edu/Catalog/LDC2001S97.html Speech in Noisy Environments 1 (SPINE1 CODED) Coded Audio]&lt;br /&gt;
*[http://www.ldc.upenn.edu/Catalog/LDC2001S99.html Speech in Noisy Environments 2 (SPINE2 CODED) Coded Audio]&lt;br /&gt;
*[http://www.cs.cmu.edu/afs/cs/project/ai-repository/ai/areas/nlp/doc/notes/corpora.txt Survey of Electronic Corpora (by Jane A. Edwards, file at CMU)]&lt;br /&gt;
*[http://www.ucl.ac.uk/english-usage/ Survey of English Usage, University College, London]&lt;br /&gt;
*[http://www.icsi.berkeley.edu/real/stp/index.html Switchboard Transcription Project]&lt;br /&gt;
*[http://www.tractor.de/ TELRI Research Archive of Computational Tools and Resources]&lt;br /&gt;
*[http://childes.psy.cmu.edu/ The Childes Corpus - Children&#039;s language]&lt;br /&gt;
*[http://nora.hd.uib.no/index-e.html The CORPORA DataCenter (Norway)]&lt;br /&gt;
*[ftp://ftp.dcs.shef.ac.uk/share/ilash/Moby/ The Moby Corpus]&lt;br /&gt;
*[http://www.hf.uio.no/tekstlab/prosjekter/SOFIE.htm The Sofie Treebank - A Parallel Treebank of North European Languages]&lt;br /&gt;
&lt;br /&gt;
[[Category:Corpora|*]]&lt;/div&gt;</summary>
		<author><name>Kopotev</name></author>
	</entry>
	<entry>
		<id>https://www.aclweb.org/aclwiki/index.php?title=Corpora_for_English&amp;diff=3471</id>
		<title>Corpora for English</title>
		<link rel="alternate" type="text/html" href="https://www.aclweb.org/aclwiki/index.php?title=Corpora_for_English&amp;diff=3471"/>
		<updated>2007-02-22T08:55:46Z</updated>

		<summary type="html">&lt;p&gt;Kopotev: /* Russian */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;&#039;&#039;This list needs some cleaning. Please help.&#039;&#039;&lt;br /&gt;
&lt;br /&gt;
==English==&lt;br /&gt;
&amp;lt;!-- Please keep this list in alphabetical order --&amp;gt;&lt;br /&gt;
&lt;br /&gt;
*[http://www.elda.fr/catalogue/en/speech/S0115.html American English SpeechDat-Car]&lt;br /&gt;
*[http://americannationalcorpus.org/ American National Corpus (ANC)]&lt;br /&gt;
*[http://americannationalcorpus.org/FirstRelease/ AMERICAN NATIONAL CORPUS FIRST RELEASE]&lt;br /&gt;
*[http://compbio.uchsc.edu/ccp/corpora/index.shtml Biomedical corpora]&lt;br /&gt;
*[http://homepage.mac.com/bncweb/ BNCweb a web-based interface to the British National Corpus]&lt;br /&gt;
*[http://devoted.to/corpora Bookmarks for Corpus-based Linguists]&lt;br /&gt;
*[http://info.ox.ac.uk/bnc/ British National Corpus (from Oxford University)]&lt;br /&gt;
*[http://www.natcorp.ox.ac.uk/ British National Corpus (BNC)]&lt;br /&gt;
*[http://www.comp.lancs.ac.uk/computing/research/ucrel/bnc.html British National Corpus project page (from UCREL)]&lt;br /&gt;
*[http://clwww.essex.ac.uk/w3c/corpus_ling/content/corpora/list/private/brown/brown.html Brown Corpus]&lt;br /&gt;
*[http://www.collins.co.uk/books.aspx?group=154 Collins Wordbanks]&lt;br /&gt;
*[http://www.athel.com/corpdes.html Corpus of Spoken Professional English]&lt;br /&gt;
*[http://www-rcf.usc.edu/~billmann/diversity/DDivers-site.htm Dialogue Diversity Corpus]&lt;br /&gt;
*[http://etext.lib.virginia.edu/ Electronic Text Center -- University of Virginia]&lt;br /&gt;
*[http://www.phon.ox.ac.uk/~esther/ivyweb/ English Intonation in the British Isles -The IViE Corpus]&lt;br /&gt;
*[http://www-2.cs.cmu.edu/afs/cs.cmu.edu/project/theo-11/www/naive-bayes/bow-0.8/stopwords.c English stop words (from SMART)]&lt;br /&gt;
*[http://www-personal.umich.edu/~jlawler/levin.html English Verb Classes And Alternations: A Preliminary Investigation (Index)]&lt;br /&gt;
*[http://usna.edu/LangStudy/BNC/ Exploring Words and Phrases from the British National Corpus]&lt;br /&gt;
*[http://ir.dcs.gla.ac.uk/test_collections/gov2-summary.htm GOV2 Corpus] - 426 gigabytes of text&lt;br /&gt;
*[http://www.gutenberg.org/wiki/Main_Page Gutenberg]&lt;br /&gt;
*[http://prize.hutter1.net/ Hutter Prize for Lossless Compression of Human Knowledge 100M sample of Wikipedia]&lt;br /&gt;
*[http://nora.hd.uib.no/icame.html ICAME]&lt;br /&gt;
*[http://www.cs.fit.edu/~mmahoney/compression/text.html Large Text Compression Benchmark&#039;s 1G sample of Wikipedia]&lt;br /&gt;
*[http://www-2.cs.cmu.edu/afs/cs.cmu.edu/project/theo-11/www/naive-bayes/bow-0.8/stopwords.c List of English stopwords]&lt;br /&gt;
*[http://www.lsi.upc.es/~nlp/tools/mapping.html Mapping WordNet Versions 1.6 and 2.0]&lt;br /&gt;
*[http://www.cs.cornell.edu/People/pabo/movie-review-data/ Movie Review Data]&lt;br /&gt;
*[http://mwe.stanford.edu/resources/ Multiword Expression Resources]&lt;br /&gt;
*[http://www.askoxford.com/oec/mainpage/?view=uk Oxford English Corpus]&lt;br /&gt;
*[http://pie.usna.edu/ Phrases in English]&lt;br /&gt;
*[http://homepages.feis.herts.ac.uk/~comrcml/Lyon-thesis.ps Restricted English Corpus from Dr. Caroline Lyon for PhD]&lt;br /&gt;
*[http://www.sketchengine.co.uk/ Sketch Engine]&lt;br /&gt;
*[http://www-2.cs.cmu.edu/afs/cs/project/ai-repository/ai/areas/nlp/corpora/susanne/0.html Susanne: Annotated American English Corpus]&lt;br /&gt;
*[http://clix.to/davidlee00 The BNC Index (for the BNCWorld Edition)]&lt;br /&gt;
*[http://www-users.york.ac.uk/~sp20/corpus.html The Brooklyn-Geneva-Amsterdam-Helsinki Parsed Corpus of Old English]&lt;br /&gt;
*[http://www-rcf.usc.edu/~billmann/diversity/DDivers-site.htm The Dialogue Diversity Corpus]&lt;br /&gt;
*[http://www.grsampson.net/LucyDoc.html The LUCY Corpus - Documentation]&lt;br /&gt;
*[http://www.cs.rochester.edu/research/cisd/resources/trains.html TRAINS Dialogue Corpus]&lt;br /&gt;
*[http://www.webcorp.org.uk/guide/ WebCorp]&lt;br /&gt;
&lt;br /&gt;
==German==&lt;br /&gt;
&amp;lt;!-- Please keep this list in alphabetical order --&amp;gt;&lt;br /&gt;
&lt;br /&gt;
*[http://www.phonetik.uni-muenchen.de/Bas/BasKorporaeng.html Bavarian Archive for Speech Signals Corpora]&lt;br /&gt;
*[http://corpora.ids-mannheim.de/~cosmas/ COSMAS II]&lt;br /&gt;
*[http://www.coli.uni-sb.de/sfb378/negra-corpus/negra-corpus.html NEGRA Corpus]&lt;br /&gt;
&lt;br /&gt;
==Multilingual==&lt;br /&gt;
&amp;lt;!-- Please keep this list in alphabetical order --&amp;gt;&lt;br /&gt;
&lt;br /&gt;
*[http://wt.jrc.it/lt/Acquis/ ACQUIS COMMUNAUTAIRE Multilingual Corpus]&lt;br /&gt;
*[http://spraakbanken.gu.se/ Bank of Swedish]&lt;br /&gt;
*[http://hnk.ffzg.hr/ Croatian National Corpus (HNK)]&lt;br /&gt;
*[http://ucnk.ff.cuni.cz/ Czech National Corpus (CNC)]&lt;br /&gt;
*[http://www.kun.nl/celex CELEX - The Dutch Center for Lexical Information]&lt;br /&gt;
*[http://www.cdc.gov/ncidod/sars/languages.htm Centre for Disease Control - Chinese, French, Japanese, Spanish info on SARS]&lt;br /&gt;
*[http://www.linguateca.pt/COMPARA/ COMPARA corpus]&lt;br /&gt;
*[http://www.debian.org/international/ Debian free software community]&lt;br /&gt;
*[http://www.ling.lancs.ac.uk/corplang/emille EMILLE corpus]&lt;br /&gt;
*[http://www.statmt.org/europarl/ European Parliament Proceedings Parallel Corpus 1996-2003]&lt;br /&gt;
*[http://www.illc.uva.nl/EuroWordNet EuroWordNet]&lt;br /&gt;
*[http://www.france.diplomatie.fr/label_france/index.html French Foreign Ministry&#039;s magazine]&lt;br /&gt;
*[http://glossa.fltr.ucl.ac.be/ GlossaNet]&lt;br /&gt;
*[http://hometown.aol.com/mit2haiti/JA-HC-kr.htm Haitian Creole corpus -Teknoloji pou lang kreyol]&lt;br /&gt;
*[http://corpus.nytud.hu/mnsz/ Hungarian National Corpus]&lt;br /&gt;
*[http://www.ldc.upenn.edu/Catalog/CatalogEntry.jsp?catalogId=LDC95T20 Hansard French-English parallel corpus]&lt;br /&gt;
*[http://www.ucl.ac.uk/english-usage/ice/avail.htm ICE corpora]&lt;br /&gt;
*[http://korpus.pl/ IPI PAN Corpus of Polish]&lt;br /&gt;
*[http://www.tu-chemnitz.de/phil/InternetGrammar/ Learner Behaviour on the Internet]&lt;br /&gt;
*[http://muchmore.dfki.de/resources1.htm MuchMore Springer Bilingual Corpus]&lt;br /&gt;
*[http://nl.ijs.si/ME/ MULTEXT-East: Multilingual Corpora for Eastern and Central European Languages]&lt;br /&gt;
*[http://tcc.itc.it/people/forner/multilingualcorpora.html Multilingual Corpora: Available Resources]&lt;br /&gt;
* [http://www.csse.monash.edu.au/~jwb/tanakacorpus.html Tanaka Corpus: Japanese-English sentence pairs]&lt;br /&gt;
*[http://multisemcor.itc.it MultiSemCor]&lt;br /&gt;
*[http://www.ims.uni-stuttgart.de/info/Newspapers.html Newspapers on the Internet]&lt;br /&gt;
*[http://logos.uio.no/opus/ OPUS - an open source parallel corpus]&lt;br /&gt;
*[http://www.tekstlab.uio.no/Bosnian/Corpus.html Oslo Corpus of Bosnian]&lt;br /&gt;
*[http://langbank.engl.polyu.edu.hk/indexl.html PolyU Language Bank]&lt;br /&gt;
*[http://www.corpusdoportugues.org/ Portuguese Corpus]&lt;br /&gt;
*[http://register.consilium.eu.int/ Public registry of the Council of the EU]&lt;br /&gt;
*[http://www.ruscorpora.ru/ Russian National Corpus (RNK)]&lt;br /&gt;
*[http://www.multilingual.com/allen51.htm The Bible as a Resource for Translation Software]&lt;br /&gt;
*[http://www.cogsci.ed.ac.uk/elsnet/eci.html The ECI Multilingual corpus]&lt;br /&gt;
*[http://www.fida.net/ Slovenian Corpus FIDA] and [http://www.fidaplus.net/ FIDA+]&lt;br /&gt;
*[http://www.corpusdelespanol.org/ Spanish Corpus]&lt;br /&gt;
*[http://www.unhchr.ch/udhr/index.htm UN declaration of human rights in multiple languages]&lt;br /&gt;
*[http://www-igm.univ-mlv.fr/~unitex/ UNITEX]&lt;br /&gt;
*[http://www.u-grenoble3.fr/kraif/liens.htm Useful links about parallel corpora, by Olivier Kraif]&lt;br /&gt;
*[http://wacky.sslmit.unibo.it/ WaCky Project]&lt;br /&gt;
*[http://www.wortschatz.uni-leipzig.de/html/wliste.html Wortlisten: spoken German, English, French, and Dutch]&lt;br /&gt;
&lt;br /&gt;
==Russian==&lt;br /&gt;
&amp;lt;!-- Please keep this list in alphabetical order --&amp;gt;&lt;br /&gt;
&lt;br /&gt;
*[http://bokrcorpora.narod.ru Bokr Russian Reference Corpus]&lt;br /&gt;
*[http://www.slav.helsinki.fi/hanco/index_en.html The HANCO: The Helsinki annotated corpus of Russian texts]&lt;br /&gt;
*[http://www.sfb441.uni-tuebingen.de/b1/korpora.html Russian Corpora]&lt;br /&gt;
*[http://rykov-cl.narod.ru/r.html Russian Corpora]&lt;br /&gt;
*[http://lib.ru/ Russian Corpus Site]&lt;br /&gt;
*[http://www.ruscorpora.ru/ The Russian National Corpus]&lt;br /&gt;
*[http://www.philol.msu.ru/~lex/corpus/ Russian Newspaper Corpus]&lt;br /&gt;
*[http://schools.keldysh.ru/uvk1838/Sciper/volume2/langres/russiclr.htm Russicon Resources]&lt;br /&gt;
&lt;br /&gt;
==Slovak==&lt;br /&gt;
&amp;lt;!-- Please keep this list in alphabetical order --&amp;gt;&lt;br /&gt;
&lt;br /&gt;
*[http://korpus.juls.savba.sk/index.en.html Slovak National Corpus]&lt;br /&gt;
&lt;br /&gt;
==Italian==&lt;br /&gt;
&amp;lt;!-- Please keep this list in alphabetical order --&amp;gt;&lt;br /&gt;
&lt;br /&gt;
*[http://languageserver.uni-graz.at/badip/badip/20_corpusLip.php LIP - Lessico di frequenza dell&#039;Italiano Parlato - Access via BADIP]&lt;br /&gt;
*[http://www.istc.cnr.it/material/database/colfis/ ColFIS Corpus e Lessico di Frequenza dell&#039;Italiano Scritto]&lt;br /&gt;
*[http://corpus.cilta.unibo.it:8080/coris_ita.html Corpus di Italiano Scritto contemporaneo (CORIS/CODIS)]&lt;br /&gt;
*[http://tlio.ovi.cnr.it/TLIO/ Tesoro della lingua italiana delle origini (TLIO)]&lt;br /&gt;
&lt;br /&gt;
==Link collections==&lt;br /&gt;
&amp;lt;!-- Please keep this list in alphabetical order --&amp;gt;&lt;br /&gt;
&lt;br /&gt;
*[http://www.dcs.gla.ac.uk/idom/ir_resources/ Collections of texts and corpora]&lt;br /&gt;
*[http://www.bmanuel.org/clr2_mp.html Manuel Barbera: General Corpora and Corpus Linguistics Resources]&lt;br /&gt;
*[http://www.alphabit.net Isabella Chiari: Corpora, Software and Linguistic resources]&lt;br /&gt;
*[http://www.sultry.arts.usyd.edu.au/links/statnlp.html Annotated list of resources on statistical NLP and corpus-based CL]&lt;br /&gt;
&lt;br /&gt;
==Corpora tools==&lt;br /&gt;
&amp;lt;!-- Please keep this list in alphabetical order --&amp;gt;&lt;br /&gt;
&lt;br /&gt;
*[http://www.dcs.gla.ac.uk/idom/ir_resources/linguistic_utils/stop_words List of stop words]&lt;br /&gt;
*[http://www.sketchengine.co.uk/ The Sketch Engine]&lt;br /&gt;
*[http://www.cis.upenn.edu/~treebank/tokenization.html Treebank tokenization scheme]&lt;br /&gt;
&lt;br /&gt;
==Uncategorized==&lt;br /&gt;
&amp;lt;!-- Please keep this list in alphabetical order --&amp;gt;&lt;br /&gt;
&lt;br /&gt;
===Arabic===&lt;br /&gt;
*[http://www.ldc.upenn.edu/Catalog/LDC2001T55.html Arabic Newswire Part 1]&lt;br /&gt;
===Bosnian===&lt;br /&gt;
*[http://www.tekstlab.uio.no/Bosnian/Corpus.html The Oslo Corpus of Bosnian Texts]&lt;br /&gt;
===Bulgarian===&lt;br /&gt;
*[http://www.hf.uio.no/easteur-orient/bulg/mat/ Corpus of spoken Bulgarian]&lt;br /&gt;
===Czech===&lt;br /&gt;
*[http://ucnk.ff.cuni.cz/english/index.html Czech National Corpus]&lt;br /&gt;
===Danish===&lt;br /&gt;
*[http://korpus.dsl.dk/korpus2000/indgang.php Danish news corpus]&lt;br /&gt;
===English===&lt;br /&gt;
*[ftp://ftp.cs.cornell.edu/pub/smart/time/ 1963 Time Magazine corpus]&lt;br /&gt;
*[http://www.cornelsen.de/international/ An Empirical Grammar of the English Verb System]&lt;br /&gt;
*[http://thetis.bl.uk/ BNC Online Service]&lt;br /&gt;
*[http://info.ox.ac.uk/bnc/ BRITISH NATIONAL CORPUS - WORLD EDITION]&lt;br /&gt;
===Finnish===&lt;br /&gt;
*[http://www.csc.fi/kielipankki/ Finnish text bank]&lt;br /&gt;
===French===&lt;br /&gt;
*[http://atilf.atilf.fr/dmf.htm Base Textuelle de Moyen Francais]&lt;br /&gt;
===German===&lt;br /&gt;
*[http://www.coli.uni-sb.de/sfb378/negra-corpus/ A Syntactically Annotated Corpus of German Newspaper Texts]&lt;br /&gt;
*[http://www.ims.uni-stuttgart.de/projekte/tc/CQP.html Experimental Corpus Query System (University of Stuttgart, Germany)]&lt;br /&gt;
===Haitian Creole===&lt;br /&gt;
*[http://hometown.aol.com/mit2haiti/Index4.html HAITIAN CREOLE ELECTRONIC TEXTS]&lt;br /&gt;
===Italian===&lt;br /&gt;
*[http://www.uni-duisburg.de/Fak2/FremdPhil/Romanistik/Personal/Burr/humcomp/ Oxford Text Archive Corpus of Italian Newspapers]&lt;br /&gt;
===Japanese===&lt;br /&gt;
*[http://www.csse.monash.edu.au/~jwb/afaq/jitadoushi.html list of Japanese transitive - intransitive verb pairs]&lt;br /&gt;
===Polish===&lt;br /&gt;
*[http://korpus.pl/en/ IPI PAN Polish Corpus]&lt;br /&gt;
===Romanian===&lt;br /&gt;
*[http://www.cs.unt.edu/~rada/downloads.html Romanian NLP]&lt;br /&gt;
===Sanskrit===&lt;br /&gt;
*[http://sanskritlibrary.org/ Sanskrit Library]&lt;br /&gt;
&lt;br /&gt;
===Slovenian===&lt;br /&gt;
*[http://nl.ijs.si/elan/#corpus Slovene-English Parallel Corpus]&lt;br /&gt;
===Spanish===&lt;br /&gt;
*[http://www.corpusdelespanol.org/ Corpus del Espanol]&lt;br /&gt;
*[http://www.lllf.uam.es/~fmarcos/informes/corpus/corpulee.html Corpus de referencia de la lengua Espanola contemporanea: corpus oral peninsular]&lt;br /&gt;
===Swahili===&lt;br /&gt;
*[http://www.csc.fi/kielipankki/aineistot/hcs/index.phtml.en Helsinki Corpus of Swahili (HCS)]&lt;br /&gt;
----&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
*[http://www.ldc.upenn.edu/Catalog/LDC2001S97.html 2000 NIST Speaker Recognition Evaluation Corpus]&lt;br /&gt;
*[http://ixa.si.ehu.es/Ixa/resources/sensecorpus A Web Corpus and Topic Signatures for All WordNet 1.6 Nominal Senses (v 1.0)]&lt;br /&gt;
*[http://odur.let.rug.nl/~vannoord/trees/ Alpino Treebank]&lt;br /&gt;
*[http://www.aot.ru/search1.html AOT]&lt;br /&gt;
*[http://pioneer.chula.ac.th/~awirote/ling/corpuslst.htm Corpus Resources (Chulalongkorn University, Thailand)]&lt;br /&gt;
*[ftp://ftp.cs.cornell.edu/pub/smart/cran/ Cranfield collection]&lt;br /&gt;
*[http://corpus.rae.es/creanet.html CREA]&lt;br /&gt;
*[http://www.eat.rl.ac.uk/ Edinburgh Associative Thesaurus (EAT)]&lt;br /&gt;
*[http://www.hum.uva.nl/~ewn EuroWordNet]&lt;br /&gt;
*[http://rali.iro.umontreal.ca/ Hansards Corpus - Searchable]&lt;br /&gt;
*[http://www.hcrc.ed.ac.uk/maptask/ HCRC Map Task Corpus XML annotations]&lt;br /&gt;
*[http://nats-www.informatik.uni-hamburg.de/~ingo/icopost/ ICOPOST]&lt;br /&gt;
*[http://www.ims.uni-stuttgart.de/projekte/TC.html IMS Corpus Toolbox, Univ. of Stuttgart]&lt;br /&gt;
*[http://www.ims.uni-stuttgart.de/projekte/CorpusWorkbench/ IMS Corpus Workbench (CWB)]&lt;br /&gt;
*[http://cecl.fltr.ucl.ac.be/Cecl-Projects/Icle/icle.htm International Corpus of Learner English]&lt;br /&gt;
*[http://www.ipds.uni-kiel.de/links/datenmaterial.en.html Kiel University&#039;s Institute on Phonetics and Speech Procesing]&lt;br /&gt;
*[http://www.nilc.icmc.usp.br/lacioweb Lacio Web Corpora]&lt;br /&gt;
*[http://www.vuw.ac.nz/llc/ LANGUAGE LEARNING CENTER - ACADEMIC CORPUS]&lt;br /&gt;
*[http://www.bmanuel.org/clr2_mp.html Manuel Barbera: General Corpora and Corpus Linguistics Resources]&lt;br /&gt;
*[ftp://ftp.cs.cornell.edu/pub/smart/med/ Medlars collection]&lt;br /&gt;
*[ftp://ftp.ox.ac.uk/pub/wordlists/ Miscellaneous Word Lists from Oxford University]&lt;br /&gt;
*[http://www.lpl.univ-aix.fr/projects/multext/ Multilingual Text Tools and Corpora]&lt;br /&gt;
*[http://www.census.gov/genealogy/names Name lists from US census]&lt;br /&gt;
*[http://www.di.fc.ul.pt/~ahb/nexing.htm Nexing Corpus]&lt;br /&gt;
*[http://www.cs.cmu.edu/web/books.html On-line books at CMU]&lt;br /&gt;
*[http://logos.uio.no/opus/ OPUS -- An Open Source Parallel Corpus]&lt;br /&gt;
*[http://elex.amu.edu.pl/~przemka/PICLE_search.php Polish subcorpus of the International Corpus of Learner English]&lt;br /&gt;
*[http://www.cirp.es/WXN/wxn/frames/proxectos.html Ramon Piero Center for Research]&lt;br /&gt;
*[http://about.reuters.com/researchandstandards/corpus/ Reuters Corpus]&lt;br /&gt;
*[http://www.ldc.upenn.edu/Catalog/LDC2001S97.html Speech in Noisy Environments 1 (SPINE1 CODED) Coded Audio]&lt;br /&gt;
*[http://www.ldc.upenn.edu/Catalog/LDC2001S99.html Speech in Noisy Environments 2 (SPINE2 CODED) Coded Audio]&lt;br /&gt;
*[http://www.cs.cmu.edu/afs/cs/project/ai-repository/ai/areas/nlp/doc/notes/corpora.txt Survey of Electronic Corpora (by Jane A. Edwards, file at CMU)]&lt;br /&gt;
*[http://www.ucl.ac.uk/english-usage/ Survey of English Usage, University College, London]&lt;br /&gt;
*[http://www.icsi.berkeley.edu/real/stp/index.html Switchboard Transcription Project]&lt;br /&gt;
*[http://www.tractor.de/ TELRI Research Archive of Computational Tools and Resources]&lt;br /&gt;
*[http://childes.psy.cmu.edu/ The Childes Corpus - Children&#039;s language]&lt;br /&gt;
*[http://nora.hd.uib.no/index-e.html The CORPORA DataCenter (Norway)]&lt;br /&gt;
*[ftp://ftp.dcs.shef.ac.uk/share/ilash/Moby/ The Moby Corpus]&lt;br /&gt;
*[http://www.hf.uio.no/tekstlab/prosjekter/SOFIE.htm The Sofie Treebank - A Parallel Treebank of North European Languages]&lt;br /&gt;
&lt;br /&gt;
[[Category:Corpora|*]]&lt;/div&gt;</summary>
		<author><name>Kopotev</name></author>
	</entry>
</feed>