@inproceedings{POMIKLEK12.1047.L12-1624, author = {Jan Pomik{\'a}lek and Milo\v{s} Jakub{\'\i}\v{c}ek and Pavel Rychl{\'y}}, url = {http://www.lrec-conf.org/proceedings/lrec2012/pdf/1047_Paper.pdf}, note = {ACL Anthology Identifier: L12-1624}, title = {Building a 70 billion word corpus of English from ClueWeb}, booktitle = {Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC-2012)}, year = {2012}, month = {May}, address = {Istanbul, Turkey}, editor = {Nicoletta Calzolari and Khalid Choukri and Thierry Declerck and Mehmet U\u{g}ur Do\u{g}an and Bente Maegaard and Joseph Mariani and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association (ELRA)}, isbn = {978-2-9517408-7-7}, language = {English}, pages = {502--506} }