@inproceedings{krizhevsky, title={Imagenet classification with deep convolutional neural networks}, author={Krizhevsky, Alex and Sutskever, Ilya and Hinton, Geoffrey E}, booktitle={Advances in neural information processing systems}, pages={1097--1105}, year={2012} } @article{dahl, title={Context-dependent pre-trained deep neural networks for large-vocabulary speech recognition}, author={Dahl, George E and Yu, Dong and Deng, Li and Acero, Alex}, journal={IEEE Transactions on Audio, Speech, and Language Processing}, volume={20}, number={1}, pages={30--42}, year={2012}, publisher={IEEE} } @article{hinton2012, title={Deep neural networks for acoustic modeling in speech recognition: The shared views of four research groups}, author={Hinton, Geoffrey and Deng, Li and Yu, Dong and Dahl, George E and Mohamed, Abdel-rahman and Jaitly, Navdeep and Senior, Andrew and Vanhoucke, Vincent and Nguyen, Patrick and Sainath, Tara N and others}, journal={IEEE Signal Processing Magazine}, volume={29}, number={6}, pages={82--97}, year={2012}, publisher={IEEE} } @article{bengio2003, title={A neural probabilistic language model}, author={Bengio, Yoshua and Ducharme, R{\'e}jean and Vincent, Pascal and Jauvin, Christian}, journal={Journal of machine learning research}, volume={3}, number={Feb}, pages={1137--1155}, year={2003} } @inproceedings{mikolov2013, title={Distributed representations of words and phrases and their compositionality}, author={Mikolov, Tomas and Sutskever, Ilya and Chen, Kai and Corrado, Greg S and Dean, Jeff}, booktitle={Advances in neural information processing systems}, pages={3111--3119}, year={2013} } @inproceedings{socher2013, title={Recursive deep models for semantic compositionality over a sentiment treebank}, author={Socher, Richard and Perelygin, Alex and Wu, Jean Y and Chuang, Jason and Manning, Christopher D and Ng, Andrew Y and Potts, Christopher and others}, booktitle={Proceedings of the conference on empirical methods in natural language processing (EMNLP)}, volume={1631}, pages={1642}, year={2013} } @article{cho, title={On the properties of neural machine translation: Encoder-decoder approaches}, author={Cho, Kyunghyun and Van Merri{\"e}nboer, Bart and Bahdanau, Dzmitry and Bengio, Yoshua}, journal={arXiv preprint arXiv:1409.1259}, year={2014} } @inproceedings{sutskever, title={Sequence to sequence learning with neural networks}, author={Sutskever, Ilya and Vinyals, Oriol and Le, Quoc V}, booktitle={Advances in neural information processing systems}, pages={3104--3112}, year={2014} } @article{rumelhart1988, title={Learning representations by back-propagating errors}, author={Rumelhart, David E and Hinton, Geoffrey E and Williams, Ronald J}, journal={Cognitive modeling}, volume={5}, number={3}, pages={1}, year={1988} } @article{szegedy, title={Intriguing properties of neural networks}, author={Szegedy, Christian and Zaremba, Wojciech and Sutskever, Ilya and Bruna, Joan and Erhan, Dumitru and Goodfellow, Ian and Fergus, Rob}, journal={arXiv preprint arXiv:1312.6199}, year={2013} } @inproceedings{nguyen2015, title={Deep neural networks are easily fooled: High confidence predictions for unrecognizable images}, author={Nguyen, Anh and Yosinski, Jason and Clune, Jeff}, booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition}, pages={427--436}, year={2015} } @article{wu2016google, title={Google's Neural Machine Translation System: Bridging the Gap between Human and Machine Translation}, author={Wu, Yonghui and Schuster, Mike and Chen, Zhifeng and Le, Quoc V and Norouzi, Mohammad and Macherey, Wolfgang and Krikun, Maxim and Cao, Yuan and Gao, Qin and Macherey, Klaus and others}, journal={arXiv preprint arXiv:1609.08144}, year={2016} } @article{bengio2015, title={Towards biologically plausible deep learning}, author={Bengio, Yoshua and Lee, Dong-Hyun and Bornschein, Jorg and Mesnard, Thomas and Lin, Zhouhan}, journal={arXiv preprint arXiv:1502.04156}, year={2015} } @article{collobert, title={Natural language processing (almost) from scratch}, author={Collobert, Ronan and Weston, Jason and Bottou, L{\'e}on and Karlen, Michael and Kavukcuoglu, Koray and Kuksa, Pavel}, journal={Journal of Machine Learning Research}, volume={12}, number={Aug}, pages={2493--2537}, year={2011} } @article{hu, title={Harnessing deep neural networks with logic rules}, author={Hu, Zhiting and Ma, Xuezhe and Liu, Zhengzhong and Hovy, Eduard and Xing, Eric}, journal={arXiv preprint arXiv:1603.06318}, year={2016} } @book{garcez2012neural, title={Neural-symbolic learning systems: foundations and applications}, author={Garcez, Artur S d'Avila and Broda, Krysia and Gabbay, Dov M}, year={2012}, publisher={Springer Science \& Business Media} } @inproceedings{pennington, title={Glove: Global Vectors for Word Representation.}, author={Pennington, Jeffrey and Socher, Richard and Manning, Christopher D}, booktitle={EMNLP}, volume={14}, pages={1532--1543}, year={2014} } @inproceedings{amiri2016, title={Short Text Representation for Detecting Churn in Microblogs.}, author={Amiri, Hadi and Daum{\'e} III, Hal}, booktitle={AAAI}, pages={2566--2572}, year={2016} } @article{kim, title={Convolutional neural networks for sentence classification}, author={Kim, Yoon}, journal={arXiv preprint arXiv:1408.5882}, year={2014} } @article{keramati, title={Developing a prediction model for customer churn from electronic banking services using data mining}, author={Keramati, Abbas and Ghaneei, Hajar and Mirmohammadi, Seyed Mohammad}, journal={Financial Innovation}, volume={2}, number={1}, pages={10}, year={2016}, publisher={Springer} } @article{backiel, title={Predicting time-to-churn of prepaid mobile telephone customers using social network analysis}, author={Backiel, Aim{\'e}e and Baesens, Bart and Claeskens, Gerda}, journal={Journal of the Operational Research Society}, volume={67}, number={9}, pages={0}, year={2016}, publisher={Springer} } @inproceedings{li2016feature, title={A Feature Extraction Method Based on Stacked Auto-Encoder for Telecom Churn Prediction}, author={Li, Ruiqi and Wang, Peng and Chen, Zonghai}, booktitle={Asian Simulation Conference}, pages={568--576}, year={2016}, organization={Springer} } @inproceedings{amiri2015, title={Target-Dependent Churn Classification in Microblogs.}, author={Amiri, Hadi and Daum{\'e} III, Hal}, booktitle={AAAI}, pages={2361--2367}, year={2015} } @article{hinton2015distilling, title={Distilling the knowledge in a neural network}, author={Hinton, Geoffrey and Vinyals, Oriol and Dean, Jeff}, journal={arXiv preprint arXiv:1503.02531}, year={2015} } @article{ganchev, title={Posterior regularization for structured latent variable models}, author={Ganchev, Kuzman and Gillenwater, Jennifer and Taskar, Ben and others}, journal={Journal of Machine Learning Research}, volume={11}, number={Jul}, pages={2001--2049}, year={2010} } @book{minsky, title={Learning meaning}, author={Minsky, Marvin}, year={1983}, publisher={Artificial Intelligence Laboratory, Massachusetts Institute of Technology} } @article{bach, title={Hinge-loss Markov random fields and probabilistic soft logic}, author={Bach, Stephen H and Broecheler, Matthias and Huang, Bert and Getoor, Lise}, journal={arXiv preprint arXiv:1505.04406}, year={2015} } @inproceedings{jia2009, title={The effect of negation on sentiment analysis and retrieval effectiveness}, author={Jia, Lifeng and Yu, Clement and Meng, Weiyi}, booktitle={Proceedings of the 18th ACM conference on Information and knowledge management}, pages={1827--1830}, year={2009}, organization={ACM} } @article{dadvar2011, title={Scope of negation detection in sentiment analysis}, author={Dadvar, Maral and Hauff, Claudia and De Jong, FMG}, year={2011}, publisher={University of Amsterdam} } @inproceedings{hogenboom, title={Determining negation scope and strength in sentiment analysis}, author={Hogenboom, Alexander and Van Iterson, Paul and Heerschop, Bas and Frasincar, Flavius and Kaymak, Uzay}, booktitle={Systems, Man, and Cybernetics (SMC), 2011 IEEE International Conference on}, pages={2589--2594}, year={2011}, organization={IEEE} } @article{zeiler, title={ADADELTA: an adaptive learning rate method}, author={Zeiler, Matthew D}, journal={arXiv preprint arXiv:1212.5701}, year={2012} } @inproceedings{salimans, title={Weight normalization: A simple reparameterization to accelerate training of deep neural networks}, author={Salimans, Tim and Kingma, Diederik P}, booktitle={Advances in Neural Information Processing Systems}, pages={901--901}, year={2016} } @article{ioffe, title={Batch normalization: Accelerating deep network training by reducing internal covariate shift}, author={Ioffe, Sergey and Szegedy, Christian}, journal={arXiv preprint arXiv:1502.03167}, year={2015} } @article{smith2016deep, title={Deep convolutional neural network design patterns}, author={Smith, Leslie N and Topin, Nicholay}, journal={arXiv preprint arXiv:1611.00847}, year={2016} } @article{gehring, title={Convolutional Sequence to Sequence Learning}, author={Gehring, Jonas and Auli, Michael and Grangier, David and Yarats, Denis and Dauphin, Yann N}, journal={arXiv preprint arXiv:1705.03122}, year={2017} } @article{huang2015, title={Bidirectional LSTM-CRF models for sequence tagging}, author={Huang, Zhiheng and Xu, Wei and Yu, Kai}, journal={arXiv preprint arXiv:1508.01991}, year={2015} } @article{chiu2015named, title={Named entity recognition with bidirectional LSTM-CNNs}, author={Chiu, Jason PC and Nichols, Eric}, journal={arXiv preprint arXiv:1511.08308}, year={2015} } @article{hinton1985learning, title={Learning internal representations by back-propagating errors}, author={Hinton, GE and Rumelhart, DE and Williams, RJ}, journal={Parallel Distributed Processing: Explorations in the Microstructure of Cognition}, volume={1}, year={1985} } @inproceedings{hisamoto2013empirical, title={An empirical investigation of word representations for parsing the web}, author={Hisamoto, Sorami and Duh, Kevin and Matsumoto, Yuji}, booktitle={Proceedings of ANLP}, pages={188--193}, year={2013} } @inproceedings{turian2010word, title={Word representations: a simple and general method for semi-supervised learning}, author={Turian, Joseph and Ratinov, Lev and Bengio, Yoshua}, booktitle={Proceedings of the 48th annual meeting of the association for computational linguistics}, pages={384--394}, year={2010}, organization={Association for Computational Linguistics} } @article{huang2014learning, title={Learning representations for weakly supervised natural language processing tasks}, author={Huang, Fei and Ahuja, Arun and Downey, Doug and Yang, Yi and Guo, Yuhong and Yates, Alexander}, journal={Computational Linguistics}, volume={40}, number={1}, pages={85--120}, year={2014}, publisher={MIT Press} } @inproceedings{bansal2014tailoring, title={Tailoring Continuous Word Representations for Dependency Parsing.}, author={Bansal, Mohit and Gimpel, Kevin and Livescu, Karen}, booktitle={ACL (2)}, pages={809--815}, year={2014} } @article{zaratiegui, title={Performing highly accurate predictions through convolutional networks for actual telecommunication challenges}, author={Zaratiegui, Jaime and Montoro, Ana and Castanedo, Federico}, journal={arXiv preprint arXiv:1511.04906}, year={2015} } @inproceedings{foulds, title={Latent topic networks: A versatile probabilistic programming framework for topic models}, author={Foulds, James and Kumar, Shachi and Getoor, Lise}, booktitle={International Conference on Machine Learning}, pages={777--786}, year={2015} }