-
Notifications
You must be signed in to change notification settings - Fork 0
/
bibliografia.bib
912 lines (826 loc) · 42.5 KB
/
bibliografia.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
@inproceedings{yao-2018,
author = {Yao, Ziyu and Weld, Daniel S. and Chen, Wei-Peng and Sun, Huan},
title = {StaQC: A Systematically Mined Question-Code Dataset from Stack Overflow},
booktitle = {Proceedings of the 2018 World Wide Web Conference},
series = {WWW '18},
year = {2018},
isbn = {978-1-4503-5639-8},
location = {Lyon, France},
pages = {1693--1703},
numpages = {11},
url = {https://doi.org/10.1145/3178876.3186081},
doi = {10.1145/3178876.3186081},
acmid = {3186081},
publisher = {International World Wide Web Conferences Steering Committee},
address = {Republic and Canton of Geneva, Switzerland},
keywords = {deep neural networks, natural language question answering, question-code pairs, stack overflow},
}
@INPROCEEDINGS{feng-2015,
author={M. {Feng} and B. {Xiang} and M. R. {Glass} and L. {Wang} and B. {Zhou}},
booktitle={2015 IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
title={Applying deep learning to answer selection: A study and an open task},
year={2015},
volume={},
number={},
pages={813-820},
keywords={learning (artificial intelligence);question answering (information retrieval);deep learning;nonfactoid question answering task;QA corpus;QA task;insurance domain;Computer architecture;Machine learning;Knowledge discovery;Training;Convolution;Measurement;Insurance;Answer Selection;Question Answering;Convolutional Neural Network (CNN);Deep Learning;Spoken Question Answering System},
doi={10.1109/ASRU.2015.7404872},
ISSN={},
month={Dec},}
@online{sof-2019,
author = {Stack Exchange, Inc.},
title = {Stack Exchange Data Dump},
year = 2019,
url = {https://archive.org/details/stackexchange},
urldate = {2019-05-13}
}
@online{bigquery-2019,
author = {Google},
title = {Google Cloud BigQuery},
year = 2019,
url = {https://cloud.google.com/bigquery/},
urldate = {2019-05-13}
}
@online{glossary-ml,
author = {Google},
title = {Machine Learning Glossary},
year = 2019,
url = {https://developers.google.com/machine-learning/glossary/},
urldate = {2019-05-13}
}
@online{google-question-answering,
author = {Google AI},
title = {Open Domain Question Answering},
year = 2019,
url = {https://ai.google.com/research/NaturalQuestions},
urldate = {2019-05-13}
}
@online{stackoverflow-questions-topics-2019,
author = {stackoverflow},
title = {What topics can I ask about here?},
year = 2019,
url = {https://stackoverflow.com/help/on-topic},
urldate = {2019-05-13}
}
@inproceedings{iyer-etal-2016-summarizing,
title = "Summarizing Source Code using a Neural Attention Model",
author = "Iyer, Srinivasan and
Konstas, Ioannis and
Cheung, Alvin and
Zettlemoyer, Luke",
booktitle = "Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = aug,
year = "2016",
address = "Berlin, Germany",
publisher = "Association for Computational Linguistics",
url = "https://www.aclweb.org/anthology/P16-1195",
doi = "10.18653/v1/P16-1195",
pages = "2073--2083",
}
@inproceedings{Allamanis-method-class-names:2015,
author = {Allamanis, Miltiadis and Barr, Earl T. and Bird, Christian and Sutton, Charles},
title = {Suggesting Accurate Method and Class Names},
booktitle = {Proceedings of the 2015 10th Joint Meeting on Foundations of Software Engineering},
series = {ESEC/FSE 2015},
year = {2015},
isbn = {978-1-4503-3675-8},
location = {Bergamo, Italy},
pages = {38--49},
numpages = {12},
url = {http://doi.acm.org/10.1145/2786805.2786849},
doi = {10.1145/2786805.2786849},
acmid = {2786849},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {Coding conventions, naturalness of software},
}
@article{Proksch:2015,
author = {Proksch, Sebastian and Lerch, Johannes and Mezini, Mira},
title = {Intelligent Code Completion with Bayesian Networks},
journal = {ACM Trans. Softw. Eng. Methodol.},
issue_date = {December 2015},
volume = {25},
number = {1},
month = dec,
year = {2015},
issn = {1049-331X},
pages = {3:1--3:31},
articleno = {3},
numpages = {31},
url = {http://doi.acm.org/10.1145/2744200},
doi = {10.1145/2744200},
acmid = {2744200},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {Content assist, code completion, code recommender, evaluation, integrated development environments, machine learning, productivity},
}
@article{rebecca-2018,
author = {Rebecca L. Russell and
Louis Y. Kim and
Lei H. Hamilton and
Tomo Lazovich and
Jacob A. Harer and
Onur Ozdemir and
Paul M. Ellingwood and
Marc W. McConley},
title = {Automated Vulnerability Detection in Source Code Using Deep Representation
Learning},
journal = {CoRR},
volume = {abs/1807.04320},
year = {2018},
url = {http://arxiv.org/abs/1807.04320},
archivePrefix = {arXiv},
eprint = {1807.04320},
timestamp = {Mon, 13 Aug 2018 16:46:23 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1807-04320},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{Allamanis:2018:SML,
author = {Allamanis, Miltiadis and Barr, Earl T. and Devanbu, Premkumar and Sutton, Charles},
title = {A Survey of Machine Learning for Big Code and Naturalness},
journal = {ACM Comput. Surv.},
issue_date = {September 2018},
volume = {51},
number = {4},
month = jul,
year = {2018},
issn = {0360-0300},
pages = {81:1--81:37},
articleno = {81},
numpages = {37},
url = {http://doi.acm.org/10.1145/3212695},
doi = {10.1145/3212695},
acmid = {3212695},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {Big code, code naturalness, machine learning, software engineering tools},
}
@online{wikipedia-developers-demographics-2019,
author = {Wikipedia},
title = {Software engineering demographics},
year = 2019,
url = {https://en.wikipedia.org/wiki/Software_engineering_demographics},
urldate = {2019-06-10}
}
@online{stackoverflow-survey-2019,
author = {Stackoverflow},
title = {Developer Survey Results},
year = 2019,
url = {https://insights.stackoverflow.com/survey/2019},
urldate = {2019-06-10}
}
@inproceedings{Wang-quora:2013,
author = {Wang, Gang and Gill, Konark and Mohanlal, Manish and Zheng, Haitao and Zhao, Ben Y.},
title = {Wisdom in the Social Crowd: An Analysis of Quora},
booktitle = {Proceedings of the 22Nd International Conference on World Wide Web},
series = {WWW '13},
year = {2013},
isbn = {978-1-4503-2035-1},
location = {Rio de Janeiro, Brazil},
pages = {1341--1352},
numpages = {12},
url = {http://doi.acm.org/10.1145/2488388.2488506},
doi = {10.1145/2488388.2488506},
acmid = {2488506},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {graphs, online social networks, q\&\#38;a system},
}
@INPROCEEDINGS{Vasilescu:2013,
author={B. {Vasilescu} and V. {Filkov} and A. {Serebrenik}},
booktitle={2013 International Conference on Social Computing},
title={StackOverflow and GitHub: Associations between Software Development and Crowdsourced Knowledge},
year={2013},
volume={},
number={},
pages={188-195},
keywords={knowledge based systems;software maintenance;StackOverflow;GitHub;software development;crowdsourced knowledge;social coding repository;online programming question-and-answer community;Software;Rhythm;Electronic mail;Indexes;Communities;Productivity;Merging;software development;crowdsourced knowledge;social media},
doi={10.1109/SocialCom.2013.35},
ISSN={},
month={Sep.},}
@online{wikipedia-quora-2019,
author = {Wikipedia},
title = {Quora},
year = 2019,
url = {https://pt.wikipedia.org/wiki/Quora},
urldate = {2019-06-10}
}
@inproceedings{Allamanis-bimodal-source-code-natural-language:2015,
author = {Allamanis, Miltiadis and Tarlow, Daniel and Gordon, Andrew D. and Wei, Yi},
title = {Bimodal Modelling of Source Code and Natural Language},
booktitle = {Proceedings of the 32Nd International Conference on International Conference on Machine Learning - Volume 37},
series = {ICML'15},
year = {2015},
location = {Lille, France},
pages = {2123--2132},
numpages = {10},
url = {http://dl.acm.org/citation.cfm?id=3045118.3045344},
acmid = {3045344},
publisher = {JMLR.org},
}
@incollection{Hinton-distributed-representatons:1986,
author = {Hinton, G. E. and McClelland, J. L. and Rumelhart, D. E.},
chapter = {Distributed Representations},
title = {Parallel Distributed Processing: Explorations in the Microstructure of Cognition, Vol. 1},
editor = {Rumelhart, David E. and McClelland, James L. and PDP Research Group, CORPORATE},
year = {1986},
isbn = {0-262-68053-X},
pages = {77--109},
numpages = {33},
url = {http://dl.acm.org/citation.cfm?id=104279.104287},
acmid = {104287},
publisher = {MIT Press},
address = {Cambridge, MA, USA},
}
@book{Goodfellow-et-al-2016,
title={Deep Learning},
author={Ian Goodfellow and Yoshua Bengio and Aaron Courville},
publisher={MIT Press},
note={\url{http://www.deeplearningbook.org}},
year={2016}
}
@inbook{nndesign:2014:pratical-training-issues,
author = {Demuth, Howard B. and Beale, Mark H. and De Jess, Orlando and Hagan, Martin T.},
title = {Neural Network Design},
year = {2014},
isbn = {0971732116, 9780971732117},
edition = {2nd},
publisher = {Martin Hagan},
address = {USA},
chapter = 22,
}
@inproceedings{lai-etal-2018-review,
title = "A Review on Deep Learning Techniques Applied to Answer Selection",
author = "Lai, Tuan Manh and
Bui, Trung and
Li, Sheng",
booktitle = "Proceedings of the 27th International Conference on Computational Linguistics",
month = aug,
year = "2018",
address = "Santa Fe, New Mexico, USA",
publisher = "Association for Computational Linguistics",
url = "https://www.aclweb.org/anthology/C18-1181",
pages = "2132--2144",
abstract = "Given a question and a set of candidate answers, answer selection is the task of identifying which of the candidates answers the question correctly. It is an important problem in natural language processing, with applications in many areas. Recently, many deep learning based methods have been proposed for the task. They produce impressive performance without relying on any feature engineering or expensive external resources. In this paper, we aim to provide a comprehensive review on deep learning methods applied to answer selection.",
}
@article{tan-lstm-qa,
author = {Ming Tan and
Cicero dos Santos and
Bing Xiang and
Bowen Zhou},
title = {LSTM-based Deep Learning Models for non-factoid answer selection},
journal = {CoRR},
volume = {abs/1511.04108},
year = {2015},
url = {http://arxiv.org/abs/1511.04108},
archivePrefix = {arXiv},
eprint = {1511.04108},
timestamp = {Mon, 13 Aug 2018 16:46:33 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/TanXZ15},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{Gu-deep-code-search:2018,
author = {Gu, Xiaodong and Zhang, Hongyu and Kim, Sunghun},
title = {Deep Code Search},
booktitle = {Proceedings of the 40th International Conference on Software Engineering},
series = {ICSE '18},
year = {2018},
isbn = {978-1-4503-5638-1},
location = {Gothenburg, Sweden},
pages = {933--944},
numpages = {12},
url = {http://doi.acm.org/10.1145/3180155.3180167},
doi = {10.1145/3180155.3180167},
acmid = {3180167},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {code search, deep learning, joint embedding},
}
@inproceedings{Sachdev-neural-code-search:2018,
author = {Sachdev, Saksham and Li, Hongyu and Luan, Sifei and Kim, Seohyun and Sen, Koushik and Chandra, Satish},
title = {Retrieval on Source Code: A Neural Code Search},
booktitle = {Proceedings of the 2Nd ACM SIGPLAN International Workshop on Machine Learning and Programming Languages},
series = {MAPL 2018},
year = {2018},
isbn = {978-1-4503-5834-7},
location = {Philadelphia, PA, USA},
pages = {31--41},
numpages = {11},
url = {http://doi.acm.org/10.1145/3211346.3211353},
doi = {10.1145/3211346.3211353},
acmid = {3211353},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {TF-IDF, code search, word-embedding},
}
@inproceedings{cambronero-deep-learning-code-search:2019,
author = {Cambronero, Jose and Li, Hongyu and Kim, Seohyun and Sen, Koushik and Chandra, Satish},
title = {When Deep Learning Met Code Search},
year = {2019},
isbn = {9781450355728},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3338906.3340458},
doi = {10.1145/3338906.3340458},
booktitle = {Proceedings of the 2019 27th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering},
pages = {964–974},
numpages = {11},
keywords = {joint embedding, neural networks, code search},
location = {Tallinn, Estonia},
series = {ESEC/FSE 2019}
}
@ARTICLE{shao-answer-selection:2019,
author={T. {Shao} and Y. {Guo} and H. {Chen} and Z. {Hao}},
journal={IEEE Access},
title={Transformer-Based Neural Network for Answer Selection in Question Answering},
year={2019},
volume={7},
number={},
pages={26146-26156},
keywords={question answering (information retrieval);recurrent neural nets;text analysis;question answering system;QA-TF;sentence embedding;BiLSTM;bidirectional long short-term memory;answer selection;transformer-based neural network;Task analysis;Feature extraction;Knowledge discovery;Artificial neural networks;Tools;Natural language processing;Answer selection;deep learning;question answering;Transformer},
doi={10.1109/ACCESS.2019.2900753},
ISSN={2169-3536},
month={},}
@inproceedings{Chen-bi-variational-autoencoder:2018,
author = {Chen, Qingying and Zhou, Minghui},
title = {A Neural Framework for Retrieval and Summarization of Source Code},
booktitle = {Proceedings of the 33rd ACM/IEEE International Conference on Automated Software Engineering},
series = {ASE 2018},
year = {2018},
isbn = {978-1-4503-5937-5},
location = {Montpellier, France},
pages = {826--831},
numpages = {6},
url = {http://doi.acm.org/10.1145/3238147.3240471},
doi = {10.1145/3238147.3240471},
acmid = {3240471},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {Code retrieval, code summarization, neural framework},
}
@inproceedings{Yao-coacor:2019,
author = {Yao, Ziyu and Peddamail, Jayavardhan Reddy and Sun, Huan},
title = {CoaCor: Code Annotation for Code Retrieval with Reinforcement Learning},
booktitle = {The World Wide Web Conference},
series = {WWW '19},
year = {2019},
isbn = {978-1-4503-6674-8},
location = {San Francisco, CA, USA},
pages = {2203--2214},
numpages = {12},
url = {http://doi.acm.org/10.1145/3308558.3313632},
doi = {10.1145/3308558.3313632},
acmid = {3313632},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {Code Annotation, Code Retrieval, Reinforcement Learning},
}
@misc{mikolov2013distributed,
title = {Efficient Estimation of Word Representations in Vector Space},
author = {Tomas Mikolov and Kai Chen and Greg S. Corrado and Jeffrey Dean},
year = {2013},
URL = {http://arxiv.org/abs/1301.3781}
}
@online{scikit-learn-tsne-2019,
author = {scikit-learn},
title = {sklearn.manifold.TSNE},
year = 2019,
url = {https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html},
urldate = {2019-09-13}
}
@online{quora-tsne-2019,
author = {Simba Khadder},
title = {How does t-SNE work in simple words?},
year = 2019,
url = {https://www.quora.com/How-does-t-SNE-work-in-simple-words},
urldate = {2019-09-13}
}
@online{wikipedia-docstring-2019,
author = {Wikipedia},
title = {Docstring},
year = 2019,
url = {https://en.wikipedia.org/wiki/Docstring},
urldate = {2019-09-13}
}
@inproceedings{marcelo-vem-2019,
author = {Marcelo de Rezende Martins and Marco Aurélio Gerosa},
title = {Um estudo preliminar sobre o uso de uma arquitetura deep learning para seleção de respostas no problema de recuperação de código-fonte},
booktitle = {Anais do VII Workshop on Software Visualization, Evolution and Maintenance (VEM)},
location = {Salvador},
year = {2019},
keywords = {},
pages = {94--101},
publisher = {SBC},
address = {Porto Alegre, RS, Brasil},
doi = {10.5753/vem.2019.7589},
url = {https://sol.sbc.org.br/index.php/vem/article/view/7589}
}
@article{Zhang:2019:deep-learning-recommender-survey,
author = {Zhang, Shuai and Yao, Lina and Sun, Aixin and Tay, Yi},
title = {Deep Learning Based Recommender System: A Survey and New Perspectives},
journal = {ACM Comput. Surv.},
issue_date = {February 2019},
volume = {52},
number = {1},
month = feb,
year = {2019},
issn = {0360-0300},
pages = {5:1--5:38},
articleno = {5},
numpages = {38},
url = {http://doi.acm.org/10.1145/3285029},
doi = {10.1145/3285029},
acmid = {3285029},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {Recommender system, deep learning, survey},
}
@online{colab-2019,
author = {Google},
title = {Colaboratory},
year = 2019,
url = {https://research.google.com/colaboratory/faq.html},
urldate = {2019-09-13}
}
@online{jupyter-2019,
author = {Jupyter},
title = {Jupyter Notebook},
year = 2019,
url = {https://jupyter-notebook.readthedocs.io/en/latest/},
urldate = {2019-09-13}
}
@online{joshua-kim-cnn-understanding-word-embeddings-2019,
author = {Joshua Kim},
title = {Understanding how Convolutional Neural Network (CNN) perform text classification with word embeddings},
year = 2019,
url = {http://www.joshuakim.io/understanding-how-convolutional-neural-network-cnn-perform-text-classification-with-word-embeddings/},
urldate = {2019-09-13}
}
@online{wikipedia-git-2019,
author = {Wikipedia},
title = {Git},
year = 2019,
url = {https://en.wikipedia.org/wiki/Git},
urldate = {2019-09-13}
}
@ARTICLE{tom-young:trends-deep-learning-nlp,
author={T. {Young} and D. {Hazarika} and S. {Poria} and E. {Cambria}}, journal={IEEE Computational Intelligence Magazine}, title={Recent Trends in Deep Learning Based Natural Language Processing [Review Article]}, year={2018}, volume={13}, number={3}, pages={55-75},}
@inproceedings{zhang-guide-convolutional-cnn-embedding-ilustration:2015,
title = "A Sensitivity Analysis of (and Practitioners{'} Guide to) Convolutional Neural Networks for Sentence Classification",
author = "Zhang, Ye and
Wallace, Byron",
booktitle = "Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 1: Long Papers)",
month = nov,
year = "2017",
address = "Taipei, Taiwan",
publisher = "Asian Federation of Natural Language Processing",
url = "https://www.aclweb.org/anthology/I17-1026",
pages = "253--263",
abstract = "Convolutional Neural Networks (CNNs) have recently achieved remarkably strong performance on the practically important task of sentence classification (Kim, 2014; Kalchbrenner et al., 2014; Johnson and Zhang, 2014; Zhang et al., 2016). However, these models require practitioners to specify an exact model architecture and set accompanying hyperparameters, including the filter region size, regularization parameters, and so on. It is currently unknown how sensitive model performance is to changes in these configurations for the task of sentence classification. We thus conduct a sensitivity analysis of one-layer CNNs to explore the effect of architecture components on model performance; our aim is to distinguish between important and comparatively inconsequential design decisions for sentence classification. We focus on one-layer CNNs (to the exclusion of more complex models) due to their comparative simplicity and strong empirical performance, which makes it a modern standard baseline method akin to Support Vector Machine (SVMs) and logistic regression. We derive practical advice from our extensive empirical results for those interested in getting the most out of CNNs for sentence classification in real world settings.",
}
@inproceedings{masudur-developers-use-google-code-retrieval:2018,
author = {Rahman, Md Masudur and Barson, Jed and Paul, Sydney and Kayani, Joshua and Lois, Federico Andr\'{e}s and Quezada, Sebasti\'{a}n Fernandez and Parnin, Christopher and Stolee, Kathryn T. and Ray, Baishakhi},
title = {Evaluating How Developers Use General-Purpose Web-Search for Code Retrieval},
year = {2018},
isbn = {9781450357166},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3196398.3196425},
doi = {10.1145/3196398.3196425},
booktitle = {Proceedings of the 15th International Conference on Mining Software Repositories},
pages = {465–475},
numpages = {11},
location = {Gothenburg, Sweden},
series = {MSR ’18}
}
@online{husain-github-semantic-search-code-2019,
author = {Hamel Husain and Ho-Hsiang Wu},
title = {How To Create Natural Language Semantic Search For Arbitrary Objects With Deep Learning},
year = 2018,
url = {https://towardsdatascience.com/semantic-code-search-3cd6d244a39c},
urldate = {2020-04-13}
}
@online{casi-newell-sentence-length-2018,
author = {Casi Newell},
title = {Editing Tip: Sentence Length},
year = 2018,
url = {https://www.aje.com/en/arc/editing-tip-sentence-length/},
urldate = {2020-04-13}
}
@INPROCEEDINGS{tang-hybrid-deep-representation-2018, author={D. {Tang} and W. {Rong} and L. {Shi} and H. {Yang} and Z. {Xiong}}, booktitle={2018 International Conference on Cyber-Enabled Distributed Computing and Knowledge Discovery (CyberC)}, title={A Hybrid of Deep Sentence Representation and Local Feature Representation Model for Question Answer Selection}, year={2018}, volume={}, number={}, pages={280-2803},}
@article{wen-joint-modeling-question-answer-2019,
title = "Joint modeling of users, questions and answers for answer selection in CQA",
journal = "Expert Systems with Applications",
volume = "118",
pages = "563 - 572",
year = "2019",
issn = "0957-4174",
doi = "https://doi.org/10.1016/j.eswa.2018.10.038",
url = "http://www.sciencedirect.com/science/article/pii/S0957417418306961",
author = "Jiahui Wen and Hongkui Tu and Xiaohui Cheng and Renquan Xie and Wei Yin",
keywords = "Answer selection, User modelling, Attentive neural network",
abstract = "In this paper, we propose solutions to advance answer selection in Community Question Answering (CQA). Automatically selecting correct answers can significantly improve intelligence for CQA, as users are not required to browse the large quantity of texts and select the right answers manually. Also, automatic answers selection can minimize the time for satisfying users seeking the correct answers and maximize user engagement with the site. Unlike previous works, we propose a hybrid attention mechanism to model question-answer pairs. Specifically, for each word, we calculate the intra-sentence attention indicating its local importance and the inter-sentence attention implying its importance to the counterpart sentence. The inter-sentence attention is based on the interactions between question-answer pairs, and the combination of these two attention mechanisms enables us to align the most informative parts in question-answer pairs for sentence matching. Additionally, we exploit user information for answer selection due to the fact that users are more likely to provide correct answers in their areas of expertise. We model users from their written answers to alleviate data sparsity problem, and then learn user representations according to the informative parts in sentences that are useful for question-answer matching task. This mean of modelling users can bridge the semantic gap between different users, as similar users may have the same way of wording their answers. The representations of users, questions and answers are learnt in an end-to-end neural network in a mean that best explains the interrelation between question-answer pairs. We validate the proposed model on a public dataset, and demonstrate its advantages over the baselines with thorough experiments."
}
@inproceedings{sergey-batch-normalization-2015,
author = {Ioffe, Sergey and Szegedy, Christian},
title = {Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift},
year = {2015},
publisher = {JMLR.org},
booktitle = {Proceedings of the 32nd International Conference on International Conference on Machine Learning - Volume 37},
pages = {448–456},
numpages = {9},
location = {Lille, France},
series = {ICML’15}
}
@inproceedings{rajpurkar-etal-2016-squad,
title = "{SQ}u{AD}: 100,000+ Questions for Machine Comprehension of Text",
author = "Rajpurkar, Pranav and
Zhang, Jian and
Lopyrev, Konstantin and
Liang, Percy",
booktitle = "Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2016",
address = "Austin, Texas",
publisher = "Association for Computational Linguistics",
url = "https://www.aclweb.org/anthology/D16-1264",
doi = "10.18653/v1/D16-1264",
pages = "2383--2392",
}
@article{theodora-introductory-programming-python-2015,
author = {Koulouri, Theodora and Lauria, Stanislao and Macredie, Robert D.},
title = {Teaching Introductory Programming: A Quantitative Evaluation of Different Approaches},
year = {2015},
issue_date = {February 2015},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
volume = {14},
number = {4},
url = {https://doi.org/10.1145/2662412},
doi = {10.1145/2662412},
journal = {ACM Trans. Comput. Educ.},
month = dec,
articleno = {Article 26},
numpages = {28},
keywords = {teaching strategies, novice programmers, CS1, programming languages, learning programming, formative feedback, problem solving, Empirical studies}
}
@inproceedings{devlin-etal-2019-bert,
title = "{BERT}: Pre-training of Deep Bidirectional Transformers for Language Understanding",
author = "Devlin, Jacob and
Chang, Ming-Wei and
Lee, Kenton and
Toutanova, Kristina",
booktitle = "Proceedings of the 2019 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)",
month = jun,
year = "2019",
address = "Minneapolis, Minnesota",
publisher = "Association for Computational Linguistics",
url = "https://www.aclweb.org/anthology/N19-1423",
doi = "10.18653/v1/N19-1423",
pages = "4171--4186",
abstract = "We introduce a new language representation model called BERT, which stands for Bidirectional Encoder Representations from Transformers. Unlike recent language representation models (Peters et al., 2018a; Radford et al., 2018), BERT is designed to pre-train deep bidirectional representations from unlabeled text by jointly conditioning on both left and right context in all layers. As a result, the pre-trained BERT model can be fine-tuned with just one additional output layer to create state-of-the-art models for a wide range of tasks, such as question answering and language inference, without substantial task-specific architecture modifications. BERT is conceptually simple and empirically powerful. It obtains new state-of-the-art results on eleven natural language processing tasks, including pushing the GLUE score to 80.5 (7.7 point absolute improvement), MultiNLI accuracy to 86.7{\%} (4.6{\%} absolute improvement), SQuAD v1.1 question answering Test F1 to 93.2 (1.5 point absolute improvement) and SQuAD v2.0 Test F1 to 83.1 (5.1 point absolute improvement).",
}
@inproceedings{yang2019xlNet,
title = {XLNet: Generalized Autoregressive Pretraining for Language Understanding},
author = {Yang, Zhilin and Dai, Zihang and Yang, Yiming and Carbonell, Jaime and Salakhutdinov, Russ R and Le, Quoc V},
booktitle = {Advances in Neural Information Processing Systems 32},
pages = {5754--5764},
year = {2019},
publisher = {Curran Associates, Inc.},
url = {http://papers.nips.cc/paper/8812-xlnet-generalized-autoregressive-pretraining-for-language-understanding.pdf}
}
@inproceedings{park-regarding-margin-loss:2017,
author = {Kang, Myeong K. and Park, Kyo H. and Kim, Seong W. and Kim, Min J. and Lee, Sang C.},
title = {Classification Performance Analysis Regarding Margin of Energy-Based Model},
year = {2017},
isbn = {9781450348669},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3178264.3178280},
doi = {10.1145/3178264.3178280},
booktitle = {Proceedings of the 2017 International Conference on Industrial Design Engineering},
pages = {34–37},
numpages = {4},
keywords = {negative-log-likelihood loss, triplet loss, energy based models, hinge loss, margin},
location = {Dubai, United Arab Emirates},
series = {ICIDE 2017}
}
@online{wikipedia-tensorflow-2020,
author = {Wikipedia},
title = {Tensorflow},
year = 2020,
url = {https://pt.wikipedia.org/wiki/TensorFlow},
urldate = {2020-04-13}
}
@online{wikipedia-keras-2020,
author = {Wikipedia},
title = {Keras},
year = 2020,
url = {https://en.wikipedia.org/wiki/Keras},
urldate = {2020-04-13}
}
@article{bengio-hyper-parameter-optimization-2012,
author = {James Bergstra and Yoshua Bengio},
title = {Random Search for Hyper-Parameter Optimization},
journal = {Journal of Machine Learning Research},
year = {2012},
volume = {13},
number = {10},
pages = {281-305},
url = {http://jmlr.org/papers/v13/bergstra12a.html}
}
@InProceedings{kun-learning-to-rank:2018,
author = {He, Kun and Cakir, Fatih and Adel Bargal, Sarah and Sclaroff, Stan},
title = {Hashing as Tie-Aware Learning to Rank},
booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
month = {June},
year = {2018}
}
@article{hasson-direct-fit-to-nature-evolutionary-perspective-ann:2020,
title = "Direct Fit to Nature: An Evolutionary Perspective on Biological and Artificial Neural Networks",
journal = "Neuron",
volume = "105",
number = "3",
pages = "416 - 434",
year = "2020",
issn = "0896-6273",
doi = "https://doi.org/10.1016/j.neuron.2019.12.002",
url = "http://www.sciencedirect.com/science/article/pii/S089662731931044X",
author = "Uri Hasson and Samuel A. Nastase and Ariel Goldstein",
keywords = "evolution, experimental design, interpolation, learning, neural networks",
abstract = "Summary
Evolution is a blind fitting process by which organisms become adapted to their environment. Does the brain use similar brute-force fitting processes to learn how to perceive and act upon the world? Recent advances in artificial neural networks have exposed the power of optimizing millions of synaptic weights over millions of observations to operate robustly in real-world contexts. These models do not learn simple, human-interpretable rules or representations of the world; rather, they use local computations to interpolate over task-relevant manifolds in a high-dimensional parameter space. Counterintuitively, similar to evolutionary processes, over-parameterized models can be simple and parsimonious, as they provide a versatile, robust solution for learning a diverse set of functions. This new family of direct-fit models present a radical challenge to many of the theoretical assumptions in psychology and neuroscience. At the same time, this shift in perspective establishes unexpected links with developmental and ecological psychology."
}
@inproceedings{sadowski-how-developers-search-for-code-case-study:2015,
author = {Sadowski, Caitlin and Stolee, Kathryn T. and Elbaum, Sebastian},
title = {How Developers Search for Code: A Case Study},
year = {2015},
isbn = {9781450336758},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/2786805.2786855},
doi = {10.1145/2786805.2786855},
booktitle = {Proceedings of the 2015 10th Joint Meeting on Foundations of Software Engineering},
pages = {191–201},
numpages = {11},
keywords = {user evaluation, code search, developer tools},
location = {Bergamo, Italy},
series = {ESEC/FSE 2015}
}
@article{silver-mastering-the-game-go:2016,
added-at = {2016-03-11T14:36:05.000+0100},
author = {Silver, David and Huang, Aja and Maddison, Chris J. and Guez, Arthur and Sifre, Laurent and van den Driessche, George and Schrittwieser, Julian and Antonoglou, Ioannis and Panneershelvam, Veda and Lanctot, Marc and Dieleman, Sander and Grewe, Dominik and Nham, John and Kalchbrenner, Nal and Sutskever, Ilya and Lillicrap, Timothy and Leach, Madeleine and Kavukcuoglu, Koray and Graepel, Thore and Hassabis, Demis},
doi = {10.1038/nature16961},
journal = {Nature},
keywords = {baduk go google},
month = jan,
number = 7587,
pages = {484--489},
publisher = {Nature Publishing Group},
timestamp = {2016-03-11T14:37:40.000+0100},
title = {Mastering the Game of {Go} with Deep Neural Networks and Tree Search},
volume = 529,
year = 2016
}
@article{allahyari-text-summarization-2017,
author = {Mehdi Allahyari and
Seyed Amin Pouriyeh and
Mehdi Assefi and
Saeid Safaei and
Elizabeth D. Trippe and
Juan B. Gutierrez and
Krys Kochut},
title = {Text Summarization Techniques: {A} Brief Survey},
journal = {CoRR},
volume = {abs/1707.02268},
year = {2017}
}
@article{what-developers-search-for-on-the-web:xia:2017,
author = {Xia, Xin and Bao, Lingfeng and Lo, David and Kochhar, Pavneet Singh and Hassan, Ahmed E. and Xing, Zhenchang},
title = {What Do Developers Search for on the Web?},
year = {2017},
issue_date = {December 2017},
publisher = {Kluwer Academic Publishers},
address = {USA},
volume = {22},
number = {6},
issn = {1382-3256},
url = {https://doi.org/10.1007/s10664-017-9514-4},
doi = {10.1007/s10664-017-9514-4},
journal = {Empirical Softw. Engg.},
month = dec,
pages = {3149–3185},
numpages = {37},
keywords = {Empirical study, Understanding, Search task, Survey}
}
@inproceedings{towards-summarizing-source-code-search:marin:2020,
author = {Marin, Victor J. and Bansal, Iti and Rivero, Carlos R.},
title = {Towards Summarizing Program Statements in Source Code Search},
year = {2020},
isbn = {9781450368667},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3341105.3374055},
doi = {10.1145/3341105.3374055},
booktitle = {Proceedings of the 35th Annual ACM Symposium on Applied Computing},
pages = {118–120},
numpages = {3},
keywords = {program dependence graph, community detection, approximate graph alignment, source code search},
location = {Brno, Czech Republic},
series = {SAC ’20}
}
@INPROCEEDINGS{yan-benchmark-code-search-information-retrieval-deep-learning:2020,
author={S. {Yan} and H. {Yu} and Y. {Chen} and B. {Shen} and L. {Jiang}},
booktitle={2020 IEEE 27th International Conference on Software Analysis, Evolution and Reengineering (SANER)},
title={Are the Code Snippets What We Are Searching for? A Benchmark and an Empirical Study on Code Search with Natural-Language Queries},
year={2020},
volume={},
number={},
pages={344-354},}
@article{guo-deep-look-into-neural-ranking-models:2019,
title = "A Deep Look into neural ranking models for information retrieval",
journal = "Information Processing & Management",
pages = "102067",
year = "2019",
issn = "0306-4573",
doi = "https://doi.org/10.1016/j.ipm.2019.102067",
url = "http://www.sciencedirect.com/science/article/pii/S0306457319302390",
author = "Jiafeng Guo and Yixing Fan and Liang Pang and Liu Yang and Qingyao Ai and Hamed Zamani and Chen Wu and W. Bruce Croft and Xueqi Cheng",
keywords = "Neural ranking model, Information retrieval, Survey",
abstract = "Ranking models lie at the heart of research on information retrieval (IR). During the past decades, different techniques have been proposed for constructing ranking models, from traditional heuristic methods, probabilistic methods, to modern machine learning methods. Recently, with the advance of deep learning technology, we have witnessed a growing body of work in applying shallow or deep neural networks to the ranking problem in IR, referred to as neural ranking models in this paper. The power of neural ranking models lies in the ability to learn from the raw text inputs for the ranking problem to avoid many limitations of hand-crafted features. Neural networks have sufficient capacity to model complicated tasks, which is needed to handle the complexity of relevance estimation in ranking. Since there have been a large variety of neural ranking models proposed, we believe it is the right time to summarize the current status, learn from existing methodologies, and gain some insights for future development. In contrast to existing reviews, in this survey, we will take a deep look into the neural ranking models from different dimensions to analyze their underlying assumptions, major design principles, and learning strategies. We compare these models through benchmark tasks to obtain a comprehensive empirical understanding of the existing techniques. We will also discuss what is missing in the current literature and what are the promising and desired future directions."
}
@InProceedings{wu-sql-rank-listwise-approach:2018,
title = {{SQL}-Rank: A Listwise Approach to Collaborative Ranking},
author = {Wu, Liwei and Hsieh, Cho-Jui and Sharpnack, James},
booktitle = {Proceedings of the 35th International Conference on Machine Learning},
pages = {5315--5324},
year = {2018},
editor = {Dy, Jennifer and Krause, Andreas},
volume = {80},
series = {Proceedings of Machine Learning Research},
address = {Stockholmsmässan, Stockholm Sweden},
month = {10--15 Jul},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v80/wu18c/wu18c.pdf},
url = {http://proceedings.mlr.press/v80/wu18c.html},
abstract = {In this paper, we propose a listwise approach for constructing user-specific rankings in recommendation systems in a collaborative fashion. We contrast the listwise approach to previous pointwise and pairwise approaches, which are based on treating either each rating or each pairwise comparison as an independent instance respectively. By extending the work of ListNet (Cao et al., 2007), we cast listwise collaborative ranking as maximum likelihood under a permutation model which applies probability mass to permutations based on a low rank latent score matrix. We present a novel algorithm called SQL-Rank, which can accommodate ties and missing data and can run in linear time. We develop a theoretical framework for analyzing listwise ranking methods based on a novel representation theory for the permutation model. Applying this framework to collaborative ranking, we derive asymptotic statistical rates as the number of users and items grow together. We conclude by demonstrating that our SQL-Rank method often outperforms current state-of-the-art algorithms for implicit feedback such as Weighted-MF and BPR and achieve favorable results when compared to explicit feedback algorithms such as matrix factorization and collaborative ranking.}
}
@InProceedings{gutmann-nce-noise-contrastive-estimation-2010,
title = {Noise-contrastive estimation: A new estimation principle for unnormalized statistical models},
author = {Michael Gutmann and Aapo Hyvärinen},
booktitle = {Proceedings of the Thirteenth International Conference on Artificial Intelligence and Statistics},
pages = {297--304},
year = {2010},
editor = {Yee Whye Teh and Mike Titterington},
volume = {9},
series = {Proceedings of Machine Learning Research},
address = {Chia Laguna Resort, Sardinia, Italy},
month = {13--15 May},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v9/gutmann10a/gutmann10a.pdf},
url = {http://proceedings.mlr.press/v9/gutmann10a.html},
abstract = {We present a new estimation principle for parameterized statistical models. The idea is to perform nonlinear logistic regression to discriminate between the observed data and some artificially generated noise, using the model log-density function in the regression nonlinearity. We show that this leads to a consistent (convergent) estimator of the parameters, and analyze the asymptotic variance. In particular, the method is shown to directly work for unnormalized models, i.e. models where the density function does not integrate to one. The normalization constant can be estimated just like any other parameter. For a tractable ICA model, we compare the method with other estimation methods that can be used to learn unnormalized models, including score matching, contrastive divergence, and maximum-likelihood where the normalization constant is estimated with importance sampling. Simulations show that noise-contrastive estimation offers the best trade-off between computational and statistical efficiency. The method is then applied to the modeling of natural images: We show that the method can successfully estimate a large-scale two-layer model and a Markov random field.}
}
@incollection{attention-is-all-you-need-2017,
title = {Attention is All you Need},
author = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, \L ukasz and Polosukhin, Illia},
booktitle = {Advances in Neural Information Processing Systems 30},
editor = {I. Guyon and U. V. Luxburg and S. Bengio and H. Wallach and R. Fergus and S. Vishwanathan and R. Garnett},
pages = {5998--6008},
year = {2017},
publisher = {Curran Associates, Inc.},
url = {http://papers.nips.cc/paper/7181-attention-is-all-you-need.pdf}
}
@incollection{devise-deep-visual-semantic-embedding-frome-2013,
title = {DeViSE: A Deep Visual-Semantic Embedding Model},
author = {Frome, Andrea and Corrado, Greg S and Shlens, Jon and Bengio, Samy and Dean, Jeff and Ranzato, Marc\textquotesingle Aurelio and Mikolov, Tomas},
booktitle = {Advances in Neural Information Processing Systems 26},
editor = {C. J. C. Burges and L. Bottou and M. Welling and Z. Ghahramani and K. Q. Weinberger},
pages = {2121--2129},
year = {2013},
publisher = {Curran Associates, Inc.},
url = {http://papers.nips.cc/paper/5204-devise-a-deep-visual-semantic-embedding-model.pdf}
}
@misc{martins2020concra,
title={CoNCRA: A Convolutional Neural Network Code Retrieval Approach},
author={Marcelo de Rezende Martins and Marco A. Gerosa},
year={2020},
eprint={2009.01959},
howpublished={\url{https://arxiv.org/abs/2009.01959}},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
@inproceedings{luong-etal-2015-effective,
title = "Effective Approaches to Attention-based Neural Machine Translation",
author = "Luong, Thang and
Pham, Hieu and
Manning, Christopher D.",
booktitle = "Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing",
month = sep,
year = "2015",
address = "Lisbon, Portugal",
publisher = "Association for Computational Linguistics",
url = "https://www.aclweb.org/anthology/D15-1166",
doi = "10.18653/v1/D15-1166",
pages = "1412--1421",
}