-
Notifications
You must be signed in to change notification settings - Fork 0
/
iesl_publication.bib
4244 lines (3845 loc) · 230 KB
/
iesl_publication.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
@inproceedings{DBLP:conf/icml/McCallumS90,
author = {Andrew McCallum and Kent A. Spackman},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/icml/McCallumS90},
booktitle = {Machine Learning, Proceedings of the Seventh International Conference on Machine Learning (ICML), Austin, Texas, USA, June 21-23, 1990},
editor = {Bruce W. Porter and Raymond J. Mooney},
pages = {149--152},
publisher = {Morgan Kaufmann},
timestamp = {Fri, 23 Dec 2011 00:00:00 +0100},
title = {Using Genetic Algorithms to Learn Disjunctive Rules from Examples},
year = {1990}
}
@inproceedings{DBLP:conf/icml/McCallum92,
author = {Andrew McCallum},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/icml/McCallum92},
booktitle = {Proceedings of the Ninth International Workshop on Machine Learning ({ML} WS), Aberdeen, Scotland, UK, July 1-3, 1992},
editor = {Derek H. Sleeman and Peter Edwards},
pages = {316--321},
publisher = {Morgan Kaufmann},
timestamp = {Wed, 04 Dec 2002 15:40:32 +0100},
title = {Using Transitional Proximity for Faster Reinforcement Learning},
year = {1992}
}
@inproceedings{DBLP:conf/icml/McCallum93,
author = {Andrew McCallum},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/icml/McCallum93},
booktitle = {Machine Learning, Proceedings of the Tenth International Conference, University of Massachusetts, Amherst, MA, USA, June 27-29, 1993},
pages = {190--196},
publisher = {Morgan Kaufmann},
timestamp = {Fri, 23 Dec 2011 00:00:00 +0100},
title = {Overcoming Incomplete Perception with Util Distinction Memory},
year = {1993}
}
@inproceedings{DBLP:conf/nips/McCallum94,
author = {Andrew McCallum},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/nips/McCallum94},
booktitle = {Advances in Neural Information Processing Systems 7, (NIPS), Denver, Colorado, USA},
editor = {Gerald Tesauro and David S. Touretzky and Todd K. Leen},
url = {http://papers.nips.cc/paper/932-instance-based-state-identification-for-reinforcement-learning},
pages = {377--384},
publisher = {{MIT} Press},
timestamp = {Thu, 11 Dec 2014 17:34:08 +0100},
title = {Instance-Based State Identification for Reinforcement Learning},
year = {1994}
}
@inproceedings{DBLP:conf/icml/McCallum95,
author = {Andrew McCallum},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/icml/McCallum95},
booktitle = {Machine Learning, Proceedings of the Twelfth International Conference on Machine Learning, Tahoe City, California, USA, July 9-12, 1995},
editor = {Armand Prieditis and Stuart J. Russell},
pages = {387--395},
publisher = {Morgan Kaufmann},
timestamp = {Fri, 23 Dec 2011 00:00:00 +0100},
title = {Instance-Based Utile Distinctions for Reinforcement Learning with Hidden State},
year = {1995}
}
@inproceedings{DBLP:conf/icml/McCallumN98,
author = {Andrew McCallum and Kamal Nigam},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/icml/McCallumN98},
booktitle = {Proceedings of the Fifteenth International Conference on Machine Learning ({ICML}), Madison, Wisconsin, USA, July 24-27, 1998},
editor = {Jude W. Shavlik},
pages = {350--358},
publisher = {Morgan Kaufmann},
timestamp = {Thu, 30 Jun 2011 10:34:12 +0200},
title = {Employing {EM} and Pool-Based Active Learning for Text Classification},
year = {1998}
}
@inproceedings{DBLP:conf/icml/McCallumRMN98,
author = {Andrew McCallum and Ronald Rosenfeld and Tom M. Mitchell and Andrew Y. Ng},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/icml/McCallumRMN98},
booktitle = {Proceedings of the Fifteenth International Conference on Machine Learning ({ICML}), Madison, Wisconsin, USA, July 24-27, 1998},
editor = {Jude W. Shavlik},
pages = {359--367},
publisher = {Morgan Kaufmann},
timestamp = {Thu, 30 Jun 2011 10:34:12 +0200},
title = {Improving Text Classification by Shrinkage in a Hierarchy of Classes},
year = {1998}
}
@inproceedings{DBLP:conf/aaai/NigamMTM98,
author = {Kamal Nigam and Andrew McCallum and Sebastian Thrun and Tom M. Mitchell},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/aaai/NigamMTM98},
booktitle = {Proceedings of the Fifteenth National Conference on Artificial Intelligence and Tenth Innovative Applications of Artificial Intelligence Conference ({AAAI}), July 26-30, 1998, Madison, Wisconsin, {USA.}},
editor = {Jack Mostow and Chuck Rich},
url = {http://www.aaai.org/Library/AAAI/1998/aaai98-112.php},
pages = {792--799},
publisher = {{AAAI} Press / The {MIT} Press},
timestamp = {Tue, 11 Dec 2012 00:00:00 +0100},
title = {Learning to Classify Text from Labeled and Unlabeled Documents},
year = {1998}
}
@inproceedings{DBLP:conf/sigir/BakerM98,
author = {L. Douglas Baker and Andrew McCallum},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/sigir/BakerM98},
booktitle = {{SIGIR} '98: Proceedings of the 21st Annual International {ACM} Conference on Research and Development in Information Retrieval ({SIGIR}), August 24-28 1998, Melbourne, Australia},
doi = {10.1145/290941.290970},
editor = {W. Bruce Croft and Alistair Moffat and C. J. van Rijsbergen and Ross Wilkinson and Justin Zobel},
url = {http://doi.acm.org/10.1145/290941.290970},
pages = {96--103},
publisher = {ACM},
timestamp = {Wed, 08 Feb 2017 13:37:23 +0100},
title = {Distributional Clustering of Words for Text Classification},
year = {1998}
}
@inproceedings{DBLP:conf/aaai/CravenFMMNS98,
author = {Mark Craven and Dan DiPasquo and Dayne Freitag and Andrew McCallum and Tom M. Mitchell and Kamal Nigam and Se{\'{a}}n Slattery},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/aaai/CravenFMMNS98},
booktitle = {Proceedings of the Fifteenth National Conference on Artificial Intelligence and Tenth Innovative Applications of Artificial Intelligence Conference ({AAAI}), July 26-30, 1998, Madison, Wisconsin, {USA.}},
editor = {Jack Mostow and Chuck Rich},
url = {http://www.aaai.org/Library/AAAI/1998/aaai98-072.php},
pages = {509--516},
publisher = {{AAAI} Press / The {MIT} Press},
timestamp = {Tue, 11 Dec 2012 00:00:00 +0100},
title = {Learning to Extract Symbolic Knowledge from the World Wide Web},
year = {1998}
}
@inproceedings{DBLP:conf/ijcai/McCallumNRS99,
author = {Andrew McCallum and Kamal Nigam and Jason Rennie and Kristie Seymore},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/ijcai/McCallumNRS99},
booktitle = {Proceedings of the Sixteenth International Joint Conference on Artificial Intelligence ({IJCAI}) Stockholm, Sweden, July 31 - August 6, 1999. 2 Volumes, 1450 pages},
editor = {Thomas Dean},
url = {http://ijcai.org/Proceedings/99-2/Papers/001.pdf},
pages = {662--667},
publisher = {Morgan Kaufmann},
timestamp = {Tue, 19 Jul 2016 16:00:04 +0200},
title = {A Machine Learning Approach to Building Domain-Specific Search Engines},
year = {1999},
}
@inproceedings{DBLP:conf/icml/RennieM99,
author = {Jason Rennie and Andrew McCallum},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/icml/RennieM99},
booktitle = {Proceedings of the Sixteenth International Conference on Machine Learning ({ICML}), Bled, Slovenia, June 27 - 30, 1999},
editor = {Ivan Bratko and Saso Dzeroski},
pages = {335--343},
publisher = {Morgan Kaufmann},
timestamp = {Tue, 03 Dec 2002 12:31:12 +0100},
title = {Using Reinforcement Learning to Spider the Web Efficiently},
year = {1999}
}
@inproceedings{DBLP:conf/icml/McCallumFP00,
author = {Andrew McCallum and Dayne Freitag and Fernando C. N. Pereira},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/icml/McCallumFP00},
booktitle = {Proceedings of the Seventeenth International Conference on Machine Learning ({ICML}), Stanford University, Stanford, CA, USA, June 29 - July 2, 2000},
editor = {Pat Langley},
pages = {591--598},
publisher = {Morgan Kaufmann},
timestamp = {Sun, 21 Feb 2010 20:54:50 +0100},
title = {Maximum Entropy Markov Models for Information Extraction and Segmentation},
year = {2000}
}
@article{DBLP:journals/ir/McCallumNRS00,
author = {Andrew McCallum and Kamal Nigam and Jason Rennie and Kristie Seymore},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/journals/ir/McCallumNRS00},
doi = {10.1023/A:1009953814988},
journal = {Inf. Retr.},
url = {https://doi.org/10.1023/A:1009953814988},
number = {2},
pages = {127--163},
timestamp = {Sat, 27 May 2017 01:00:00 +0200},
title = {Automating the Construction of Internet Portals with Machine Learning},
volume = {3},
year = {2000}
}
@inproceedings{DBLP:conf/kdd/McCallumNU00,
author = {Andrew McCallum and Kamal Nigam and Lyle H. Ungar},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/kdd/McCallumNU00},
booktitle = {Proceedings of the sixth {ACM} international conference on Knowledge discovery and data mining ({SIGKDD}), Boston, MA, USA, August 20-23, 2000},
doi = {10.1145/347090.347123},
editor = {Raghu Ramakrishnan and Salvatore J. Stolfo and Roberto J. Bayardo and Ismail Parsa},
url = {http://doi.acm.org/10.1145/347090.347123},
pages = {169--178},
publisher = {ACM},
timestamp = {Wed, 12 Dec 2012 15:08:19 +0100},
title = {Efficient clustering of high-dimensional data sets with application to reference matching},
year = {2000}
}
@inproceedings{DBLP:conf/aaai/FreitagM00,
author = {Dayne Freitag and Andrew McCallum},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/aaai/FreitagM00},
booktitle = {Proceedings of the Seventeenth National Conference on Artificial Intelligence and Twelfth Conference on on Innovative Applications of Artificial Intelligence (AAAI), July 30 - August 3, 2000, Austin, Texas, {USA.}},
editor = {Henry A. Kautz and Bruce W. Porter},
url = {http://www.aaai.org/Library/AAAI/2000/aaai00-089.php},
pages = {584--589},
publisher = {{AAAI} Press / The {MIT} Press},
timestamp = {Tue, 11 Dec 2012 00:00:00 +0100},
title = {Information Extraction with {HMM} Structures Learned by Stochastic Optimization},
year = {2000}
}
@inproceedings{DBLP:conf/icml/ChangCM00,
author = {Huan Chang and David Cohn and Andrew McCallum},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/icml/ChangCM00},
booktitle = {Proceedings of the Seventeenth International Conference on Machine Learning ({ICML}), Stanford University, Stanford, CA, USA, June 29 - July 2, 2000},
editor = {Pat Langley},
pages = {127--134},
publisher = {Morgan Kaufmann},
timestamp = {Sun, 21 Feb 2010 20:54:50 +0100},
title = {Learning to Create Customized Authority Lists},
year = {2000}
}
@article{DBLP:journals/ml/NigamMTM00,
author = {Kamal Nigam and Andrew McCallum and Sebastian Thrun and Tom M. Mitchell},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/journals/ml/NigamMTM00},
doi = {10.1023/A:1007692713085},
journal = {Machine Learning (ML)},
url = {https://doi.org/10.1023/A:1007692713085},
number = {2/3},
pages = {103--134},
timestamp = {Sun, 28 May 2017 01:00:00 +0200},
title = {Text Classification from Labeled and Unlabeled Documents using {EM}},
volume = {39},
year = {2000}
}
@article{DBLP:journals/ai/CravenDFMMNS00,
author = {Mark Craven and Dan DiPasquo and Dayne Freitag and Andrew McCallum and Tom M. Mitchell and Kamal Nigam and Se{\'{a}}n Slattery},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/journals/ai/CravenDFMMNS00},
doi = {10.1016/S0004-3702(00)00004-7},
journal = {Artif. Intell.},
url = {https://doi.org/10.1016/S0004-3702(00)00004-7},
number = {1-2},
pages = {69--113},
timestamp = {Sat, 27 May 2017 01:00:00 +0200},
title = {Learning to construct knowledge bases from the World Wide Web},
volume = {118},
year = {2000}
}
@article{DBLP:journals/debu/CohenMQ00,
author = {William W. Cohen and Andrew McCallum and Dallan Quass},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/journals/debu/CohenMQ00},
journal = {{IEEE} Data Eng. Bull.},
url = {http://sites.computer.org/debull/A00SEP-CD.pdf},
number = {3},
pages = {17--24},
timestamp = {Wed, 19 Dec 2007 00:00:00 +0100},
title = {Learning to Understand the Web},
volume = {23},
year = {2000}
}
@inproceedings{DBLP:conf/icml/LaffertyMP01,
author = {John D. Lafferty and Andrew McCallum and Fernando C. N. Pereira},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/icml/LaffertyMP01},
booktitle = {Proceedings of the Eighteenth International Conference on Machine Learning ({ICML}), Williams College, Williamstown, MA, USA, June 28 - July 1, 2001},
editor = {Carla E. Brodley and Andrea Pohoreckyj Danyluk},
pages = {282--289},
publisher = {Morgan Kaufmann},
timestamp = {Wed, 27 Nov 2002 10:53:35 +0100},
title = {Conditional Random Fields: Probabilistic Models for Segmenting and Labeling Sequence Data},
year = {2001},
sum = {A conditionally-trained model for sequences and other structured data, with global normalization. The original CRF paper. Don't bother reading the section on parameter estimation---use BFGS instead of Iterative Scaling; e.g. see [McCallum UAI 2003].}
}
@inproceedings{DBLP:conf/icml/RoyM01,
author = {Nicholas Roy and Andrew McCallum},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/icml/RoyM01},
booktitle = {Proceedings of the Eighteenth International Conference on Machine Learning ({ICML}), Williams College, Williamstown, MA, USA, June 28 - July 1, 2001},
editor = {Carla E. Brodley and Andrea Pohoreckyj Danyluk},
pages = {441--448},
publisher = {Morgan Kaufmann},
timestamp = {Wed, 27 Nov 2002 10:53:35 +0100},
title = {Toward Optimal Active Learning through Sampling Estimation of Error Reduction},
year = {2001},
sum = {A leave-one-out approach to active learning.}
}
@inproceedings{DBLP:conf/uai/BleiBM02,
author = {David M. Blei and J. Andrew Bagnell and Andrew McCallum},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/uai/BleiBM02},
booktitle = {{UAI} '02, Proceedings of the 18th Conference in Uncertainty in Artificial Intelligence (UAI), University of Alberta, Edmonton, Alberta, Canada, August 1-4, 2002},
editor = {Adnan Darwiche and Nir Friedman},
url = {https://dslpitt.org/uai/displayArticleDetails.jsp?mmnu=1&smnu=2&article_id=844&proceeding_id=18},
pages = {53--60},
publisher = {Morgan Kaufmann},
timestamp = {Wed, 06 May 2015 01:00:00 +0200},
title = {Learning with Scope, with Application to Information Extraction and Classification},
year = {2002},
sum = {Learn highly reliable formatting-based extractors on the fly at test time, using graphical models and variational inference. Describes both generative and conditional versions of the model.}
}
@inproceedings{DBLP:conf/uai/McCallum03,
author = {Andrew McCallum},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/uai/McCallum03},
booktitle = {{UAI} '03, Proceedings of the 19th Conference in Uncertainty in Artificial Intelligence (UAI), Acapulco, Mexico, August 7-10 2003},
editor = {Christopher Meek and Uffe Kj{\ae}rulff},
url = {https://dslpitt.org/uai/displayArticleDetails.jsp?mmnu=1&smnu=2&article_id=955&proceeding_id=19},
pages = {403--410},
publisher = {Morgan Kaufmann},
timestamp = {Wed, 06 May 2015 01:00:00 +0200},
title = {Efficiently Inducing Features of Conditional Random Fields},
year = {2003},
sum = {CRFs give you the great power to include the kitchen sink worth of features. How do you decide which ones to include to avoid over-fitting and running out of memory? A formal, information-theoretic approach, with carefully-chosen approximations to make it efficient with millions of candidate features. This technique key to success in Hindi above, as well as work by Pereira's group at UPenn.}
}
@inproceedings{DBLP:conf/ijcai/McCallumW03,
author = {Andrew McCallum and Ben Wellner},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/ijcai/McCallumW03},
booktitle = {Proceedings of {IJCAI-03} Workshop on Information Integration on the Web (IIWeb-03), August 9-10, 2003, Acapulco, Mexico},
editor = {Subbarao Kambhampati and Craig A. Knoblock},
url = {http://www.isi.edu/info-agents/workshops/ijcai03/papers/McCallum-ijcaiws.pdf},
pages = {79--84},
timestamp = {Wed, 21 Jul 2004 13:31:29 +0200},
title = {Toward Conditional Models of Identity Uncertainty with Application to Proper Noun Coreference},
year = {2003},
sum = {A conditionally-trained model of object consolidation, based on graph partitioning with learned edge weights.}
}
@inproceedings{DBLP:conf/conll/McCallum003,
author = {Andrew McCallum and Wei Li},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/conll/McCallum003},
booktitle = {Proceedings of the Seventh Conference on Natural Language Learning (CoNLL 2003), Held in cooperation with {HLT-NAACL} 2003, Edmonton, Canada, May 31 - June 1, 2003},
editor = {Walter Daelemans and Miles Osborne},
url = {http://aclweb.org/anthology/W/W03/W03-0430.pdf},
pages = {188--191},
publisher = {ACL},
timestamp = {Fri, 23 Jan 2015 14:28:25 +0100},
title = {Early results for Named Entity Recognition with Conditional Random Fields, Feature Induction and Web-Enhanced Lexicons},
year = {2003},
sum = {This is the first publication about named entity extraction with CRFs.}
}
@inproceedings{DBLP:conf/dgo/PintoMWC03,
author = {David Pinto and Andrew McCallum and Xing Wei and W. Bruce Croft},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/dgo/PintoMWC03},
booktitle = {Proceedings of the 2003 Annual National Conference on Digital Government Research ({DG.O}), 2003},
editor = {Yigal Arens and Eduard H. Hovy and Peggy Agouris},
url = {http://dl.acm.org/citation.cfm?id=1123294},
publisher = {Digital Government Research Center},
series = {{ACM} International Conference Proceeding Series},
timestamp = {Fri, 20 Nov 2015 13:56:20 +0100},
title = {Table Extraction Using Conditional Random Fields},
year = {2003},
}
@inproceedings{DBLP:conf/sigir/PintoMWC03,
author = {David Pinto and Andrew McCallum and Xing Wei and W. Bruce Croft},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/sigir/PintoMWC03},
booktitle = {{SIGIR} 2003: Proceedings of the 26th Annual International {ACM} Conference on Research and Development in Information Retrieval ({SIGIR}), July 28 - August 1, 2003, Toronto, Canada},
doi = {10.1145/860435.860479},
editor = {Charles L. A. Clarke and Gordon V. Cormack and Jamie Callan and David Hawking and Alan F. Smeaton},
url = {http://doi.acm.org/10.1145/860435.860479},
pages = {235--242},
publisher = {ACM},
timestamp = {Wed, 08 Feb 2017 13:37:23 +0100},
title = {Table extraction using conditional random fields},
year = {2003},
sum = {Application of CRFs to finding tables in government reports. Uses both language and layout features.}
}
@article{DBLP:journals/sigir/AllanABBCCDFHHHHHKLLLLMMPPRRRRRSSSSTVWXZ03,
author = {James Allan and Jay Aslam and Nicholas J. Belkin and Chris Buckley and James P. Callan and W. Bruce Croft and Susan T. Dumais and Norbert Fuhr and Donna Harman and David J. Harper and Djoerd Hiemstra and Thomas Hofmann and Eduard H. Hovy and Wessel Kraaij and John D. Lafferty and Victor Lavrenko and David D. Lewis and Liz Liddy and R. Manmatha and Andrew McCallum and Jay M. Ponte and John M. Prager and Dragomir R. Radev and Philip Resnik and Stephen E. Robertson and Ronald Rosenfeld and Salim Roukos and Mark Sanderson and Richard M. Schwartz and Amit Singhal and Alan F. Smeaton and Howard R. Turtle and Ellen M. Voorhees and Ralph M. Weischedel and Jinxi Xu and ChengXiang Zhai},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/journals/sigir/AllanABBCCDFHHHHHKLLLLMMPPRRRRRSSSSTVWXZ03},
doi = {10.1145/945546.945549},
journal = {{SIGIR} Forum},
url = {http://doi.acm.org/10.1145/945546.945549},
number = {1},
pages = {31--47},
timestamp = {Wed, 19 Sep 2012 01:00:00 +0200},
title = {Challenges in information retrieval and language modeling: report of a workshop held at the center for intelligent information retrieval, University of Massachusetts Amherst, September 2002},
volume = {37},
year = {2003},
sum = {A report about fruitful areas for future work in IR over a five-year time scale.}
}
@inproceedings{DBLP:conf/nips/RainaSNM03,
author = {Rajat Raina and Yirong Shen and Andrew Y. Ng and Andrew McCallum},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/nips/RainaSNM03},
booktitle = {Advances in Neural Information Processing Systems 16 [Neural Information Processing Systems ({NIPS}), December 8-13, 2003, Vancouver and Whistler, British Columbia, Canada]},
editor = {Sebastian Thrun and Lawrence K. Saul and Bernhard Sch{\"{o}}lkopf},
url = {http://papers.nips.cc/paper/2405-classification-with-hybrid-generativediscriminative-models},
pages = {545--552},
publisher = {{MIT} Press},
timestamp = {Thu, 11 Dec 2014 17:34:07 +0100},
title = {Classification with Hybrid Generative/Discriminative Models},
year = {2003},
sum = {Train some parameters generatively, some parameters conditionally.}
}
@article{DBLP:journals/talip/LiM03,
author = {Wei Li and Andrew McCallum},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/journals/talip/LiM03},
doi = {10.1145/979872.979879},
journal = {{ACM} Trans. Asian Lang. Inf. Process.},
url = {http://doi.acm.org/10.1145/979872.979879},
number = {3},
pages = {290--294},
timestamp = {Thu, 19 Aug 2010 01:00:00 +0200},
title = {Rapid development of Hindi named entity recognition using conditional random fields and feature induction},
volume = {2},
year = {2003},
sum = {How we developed a named entity recognition system for Hindi in just a few weeks.}
}
@inproceedings{DBLP:conf/nips/McCallumW04,
author = {Andrew McCallum and Ben Wellner},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/nips/McCallumW04},
booktitle = {Advances in Neural Information Processing Systems 17 [Neural Information Processing Systems ({NIPS}), December 13-18, 2004, Vancouver, British Columbia, Canada]},
url = {http://papers.nips.cc/paper/2557-conditional-models-of-identity-uncertainty-with-application-to-noun-coreference},
pages = {905--912},
timestamp = {Thu, 11 Dec 2014 17:34:07 +0100},
title = {Conditional Models of Identity Uncertainty with Application to Noun Coreference},
year = {2004},
sum = {A model of object consolidation, based on graph partitioning with learned edge weights. Conference paper version of 2003 work in KDD Workshop on Data Cleaning.}
}
@inproceedings{DBLP:conf/ceas/CulottaBM04,
author = {Aron Culotta and Ron Bekkerman and Andrew McCallum},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/ceas/CulottaBM04},
booktitle = {{CEAS} 2004 - First Conference on Email and Anti-Spam (CEAS), July 30-31, 2004, Mountain View, California, {USA}},
url = {http://www.ceas.cc/papers-2004/176.pdf},
timestamp = {Fri, 02 Jun 2006 13:30:15 +0200},
title = {Extracting social networks and contact information from email and the Web},
year = {2004},
sum = {Describes an early version of an end-to-end system that automatically populates your email address book with a large social network, including "friends-of-friends," and information about people's expertise.}
}
@inproceedings{DBLP:conf/uai/WellnerMPH04,
author = {Ben Wellner and Andrew McCallum and Fuchun Peng and Michael Hay},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/uai/WellnerMPH04},
booktitle = {{UAI} '04, Proceedings of the 20th Conference in Uncertainty in Artificial Intelligence (UAI), Banff, Canada, July 7-11, 2004},
editor = {David Maxwell Chickering and Joseph Y. Halpern},
url = {https://dslpitt.org/uai/displayArticleDetails.jsp?mmnu=1&smnu=2&article_id=1158&proceeding_id=20},
pages = {593--601},
publisher = {{AUAI} Press},
timestamp = {Wed, 06 May 2015 01:00:00 +0200},
title = {An Integrated, Conditional Model of Information Extraction and Coreference with Appli},
year = {2004},
sum = {A conditionally-trained graphical model for identity uncertainty in relational domains, representing mentions, entities and their attributes. Also a first example of joint inference for extraction and identity uncertainty--coreference decisions actually integrate out uncertainty about information extraction.}
}
@inproceedings{DBLP:conf/icml/SuttonRM04,
author = {Charles A. Sutton and Khashayar Rohanimanesh and Andrew McCallum},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/icml/SuttonRM04},
booktitle = {Machine Learning, Proceedings of the Twenty-first International Conference ({ICML}), Banff, Alberta, Canada, July 4-8, 2004},
doi = {10.1145/1015330.1015422},
editor = {Carla E. Brodley},
url = {http://doi.acm.org/10.1145/1015330.1015422},
publisher = {ACM},
series = {{ACM} International Conference Proceeding Series},
timestamp = {Mon, 22 Oct 2007 13:54:01 +0200},
title = {Dynamic conditional random fields: factorized probabilistic models for labeling and segmenting sequence data},
volume = {69},
year = {2004},
sum = {Joint inference over two traditionally-separate layers of NLP processing: POS-tagging and NP-chunking. Introduces the CRF analogue of Factorial HMMs. Compares several approximate inference procedures.}
}
@inproceedings{DBLP:conf/naacl/PengM04,
author = {Fuchun Peng and Andrew McCallum},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/naacl/PengM04},
booktitle = {Human Language Technology Conference of the North American Chapter of the Association for Computational Linguistics ({HLT-NAACL}), Boston, Massachusetts, USA, May 2-7, 2004},
editor = {Julia Hirschberg and Susan T. Dumais and Daniel Marcu and Salim Roukos},
url = {http://aclweb.org/anthology/N/N04/N04-1042.pdf},
pages = {329--336},
publisher = {The Association for Computational Linguistics},
timestamp = {Mon, 19 Dec 2016 00:00:00 +0100},
title = {Accurate Information Extraction from Research Papers using Conditional Random Fields},
year = {2004},
sum = {Applies CRFs to extraction from research paper headers and reference sections, to obtain current best-in-the-world accuracy. Also compares some simple regularization methods.}
}
@inproceedings{DBLP:conf/coling/PengFM04,
author = {Fuchun Peng and Fangfang Feng and Andrew McCallum},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/coling/PengFM04},
booktitle = {{COLING} 2004, 20th International Conference on Computational Linguistics, Proceedings of the Conference (COLING), 23-27 August 2004, Geneva, Switzerland},
url = {http://www.aclweb.org/anthology/C04-1081},
timestamp = {Mon, 04 Mar 2013 20:57:23 +0100},
title = {Chinese Segmentation and New Word Detection using Conditional Random Fields},
year = {2004},
sum = {State-of-the art Chinese word segmentation with CRFs, with rich features and many lexicons; also using confidence estimation to add new words to the lexicon.}
}
@inproceedings{DBLP:conf/aaai/KristjanssonCVM04,
author = {Trausti T. Kristjansson and Aron Culotta and Paul A. Viola and Andrew McCallum},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/aaai/KristjanssonCVM04},
booktitle = {Proceedings of the Nineteenth National Conference on Artificial Intelligence, Sixteenth Conference on Innovative Applications of Artificial Intelligence (AAAI), July 25-29, 2004, San Jose, California, {USA}},
editor = {Deborah L. McGuinness and George Ferguson},
url = {http://www.aaai.org/Library/AAAI/2004/aaai04-066.php},
pages = {412--418},
publisher = {{AAAI} Press / The {MIT} Press},
timestamp = {Tue, 11 Dec 2012 00:00:00 +0100},
title = {Interactive Information Extraction with Constrained Conditional Random Fields},
year = {2004},
desc = {Winner of Honorable Mention Award},
sum = {Help a user interactively correct the results of extraction by providing uncertainty cues in the UI, and by using constrained Viterbi to automatically make additional corrections after the first human correction.}
}
@article{DBLP:journals/queue/McCallum05,
author = {Andrew McCallum},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/journals/queue/McCallum05},
doi = {10.1145/1105664.1105679},
journal = {{ACM} Queue},
url = {http://doi.acm.org/10.1145/1105664.1105679},
number = {9},
pages = {48--57},
timestamp = {Thu, 16 Mar 2006 00:00:00 +0100},
title = {Information extraction: distilling structured data from unstructured text},
volume = {3},
year = {2005},
sum = {An overview of information extraction by machine learning methods, written for people not familiar with machine learning, especially CTOs and other people in business.}
}
@inproceedings{DBLP:conf/ijcai/McCallumCW05,
author = {Andrew McCallum and Andr{\'{e}}s Corrada{-}Emmanuel and Xuerui Wang},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/ijcai/McCallumCW05},
booktitle = {IJCAI-05, Proceedings of the Nineteenth International Joint Conference on Artificial Intelligence (IJCAI), Edinburgh, Scotland, UK, July 30 - August 5, 2005},
editor = {Leslie Pack Kaelbling and Alessandro Saffiotti},
url = {http://ijcai.org/Proceedings/05/Papers/1623.pdf},
pages = {786--791},
publisher = {Professional Book Center},
timestamp = {Wed, 20 Jul 2016 09:10:46 +0200},
title = {Topic and Role Discovery in Social Networks},
year = {2005},
sum = {Conference paper version of tech report by same authors in 2004 below. Also includes new results with Role-Author-Recipient-Topic model. Discover roles by social network analysis with a Bayesian network that models both links and text messages exchanged on those links. Experiments with Enron email and academic email.}
}
@inproceedings{DBLP:conf/uai/McCallumBP05,
author = {Andrew McCallum and Kedar Bellare and Fernando C. N. Pereira},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/uai/McCallumBP05},
booktitle = {{UAI} '05, Proceedings of the 21st Conference in Uncertainty in Artificial Intelligence (UAI), Edinburgh, Scotland, July 26-29, 2005},
url = {https://dslpitt.org/uai/displayArticleDetails.jsp?mmnu=1&smnu=2&article_id=1176&proceeding_id=21},
pages = {388--395},
publisher = {{AUAI} Press},
timestamp = {Wed, 06 May 2015 01:00:00 +0200},
title = {A Conditional Random Field for Discriminatively-trained Finite-state String Edit Distance},
year = {2005},
sum = {Train a string edit distance function from both positive and negative examples of string pairs (matching and mismatching). Significantly, the model designer is free to use arbitrary, fancy features of both strings, and also very flexible edit operations. This model is an example of an increasingly popular interesting class---conditionally-trained models with latent variables. Positive results on citations, addresses and names.}
}
@inproceedings{DBLP:conf/cikm/CulottaM05,
author = {Aron Culotta and Andrew McCallum},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/cikm/CulottaM05},
booktitle = {Proceedings of the 2005 {ACM} International Conference on Information and Knowledge Management ({CIKM}), Bremen, Germany, October 31 - November 5, 2005},
doi = {10.1145/1099554.1099615},
editor = {Otthein Herzog and Hans{-}J{\"{o}}rg Schek and Norbert Fuhr and Abdur Chowdhury and Wilfried Teiken},
url = {http://doi.acm.org/10.1145/1099554.1099615},
pages = {257--258},
publisher = {ACM},
timestamp = {Fri, 10 Feb 2006 00:00:00 +0100},
title = {Joint deduplication of multiple record types in relational data},
year = {2005},
sum = {Longer Tech Report version: A Conditional Model of Deduplication for Multi-type Relational Data. Technical Report IR-443, University of Massachusetts, September 2005.}
}
@inproceedings{DBLP:conf/aaai/CulottaM05,
author = {Aron Culotta and Andrew McCallum},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/aaai/CulottaM05},
booktitle = {Proceedings, The Twentieth National Conference on Artificial Intelligence and the Seventeenth Innovative Applications of Artificial Intelligence Conference (AAAI), July 9-13, 2005, Pittsburgh, Pennsylvania, {USA}},
editor = {Manuela M. Veloso and Subbarao Kambhampati},
url = {http://www.aaai.org/Library/AAAI/2005/aaai05-117.php},
pages = {746--751},
publisher = {{AAAI} Press / The {MIT} Press},
timestamp = {Mon, 10 Dec 2012 00:00:00 +0100},
title = {Reducing Labeling Effort for Structured Prediction Tasks},
year = {2005},
sum = {A step toward bringing trainable information extraction to the masses! Make it easier for end-users to train IE by providing multiple-choice labeling options, and propagating any constraints their labels provide on portions of the record-labeling task.}
}
@inproceedings{DBLP:conf/naacl/SuttonM05,
author = {Charles A. Sutton and Andrew McCallum},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/naacl/SuttonM05},
booktitle = {{HLT/EMNLP} 2005, Human Language Technology Conference and Conference on Empirical Methods in Natural Language Processing (EMNLP), Proceedings of the Conference, 6-8 October 2005, Vancouver, British Columbia, Canada},
url = {http://aclweb.org/anthology/H/H05/H05-1094.pdf},
pages = {748--754},
publisher = {The Association for Computational Linguistics},
timestamp = {Mon, 19 Dec 2016 00:00:00 +0100},
title = {Composition of Conditional Random Fields for Transfer Learning},
year = {2005},
sum = {Improve information extraction from email data by using the output of another extractor that was trained on large quantities of newswire. Improve accuracy further by using joint inference between the two tasks---so that the final target task can actually affect the output of the intermediate task.}
}
@inproceedings{DBLP:conf/conll/SuttonM05,
author = {Charles A. Sutton and Andrew McCallum},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/conll/SuttonM05},
booktitle = {Proceedings of the Ninth Conference on Computational Natural Language Learning (CoNLL), Ann Arbor, Michigan, USA, June 29-30, 2005},
editor = {Ido Dagan and Daniel Gildea},
url = {http://aclweb.org/anthology/W/W05/W05-0636.pdf},
pages = {225--228},
publisher = {ACL},
timestamp = {Fri, 23 Jan 2015 14:19:48 +0100},
title = {Joint Parsing and Semantic Role Labeling},
year = {2005},
sum = {Attempt to improve accuracy by performing joint inference over parsing and semantic role labeling---preserving uncertainty and multiple hypotheses in Dan Bikel's parser. Unfortunately the effort yielded negative results, most likely because the components needed to produce better calibrated probabilities.}
}
@inproceedings{DBLP:conf/uai/SuttonM05,
author = {Charles A. Sutton and Andrew McCallum},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/uai/SuttonM05},
booktitle = {{UAI} '05, Proceedings of the 21st Conference in Uncertainty in Artificial Intelligence (UAI), Edinburgh, Scotland, July 26-29, 2005},
url = {https://dslpitt.org/uai/displayArticleDetails.jsp?mmnu=1&smnu=2&article_id=1182&proceeding_id=21},
pages = {568--575},
publisher = {{AUAI} Press},
timestamp = {Wed, 06 May 2015 01:00:00 +0200},
title = {Piecewise Training for Undirected Models},
year = {2005},
sum = {Efficiently train a large graphical model in separately normalized pieces, and amazingly often obtain higher accuracy than without this approximation. This paper also shows that this piecewise objective is a lower bound on the exact likelihood, and gives results with three different graphical model structures.}
}
@inproceedings{DBLP:conf/cikm/GhamrawiM05,
author = {Nadia Ghamrawi and Andrew McCallum},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/cikm/GhamrawiM05},
booktitle = {Proceedings of the 2005 {ACM} International Conference on Information and Knowledge Management ({CIKM}), Bremen, Germany, October 31 - November 5, 2005},
doi = {10.1145/1099554.1099591},
editor = {Otthein Herzog and Hans{-}J{\"{o}}rg Schek and Norbert Fuhr and Abdur Chowdhury and Wilfried Teiken},
url = {http://doi.acm.org/10.1145/1099554.1099591},
pages = {195--200},
publisher = {ACM},
timestamp = {Fri, 10 Feb 2006 00:00:00 +0100},
title = {Collective multi-label classification},
year = {2005},
sum = {Multi-label document classification with a conditional maximum entropy model that captures not only the traditional dependences between words and the class labels, but also the coocurrence dependencies between the class labels. Performs joint inference among all class labels.}
}
@inproceedings{DBLP:conf/www/BekkermanM05,
author = {Ron Bekkerman and Andrew McCallum},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/www/BekkermanM05},
booktitle = {Proceedings of the 14th international conference on World Wide Web ({WWW}), Chiba, Japan, May 10-14, 2005},
doi = {10.1145/1060745.1060813},
editor = {Allan Ellis and Tatsuya Hagino},
url = {http://doi.acm.org/10.1145/1060745.1060813},
pages = {463--470},
publisher = {ACM},
timestamp = {Fri, 10 Feb 2006 00:00:00 +0100},
title = {Disambiguating Web appearances of people in a social network},
year = {2005},
sum = {Find homepages and other Web pages mentioning particular people. Do a better job by leveraging a collection of related people.}
}
@inproceedings{DBLP:conf/icml/BekkermanEM05,
author = {Ron Bekkerman and Ran El{-}Yaniv and Andrew McCallum},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/icml/BekkermanEM05},
booktitle = {Machine Learning, Proceedings of the Twenty-Second International Conference ({ICML}), Bonn, Germany, August 7-11, 2005},
doi = {10.1145/1102351.1102357},
editor = {Luc De Raedt and Stefan Wrobel},
url = {http://doi.acm.org/10.1145/1102351.1102357},
pages = {41--48},
publisher = {ACM},
series = {{ACM} International Conference Proceeding Series},
timestamp = {Mon, 22 Oct 2007 13:52:21 +0200},
title = {Multi-way distributional clustering via pairwise interactions},
volume = {119},
year = {2005},
sum = {Distributional clustering in multiple feature dimensions or modalities at once--made efficient by a factored representation as used in graphical models, and by a combination of top-down and bottom-up clustering. Results on email clustering, and new best results on 20 Newsgroups.}
}
@inproceedings{DBLP:conf/aaai/LiM05,
author = {Wei Li and Andrew McCallum},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/aaai/LiM05},
booktitle = {Proceedings, The Twentieth National Conference on Artificial Intelligence and the Seventeenth Innovative Applications of Artificial Intelligence Conference (AAAI), July 9-13, 2005, Pittsburgh, Pennsylvania, {USA}},
editor = {Manuela M. Veloso and Subbarao Kambhampati},
url = {http://www.aaai.org/Library/AAAI/2005/aaai05-128.php},
pages = {813--818},
publisher = {{AAAI} Press / The {MIT} Press},
timestamp = {Mon, 10 Dec 2012 00:00:00 +0100},
title = {Semi-Supervised Sequence Modeling with Syntactic Topic Models},
year = {2005},
sum = {Learn a low-dimensional manifold from large quantities of unlabled text data, then use components of the manifold as additional features when training a linear-chain CRF with limited labeled data. The manifold is learned using HMM-LDA [Griffiths, Steyvers, Blei, Tenenbaum 2004], an unsupervised model with special structure suitable for sequences and topics. Experimens with English part-of-speech tagging and Chinese word segmentation.}
}
@inproceedings{DBLP:conf/kdd/WangMM05,
author = {Xuerui Wang and Natasha Mohanty and Andrew McCallum},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/kdd/WangMM05},
booktitle = {Proceedings of the 3rd international workshop on Link discovery (LinkKDD), Chicago, Illinois, USA, August 21-25, 2005},
doi = {10.1145/1134271.1134276},
editor = {Jafar Adibi and Marko Grobelnik and Dunja Mladenic and Patrick Pantel},
url = {http://doi.acm.org/10.1145/1134271.1134276},
pages = {28--35},
publisher = {ACM},
timestamp = {Mon, 07 Sep 2015 13:38:15 +0200},
title = {Group and topic discovery from relations and text},
year = {2005},
sum = {Social network analysis that simultaneously discovers groups of entities and also clusters attributes of their relations, such that clustering in each dimension informs the other. Applied to the voting records and corresponding text of resolutions from the U.S. Senate and the U.N., showing that incorporating the votes results in more salient topic clusters, and that different groupings of legislators emerge from different topics.}
}
@inproceedings{DBLP:conf/nips/WangMM05,
author = {Xuerui Wang and Natasha Mohanty and Andrew McCallum},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/nips/WangMM05},
booktitle = {Advances in Neural Information Processing Systems 18 [Neural Information Processing Systems ({NIPS}), December 5-8, 2005, Vancouver, British Columbia, Canada]},
url = {http://papers.nips.cc/paper/2820-group-and-topic-discovery-from-relations-and-their-attributes},
pages = {1449--1456},
timestamp = {Thu, 11 Dec 2014 17:34:08 +0100},
title = {Group and Topic Discovery from Relations and Their Attributes},
year = {2005},
sum = {Social network analysis that simultaneously discovers groups of entities and also clusters attributes of their relations, such that clustering in each dimension informs the other. Applied to the voting records and corresponding text of resolutions from the U.S. Senate and the U.N., showing that incorporating the votes results in more salient topic clusters, and that different groupings of legislators emerge from different topics.}
}
@inproceedings{DBLP:conf/imc/GuMT05,
author = {Yu Gu and Andrew McCallum and Donald F. Towsley},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/imc/GuMT05},
booktitle = {Proceedings of the 5th Internet Measurement Conference ({IMC}), Berkeley, California, USA, October 19-21, 2005},
url = {http://www.usenix.org/events/imc05/tech/gu.html},
pages = {345--350},
publisher = {{USENIX} Association},
timestamp = {Sat, 19 Aug 2017 20:08:45 +0200},
title = {Detecting Anomalies in Network Traffic Using Maximum Entropy Estimation},
year = {2005},
sum = {Build a density model of normal Internet traffic with Maximum Entropy and feature induction. Detect network attacks by density threshold.}
}
@inproceedings{DBLP:conf/kdd/McCallum06,
author = {Andrew McCallum},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/kdd/McCallum06},
booktitle = {Proceedings of the Twelfth {ACM} International Conference on Knowledge Discovery and Data Mining ({SIGKDD}), Philadelphia, PA, USA, August 20-23, 2006},
doi = {10.1145/1150402.1150515},
editor = {Tina Eliassi{-}Rad and Lyle H. Ungar and Mark Craven and Dimitrios Gunopulos},
url = {http://doi.acm.org/10.1145/1150402.1150515},
pages = {835},
publisher = {ACM},
timestamp = {Wed, 20 Dec 2006 14:06:10 +0100},
title = {Information extraction, data mining and joint inference},
year = {2006}
}
@inproceedings{DBLP:conf/aaai/McCallumPDW06,
author = {Andrew McCallum and Chris Pal and Gregory Druck and Xuerui Wang},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/aaai/McCallumPDW06},
booktitle = {Proceedings, The Twenty-First National Conference on Artificial Intelligence and the Eighteenth Innovative Applications of Artificial Intelligence Conference (AAAI), July 16-20, 2006, Boston, Massachusetts, {USA}},
url = {http://www.aaai.org/Library/AAAI/2006/aaai06-069.php},
pages = {433--439},
publisher = {{AAAI} Press},
timestamp = {Mon, 19 Mar 2012 00:00:00 +0100},
title = {Multi-Conditional Learning: Generative/Discriminative Training for Clustering and Classification},
year = {2006},
sum = {Estimate parameters of an undirected graphical model not by joint likelihood, or conditional likelihood, but by a product of multiple conditional likelihoods. Can act as an improved regularizer. With latent variables, can cluster structured, relational data, like Latent Dirichlet Allocation and its successors, but with undirected graphical models and (cross-cutting) conditional-training. Improved results on document classification, Jebara-inspired synthetic data, and over the Harmonium as tested on an information retreival task.}
}
@inproceedings{DBLP:conf/icml/McCallumWM06,
author = {Andrew McCallum and Xuerui Wang and Natasha Mohanty},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/icml/McCallumWM06},
booktitle = {Statistical Network Analysis: Models, Issues, and New Directions - {ICML} 2006 Workshop on Statistical Network Analysis (ICML WS), Pittsburgh, PA, USA, June 29, 2006, Revised Selected Papers},
doi = {10.1007/978-3-540-73133-7_3},
editor = {Edoardo M. Airoldi and David M. Blei and Stephen E. Fienberg and Anna Goldenberg and Eric P. Xing and Alice X. Zheng},
url = {https://doi.org/10.1007/978-3-540-73133-7_3},
pages = {28--44},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
timestamp = {Sun, 04 Jun 2017 10:09:08 +0200},
title = {Joint Group and Topic Discovery from Relations and Text},
volume = {4503},
year = {2006}
}
@inproceedings{DBLP:conf/naacl/CulottaMB06,
author = {Aron Culotta and Andrew McCallum and Jonathan Betz},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/naacl/CulottaMB06},
booktitle = {Human Language Technology Conference of the North American Chapter of the Association of Computational Linguistics, Proceedings (HLT/NAACL), June 4-9, 2006, New York, New York, {USA}},
editor = {Robert C. Moore and Jeff A. Bilmes and Jennifer Chu{-}Carroll and Mark Sanderson},
url = {http://aclweb.org/anthology/N/N06/N06-1038.pdf},
publisher = {The Association for Computational Linguistics},
timestamp = {Mon, 19 Dec 2016 00:00:00 +0100},
title = {Integrating Probabilistic Extraction Models and Data Mining to Discover Relations and Patterns in Text},
year = {2006},
sum = {Extract relations from Wikipedia articles. Run data mining on the relational graph to obtain patterns that are predictive of relations---such as "opponent of my opponent is my ally" and "a person is likely to have the same religion as their parents." Then use feaures derived from these patterns in a second run of extraction that improves accuracy.}
}
@article{DBLP:journals/ai/CulottaKMV06,
author = {Aron Culotta and Trausti T. Kristjansson and Andrew McCallum and Paul A. Viola},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/journals/ai/CulottaKMV06},
doi = {10.1016/j.artint.2006.08.001},
journal = {Artif. Intell.},
url = {https://doi.org/10.1016/j.artint.2006.08.001},
number = {14-15},
pages = {1101--1122},
timestamp = {Sat, 27 May 2017 01:00:00 +0200},
title = {Corrective feedback and persistent learning for information extraction},
volume = {170},
year = {2006},
sum = {Help a user interactively correct the results of extraction by providing uncertainty cues in the UI, and by using constrained Viterbi to automatically make additional corrections after the first human correction. Journal paper version of AAAI paper by the same authors. Adds experiments with active learning.}
}
@inproceedings{DBLP:conf/icpr/KelmPM06,
author = {B. Michael Kelm and Chris Pal and Andrew McCallum},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/icpr/KelmPM06},
booktitle = {18th International Conference on Pattern Recognition ({ICPR}), 20-24 August 2006, Hong Kong, China},
doi = {10.1109/ICPR.2006.384},
url = {https://doi.org/10.1109/ICPR.2006.384},
pages = {828--832},
publisher = {{IEEE} Computer Society},
timestamp = {Wed, 24 May 2017 01:00:00 +0200},
title = {Combining Generative and Discriminative Methods for Pixel Classification with Multi-Conditional Learning},
year = {2006},
sum = {Multi-conditional learning explored in the context of computer vision.}
}
@inproceedings{DBLP:conf/naacl/SuttonSM06,
author = {Charles A. Sutton and Michael Sindelar and Andrew McCallum},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/naacl/SuttonSM06},
booktitle = {Human Language Technology Conference of the North American Chapter of the Association of Computational Linguistics (HLT/NAACL), Proceedings, June 4-9, 2006, New York, New York, {USA}},
editor = {Robert C. Moore and Jeff A. Bilmes and Jennifer Chu{-}Carroll and Mark Sanderson},
url = {http://aclweb.org/anthology/N/N06/N06-1012.pdf},
publisher = {The Association for Computational Linguistics},
timestamp = {Mon, 19 Dec 2016 00:00:00 +0100},
title = {Reducing Weight Undertraining in Structured Discriminative Learning},
year = {2006},
sum = {Train separately CRFs with different subsets of the features, then integrate them at test time---four different variations on the method. Especially make more reliable use of lexicon features and other highly-predictable but brittle features.}
}
@inproceedings{DBLP:conf/icassp/PalSM06,
author = {Chris Pal and Charles A. Sutton and Andrew McCallum},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/icassp/PalSM06},
booktitle = {2006 {IEEE} International Conference on Acoustics Speech and Signal Processing ({ICASSP}), Toulouse, France, May 14-19, 2006},
doi = {10.1109/ICASSP.2006.1661342},
url = {https://doi.org/10.1109/ICASSP.2006.1661342},
pages = {581--584},
publisher = {IEEE},
timestamp = {Fri, 19 May 2017 01:00:00 +0200},
title = {Sparse Forward-Backward Using Minimum Divergence Beams for Fast Training Of Conditional Random Fields},
year = {2006},
sum = {An alternative method for beam-search based on variational principles. Enables not only faster test-time performance of large-state-space CRFs, but this method makes beam search robust enough to be used at training time, enabling dramatically faster learning of discriminative finite-state methods for speech, IE and other applications.}
}
@article{DBLP:journals/ipm/PengM06,
author = {Fuchun Peng and Andrew McCallum},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/journals/ipm/PengM06},
doi = {10.1016/j.ipm.2005.09.002},
journal = {Inf. Process. Manage.},
url = {https://doi.org/10.1016/j.ipm.2005.09.002},
number = {4},
pages = {963--979},
timestamp = {Thu, 18 May 2017 01:00:00 +0200},
title = {Information extraction from research papers using conditional random fields},
volume = {42},
year = {2006}
}
@inproceedings{DBLP:conf/jcdl/MannMM06,
author = {Gideon S. Mann and David M. Mimno and Andrew McCallum},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/jcdl/MannMM06},
booktitle = {{ACM/IEEE} Joint Conference on Digital Libraries ({JCDL}), Chapel Hill, NC, USA, June 11-15, 2006, Proceedings},
doi = {10.1145/1141753.1141765},
editor = {Gary Marchionini and Michael L. Nelson and Catherine C. Marshall},
url = {http://doi.acm.org/10.1145/1141753.1141765},
pages = {65--74},
publisher = {ACM},
timestamp = {Sun, 04 Jun 2017 01:00:00 +0200},
title = {Bibliometric impact measures leveraging topic analysis},
year = {2006},
sum = {Use a new topic model that leverages n-grams to discover interpretable, fine-grained topics in over a million research papers. Use these topic divisions as well as automated citation analysis to extend three existing bibliometric impact measures, and create three new ones: Topical Diversity, Topical Transfer, Topical Precedence.}
}
@inproceedings{DBLP:conf/emnlp/WickCM06,
author = {Michael L. Wick and Aron Culotta and Andrew McCallum},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/emnlp/WickCM06},
booktitle = {{EMNLP} 2007, Proceedings of the 2006 Conference on Empirical Methods in Natural Language Processing (EMNLP), 22-23 July 2006, Sydney, Australia},
editor = {Dan Jurafsky and {\'{E}}ric Gaussier},
url = {http://www.aclweb.org/anthology/W06-1671},
pages = {603--611},
publisher = {ACL},
timestamp = {Wed, 23 Jun 2010 10:51:58 +0200},
title = {Learning Field Compatibilities to Extract Database Records from Unstructured Text},
year = {2006},
sum = {Record extraction, jointly accounting for multi-field compatibility by content and layout features.}
}
@inproceedings{DBLP:conf/dial/FengMM06,
author = {Shaolei Feng and R. Manmatha and Andrew McCallum},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/dial/FengMM06},
booktitle = {Second International Workshop on Document Image Analysis for Libraries ({DIAL}), 27-28 April 2006, Lyon, France},
doi = {10.1109/DIAL.2006.19},
url = {https://doi.org/10.1109/DIAL.2006.19},
pages = {30--37},
publisher = {{IEEE} Computer Society},
timestamp = {Fri, 26 May 2017 01:00:00 +0200},
title = {Exploring the Use of Conditional Random Field Models and HMMs for Historical Handwritten Document Recognition},
year = {2006},
sum = {Mixed results on CRFs applied to handwritten word recognition.}
}
@inproceedings{DBLP:conf/icml/LiM06,
author = {Wei Li and Andrew McCallum},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/icml/LiM06},
booktitle = {Machine Learning, Proceedings of the Twenty-Third International Conference ({ICML}), Pittsburgh, Pennsylvania, USA, June 25-29, 2006},
doi = {10.1145/1143844.1143917},
editor = {William W. Cohen and Andrew Moore},
url = {http://doi.acm.org/10.1145/1143844.1143917},
pages = {577--584},
publisher = {ACM},
series = {{ACM} International Conference Proceeding Series},
timestamp = {Thu, 19 Aug 2010 01:00:00 +0200},
title = {Pachinko allocation: DAG-structured mixture models of topic correlations},
volume = {148},
year = {2006},
sum = {An LDA-style topic model that captures correlations between topics, enabling discovery of finer-grained topics. Similar motivations to Blei and Lafferty's Correlated Topic Model (CTM), but uses a DAG to capture arbitrary, nested and possibly sparse correlations among topics. Interior nodes of the DAG have a Dirichlet distribution over their children; words are in the leaves. Provides improved interpretability and held-out data likelihood.}
}
@article{DBLP:journals/ir/WeiCM06,
author = {Xing Wei and W. Bruce Croft and Andrew McCallum},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/journals/ir/WeiCM06},
doi = {10.1007/s10791-006-9005-5},
journal = {Inf. Retr.},
url = {https://doi.org/10.1007/s10791-006-9005-5},
number = {5},
pages = {589--611},
timestamp = {Sat, 27 May 2017 01:00:00 +0200},
title = {Table extraction for answer retrieval},
volume = {9},
year = {2006},
sum = {Information extraction from tables, using conditional random fields with language and layout features, with application to question answering. Journal paper version of our SIGIR 2003 paper.}
}
@inproceedings{DBLP:conf/kdd/WangM06,
author = {Xuerui Wang and Andrew McCallum},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.org/rec/bib/conf/kdd/WangM06},
booktitle = {Proceedings of the Twelfth {ACM} International Conference on Knowledge Discovery and Data Mining ({SIGKDD}), Philadelphia, PA, USA, August 20-23, 2006},
doi = {10.1145/1150402.1150450},
editor = {Tina Eliassi{-}Rad and Lyle H. Ungar and Mark Craven and Dimitrios Gunopulos},
url = {http://doi.acm.org/10.1145/1150402.1150450},
pages = {424--433},
publisher = {ACM},
timestamp = {Wed, 20 Dec 2006 14:06:10 +0100},
title = {Topics over time: a non-Markov continuous-time model of topical trends},
year = {2006},
sum = {A new LDA-style topic model that models trends over time. The meaning of a topic remains fixed and reliable, but its prevalence over time is captured, and topics may thus focus in on co-occurrence patterns that are time-sensitive. Unlike other work that relies on Markov assumptions or discretization of time, here each topic is associated with a continuous distribution over timestamps. Improvements in topic saliency and the ability to predict time given words.}
}
@article{DBLP:journals/jair/McCallumWC07,
author = {Andrew McCallum and Xuerui Wang and Andr{\'{e}}s Corrada{-}Emmanuel},