-
Notifications
You must be signed in to change notification settings - Fork 18
/
references.bib
227 lines (211 loc) · 9.09 KB
/
references.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
% Artetxe & Schwenk (2018)
@article{artetxe-schwenk-2018-margin,
author = {Mikel Artetxe and Holger Schwenk},
title = {Margin-based Parallel Corpus Mining with Multilingual Sentence Embeddings},
journal = {CoRR},
volume = {abs/1811.01136},
year = {2018},
url = {http://arxiv.org/abs/1811.01136},
eprinttype = {arXiv},
eprint = {1811.01136},
timestamp = {Thu, 22 Nov 2018 17:58:30 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-1811-01136.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
% Aulamo et al. (2020a)
@inproceedings{aulamo-etal-2020-opustools,
title = "{O}pus{T}ools and Parallel Corpus Diagnostics",
author = {Aulamo, Mikko and Sulubacak, Umut and Virpioja, Sami and Tiedemann, J{\"o}rg},
booktitle = "Proceedings of the 12th Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.467",
pages = "3782--3789",
language = "English",
ISBN = "979-10-95546-34-4",
}
% Aulamo et al. (2020b)
@inproceedings{aulamo-etal-2020-opusfilter,
title = "{O}pus{F}ilter: A Configurable Parallel Corpus Filtering Toolbox",
author = {Aulamo, Mikko and Virpioja, Sami and Tiedemann, J{\"o}rg},
booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: System Demonstrations",
month = jul,
year = "2020",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.acl-demos.20",
doi = "10.18653/v1/2020.acl-demos.20",
pages = "150--156",
}
% Aulamo et al. (2023)
@inproceedings{aulamo-etal-2023-unsupervised,
title = "Unsupervised Feature Selection for Effective Parallel Corpus Filtering",
author = {Aulamo, Mikko and de Gibert, Ona and Virpioja, Sami and Tiedemann, J{\"o}rg},
booktitle = "Proceedings of the 24th Annual Conference of the European Association for Machine Translation",
month = jun,
year = "2023",
address = "Tampere, Finland",
publisher = "European Association for Machine Translation",
url = "https://aclanthology.org/2023.eamt-1.4",
pages = "31--38",
}
% Chaudhary et al. (2019)
@inproceedings{chaudhary-etal-2019-low,
title = "Low-Resource Corpus Filtering Using Multilingual Sentence Embeddings",
author = "Chaudhary, Vishrav and Tang, Yuqing and Guzm{\'a}n, Francisco and Schwenk, Holger and Koehn, Philipp",
booktitle = "Proceedings of the Fourth Conference on Machine Translation (Volume 3: Shared Task Papers, Day 2)",
month = aug,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-5435",
doi = "10.18653/v1/W19-5435",
pages = "261--266"
}
% Joulin et al. (2016)
@article{joulin-etal-2016-fasttext,
author = {Armand Joulin and Edouard Grave and Piotr Bojanowski and Matthijs Douze and Herv{\'{e}} J{\'{e}}gou and Tom{\'{a}}s Mikolov},
title = {FastText.zip: Compressing text classification models},
journal = {CoRR},
volume = {abs/1612.03651},
year = {2016},
url = {http://arxiv.org/abs/1612.03651},
archivePrefix = {arXiv},
eprint = {1612.03651},
timestamp = {Mon, 28 Dec 2020 11:31:02 +0100},
biburl = {https://dblp.org/rec/journals/corr/JoulinGBDJM16.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
% Joulin et al. (2017)
@inproceedings{joulin-etal-2017-bag,
title = "Bag of Tricks for Efficient Text Classification",
author = "Joulin, Armand and Grave, Edouard and Bojanowski, Piotr and Mikolov, Tomas",
booktitle = "Proceedings of the 15th Conference of the {E}uropean Chapter of the Association for Computational Linguistics: Volume 2, Short Papers",
month = apr,
year = "2017",
address = "Valencia, Spain",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/E17-2068",
pages = "427--431",
}
% Koehn (2005)
@inproceedings{koehn-2005-europarl,
title = "{E}uroparl: A Parallel Corpus for Statistical Machine Translation",
author = "Koehn, Philipp",
booktitle = "Proceedings of Machine Translation Summit X: Papers",
month = sep,
year = "2005",
address = "Phuket, Thailand",
url = "https://aclanthology.org/2005.mtsummit-papers.11",
pages = "79--86"
}
% Lui and Baldwin (2012)
@inproceedings{lui-baldwin-2012-langid,
title = "langid.py: An Off-the-shelf Language Identification Tool",
author = "Lui, Marco and Baldwin, Timothy",
booktitle = "Proceedings of the {ACL} 2012 System Demonstrations",
month = jul,
year = "2012",
address = "Jeju Island, Korea",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/P12-3005",
pages = "25--30",
}
% Moore and Lewis (2010)
@inproceedings{moore-lewis-2010-intelligent,
title = "Intelligent Selection of Language Model Training Data",
author = "Moore, Robert C. and Lewis, William",
booktitle = "Proceedings of the {ACL} 2010 Conference Short Papers",
month = jul,
year = "2010",
address = "Uppsala, Sweden",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/P10-2041",
pages = "220--224",
}
% Östling and Tiedemann (2016)
@article{ostling-tiedemann-2016-efficient,
title = {Efficient word alignment with {M}arkov {C}hain {M}onte {C}arlo},
author = {Robert {\"O}stling and J{\"o}rg Tiedemann},
journal = {Prague Bulletin of Mathematical Linguistics},
year = {2016},
month = {October},
pages = {125--146},
volume = {106},
owner = {robert},
timestamp = {2016.08.26},
url = {http://ufal.mff.cuni.cz/pbml/106/art-ostling-tiedemann.pdf}
}
% Sennrich et al. (2016)
@inproceedings{sennrich-etal-2016-neural,
title = "Neural Machine Translation of Rare Words with Subword Units",
author = "Sennrich, Rico and Haddow, Barry and Birch, Alexandra",
booktitle = "Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = aug,
year = "2016",
address = "Berlin, Germany",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/P16-1162",
doi = "10.18653/v1/P16-1162",
pages = "1715--1725",
}
% Siivola et al. (2007)
@article{siivola-etal-2007-growing,
author = {Vesa Siivola and Teemu Hirsim\"aki and Sami Virpioja},
title = {On Growing and Pruning {K}neser-{N}ey Smoothed N-Gram Models},
journal = {IEEE Transactions on Audio, Speech and Language Processing},
volume = {15},
number = {5},
pages = {1617--1624},
year = {2007},
url = {https://doi.org/10.1109/TASL.2007.896666}
}
% Tiedemann (2012)
@inproceedings{tiedemann-2012-parallel,
title = "Parallel Data, Tools and Interfaces in {OPUS}",
author = {Tiedemann, J{\"o}rg},
booktitle = "Proceedings of the Eighth International Conference on Language Resources and Evaluation ({LREC}'12)",
month = may,
year = "2012",
address = "Istanbul, Turkey",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2012/pdf/463_Paper.pdf",
pages = "2214--2218"
}
% Vatanen et al. (2010)
@inproceedings{vatanen-etal-2010-language,
title = "Language Identification of Short Text Segments with N-gram Models",
author = "Tommi Vatanen and V{\"a}yrynen, {Jaakko J.} and Sami Virpioja",
year = "2010",
month = may,
editor = "Nicoletta Calzolari and Khalid Choukri and Bente Maegaard and Joseph Mariani and Jan Odjik and Stelios Piperidis and Mike Rosner and Daniel Tapias",
booktitle = "Proceedings of the Seventh conference on International Language Resources and Evaluation (LREC'10)",
publisher = "European Language Resources Association (ELRA)"
}
% Vázquez et al. (2019)
@inproceedings{vazquez-etal-2019-university,
title = "The {U}niversity of {H}elsinki Submission to the {WMT}19 Parallel Corpus Filtering Task",
author = {V{\'a}zquez, Ra{\'u}l and Sulubacak, Umut and Tiedemann, J{\"o}rg},
booktitle = "Proceedings of the Fourth Conference on Machine Translation (Volume 3: Shared Task Papers, Day 2)",
month = aug,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-5441",
doi = "10.18653/v1/W19-5441",
pages = "294--300"
}
% Virpioja et al. (2013)
@techreport{virpioja-etal-2013-morfessor,
address = {Helsinki, Finland},
author = {Virpioja, Sami and Smit, Peter and Gr\"{o}nroos, Stig-Arne and Kurimo, Mikko},
institution = {Department of Signal Processing and Acoustics, Aalto University},
language = {eng},
number = {25/2013 in Aalto University publication series SCIENCE + TECHNOLOGY},
pages = {38},
series = {Aalto University publication series SCIENCE + TECHNOLOGY},
title = {Morfessor 2.0: Python Implementation and Extensions for {M}orfessor {B}aseline},
type = {Report},
year = {2013},
}