Skip to content

Commit f3252e9

Browse files
committed
Merge pull request #11 from vegeziel/master
BugFix for bug "#880 PubMed Import broken"
2 parents 927cec4 + 224b486 commit f3252e9

File tree

3 files changed

+290
-4
lines changed

3 files changed

+290
-4
lines changed
Lines changed: 279 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,279 @@
1+
/* Copyright (C) 2003-2011 JabRef contributors.
2+
This program is free software; you can redistribute it and/or modify
3+
it under the terms of the GNU General Public License as published by
4+
the Free Software Foundation; either version 2 of the License, or
5+
(at your option) any later version.
6+
7+
This program is distributed in the hope that it will be useful,
8+
but WITHOUT ANY WARRANTY; without even the implied warranty of
9+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10+
GNU General Public License for more details.
11+
12+
You should have received a copy of the GNU General Public License along
13+
with this program; if not, write to the Free Software Foundation, Inc.,
14+
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
15+
16+
*/
17+
18+
package net.sf.jabref.imports;
19+
20+
import java.util.regex.Pattern;
21+
import java.io.InputStream;
22+
import java.io.BufferedReader;
23+
import java.io.IOException;
24+
import java.util.List;
25+
import java.util.ArrayList;
26+
import java.util.HashMap;
27+
import java.util.Iterator;
28+
29+
import net.sf.jabref.BibtexEntry;
30+
import net.sf.jabref.Globals;
31+
import net.sf.jabref.AuthorList;
32+
import net.sf.jabref.BibtexFields;
33+
import net.sf.jabref.OutputPrinter;
34+
35+
/**
36+
* Importer for the MEDLINE Plain format.
37+
*
38+
* check here for details on the format
39+
* http://www.nlm.nih.gov/bsd/mms/medlineelements.html
40+
*
41+
* @author vegeziel
42+
*/
43+
public class MedlinePlainImporter extends ImportFormat {
44+
45+
/**
46+
* Return the name of this import format.
47+
*/
48+
public String getFormatName() {
49+
return "MedlinePlain";
50+
}
51+
52+
/*
53+
* (non-Javadoc)
54+
* @see net.sf.jabref.imports.ImportFormat#getCLIId()
55+
*/
56+
public String getCLIId() {
57+
return "medlineplain";
58+
}
59+
60+
/**
61+
* Check whether the source is in the correct format for this importer.
62+
*/
63+
public boolean isRecognizedFormat(InputStream stream) throws IOException {
64+
65+
// Our strategy is to look for the "PMID - *", "PMC.*-.*", or "PMCR.*-.*" line
66+
// (i.e., PubMed Unique Identifier, PubMed Central Identifier, PubMed Central Release)
67+
BufferedReader in = new BufferedReader(ImportFormatReader.getReaderDefaultEncoding(stream));
68+
Pattern pat1 = Pattern.compile("PMID.*-.*"),
69+
pat2 = Pattern.compile("PMC.*-.*"),
70+
pat3 = Pattern.compile("PMCR.*-.*");
71+
72+
String str;
73+
while ((str = in.readLine()) != null){
74+
if (pat1.matcher(str).find() || pat2.matcher(str).find() || pat3.matcher(str).find())
75+
return true;
76+
}
77+
78+
return false;
79+
}
80+
81+
/**
82+
* Parse the entries in the source, and return a List of BibtexEntry
83+
* objects.
84+
*/
85+
public List<BibtexEntry> importEntries(InputStream stream, OutputPrinter status) throws IOException {
86+
ArrayList<BibtexEntry> bibitems = new ArrayList<BibtexEntry>();
87+
StringBuffer sb = new StringBuffer();
88+
BufferedReader in = new BufferedReader(ImportFormatReader.getReaderDefaultEncoding(stream));
89+
String str;
90+
while ((str = in.readLine()) != null){
91+
sb.append(str);
92+
sb.append("\n");
93+
}
94+
String[] entries = sb.toString().replaceAll("\u2013", "-").replaceAll("\u2014", "--").replaceAll("\u2015", "--").split("\\n\\n");
95+
96+
for (int i = 0; i < entries.length; i++){
97+
98+
if (entries[i].trim().length() == 0)
99+
continue;
100+
101+
String type = "", author = "", editor = "", comment = "";
102+
HashMap<String, String> hm = new HashMap<String, String>();
103+
104+
105+
String[] fields = entries[i].split("\n");
106+
107+
for (int j = 0; j < fields.length; j++){
108+
if(fields[j].equals(""))
109+
continue;
110+
111+
StringBuffer current = new StringBuffer(fields[j]);
112+
boolean done = false;
113+
114+
while (!done && (j < fields.length-1)) {
115+
if(fields[j+1].length() <=4 )
116+
System.out.println("aaa");
117+
if (fields[j+1].charAt(4) != '-') {
118+
if ((current.length() > 0)
119+
&& !Character.isWhitespace(current.charAt(current.length()-1)))
120+
current.append(' ');
121+
current.append(fields[j+1].trim());
122+
j++;
123+
} else
124+
done = true;
125+
}
126+
String entry = current.toString();
127+
128+
String lab = entry.substring(0, entry.indexOf('-')).trim();
129+
String val = entry.substring(entry.indexOf('-')+1).trim();
130+
if (lab.equals("PT")){
131+
val = val.toLowerCase();
132+
if (val.equals("BOOK")) type = "book";
133+
else if (val.equals("journal article")
134+
|| val.equals("classical article")
135+
|| val.equals("corrected and republished article")
136+
|| val.equals("historical article")
137+
|| val.equals("introductory journal article")
138+
|| val.equals("newspaper article")) type = "article";
139+
else if (val.equals("clinical conference")
140+
|| val.equals("consensus development conference")
141+
|| val.equals("consensus development conference, NIH")) type = "conference";
142+
else if (val.equals("technical report")) type = "techreport";
143+
else if (val.equals("editorial")) type = "inproceedings";//"incollection";"inbook";
144+
else if (val.equals("overall")) type = "proceedings";
145+
else if(type.equals("")) type = "other";
146+
147+
}else if (lab.equals("TI")) {
148+
String oldVal = hm.get("title");
149+
if (oldVal == null)
150+
hm.put("title", val);
151+
else {
152+
if (oldVal.endsWith(":") || oldVal.endsWith(".") || oldVal.endsWith("?"))
153+
hm.put("title", oldVal+" "+val);
154+
else
155+
hm.put("title", oldVal+": "+val);
156+
}
157+
}
158+
// =
159+
// val;
160+
else if (lab.equals("BTI") || lab.equals("CTI")) {
161+
hm.put("booktitle", val);
162+
}
163+
else if (lab.equals("FAU")) {
164+
if (author.equals("")) // don't add " and " for the first author
165+
author = val;
166+
else author += " and " + val;
167+
}
168+
else if (lab.equals("FED")){
169+
if (editor.equals("")) // don't add " and " for the first editor
170+
editor = val;
171+
else editor += " and " + val;
172+
}
173+
else if (lab.equals("JT")) {
174+
if (type.equals("inproceedings"))
175+
hm.put("booktitle", val);
176+
else
177+
hm.put("journal", val);
178+
}
179+
180+
else if (lab.equals("PG"))
181+
hm.put("pages", val);
182+
183+
// else if (lab.equals("STAT")) {
184+
// if (val.equals("MEDLINE"))
185+
// hm.put("publisher", "PubMed");
186+
// else
187+
// hm.put("publisher", val);
188+
// }
189+
else if (lab.equals("PL"))
190+
hm.put("address", val);
191+
else if (lab.equals("IS"))
192+
hm.put("issn", val);
193+
else if (lab.equals("VI"))
194+
hm.put("volume", val);
195+
// else if (lab.equals(""))
196+
// hm.put("number", val);
197+
else if (lab.equals("AB")) {
198+
String oldAb = hm.get("abstract");
199+
if (oldAb == null)
200+
hm.put("abstract", val);
201+
else
202+
hm.put("abstract", oldAb+"\n"+val);
203+
}
204+
else if ((lab.equals("DP"))) {
205+
String[] parts = val.split(" ");
206+
hm.put("year", parts[0]);
207+
if ((parts.length > 1) && (parts[1].length() > 0)) {
208+
hm.put("month", parts[1]);
209+
}
210+
}
211+
212+
else if (lab.equals("MH") || lab.equals("OT")){
213+
if (!hm.containsKey("keywords")) hm.put("keywords", val);
214+
else{
215+
String kw = hm.get("keywords");
216+
hm.put("keywords", kw + ", " + val);
217+
}
218+
}
219+
else if (lab.equals("CON") || lab.equals("CIN") || lab.equals("EIN")
220+
|| lab.equals("EFR") || lab.equals("CRI") || lab.equals("CRF")
221+
|| lab.equals("PRIN") || lab.equals("PROF") || lab.equals("RPI")
222+
|| lab.equals("RPF") || lab.equals("RIN") || lab.equals("ROF")
223+
|| lab.equals("UIN") || lab.equals("UOF") || lab.equals("SPIN")
224+
|| lab.equals("ORI")) {
225+
if (comment.length() > 0)
226+
comment = comment+"\n";
227+
comment = comment+val;
228+
}
229+
// // Added ID import 2005.12.01, Morten Alver:
230+
// else if (lab.equals("ID"))
231+
// hm.put("refid", val);
232+
// // Added doi import (sciencedirect.com) 2011.01.10, Alexander Hug <alexander@alexanderhug.info>
233+
else if (lab.equals("AID")){
234+
String doi = val;
235+
if (doi.startsWith("doi:")){
236+
doi = doi.replaceAll("(?i)doi:", "").trim();
237+
hm.put("doi", doi);
238+
}
239+
}
240+
}
241+
// fix authors
242+
if (author.length() > 0) {
243+
author = AuthorList.fixAuthor_lastNameFirst(author);
244+
hm.put("author", author);
245+
}
246+
if (editor.length() > 0) {
247+
editor = AuthorList.fixAuthor_lastNameFirst(editor);
248+
hm.put("editor", editor);
249+
}
250+
if (comment.length() > 0) {
251+
hm.put("comment", comment);
252+
}
253+
254+
BibtexEntry b = new BibtexEntry(BibtexFields.DEFAULT_BIBTEXENTRY_ID, Globals
255+
.getEntryType(type)); // id assumes an existing database so don't
256+
257+
// Remove empty fields:
258+
ArrayList<Object> toRemove = new ArrayList<Object>();
259+
for (Iterator<String> it = hm.keySet().iterator(); it.hasNext();) {
260+
Object key = it.next();
261+
String content = hm.get(key);
262+
if ((content == null) || (content.trim().length() == 0))
263+
toRemove.add(key);
264+
}
265+
for (Iterator<Object> iterator = toRemove.iterator(); iterator.hasNext();) {
266+
hm.remove(iterator.next());
267+
268+
}
269+
270+
// create one here
271+
b.setField(hm);
272+
bibitems.add(b);
273+
274+
}
275+
276+
return bibitems;
277+
278+
}
279+
}

src/main/java/net/sf/jabref/imports/RisImporter.java

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,14 +60,12 @@ public boolean isRecognizedFormat(InputStream stream) throws IOException {
6060

6161
// Our strategy is to look for the "AU - *" line.
6262
BufferedReader in = new BufferedReader(ImportFormatReader.getReaderDefaultEncoding(stream));
63-
Pattern pat1 = Pattern.compile("AU - .*"),
64-
pat2 = Pattern.compile("A1 - .*"),
65-
pat3 = Pattern.compile("A2 - .*");
63+
Pattern pat1 = Pattern.compile("TY - .*");
6664

6765

6866
String str;
6967
while ((str = in.readLine()) != null){
70-
if (pat1.matcher(str).find() || pat2.matcher(str).find() || pat3.matcher(str).find())
68+
if (pat1.matcher(str).find())
7169
return true;
7270
}
7371

src/main/resources/plugins/net.sf.jabref.core/plugin.xml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,15 @@
3232
<parameter id="description"
3333
value="Import format for Medline references." />
3434
</extension>
35+
36+
<extension id="MedlinePlainImporter" plugin-id="net.sf.jabref.core"
37+
point-id="ImportFormat">
38+
<parameter id="importFormat"
39+
value="net.sf.jabref.imports.MedlinePlainImporter" />
40+
<parameter id="name" value="Medline Plain Importer" />
41+
<parameter id="description"
42+
value="Import format for Medline references (not XML)." />
43+
</extension>
3544

3645
<!-- To extend JabRef with an Entry Fetcher use the following extension point -->
3746
<extension-point id="EntryFetcher">

0 commit comments

Comments
 (0)