Skip to content

Commit 84d6845

Browse files
committed
WIP every single output is a valid XML by itself
Works hereby in the "stream" fashion.
1 parent 767e8d0 commit 84d6845

File tree

1 file changed

+48
-57
lines changed

1 file changed

+48
-57
lines changed

metafacture-io/src/main/java/org/metafacture/io/SruOpener.java

Lines changed: 48 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,15 @@
2626

2727
/**
2828
* Opens an SRU (Search Retrieval by URL) stream and passes a reader to the receiver.
29-
* The input should be the base URL of the SRU service to be retrieved from.
3029
*
3130
* @author Pascal Christoph (dr0i)
3231
*/
33-
@Description("Opens a SRU stream and passes a reader to the receiver. The input should be the base URL of the SRU service to be retrieved from. Mandatory argument is: QUERY.")
32+
@Description(
33+
"Opens a SRU stream and passes a reader to the receiver. The input is be the base URL of the SRU service " +
34+
"to be retrieved from. Mandatory argument is: QUERY.\n" +
35+
"The output is an XML document holding the user defined \"maximumRecords\" as documents. If there are" +
36+
"more documents than defined by MAXIMUM_RECORDS and there are more documents wanted (defined by " +
37+
"\"totalRecords\") there will be consecutives XML documents output.")
3438
@In(String.class)
3539
@Out(java.io.Reader.class)
3640
@FluxCommand("open-sru")
@@ -58,7 +62,7 @@ public final class SruOpener extends DefaultObjectPipe<String, ObjectReceiver<Re
5862
private boolean stopRetrieving;
5963
private int recordsRetrieved;
6064

61-
private String xmlDeclarationTemplate ="<?xml version=\"%s\" encoding=\"%s\"?>";
65+
private String xmlDeclarationTemplate = "<?xml version=\"%s\" encoding=\"%s\"?>";
6266
private String xmlDeclaration;
6367

6468
/**
@@ -88,7 +92,8 @@ public void setQuery(final String query) {
8892
}
8993

9094
/**
91-
* Sets total number of records to be retrieved. <strong>Default value: indefinite (as in "all")</strong>.
95+
* Sets total number of records to be retrieved. <strong>Default value: indefinite (as in "all")
96+
* </strong>.
9297
*
9398
* @param totalRecords total number of records to be retrieved
9499
*/
@@ -112,7 +117,7 @@ public void setMaximumRecords(final String maximumRecords) {
112117
* @param startRecord where to start when retrieving records
113118
*/
114119
public void setStartRecord(final String startRecord) {
115-
this.startRecord = Integer.parseInt(startRecord);
120+
this.startRecord = Integer.parseInt(startRecord);
116121
}
117122

118123
/**
@@ -150,83 +155,69 @@ public void process(final String baseUrl) {
150155
srUrl.append("?query=").append(query).append("&operation=").append(operation).append("&recordSchema=")
151156
.append(recordSchema).append("&version=").append(version);
152157
} else {
158+
stopRetrieving = true;
153159
throw new IllegalArgumentException("Missing mandatory parameter 'query'");
154160
}
155161

156-
try {
157-
//get first document and add a starting root tag
158-
Transformer t = TransformerFactory.newInstance().newTransformer();
159-
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(getXmlDocsViaSru(srUrl)));
160-
String line;
161-
StringBuilder stringBuilder = new StringBuilder(1024 * 1024);
162-
boolean rootTagAdded = false;
163-
while ((line = bufferedReader.readLine()) != null) {
164-
if(!rootTagAdded) {
165-
if (line.matches(".*searchRetrieveResponse.*")) {
166-
stringBuilder.append(xmlDeclaration+"\n");
167-
stringBuilder.append("<harvest>\n");
168-
rootTagAdded = true;
169-
}
170-
}
171-
stringBuilder.append(line+"\n");
172-
}
173-
getReceiver().process(new InputStreamReader(new ByteArrayInputStream(stringBuilder.toString().getBytes())));
174-
while (!stopRetrieving && recordsRetrieved < totalRecords && (startRecord < numberOfRecords)) {
175-
InputStream inputStream = getXmlDocsViaSru(srUrl);
176-
getReceiver().process(new InputStreamReader(inputStream));
177-
}
178-
//close root tag
179-
getReceiver().process(new InputStreamReader(new ByteArrayInputStream("</harvest>\n\n".getBytes())));
180-
}
181-
catch (TransformerConfigurationException | IOException e) {
182-
throw new MetafactureException(e);
162+
while (!stopRetrieving && recordsRetrieved < totalRecords && (startRecord < numberOfRecords)) {
163+
InputStream inputStream = getXmlDocsViaSru(srUrl);
164+
getReceiver().process(new InputStreamReader(inputStream));
183165
}
166+
184167
}
185168

186-
private InputStream getXmlDocsViaSru(final StringBuilder srUrl ){
187-
try {
169+
private InputStream getXmlDocsViaSru(final StringBuilder srUrl) {
170+
try {
188171
ByteArrayInputStream byteArrayInputStream = retrieve(srUrl, startRecord, maximumRecords);
189172
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
190173
DocumentBuilder docBuilder = factory.newDocumentBuilder();
191174
Document xmldoc = docBuilder.parse(byteArrayInputStream);
192175

193-
/* Element newRoot = xmldoc.createElement("harvest");
194-
newRoot.appendChild(xmldoc.getFirstChild());
195-
xmldoc.appendChild(newRoot);*/
196-
197-
numberOfRecords =
198-
Integer.parseInt(((Element) xmldoc.getElementsByTagName("numberOfRecords").item(0)).getTextContent());
199-
int recordPosition =
200-
Integer.parseInt(((Element) xmldoc.getElementsByTagName("recordPosition").item(0)).getTextContent());
201-
int nextRecordPosition =
202-
Integer.parseInt(((Element) xmldoc.getElementsByTagName("nextRecordPosition").item(0)).getTextContent());
203-
204-
String xmlEncoding = xmldoc.getXmlEncoding();
205-
String xmlVersion = xmldoc.getXmlVersion();
206-
//<?xml version="1.0" encoding="UTF-8"?>
207-
xmlDeclaration=String.format(xmlDeclarationTemplate,xmldoc.getXmlVersion(),xmldoc.getXmlEncoding());
208-
recordsRetrieved = recordsRetrieved + nextRecordPosition - recordPosition;
176+
numberOfRecords =
177+
Integer.parseInt(
178+
((Element) xmldoc.getElementsByTagName("numberOfRecords").item(0)).getTextContent());
179+
int recordPosition =
180+
Integer.parseInt(
181+
((Element) xmldoc.getElementsByTagName("recordPosition").item(0)).getTextContent());
182+
int nextRecordPosition =
183+
Integer.parseInt(
184+
((Element) xmldoc.getElementsByTagName("nextRecordPosition").item(0)).getTextContent());
185+
186+
String xmlEncoding = xmldoc.getXmlEncoding();
187+
String xmlVersion = xmldoc.getXmlVersion();
188+
xmlDeclaration = String.format(xmlDeclarationTemplate, xmldoc.getXmlVersion(), xmldoc.getXmlEncoding());
189+
recordsRetrieved = recordsRetrieved + nextRecordPosition - recordPosition;
209190

210191
ByteArrayOutputStream os = new ByteArrayOutputStream();
211192

212193
Result result = new StreamResult(os);
213194
Transformer t = TransformerFactory.newInstance().newTransformer();
214-
t.setOutputProperty("omit-xml-declaration", "yes");
195+
t.setOutputProperty("omit-xml-declaration", "yes");
215196
t.transform(new DOMSource(xmldoc), result);
216197

217198
ByteArrayInputStream inputStream = new ByteArrayInputStream(os.toByteArray());
218199
startRecord = startRecord + maximumRecords;
219-
return inputStream;
220200

221-
} catch (final IOException | TransformerException | SAXException | ParserConfigurationException e) {
201+
//get searchRetrieveResponse and add XML declaration
202+
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
203+
String line;
204+
StringBuilder stringBuilder = new StringBuilder(1024 * 1024);
205+
stringBuilder.append(xmlDeclaration + "\n");
206+
while ((line = bufferedReader.readLine()) != null) {
207+
stringBuilder.append(line + "\n");
208+
}
209+
return new ByteArrayInputStream(stringBuilder.toString().getBytes());
210+
211+
}
212+
catch (final IOException | TransformerException | SAXException | ParserConfigurationException e) {
213+
stopRetrieving = true;
222214
throw new MetafactureException(e);
223215
}
224-
225-
226216
}
227217

228218
private ByteArrayInputStream retrieve(StringBuilder srUrl, int startRecord, int maximumRecords) throws IOException {
229-
final URL urlToOpen = new URL(srUrl.toString() + "&maximumRecords=" + maximumRecords+"&startRecord=" + startRecord);
219+
final URL urlToOpen =
220+
new URL(srUrl.toString() + "&maximumRecords=" + maximumRecords + "&startRecord=" + startRecord);
230221
final HttpURLConnection connection = (HttpURLConnection) urlToOpen.openConnection();
231222

232223
connection.setConnectTimeout(CONNECTION_TIMEOUT);
@@ -239,7 +230,7 @@ private ByteArrayInputStream retrieve(StringBuilder srUrl, int startRecord, int
239230

240231
inputStream.transferTo(outputStream);
241232
return new ByteArrayInputStream(outputStream.toByteArray());
242-
}
233+
}
243234

244235
private InputStream getInputStream(final HttpURLConnection connection) {
245236
try {

0 commit comments

Comments
 (0)