Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix for #191 #197

Merged
merged 40 commits into from
Jun 28, 2023
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
5398859
fix #191
nck-mlcnv Apr 8, 2023
58d71bb
simple fix for countLines method
nck-mlcnv Apr 8, 2023
23470d2
Merge branch 'develop' into fix/issue-191/readline-with-different-lin…
nck-mlcnv Apr 11, 2023
6798f8f
add IndexedLineReader with tests
nck-mlcnv Apr 12, 2023
6b8add0
change the IndexedLineReader to comply with current indexing implemen…
nck-mlcnv Apr 14, 2023
ae4bae6
update FileUtils
nck-mlcnv Apr 14, 2023
8bec05d
change getLineEnding method in FileUtils class
nck-mlcnv Apr 18, 2023
78ebd18
refactor IndexedLineReader
nck-mlcnv Apr 18, 2023
5848fe2
restructure FileUtilsTest
nck-mlcnv Apr 18, 2023
ebd623b
fix javadoc
nck-mlcnv Apr 18, 2023
343becc
fix javadoc again
nck-mlcnv Apr 21, 2023
5b52ab2
correct try-resource-blocks
nck-mlcnv Apr 21, 2023
b0ab294
add a test for IndexedLineReader
nck-mlcnv Apr 21, 2023
0077efb
correct try-resource-block for streams too
nck-mlcnv Apr 21, 2023
70a172e
make countLines method skip lines that only contain whitespace charac…
nck-mlcnv May 15, 2023
4398bdc
add docs for constructor of FileSeparatorQuerySource
nck-mlcnv May 15, 2023
41cf274
rename IndexedLineReader to IndexedQueryReader
nck-mlcnv May 17, 2023
1d8fbf6
change try-with-resource instructions
nck-mlcnv May 17, 2023
bd76573
refactor indexing
nck-mlcnv May 25, 2023
5ae9166
fix unit tests
nck-mlcnv May 25, 2023
23eb002
fix size method
nck-mlcnv May 25, 2023
7fed3c9
refactor default separator in FileSeparatorQuerySource
nck-mlcnv May 25, 2023
adc47b4
fix more tests
nck-mlcnv May 26, 2023
065e8fe
fix documentation
nck-mlcnv May 26, 2023
1157ce0
update constructor and readQuery
nck-mlcnv May 26, 2023
74cb9ff
remove unused methods and other minor changes
nck-mlcnv Jun 15, 2023
4de0a20
fix indexFile method and add more test cases
nck-mlcnv Jun 17, 2023
b7b6396
Fix/issue 191/rework parsing (#211)
bigerl Jun 21, 2023
8e1a35d
small change to test
nck-mlcnv Jun 23, 2023
1a86dcf
update documentation
nck-mlcnv Jun 23, 2023
5783fd5
fix QueryHandlerTest
nck-mlcnv Jun 23, 2023
90404f9
Merge branch 'develop' into fix/issue-191/readline-with-different-lin…
nck-mlcnv Jun 23, 2023
41002ac
adjust expected test results to new behaviour of the IndexedQueryReader
nck-mlcnv Jun 26, 2023
2191621
add tests for getLineEnding
nck-mlcnv Jun 26, 2023
cb57354
fix test
nck-mlcnv Jun 26, 2023
e82c165
fix test cases
nck-mlcnv Jun 26, 2023
eb88285
Refactor FileUtilsTest to use temporary files
bigerl Jun 28, 2023
18f7d33
Ensure temporary test files are deleted
bigerl Jun 28, 2023
ff7bbc0
Update FileUtilsTest to use temp file
bigerl Jun 28, 2023
b41fbd2
Refactor FileUtilsTest for safer and more dynamic test data
bigerl Jun 28, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
package org.aksw.iguana.cc.query.source.impl;

import org.aksw.iguana.cc.query.source.QuerySource;
import org.aksw.iguana.cc.utils.FileUtils;
import org.aksw.iguana.cc.utils.IndexedLineReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.util.List;

/**
Expand All @@ -18,32 +16,30 @@
public class FileLineQuerySource extends QuerySource {
private static final Logger LOGGER = LoggerFactory.getLogger(FileLineQuerySource.class);

protected File queryFile;

protected int size;
private IndexedLineReader ilr;

public FileLineQuerySource(String path) {
super(path);
this.queryFile = new File(this.path);

try {
this.size = FileUtils.countLines(this.queryFile);
ilr = IndexedLineReader.make(path);
} catch (IOException e) {
LOGGER.error("Could not read queries");
LOGGER.error("Failed to read this file for the queries: " + path + "\n" + e);
}
bigerl marked this conversation as resolved.
Show resolved Hide resolved
}

@Override
public int size() {
return this.size;
return ilr.size();
}

@Override
public String getQuery(int index) throws IOException {
return FileUtils.readLineAt(index, this.queryFile);
return ilr.readLine(index);
}

@Override
public List<String> getAllQueries() throws IOException {
return Files.readAllLines(this.queryFile.toPath());
return ilr.readLines();
}
}
Original file line number Diff line number Diff line change
@@ -1,19 +1,12 @@
package org.aksw.iguana.cc.query.source.impl;

import org.aksw.iguana.cc.query.source.QuerySource;
import org.aksw.iguana.cc.utils.FileUtils;
import org.aksw.iguana.cc.utils.IndexedLineReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.stream.Stream;

/**
* The FileSeparatorQuerySource reads queries from a file with
Expand All @@ -26,84 +19,39 @@ public class FileSeparatorQuerySource extends QuerySource {

private static final String DEFAULT_SEPARATOR = "###";

protected File queryFile;
protected String separator;
protected int size;

private List<Integer> separatorPositions;
private IndexedLineReader ilr;

public FileSeparatorQuerySource(String path) {
this(path, DEFAULT_SEPARATOR);
}

public FileSeparatorQuerySource(String path, String separator) {
nck-mlcnv marked this conversation as resolved.
Show resolved Hide resolved
super(path);
this.queryFile = new File(this.path);
this.separator = separator;

indexFile();
}

private void indexFile() {
this.separatorPositions = new LinkedList<>();
int separatorCount = 0;
try (BufferedReader reader = FileUtils.getBufferedReader(this.queryFile)) {
int index = 0;
String line;
this.separatorPositions.add(-1);
while ((line = reader.readLine()) != null) {
if (line.equals(this.separator)) {
separatorCount++;
this.separatorPositions.add(index);
}
index++;
try {
if(separator.isBlank()) {
ilr = IndexedLineReader.makeWithBlankLines(path);
}
else {
ilr = IndexedLineReader.makeWithStringSeparator(path, separator);
}
this.separatorPositions.add(index);
} catch (IOException e) {
LOGGER.error("Could not read queries");
LOGGER.error("Failed to read this file for the queries: " + path + "\n" + e);
nck-mlcnv marked this conversation as resolved.
Show resolved Hide resolved
}

this.size = separatorCount + 1;
}

@Override
public int size() {
return this.size;
return ilr.size();
}

@Override
public String getQuery(int index) throws IOException {
int start = this.separatorPositions.get(index) + 1;
int end = this.separatorPositions.get(index + 1);

try (Stream<String> lines = Files.lines(this.queryFile.toPath())) {
return lines.skip(start)
.limit(end - start)
.reduce((a, b) -> a + b)
.get();
} catch (FileNotFoundException e) {
LOGGER.error("Could not read queries");
}
return null;
return ilr.readLine(index);
}

@Override
public List<String> getAllQueries() throws IOException {
try (BufferedReader reader = FileUtils.getBufferedReader(this.queryFile)) {
List<String> queries = new ArrayList<>(this.size);
String line;
StringBuilder query = new StringBuilder();
while ((line = reader.readLine()) != null) {
if (line.equals(this.separator)) {
queries.add(query.toString());
query = new StringBuilder();
} else {
query.append(line);
}
}
queries.add(query.toString());
return queries;
}
return ilr.readLines();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
Expand Down Expand Up @@ -52,14 +50,7 @@ public int size() {

@Override
public String getQuery(int index) throws IOException {
try (BufferedReader reader = new BufferedReader(new FileReader(this.files[index]))) {
StringBuilder query = new StringBuilder();
String line;
while ((line = reader.readLine()) != null) {
query.append(line);
}
return query.toString();
}
return FileUtils.readFile(files[index].getAbsolutePath());
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,74 +14,81 @@
public class FileUtils {

/**
* Counts the lines in a file efficiently. Props goes to:
* <a href="http://stackoverflow.com/a/453067/2917596">http://stackoverflow.com/a/453067/2917596</a>
* Counts the lines in a file efficiently. (only if the line ending is "\n") <br/>
* Source: <a href="http://stackoverflow.com/a/453067/2917596">http://stackoverflow.com/a/453067/2917596</a>
*
* @param filename File to count lines of
* @return No. of lines in File
* @param filename file to count lines of
* @return number of lines in the given file
* @throws IOException
*/
public static int countLines(File filename) throws IOException {
nck-mlcnv marked this conversation as resolved.
Show resolved Hide resolved
nck-mlcnv marked this conversation as resolved.
Show resolved Hide resolved
try (InputStream is = new BufferedInputStream(new FileInputStream(filename))) {

byte[] c = new byte[1024];
int count = 0;
int readChars;
boolean empty = true;
byte lastChar = '\n';
while ((readChars = is.read(c)) != -1) {
for (int i = 0; i < readChars; ++i) {
if (c[i] == '\n') {
// Check if line was empty
if (lastChar != '\n') {
++count;
if(getLineEnding((filename.getAbsolutePath())).equals("\n")) {
final int BUFFER_SIZE = 8192;
try (FileInputStream fis = new FileInputStream(filename)) {
InputStream is = new BufferedInputStream(fis, BUFFER_SIZE);
byte[] c = new byte[BUFFER_SIZE];
int count = 0;
int readChars = 0;
boolean empty = true;
byte lastChar = '\n';
while ((readChars = is.read(c)) != -1) {
for (int i = 0; i < readChars; ++i) {
if (c[i] == '\n') {
// Check if line was empty
if (lastChar != '\n') {
++count;
}
} else {
empty = false;
}
} else {
empty = false;
lastChar = c[i];
}
lastChar = c[i];
}
if (lastChar != '\n') {
count++;
}
return (count == 0 && !empty) ? 1 : count;
}
if (lastChar != '\n') {
count++;
} else {
String line = "";
int count = 0;
try(FileReader fr = new FileReader(filename)) {
BufferedReader br = new BufferedReader(fr);
while ((line = br.readLine()) != null) {
if (!line.isEmpty()) {
count++;
}
}
}
return (count == 0 && !empty) ? 1 : count;
return count;
}
}

/**
* Returns a line at a given position of a File
*
* @param pos line which should be returned
* @param filename File in which the queries are stated
* @return line at pos
* Returns a line at a given position of a File. <br/>
* This method ignores every empty line, therefore the parameter <code>pos</code> references the n-th non-empty line.
*
* @param index the position of a non-empty line which should be returned
* @param file the file to read from
* @return the line at the given position
* @throws IOException
*/
public static String readLineAt(int pos, File filename) throws IOException {
nck-mlcnv marked this conversation as resolved.
Show resolved Hide resolved
try (InputStream is = new BufferedInputStream(new FileInputStream(filename))) {
StringBuilder line = new StringBuilder();
public static String readLineAt(int index, File file) throws IOException {
nck-mlcnv marked this conversation as resolved.
Show resolved Hide resolved
nck-mlcnv marked this conversation as resolved.
Show resolved Hide resolved
String line = "";
int count = 0;

byte[] c = new byte[1024];
int count = 0;
int readChars;
byte lastChar = '\n';
while ((readChars = is.read(c)) != -1) {
for (int i = 0; i < readChars; ++i) {
if (c[i] == '\n') {
// Check if line was empty
if (lastChar != '\n') {
++count;
}
} else if (count == pos) {
// Now the line
line.append((char) c[i]);
try(FileReader fr = new FileReader(file)) {
BufferedReader br = new BufferedReader(fr);
nck-mlcnv marked this conversation as resolved.
Show resolved Hide resolved
while ((line = br.readLine()) != null) {
if (!line.isBlank()) {
if (count == index) {
return line;
}
lastChar = c[i];
count++;
}
}

return line.toString();
}
}
return "";
}

public static int getHashcodeFromFileContent(String filepath) {
Expand All @@ -100,6 +107,40 @@ public static String readFile(String path) throws IOException {
return new String(encoded, StandardCharsets.UTF_8);
}

/**
* This method detects and returns the line-ending used in a file. <br/>
* It reads the whole first line until it detects one of the following line-endings:
* <ul>
* <li>\r\n - Windows</li>
* <li>\n - Linux</li>
* <li>\r - old macOS</li>
* </ul>
*
* If the file doesn't contain a line ending, it defaults to <code>System.lineSeparator()</code>.
*
* @param filepath this string that contains the path of the file
* @return the line ending used in the given file
* @throws IOException
*/
public static String getLineEnding(String filepath) throws IOException {
nck-mlcnv marked this conversation as resolved.
Show resolved Hide resolved
try(FileReader fr = new FileReader(filepath)) {
BufferedReader br = new BufferedReader(fr);
nck-mlcnv marked this conversation as resolved.
Show resolved Hide resolved
char c;
while ((c = (char) br.read()) != (char) -1) {
if (c == '\n')
return "\n";
else if (c == '\r') {
if ((char) br.read() == '\n')
return "\r\n";
return "\r";
}
}
}

// fall back if there is no line end in the file
return System.lineSeparator();
}

nck-mlcnv marked this conversation as resolved.
Show resolved Hide resolved
public static BufferedReader getBufferedReader(File queryFile) throws FileNotFoundException {
return new BufferedReader(new FileReader(queryFile));
}
Expand Down
Loading