Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support Parquet data for HNSWDenseVector #2582

Merged
merged 8 commits into from
Sep 10, 2024
Merged
Prev Previous commit
Next Next commit
cleanup
  • Loading branch information
valamuri2020 committed Aug 29, 2024
commit 3a56a0a173db2a09b2fa76913d5422c778f8da5f
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ public static class Segment extends FileSegment<ParquetDenseVectorCollection.Doc
*/
public Segment(java.nio.file.Path path) throws IOException {
super(path);
initializeParquetReader(path); // Initialize the Parquet reader and load data
initializeParquetReader(path);
}

/**
Expand Down Expand Up @@ -145,11 +145,11 @@ private void initializeParquetReader(java.nio.file.Path path) throws IOException
while ((record = reader.read()) != null) {
// Extract the docid (String) from the record
String docid = record.getString("docid", 0);
ids.add(docid); // Add to the list of IDs
ids.add(docid);

// Extract the contents (String) from the record
String content = record.getString("contents", 0);
contents.add(content); // Add to the list of contents
contents.add(content);

// Extract the vector (double[]) from the record
Group vectorGroup = record.getGroup("vector", 0); // Access the 'vector' field
Expand All @@ -159,11 +159,11 @@ private void initializeParquetReader(java.nio.file.Path path) throws IOException
Group listGroup = vectorGroup.getGroup(0, i); // Access the 'list' group
vector[i] = listGroup.getDouble("element", 0); // Get the double value from the 'element' field
}
vectors.add(vector); // Add to the list of vectors
vectors.add(vector);
}

reader.close(); // Close the reader
currentIndex = 0; // Start iterating from the beginning
reader.close();
currentIndex = 0;
}

/**
Expand All @@ -176,8 +176,8 @@ private void initializeParquetReader(java.nio.file.Path path) throws IOException
protected synchronized void readNext() throws IOException, NoSuchElementException {
// Check if we have reached the end of the list
if (currentIndex >= ids.size()) {
atEOF = true; // Set the end-of-file flag
throw new NoSuchElementException("End of file reached"); // Throw exception to signal end of data
atEOF = true;
throw new NoSuchElementException("End of file reached");
}

// Get the current document's ID, contents, and vector
Expand All @@ -188,7 +188,6 @@ protected synchronized void readNext() throws IOException, NoSuchElementExceptio
// Create a new Document object with the retrieved data
bufferedRecord = new ParquetDenseVectorCollection.Document(id, vector, content);

// Move to the next document
currentIndex++;
}
}
Expand All @@ -197,9 +196,9 @@ protected synchronized void readNext() throws IOException, NoSuchElementExceptio
* Inner class representing a document in the ParquetDenseVectorCollection.
*/
public static class Document implements SourceDocument {
private final String id; // Document ID
private final double[] vector; // Vector data
private final String raw; // Raw data
private final String id;
private final double[] vector;
private final String raw;

/**
* Constructor for the Document class.
Expand Down
Loading