Skip to content

Commit

Permalink
MRG: support check --upgrade to upgrade old versions of RocksDB/Rev…
Browse files Browse the repository at this point in the history
…Index (#581)

* MRG: support multiple input files for singlesketch

* MRG: open rocksdb in rw mode so that old databases can be upgraded

* support check --upgrade; revert rw opening by default

* test 0.9.13 rocksdb too

* format

* cleanup

* fix

* add test to help debug

* add log?

* print out sourmash version

* print out sourmash version

* MRG: update to require sourmash v4.8.13

* upd
  • Loading branch information
ctb authored Jan 11, 2025
1 parent 609b2a1 commit a9a4934
Show file tree
Hide file tree
Showing 41 changed files with 4,548 additions and 11 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,7 @@ docs/_build/

# pixi
.pixi/

*.csv
*.zip
*.rocksdb
14 changes: 11 additions & 3 deletions src/check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,21 @@ use anyhow::Result;

use sourmash::index::revindex::{RevIndex, RevIndexOps};

pub fn check(index: camino::Utf8PathBuf, quick: bool) -> Result<()> {
pub fn check(index: camino::Utf8PathBuf, quick: bool, rw: bool) -> Result<()> {
if !is_revindex_database(&index) {
bail!("'{}' is not a valid RevIndex database", index);
}

println!("Opening DB");
let db = RevIndex::open(index, true, None)?;
println!("Opening DB (rw mode? {})", rw);
let db = match RevIndex::open(index, !rw, None) {
Ok(db) => db,
Err(e) => {
return Err(anyhow::anyhow!(
"cannot open RocksDB database. Error is: {}",
e
))
}
};

println!("Starting check");
db.check(quick);
Expand Down
10 changes: 9 additions & 1 deletion src/fastmultigather_rocksdb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,15 @@ pub fn fastmultigather_rocksdb(
bail!("'{}' is not a valid RevIndex database", index);
}
// Open database once
let db = RevIndex::open(index, true, None)?;
let db = match RevIndex::open(index, true, None) {
Ok(db) => db,
Err(e) => {
return Err(anyhow::anyhow!(
"cannot open RocksDB database. Error is: {}",
e
))
}
};
println!("Loaded DB");

// grab scaled from the database.
Expand Down
3 changes: 3 additions & 0 deletions src/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,10 @@ pub(crate) fn index_obj<P: AsRef<Path>>(
let mut index = RevIndex::create(output.as_ref(), collection, use_colors)?;

if use_internal_storage {
eprintln!("Internalizing storage.");
index.internalize_storage()?;
} else {
eprintln!("Using external storage - not copying sketches.");
}
Ok(())
}
Expand Down
4 changes: 2 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -222,9 +222,9 @@ fn do_index(
}

#[pyfunction]
fn do_check(index: String, quick: bool) -> anyhow::Result<u8> {
fn do_check(index: String, quick: bool, rw: bool) -> anyhow::Result<u8> {
let idx: PathBuf = index.into();
match check::check(idx, quick) {
match check::check(idx, quick, rw) {
Ok(_) => Ok(0),
Err(e) => {
eprintln!("Error: {e}");
Expand Down
13 changes: 10 additions & 3 deletions src/manysearch_rocksdb.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
/// manysearch_rocksdb: rocksdb-indexed version of manysearch.
use anyhow::Result;
use camino::Utf8PathBuf as PathBuf;
use log::debug;
use rayon::prelude::*;
use std::sync::atomic;
use std::sync::atomic::AtomicUsize;
Expand Down Expand Up @@ -30,9 +29,17 @@ pub fn manysearch_rocksdb(
if !is_revindex_database(&index) {
bail!("'{}' is not a valid RevIndex database", index);
}

// Open database once
debug!("Opened revindex: '{index}')");
let db = RevIndex::open(index, true, None)?;
let db = match RevIndex::open(index, true, None) {
Ok(db) => db,
Err(e) => {
return Err(anyhow::anyhow!(
"cannot open RocksDB database. Error is: {}",
e
))
}
};

println!("Loaded DB");

Expand Down
10 changes: 9 additions & 1 deletion src/python/sourmash_plugin_branchwater/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,11 +405,19 @@ def __init__(self, p):
super().__init__(p)
p.add_argument("index", help="RocksDB index file created with 'index'")
p.add_argument("--quick", action="store_true")
p.add_argument(
"--writable",
"--upgrade",
action="store_true",
help="open database in read-write mode to upgrade the internal format if needed",
)

def main(self, args):
notify(f"checking index '{args.index}'")
super().main(args)
status = sourmash_plugin_branchwater.do_check(args.index, args.quick)
status = sourmash_plugin_branchwater.do_check(
args.index, args.quick, args.writable
)
if status == 0:
notify(f"...index is ok!")
return status
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Created with:

sourmash scripts index podar-ref-subset.sig.zip -o podar-ref-subset.branch0_9_13.external.rocksdb -s 100_000 -k 31 --no-internal-storage

using sourmash_plugin_branchwater v0.9.13.

This command uses external storage for the sketches, so the zip file must be
in the cwd in order to do a fastmultigather.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
MANIFEST-000005
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
79684d80-0690-40da-9060-a5f52f3e0bfc
Empty file.

Large diffs are not rendered by default.

Binary file not shown.
Loading

0 comments on commit a9a4934

Please sign in to comment.