From 670623cd2455b4c80b6784ee81d0543c5f8413f7 Mon Sep 17 00:00:00 2001 From: Jimmy Lin Date: Wed, 26 Jun 2024 17:01:23 -0400 Subject: [PATCH] Refactor tolerance settings for BEIR dense vector regressions (#2538) + tweak docs for flat indexes. + refactor tolerance values for HNSW indexes, calibrate wrt flat index scores. --- docs/experiments-msmarco-passage.md | 3 +- ...guana.bge-base-en-v1.5.flat-int8.cached.md | 5 +- ...arguana.bge-base-en-v1.5.flat-int8.onnx.md | 5 +- ....0-arguana.bge-base-en-v1.5.flat.cached.md | 2 +- ....0.0-arguana.bge-base-en-v1.5.flat.onnx.md | 4 +- ...guana.bge-base-en-v1.5.hnsw-int8.cached.md | 17 +- ...arguana.bge-base-en-v1.5.hnsw-int8.onnx.md | 19 +- ....0-arguana.bge-base-en-v1.5.hnsw.cached.md | 5 +- ....0.0-arguana.bge-base-en-v1.5.hnsw.onnx.md | 11 +- ...ioasq.bge-base-en-v1.5.flat-int8.cached.md | 5 +- ...-bioasq.bge-base-en-v1.5.flat-int8.onnx.md | 5 +- ...0.0-bioasq.bge-base-en-v1.5.flat.cached.md | 2 +- ...1.0.0-bioasq.bge-base-en-v1.5.flat.onnx.md | 4 +- ...ioasq.bge-base-en-v1.5.hnsw-int8.cached.md | 19 +- ...-bioasq.bge-base-en-v1.5.hnsw-int8.onnx.md | 19 +- ...0.0-bioasq.bge-base-en-v1.5.hnsw.cached.md | 11 +- ...1.0.0-bioasq.bge-base-en-v1.5.hnsw.onnx.md | 11 +- ...fever.bge-base-en-v1.5.flat-int8.cached.md | 5 +- ...e-fever.bge-base-en-v1.5.flat-int8.onnx.md | 5 +- ...mate-fever.bge-base-en-v1.5.flat.cached.md | 2 +- ...limate-fever.bge-base-en-v1.5.flat.onnx.md | 4 +- ...fever.bge-base-en-v1.5.hnsw-int8.cached.md | 19 +- ...e-fever.bge-base-en-v1.5.hnsw-int8.onnx.md | 19 +- ...mate-fever.bge-base-en-v1.5.hnsw.cached.md | 7 +- ...limate-fever.bge-base-en-v1.5.hnsw.onnx.md | 9 +- ...droid.bge-base-en-v1.5.flat-int8.cached.md | 5 +- ...android.bge-base-en-v1.5.flat-int8.onnx.md | 5 +- ...ck-android.bge-base-en-v1.5.flat.cached.md | 2 +- ...tack-android.bge-base-en-v1.5.flat.onnx.md | 4 +- ...droid.bge-base-en-v1.5.hnsw-int8.cached.md | 19 +- ...android.bge-base-en-v1.5.hnsw-int8.onnx.md | 19 +- ...ck-android.bge-base-en-v1.5.hnsw.cached.md | 7 +- ...tack-android.bge-base-en-v1.5.hnsw.onnx.md | 9 +- ...glish.bge-base-en-v1.5.flat-int8.cached.md | 5 +- ...english.bge-base-en-v1.5.flat-int8.onnx.md | 5 +- ...ck-english.bge-base-en-v1.5.flat.cached.md | 2 +- ...tack-english.bge-base-en-v1.5.flat.onnx.md | 4 +- ...glish.bge-base-en-v1.5.hnsw-int8.cached.md | 19 +- ...english.bge-base-en-v1.5.hnsw-int8.onnx.md | 19 +- ...ck-english.bge-base-en-v1.5.hnsw.cached.md | 11 +- ...tack-english.bge-base-en-v1.5.hnsw.onnx.md | 11 +- ...aming.bge-base-en-v1.5.flat-int8.cached.md | 5 +- ...-gaming.bge-base-en-v1.5.flat-int8.onnx.md | 5 +- ...ack-gaming.bge-base-en-v1.5.flat.cached.md | 2 +- ...stack-gaming.bge-base-en-v1.5.flat.onnx.md | 4 +- ...aming.bge-base-en-v1.5.hnsw-int8.cached.md | 19 +- ...-gaming.bge-base-en-v1.5.hnsw-int8.onnx.md | 19 +- ...ack-gaming.bge-base-en-v1.5.hnsw.cached.md | 11 +- ...stack-gaming.bge-base-en-v1.5.hnsw.onnx.md | 11 +- ...k-gis.bge-base-en-v1.5.flat-int8.cached.md | 5 +- ...ack-gis.bge-base-en-v1.5.flat-int8.onnx.md | 5 +- ...pstack-gis.bge-base-en-v1.5.flat.cached.md | 2 +- ...dupstack-gis.bge-base-en-v1.5.flat.onnx.md | 4 +- ...k-gis.bge-base-en-v1.5.hnsw-int8.cached.md | 19 +- ...ack-gis.bge-base-en-v1.5.hnsw-int8.onnx.md | 19 +- ...pstack-gis.bge-base-en-v1.5.hnsw.cached.md | 11 +- ...dupstack-gis.bge-base-en-v1.5.hnsw.onnx.md | 9 +- ...atica.bge-base-en-v1.5.flat-int8.cached.md | 5 +- ...ematica.bge-base-en-v1.5.flat-int8.onnx.md | 5 +- ...athematica.bge-base-en-v1.5.flat.cached.md | 2 +- ...-mathematica.bge-base-en-v1.5.flat.onnx.md | 4 +- ...atica.bge-base-en-v1.5.hnsw-int8.cached.md | 15 +- ...ematica.bge-base-en-v1.5.hnsw-int8.onnx.md | 15 +- ...athematica.bge-base-en-v1.5.hnsw.cached.md | 5 +- ...-mathematica.bge-base-en-v1.5.hnsw.onnx.md | 5 +- ...ysics.bge-base-en-v1.5.flat-int8.cached.md | 5 +- ...physics.bge-base-en-v1.5.flat-int8.onnx.md | 5 +- ...ck-physics.bge-base-en-v1.5.flat.cached.md | 2 +- ...tack-physics.bge-base-en-v1.5.flat.onnx.md | 4 +- ...ysics.bge-base-en-v1.5.hnsw-int8.cached.md | 19 +- ...physics.bge-base-en-v1.5.hnsw-int8.onnx.md | 19 +- ...ck-physics.bge-base-en-v1.5.hnsw.cached.md | 5 +- ...tack-physics.bge-base-en-v1.5.hnsw.onnx.md | 5 +- ...mmers.bge-base-en-v1.5.flat-int8.cached.md | 5 +- ...rammers.bge-base-en-v1.5.flat-int8.onnx.md | 5 +- ...rogrammers.bge-base-en-v1.5.flat.cached.md | 2 +- ...-programmers.bge-base-en-v1.5.flat.onnx.md | 4 +- ...mmers.bge-base-en-v1.5.hnsw-int8.cached.md | 17 +- ...rammers.bge-base-en-v1.5.hnsw-int8.onnx.md | 17 +- ...rogrammers.bge-base-en-v1.5.hnsw.cached.md | 5 +- ...-programmers.bge-base-en-v1.5.hnsw.onnx.md | 5 +- ...stats.bge-base-en-v1.5.flat-int8.cached.md | 5 +- ...k-stats.bge-base-en-v1.5.flat-int8.onnx.md | 5 +- ...tack-stats.bge-base-en-v1.5.flat.cached.md | 2 +- ...pstack-stats.bge-base-en-v1.5.flat.onnx.md | 4 +- ...stats.bge-base-en-v1.5.hnsw-int8.cached.md | 19 +- ...k-stats.bge-base-en-v1.5.hnsw-int8.onnx.md | 19 +- ...tack-stats.bge-base-en-v1.5.hnsw.cached.md | 7 +- ...pstack-stats.bge-base-en-v1.5.hnsw.onnx.md | 9 +- ...k-tex.bge-base-en-v1.5.flat-int8.cached.md | 5 +- ...ack-tex.bge-base-en-v1.5.flat-int8.onnx.md | 5 +- ...pstack-tex.bge-base-en-v1.5.flat.cached.md | 2 +- ...dupstack-tex.bge-base-en-v1.5.flat.onnx.md | 4 +- ...k-tex.bge-base-en-v1.5.hnsw-int8.cached.md | 17 +- ...ack-tex.bge-base-en-v1.5.hnsw-int8.onnx.md | 17 +- ...pstack-tex.bge-base-en-v1.5.hnsw.cached.md | 11 +- ...dupstack-tex.bge-base-en-v1.5.hnsw.onnx.md | 11 +- ...-unix.bge-base-en-v1.5.flat-int8.cached.md | 5 +- ...ck-unix.bge-base-en-v1.5.flat-int8.onnx.md | 5 +- ...stack-unix.bge-base-en-v1.5.flat.cached.md | 2 +- ...upstack-unix.bge-base-en-v1.5.flat.onnx.md | 4 +- ...-unix.bge-base-en-v1.5.hnsw-int8.cached.md | 17 +- ...ck-unix.bge-base-en-v1.5.hnsw-int8.onnx.md | 17 +- ...stack-unix.bge-base-en-v1.5.hnsw.cached.md | 7 +- ...upstack-unix.bge-base-en-v1.5.hnsw.onnx.md | 7 +- ...sters.bge-base-en-v1.5.flat-int8.cached.md | 5 +- ...masters.bge-base-en-v1.5.flat-int8.onnx.md | 5 +- ...webmasters.bge-base-en-v1.5.flat.cached.md | 2 +- ...k-webmasters.bge-base-en-v1.5.flat.onnx.md | 4 +- ...sters.bge-base-en-v1.5.hnsw-int8.cached.md | 19 +- ...masters.bge-base-en-v1.5.hnsw-int8.onnx.md | 19 +- ...webmasters.bge-base-en-v1.5.hnsw.cached.md | 9 +- ...k-webmasters.bge-base-en-v1.5.hnsw.onnx.md | 7 +- ...press.bge-base-en-v1.5.flat-int8.cached.md | 5 +- ...rdpress.bge-base-en-v1.5.flat-int8.onnx.md | 5 +- ...-wordpress.bge-base-en-v1.5.flat.cached.md | 2 +- ...ck-wordpress.bge-base-en-v1.5.flat.onnx.md | 4 +- ...press.bge-base-en-v1.5.hnsw-int8.cached.md | 15 +- ...rdpress.bge-base-en-v1.5.hnsw-int8.onnx.md | 17 +- ...-wordpress.bge-base-en-v1.5.hnsw.cached.md | 7 +- ...ck-wordpress.bge-base-en-v1.5.hnsw.onnx.md | 7 +- ...ntity.bge-base-en-v1.5.flat-int8.cached.md | 5 +- ...-entity.bge-base-en-v1.5.flat-int8.onnx.md | 5 +- ...dia-entity.bge-base-en-v1.5.flat.cached.md | 2 +- ...pedia-entity.bge-base-en-v1.5.flat.onnx.md | 4 +- ...ntity.bge-base-en-v1.5.hnsw-int8.cached.md | 17 +- ...-entity.bge-base-en-v1.5.hnsw-int8.onnx.md | 19 +- ...dia-entity.bge-base-en-v1.5.hnsw.cached.md | 9 +- ...pedia-entity.bge-base-en-v1.5.hnsw.onnx.md | 11 +- ...fever.bge-base-en-v1.5.flat-int8.cached.md | 5 +- ...0-fever.bge-base-en-v1.5.flat-int8.onnx.md | 5 +- ....0.0-fever.bge-base-en-v1.5.flat.cached.md | 2 +- ...v1.0.0-fever.bge-base-en-v1.5.flat.onnx.md | 4 +- ...fever.bge-base-en-v1.5.hnsw-int8.cached.md | 19 +- ...0-fever.bge-base-en-v1.5.hnsw-int8.onnx.md | 19 +- ....0.0-fever.bge-base-en-v1.5.hnsw.cached.md | 11 +- ...v1.0.0-fever.bge-base-en-v1.5.hnsw.onnx.md | 11 +- ...-fiqa.bge-base-en-v1.5.flat-int8.cached.md | 5 +- ....0-fiqa.bge-base-en-v1.5.flat-int8.onnx.md | 5 +- ...1.0.0-fiqa.bge-base-en-v1.5.flat.cached.md | 2 +- ...-v1.0.0-fiqa.bge-base-en-v1.5.flat.onnx.md | 4 +- ...-fiqa.bge-base-en-v1.5.hnsw-int8.cached.md | 19 +- ....0-fiqa.bge-base-en-v1.5.hnsw-int8.onnx.md | 19 +- ...1.0.0-fiqa.bge-base-en-v1.5.hnsw.cached.md | 11 +- ...-v1.0.0-fiqa.bge-base-en-v1.5.hnsw.onnx.md | 11 +- ...potqa.bge-base-en-v1.5.flat-int8.cached.md | 5 +- ...otpotqa.bge-base-en-v1.5.flat-int8.onnx.md | 5 +- ...0-hotpotqa.bge-base-en-v1.5.flat.cached.md | 2 +- ...0.0-hotpotqa.bge-base-en-v1.5.flat.onnx.md | 4 +- ...potqa.bge-base-en-v1.5.hnsw-int8.cached.md | 19 +- ...otpotqa.bge-base-en-v1.5.hnsw-int8.onnx.md | 19 +- ...0-hotpotqa.bge-base-en-v1.5.hnsw.cached.md | 11 +- ...0.0-hotpotqa.bge-base-en-v1.5.hnsw.onnx.md | 11 +- ...orpus.bge-base-en-v1.5.flat-int8.cached.md | 5 +- ...fcorpus.bge-base-en-v1.5.flat-int8.onnx.md | 5 +- ...0-nfcorpus.bge-base-en-v1.5.flat.cached.md | 2 +- ...0.0-nfcorpus.bge-base-en-v1.5.flat.onnx.md | 4 +- ...orpus.bge-base-en-v1.5.hnsw-int8.cached.md | 17 +- ...fcorpus.bge-base-en-v1.5.hnsw-int8.onnx.md | 19 +- ...0-nfcorpus.bge-base-en-v1.5.hnsw.cached.md | 9 +- ...0.0-nfcorpus.bge-base-en-v1.5.hnsw.onnx.md | 9 +- ....0-nq.bge-base-en-v1.5.flat-int8.cached.md | 5 +- ....0.0-nq.bge-base-en-v1.5.flat-int8.onnx.md | 5 +- ...-v1.0.0-nq.bge-base-en-v1.5.flat.cached.md | 2 +- ...ir-v1.0.0-nq.bge-base-en-v1.5.flat.onnx.md | 4 +- ....0-nq.bge-base-en-v1.5.hnsw-int8.cached.md | 19 +- ....0.0-nq.bge-base-en-v1.5.hnsw-int8.onnx.md | 19 +- ...-v1.0.0-nq.bge-base-en-v1.5.hnsw.cached.md | 9 +- ...ir-v1.0.0-nq.bge-base-en-v1.5.hnsw.onnx.md | 9 +- ...quora.bge-base-en-v1.5.flat-int8.cached.md | 5 +- ...0-quora.bge-base-en-v1.5.flat-int8.onnx.md | 5 +- ....0.0-quora.bge-base-en-v1.5.flat.cached.md | 2 +- ...v1.0.0-quora.bge-base-en-v1.5.flat.onnx.md | 4 +- ...quora.bge-base-en-v1.5.hnsw-int8.cached.md | 15 +- ...0-quora.bge-base-en-v1.5.hnsw-int8.onnx.md | 15 +- ....0.0-quora.bge-base-en-v1.5.hnsw.cached.md | 5 +- ...v1.0.0-quora.bge-base-en-v1.5.hnsw.onnx.md | 7 +- ...ust04.bge-base-en-v1.5.flat-int8.cached.md | 5 +- ...obust04.bge-base-en-v1.5.flat-int8.onnx.md | 5 +- ...0-robust04.bge-base-en-v1.5.flat.cached.md | 2 +- ...0.0-robust04.bge-base-en-v1.5.flat.onnx.md | 4 +- ...ust04.bge-base-en-v1.5.hnsw-int8.cached.md | 17 +- ...obust04.bge-base-en-v1.5.hnsw-int8.onnx.md | 19 +- ...0-robust04.bge-base-en-v1.5.hnsw.cached.md | 9 +- ...0.0-robust04.bge-base-en-v1.5.hnsw.onnx.md | 11 +- ...idocs.bge-base-en-v1.5.flat-int8.cached.md | 5 +- ...scidocs.bge-base-en-v1.5.flat-int8.onnx.md | 5 +- ....0-scidocs.bge-base-en-v1.5.flat.cached.md | 2 +- ....0.0-scidocs.bge-base-en-v1.5.flat.onnx.md | 4 +- ...idocs.bge-base-en-v1.5.hnsw-int8.cached.md | 17 +- ...scidocs.bge-base-en-v1.5.hnsw-int8.onnx.md | 17 +- ....0-scidocs.bge-base-en-v1.5.hnsw.cached.md | 7 +- ....0.0-scidocs.bge-base-en-v1.5.hnsw.onnx.md | 7 +- ...ifact.bge-base-en-v1.5.flat-int8.cached.md | 5 +- ...scifact.bge-base-en-v1.5.flat-int8.onnx.md | 5 +- ....0-scifact.bge-base-en-v1.5.flat.cached.md | 2 +- ....0.0-scifact.bge-base-en-v1.5.flat.onnx.md | 4 +- ...ifact.bge-base-en-v1.5.hnsw-int8.cached.md | 15 +- ...scifact.bge-base-en-v1.5.hnsw-int8.onnx.md | 17 +- ....0-scifact.bge-base-en-v1.5.hnsw.cached.md | 5 +- ....0.0-scifact.bge-base-en-v1.5.hnsw.onnx.md | 5 +- ...nal1m.bge-base-en-v1.5.flat-int8.cached.md | 5 +- ...ignal1m.bge-base-en-v1.5.flat-int8.onnx.md | 5 +- ...0-signal1m.bge-base-en-v1.5.flat.cached.md | 2 +- ...0.0-signal1m.bge-base-en-v1.5.flat.onnx.md | 4 +- ...nal1m.bge-base-en-v1.5.hnsw-int8.cached.md | 19 +- ...ignal1m.bge-base-en-v1.5.hnsw-int8.onnx.md | 19 +- ...0-signal1m.bge-base-en-v1.5.hnsw.cached.md | 11 +- ...0.0-signal1m.bge-base-en-v1.5.hnsw.onnx.md | 11 +- ...covid.bge-base-en-v1.5.flat-int8.cached.md | 5 +- ...c-covid.bge-base-en-v1.5.flat-int8.onnx.md | 5 +- ...trec-covid.bge-base-en-v1.5.flat.cached.md | 2 +- ...0-trec-covid.bge-base-en-v1.5.flat.onnx.md | 4 +- ...covid.bge-base-en-v1.5.hnsw-int8.cached.md | 19 +- ...c-covid.bge-base-en-v1.5.hnsw-int8.onnx.md | 19 +- ...trec-covid.bge-base-en-v1.5.hnsw.cached.md | 5 +- ...0-trec-covid.bge-base-en-v1.5.hnsw.onnx.md | 7 +- ...-news.bge-base-en-v1.5.flat-int8.cached.md | 5 +- ...ec-news.bge-base-en-v1.5.flat-int8.onnx.md | 5 +- ...-trec-news.bge-base-en-v1.5.flat.cached.md | 2 +- ....0-trec-news.bge-base-en-v1.5.flat.onnx.md | 4 +- ...-news.bge-base-en-v1.5.hnsw-int8.cached.md | 19 +- ...ec-news.bge-base-en-v1.5.hnsw-int8.onnx.md | 19 +- ...-trec-news.bge-base-en-v1.5.hnsw.cached.md | 11 +- ....0-trec-news.bge-base-en-v1.5.hnsw.onnx.md | 11 +- ...e2020.bge-base-en-v1.5.flat-int8.cached.md | 5 +- ...che2020.bge-base-en-v1.5.flat-int8.onnx.md | 5 +- ...touche2020.bge-base-en-v1.5.flat.cached.md | 2 +- ...s-touche2020.bge-base-en-v1.5.flat.onnx.md | 4 +- ...e2020.bge-base-en-v1.5.hnsw-int8.cached.md | 19 +- ...che2020.bge-base-en-v1.5.hnsw-int8.onnx.md | 19 +- ...touche2020.bge-base-en-v1.5.hnsw.cached.md | 7 +- ...s-touche2020.bge-base-en-v1.5.hnsw.onnx.md | 9 +- docs/rest-api.md | 87 +- docs/start-here.md | 1 + src/main/frontend/package.json | 8 +- src/main/frontend/pages/_app.tsx | 20 +- .../frontend/pages/components/Dropdown.tsx | 171 ++- .../frontend/pages/components/SearchBar.tsx | 112 +- src/main/frontend/pages/index.tsx | 30 +- src/main/frontend/styles/globals.css | 14 - src/main/frontend/yarn.lock | 1179 ++++++++++++++++- .../io/anserini/index/AbstractIndexer.java | 647 ++++----- .../anserini/index/IndexHnswDenseVectors.java | 421 +++--- ...feTensorsDenseVectorDocumentGenerator.java | 28 +- .../io/anserini/server/ControllerV1_0.java | 6 + .../io/anserini/server/SearchService.java | 40 +- src/main/python/run_regression.py | 216 +-- ...bge-base-en-v1.5.flat-int8.cached.template | 5 +- ...a.bge-base-en-v1.5.flat-int8.onnx.template | 5 +- ...uana.bge-base-en-v1.5.flat.cached.template | 2 +- ...rguana.bge-base-en-v1.5.flat.onnx.template | 4 +- ...bge-base-en-v1.5.hnsw-int8.cached.template | 5 +- ...a.bge-base-en-v1.5.hnsw-int8.onnx.template | 5 +- ...uana.bge-base-en-v1.5.hnsw.cached.template | 5 +- ...rguana.bge-base-en-v1.5.hnsw.onnx.template | 5 +- ...bge-base-en-v1.5.flat-int8.cached.template | 5 +- ...q.bge-base-en-v1.5.flat-int8.onnx.template | 5 +- ...oasq.bge-base-en-v1.5.flat.cached.template | 2 +- ...bioasq.bge-base-en-v1.5.flat.onnx.template | 4 +- ...bge-base-en-v1.5.hnsw-int8.cached.template | 5 +- ...q.bge-base-en-v1.5.hnsw-int8.onnx.template | 5 +- ...oasq.bge-base-en-v1.5.hnsw.cached.template | 5 +- ...bioasq.bge-base-en-v1.5.hnsw.onnx.template | 5 +- ...bge-base-en-v1.5.flat-int8.cached.template | 5 +- ...r.bge-base-en-v1.5.flat-int8.onnx.template | 5 +- ...ever.bge-base-en-v1.5.flat.cached.template | 2 +- ...-fever.bge-base-en-v1.5.flat.onnx.template | 4 +- ...bge-base-en-v1.5.hnsw-int8.cached.template | 5 +- ...r.bge-base-en-v1.5.hnsw-int8.onnx.template | 5 +- ...ever.bge-base-en-v1.5.hnsw.cached.template | 5 +- ...-fever.bge-base-en-v1.5.hnsw.onnx.template | 5 +- ...bge-base-en-v1.5.flat-int8.cached.template | 5 +- ...d.bge-base-en-v1.5.flat-int8.onnx.template | 5 +- ...roid.bge-base-en-v1.5.flat.cached.template | 2 +- ...ndroid.bge-base-en-v1.5.flat.onnx.template | 4 +- ...bge-base-en-v1.5.hnsw-int8.cached.template | 5 +- ...d.bge-base-en-v1.5.hnsw-int8.onnx.template | 5 +- ...roid.bge-base-en-v1.5.hnsw.cached.template | 5 +- ...ndroid.bge-base-en-v1.5.hnsw.onnx.template | 5 +- ...bge-base-en-v1.5.flat-int8.cached.template | 5 +- ...h.bge-base-en-v1.5.flat-int8.onnx.template | 5 +- ...lish.bge-base-en-v1.5.flat.cached.template | 2 +- ...nglish.bge-base-en-v1.5.flat.onnx.template | 4 +- ...bge-base-en-v1.5.hnsw-int8.cached.template | 5 +- ...h.bge-base-en-v1.5.hnsw-int8.onnx.template | 5 +- ...lish.bge-base-en-v1.5.hnsw.cached.template | 5 +- ...nglish.bge-base-en-v1.5.hnsw.onnx.template | 5 +- ...bge-base-en-v1.5.flat-int8.cached.template | 5 +- ...g.bge-base-en-v1.5.flat-int8.onnx.template | 5 +- ...ming.bge-base-en-v1.5.flat.cached.template | 2 +- ...gaming.bge-base-en-v1.5.flat.onnx.template | 4 +- ...bge-base-en-v1.5.hnsw-int8.cached.template | 5 +- ...g.bge-base-en-v1.5.hnsw-int8.onnx.template | 5 +- ...ming.bge-base-en-v1.5.hnsw.cached.template | 5 +- ...gaming.bge-base-en-v1.5.hnsw.onnx.template | 5 +- ...bge-base-en-v1.5.flat-int8.cached.template | 5 +- ...s.bge-base-en-v1.5.flat-int8.onnx.template | 5 +- ...-gis.bge-base-en-v1.5.flat.cached.template | 2 +- ...ck-gis.bge-base-en-v1.5.flat.onnx.template | 4 +- ...bge-base-en-v1.5.hnsw-int8.cached.template | 5 +- ...s.bge-base-en-v1.5.hnsw-int8.onnx.template | 5 +- ...-gis.bge-base-en-v1.5.hnsw.cached.template | 5 +- ...ck-gis.bge-base-en-v1.5.hnsw.onnx.template | 5 +- ...bge-base-en-v1.5.flat-int8.cached.template | 5 +- ...a.bge-base-en-v1.5.flat-int8.onnx.template | 5 +- ...tica.bge-base-en-v1.5.flat.cached.template | 2 +- ...matica.bge-base-en-v1.5.flat.onnx.template | 4 +- ...bge-base-en-v1.5.hnsw-int8.cached.template | 5 +- ...a.bge-base-en-v1.5.hnsw-int8.onnx.template | 5 +- ...tica.bge-base-en-v1.5.hnsw.cached.template | 5 +- ...matica.bge-base-en-v1.5.hnsw.onnx.template | 5 +- ...bge-base-en-v1.5.flat-int8.cached.template | 5 +- ...s.bge-base-en-v1.5.flat-int8.onnx.template | 5 +- ...sics.bge-base-en-v1.5.flat.cached.template | 2 +- ...hysics.bge-base-en-v1.5.flat.onnx.template | 4 +- ...bge-base-en-v1.5.hnsw-int8.cached.template | 5 +- ...s.bge-base-en-v1.5.hnsw-int8.onnx.template | 5 +- ...sics.bge-base-en-v1.5.hnsw.cached.template | 5 +- ...hysics.bge-base-en-v1.5.hnsw.onnx.template | 5 +- ...bge-base-en-v1.5.flat-int8.cached.template | 5 +- ...s.bge-base-en-v1.5.flat-int8.onnx.template | 5 +- ...mers.bge-base-en-v1.5.flat.cached.template | 2 +- ...ammers.bge-base-en-v1.5.flat.onnx.template | 4 +- ...bge-base-en-v1.5.hnsw-int8.cached.template | 5 +- ...s.bge-base-en-v1.5.hnsw-int8.onnx.template | 5 +- ...mers.bge-base-en-v1.5.hnsw.cached.template | 5 +- ...ammers.bge-base-en-v1.5.hnsw.onnx.template | 5 +- ...bge-base-en-v1.5.flat-int8.cached.template | 5 +- ...s.bge-base-en-v1.5.flat-int8.onnx.template | 5 +- ...tats.bge-base-en-v1.5.flat.cached.template | 2 +- ...-stats.bge-base-en-v1.5.flat.onnx.template | 4 +- ...bge-base-en-v1.5.hnsw-int8.cached.template | 5 +- ...s.bge-base-en-v1.5.hnsw-int8.onnx.template | 5 +- ...tats.bge-base-en-v1.5.hnsw.cached.template | 5 +- ...-stats.bge-base-en-v1.5.hnsw.onnx.template | 5 +- ...bge-base-en-v1.5.flat-int8.cached.template | 5 +- ...x.bge-base-en-v1.5.flat-int8.onnx.template | 5 +- ...-tex.bge-base-en-v1.5.flat.cached.template | 2 +- ...ck-tex.bge-base-en-v1.5.flat.onnx.template | 4 +- ...bge-base-en-v1.5.hnsw-int8.cached.template | 5 +- ...x.bge-base-en-v1.5.hnsw-int8.onnx.template | 5 +- ...-tex.bge-base-en-v1.5.hnsw.cached.template | 5 +- ...ck-tex.bge-base-en-v1.5.hnsw.onnx.template | 5 +- ...bge-base-en-v1.5.flat-int8.cached.template | 5 +- ...x.bge-base-en-v1.5.flat-int8.onnx.template | 5 +- ...unix.bge-base-en-v1.5.flat.cached.template | 2 +- ...k-unix.bge-base-en-v1.5.flat.onnx.template | 4 +- ...bge-base-en-v1.5.hnsw-int8.cached.template | 5 +- ...x.bge-base-en-v1.5.hnsw-int8.onnx.template | 5 +- ...unix.bge-base-en-v1.5.hnsw.cached.template | 5 +- ...k-unix.bge-base-en-v1.5.hnsw.onnx.template | 5 +- ...bge-base-en-v1.5.flat-int8.cached.template | 5 +- ...s.bge-base-en-v1.5.flat-int8.onnx.template | 5 +- ...ters.bge-base-en-v1.5.flat.cached.template | 2 +- ...asters.bge-base-en-v1.5.flat.onnx.template | 4 +- ...bge-base-en-v1.5.hnsw-int8.cached.template | 5 +- ...s.bge-base-en-v1.5.hnsw-int8.onnx.template | 5 +- ...ters.bge-base-en-v1.5.hnsw.cached.template | 5 +- ...asters.bge-base-en-v1.5.hnsw.onnx.template | 5 +- ...bge-base-en-v1.5.flat-int8.cached.template | 5 +- ...s.bge-base-en-v1.5.flat-int8.onnx.template | 5 +- ...ress.bge-base-en-v1.5.flat.cached.template | 2 +- ...dpress.bge-base-en-v1.5.flat.onnx.template | 4 +- ...bge-base-en-v1.5.hnsw-int8.cached.template | 5 +- ...s.bge-base-en-v1.5.hnsw-int8.onnx.template | 5 +- ...ress.bge-base-en-v1.5.hnsw.cached.template | 5 +- ...dpress.bge-base-en-v1.5.hnsw.onnx.template | 5 +- ...bge-base-en-v1.5.flat-int8.cached.template | 5 +- ...y.bge-base-en-v1.5.flat-int8.onnx.template | 5 +- ...tity.bge-base-en-v1.5.flat.cached.template | 2 +- ...entity.bge-base-en-v1.5.flat.onnx.template | 4 +- ...bge-base-en-v1.5.hnsw-int8.cached.template | 5 +- ...y.bge-base-en-v1.5.hnsw-int8.onnx.template | 5 +- ...tity.bge-base-en-v1.5.hnsw.cached.template | 5 +- ...entity.bge-base-en-v1.5.hnsw.onnx.template | 5 +- ...bge-base-en-v1.5.flat-int8.cached.template | 5 +- ...r.bge-base-en-v1.5.flat-int8.onnx.template | 5 +- ...ever.bge-base-en-v1.5.flat.cached.template | 2 +- ...-fever.bge-base-en-v1.5.flat.onnx.template | 4 +- ...bge-base-en-v1.5.hnsw-int8.cached.template | 5 +- ...r.bge-base-en-v1.5.hnsw-int8.onnx.template | 5 +- ...ever.bge-base-en-v1.5.hnsw.cached.template | 5 +- ...-fever.bge-base-en-v1.5.hnsw.onnx.template | 5 +- ...bge-base-en-v1.5.flat-int8.cached.template | 5 +- ...a.bge-base-en-v1.5.flat-int8.onnx.template | 5 +- ...fiqa.bge-base-en-v1.5.flat.cached.template | 2 +- ...0-fiqa.bge-base-en-v1.5.flat.onnx.template | 4 +- ...bge-base-en-v1.5.hnsw-int8.cached.template | 5 +- ...a.bge-base-en-v1.5.hnsw-int8.onnx.template | 5 +- ...fiqa.bge-base-en-v1.5.hnsw.cached.template | 5 +- ...0-fiqa.bge-base-en-v1.5.hnsw.onnx.template | 5 +- ...bge-base-en-v1.5.flat-int8.cached.template | 5 +- ...a.bge-base-en-v1.5.flat-int8.onnx.template | 5 +- ...otqa.bge-base-en-v1.5.flat.cached.template | 2 +- ...tpotqa.bge-base-en-v1.5.flat.onnx.template | 4 +- ...bge-base-en-v1.5.hnsw-int8.cached.template | 5 +- ...a.bge-base-en-v1.5.hnsw-int8.onnx.template | 5 +- ...otqa.bge-base-en-v1.5.hnsw.cached.template | 5 +- ...tpotqa.bge-base-en-v1.5.hnsw.onnx.template | 5 +- ...bge-base-en-v1.5.flat-int8.cached.template | 5 +- ...s.bge-base-en-v1.5.flat-int8.onnx.template | 5 +- ...rpus.bge-base-en-v1.5.flat.cached.template | 2 +- ...corpus.bge-base-en-v1.5.flat.onnx.template | 4 +- ...bge-base-en-v1.5.hnsw-int8.cached.template | 5 +- ...s.bge-base-en-v1.5.hnsw-int8.onnx.template | 5 +- ...rpus.bge-base-en-v1.5.hnsw.cached.template | 5 +- ...corpus.bge-base-en-v1.5.hnsw.onnx.template | 5 +- ...bge-base-en-v1.5.flat-int8.cached.template | 5 +- ...q.bge-base-en-v1.5.flat-int8.onnx.template | 5 +- ...0-nq.bge-base-en-v1.5.flat.cached.template | 2 +- ...0.0-nq.bge-base-en-v1.5.flat.onnx.template | 4 +- ...bge-base-en-v1.5.hnsw-int8.cached.template | 5 +- ...q.bge-base-en-v1.5.hnsw-int8.onnx.template | 5 +- ...0-nq.bge-base-en-v1.5.hnsw.cached.template | 5 +- ...0.0-nq.bge-base-en-v1.5.hnsw.onnx.template | 5 +- ...bge-base-en-v1.5.flat-int8.cached.template | 5 +- ...a.bge-base-en-v1.5.flat-int8.onnx.template | 5 +- ...uora.bge-base-en-v1.5.flat.cached.template | 2 +- ...-quora.bge-base-en-v1.5.flat.onnx.template | 4 +- ...bge-base-en-v1.5.hnsw-int8.cached.template | 5 +- ...a.bge-base-en-v1.5.hnsw-int8.onnx.template | 5 +- ...uora.bge-base-en-v1.5.hnsw.cached.template | 5 +- ...-quora.bge-base-en-v1.5.hnsw.onnx.template | 5 +- ...bge-base-en-v1.5.flat-int8.cached.template | 5 +- ...4.bge-base-en-v1.5.flat-int8.onnx.template | 5 +- ...st04.bge-base-en-v1.5.flat.cached.template | 2 +- ...bust04.bge-base-en-v1.5.flat.onnx.template | 4 +- ...bge-base-en-v1.5.hnsw-int8.cached.template | 5 +- ...4.bge-base-en-v1.5.hnsw-int8.onnx.template | 5 +- ...st04.bge-base-en-v1.5.hnsw.cached.template | 5 +- ...bust04.bge-base-en-v1.5.hnsw.onnx.template | 5 +- ...bge-base-en-v1.5.flat-int8.cached.template | 5 +- ...s.bge-base-en-v1.5.flat-int8.onnx.template | 5 +- ...docs.bge-base-en-v1.5.flat.cached.template | 2 +- ...cidocs.bge-base-en-v1.5.flat.onnx.template | 4 +- ...bge-base-en-v1.5.hnsw-int8.cached.template | 5 +- ...s.bge-base-en-v1.5.hnsw-int8.onnx.template | 5 +- ...docs.bge-base-en-v1.5.hnsw.cached.template | 5 +- ...cidocs.bge-base-en-v1.5.hnsw.onnx.template | 5 +- ...bge-base-en-v1.5.flat-int8.cached.template | 5 +- ...t.bge-base-en-v1.5.flat-int8.onnx.template | 5 +- ...fact.bge-base-en-v1.5.flat.cached.template | 2 +- ...cifact.bge-base-en-v1.5.flat.onnx.template | 4 +- ...bge-base-en-v1.5.hnsw-int8.cached.template | 5 +- ...t.bge-base-en-v1.5.hnsw-int8.onnx.template | 5 +- ...fact.bge-base-en-v1.5.hnsw.cached.template | 5 +- ...cifact.bge-base-en-v1.5.hnsw.onnx.template | 5 +- ...bge-base-en-v1.5.flat-int8.cached.template | 5 +- ...m.bge-base-en-v1.5.flat-int8.onnx.template | 5 +- ...al1m.bge-base-en-v1.5.flat.cached.template | 2 +- ...gnal1m.bge-base-en-v1.5.flat.onnx.template | 4 +- ...bge-base-en-v1.5.hnsw-int8.cached.template | 5 +- ...m.bge-base-en-v1.5.hnsw-int8.onnx.template | 5 +- ...al1m.bge-base-en-v1.5.hnsw.cached.template | 5 +- ...gnal1m.bge-base-en-v1.5.hnsw.onnx.template | 5 +- ...bge-base-en-v1.5.flat-int8.cached.template | 5 +- ...d.bge-base-en-v1.5.flat-int8.onnx.template | 5 +- ...ovid.bge-base-en-v1.5.flat.cached.template | 2 +- ...-covid.bge-base-en-v1.5.flat.onnx.template | 4 +- ...bge-base-en-v1.5.hnsw-int8.cached.template | 5 +- ...d.bge-base-en-v1.5.hnsw-int8.onnx.template | 5 +- ...ovid.bge-base-en-v1.5.hnsw.cached.template | 5 +- ...-covid.bge-base-en-v1.5.hnsw.onnx.template | 5 +- ...bge-base-en-v1.5.flat-int8.cached.template | 5 +- ...s.bge-base-en-v1.5.flat-int8.onnx.template | 5 +- ...news.bge-base-en-v1.5.flat.cached.template | 2 +- ...c-news.bge-base-en-v1.5.flat.onnx.template | 4 +- ...bge-base-en-v1.5.hnsw-int8.cached.template | 5 +- ...s.bge-base-en-v1.5.hnsw-int8.onnx.template | 5 +- ...news.bge-base-en-v1.5.hnsw.cached.template | 5 +- ...c-news.bge-base-en-v1.5.hnsw.onnx.template | 5 +- ...bge-base-en-v1.5.flat-int8.cached.template | 5 +- ...0.bge-base-en-v1.5.flat-int8.onnx.template | 5 +- ...2020.bge-base-en-v1.5.flat.cached.template | 2 +- ...he2020.bge-base-en-v1.5.flat.onnx.template | 4 +- ...bge-base-en-v1.5.hnsw-int8.cached.template | 5 +- ...0.bge-base-en-v1.5.hnsw-int8.onnx.template | 5 +- ...2020.bge-base-en-v1.5.hnsw.cached.template | 5 +- ...he2020.bge-base-en-v1.5.hnsw.onnx.template | 5 +- ...ana.bge-base-en-v1.5.hnsw-int8.cached.yaml | 8 +- ...guana.bge-base-en-v1.5.hnsw-int8.onnx.yaml | 8 +- ...-arguana.bge-base-en-v1.5.hnsw.cached.yaml | 6 +- ....0-arguana.bge-base-en-v1.5.hnsw.onnx.yaml | 6 +- ...asq.bge-base-en-v1.5.hnsw-int8.cached.yaml | 8 +- ...ioasq.bge-base-en-v1.5.hnsw-int8.onnx.yaml | 8 +- ...0-bioasq.bge-base-en-v1.5.hnsw.cached.yaml | 6 +- ...0.0-bioasq.bge-base-en-v1.5.hnsw.onnx.yaml | 6 +- ...ver.bge-base-en-v1.5.hnsw-int8.cached.yaml | 8 +- ...fever.bge-base-en-v1.5.hnsw-int8.onnx.yaml | 8 +- ...te-fever.bge-base-en-v1.5.hnsw.cached.yaml | 6 +- ...mate-fever.bge-base-en-v1.5.hnsw.onnx.yaml | 6 +- ...oid.bge-base-en-v1.5.hnsw-int8.cached.yaml | 8 +- ...droid.bge-base-en-v1.5.hnsw-int8.onnx.yaml | 8 +- ...-android.bge-base-en-v1.5.hnsw.cached.yaml | 6 +- ...ck-android.bge-base-en-v1.5.hnsw.onnx.yaml | 6 +- ...ish.bge-base-en-v1.5.hnsw-int8.cached.yaml | 8 +- ...glish.bge-base-en-v1.5.hnsw-int8.onnx.yaml | 8 +- ...-english.bge-base-en-v1.5.hnsw.cached.yaml | 6 +- ...ck-english.bge-base-en-v1.5.hnsw.onnx.yaml | 6 +- ...ing.bge-base-en-v1.5.hnsw-int8.cached.yaml | 8 +- ...aming.bge-base-en-v1.5.hnsw-int8.onnx.yaml | 8 +- ...k-gaming.bge-base-en-v1.5.hnsw.cached.yaml | 6 +- ...ack-gaming.bge-base-en-v1.5.hnsw.onnx.yaml | 6 +- ...gis.bge-base-en-v1.5.hnsw-int8.cached.yaml | 8 +- ...k-gis.bge-base-en-v1.5.hnsw-int8.onnx.yaml | 8 +- ...tack-gis.bge-base-en-v1.5.hnsw.cached.yaml | 6 +- ...pstack-gis.bge-base-en-v1.5.hnsw.onnx.yaml | 6 +- ...ica.bge-base-en-v1.5.hnsw-int8.cached.yaml | 8 +- ...atica.bge-base-en-v1.5.hnsw-int8.onnx.yaml | 8 +- ...hematica.bge-base-en-v1.5.hnsw.cached.yaml | 6 +- ...athematica.bge-base-en-v1.5.hnsw.onnx.yaml | 6 +- ...ics.bge-base-en-v1.5.hnsw-int8.cached.yaml | 8 +- ...ysics.bge-base-en-v1.5.hnsw-int8.onnx.yaml | 8 +- ...-physics.bge-base-en-v1.5.hnsw.cached.yaml | 6 +- ...ck-physics.bge-base-en-v1.5.hnsw.onnx.yaml | 6 +- ...ers.bge-base-en-v1.5.hnsw-int8.cached.yaml | 8 +- ...mmers.bge-base-en-v1.5.hnsw-int8.onnx.yaml | 8 +- ...grammers.bge-base-en-v1.5.hnsw.cached.yaml | 6 +- ...rogrammers.bge-base-en-v1.5.hnsw.onnx.yaml | 6 +- ...ats.bge-base-en-v1.5.hnsw-int8.cached.yaml | 8 +- ...stats.bge-base-en-v1.5.hnsw-int8.onnx.yaml | 8 +- ...ck-stats.bge-base-en-v1.5.hnsw.cached.yaml | 6 +- ...tack-stats.bge-base-en-v1.5.hnsw.onnx.yaml | 6 +- ...tex.bge-base-en-v1.5.hnsw-int8.cached.yaml | 8 +- ...k-tex.bge-base-en-v1.5.hnsw-int8.onnx.yaml | 8 +- ...tack-tex.bge-base-en-v1.5.hnsw.cached.yaml | 6 +- ...pstack-tex.bge-base-en-v1.5.hnsw.onnx.yaml | 6 +- ...nix.bge-base-en-v1.5.hnsw-int8.cached.yaml | 8 +- ...-unix.bge-base-en-v1.5.hnsw-int8.onnx.yaml | 8 +- ...ack-unix.bge-base-en-v1.5.hnsw.cached.yaml | 6 +- ...stack-unix.bge-base-en-v1.5.hnsw.onnx.yaml | 6 +- ...ers.bge-base-en-v1.5.hnsw-int8.cached.yaml | 8 +- ...sters.bge-base-en-v1.5.hnsw-int8.onnx.yaml | 8 +- ...bmasters.bge-base-en-v1.5.hnsw.cached.yaml | 6 +- ...webmasters.bge-base-en-v1.5.hnsw.onnx.yaml | 6 +- ...ess.bge-base-en-v1.5.hnsw-int8.cached.yaml | 8 +- ...press.bge-base-en-v1.5.hnsw-int8.onnx.yaml | 8 +- ...ordpress.bge-base-en-v1.5.hnsw.cached.yaml | 6 +- ...-wordpress.bge-base-en-v1.5.hnsw.onnx.yaml | 6 +- ...ity.bge-base-en-v1.5.hnsw-int8.cached.yaml | 8 +- ...ntity.bge-base-en-v1.5.hnsw-int8.onnx.yaml | 8 +- ...a-entity.bge-base-en-v1.5.hnsw.cached.yaml | 6 +- ...dia-entity.bge-base-en-v1.5.hnsw.onnx.yaml | 6 +- ...ver.bge-base-en-v1.5.hnsw-int8.cached.yaml | 8 +- ...fever.bge-base-en-v1.5.hnsw-int8.onnx.yaml | 8 +- ....0-fever.bge-base-en-v1.5.hnsw.cached.yaml | 6 +- ....0.0-fever.bge-base-en-v1.5.hnsw.onnx.yaml | 6 +- ...iqa.bge-base-en-v1.5.hnsw-int8.cached.yaml | 8 +- ...-fiqa.bge-base-en-v1.5.hnsw-int8.onnx.yaml | 8 +- ...0.0-fiqa.bge-base-en-v1.5.hnsw.cached.yaml | 6 +- ...1.0.0-fiqa.bge-base-en-v1.5.hnsw.onnx.yaml | 6 +- ...tqa.bge-base-en-v1.5.hnsw-int8.cached.yaml | 8 +- ...potqa.bge-base-en-v1.5.hnsw-int8.onnx.yaml | 8 +- ...hotpotqa.bge-base-en-v1.5.hnsw.cached.yaml | 6 +- ...0-hotpotqa.bge-base-en-v1.5.hnsw.onnx.yaml | 6 +- ...pus.bge-base-en-v1.5.hnsw-int8.cached.yaml | 8 +- ...orpus.bge-base-en-v1.5.hnsw-int8.onnx.yaml | 8 +- ...nfcorpus.bge-base-en-v1.5.hnsw.cached.yaml | 6 +- ...0-nfcorpus.bge-base-en-v1.5.hnsw.onnx.yaml | 6 +- ...-nq.bge-base-en-v1.5.hnsw-int8.cached.yaml | 8 +- ....0-nq.bge-base-en-v1.5.hnsw-int8.onnx.yaml | 8 +- ...1.0.0-nq.bge-base-en-v1.5.hnsw.cached.yaml | 6 +- ...-v1.0.0-nq.bge-base-en-v1.5.hnsw.onnx.yaml | 6 +- ...ora.bge-base-en-v1.5.hnsw-int8.cached.yaml | 8 +- ...quora.bge-base-en-v1.5.hnsw-int8.onnx.yaml | 8 +- ....0-quora.bge-base-en-v1.5.hnsw.cached.yaml | 6 +- ....0.0-quora.bge-base-en-v1.5.hnsw.onnx.yaml | 6 +- ...t04.bge-base-en-v1.5.hnsw-int8.cached.yaml | 8 +- ...ust04.bge-base-en-v1.5.hnsw-int8.onnx.yaml | 8 +- ...robust04.bge-base-en-v1.5.hnsw.cached.yaml | 6 +- ...0-robust04.bge-base-en-v1.5.hnsw.onnx.yaml | 6 +- ...ocs.bge-base-en-v1.5.hnsw-int8.cached.yaml | 8 +- ...idocs.bge-base-en-v1.5.hnsw-int8.onnx.yaml | 8 +- ...-scidocs.bge-base-en-v1.5.hnsw.cached.yaml | 6 +- ....0-scidocs.bge-base-en-v1.5.hnsw.onnx.yaml | 6 +- ...act.bge-base-en-v1.5.hnsw-int8.cached.yaml | 8 +- ...ifact.bge-base-en-v1.5.hnsw-int8.onnx.yaml | 8 +- ...-scifact.bge-base-en-v1.5.hnsw.cached.yaml | 6 +- ....0-scifact.bge-base-en-v1.5.hnsw.onnx.yaml | 6 +- ...l1m.bge-base-en-v1.5.hnsw-int8.cached.yaml | 8 +- ...nal1m.bge-base-en-v1.5.hnsw-int8.onnx.yaml | 8 +- ...signal1m.bge-base-en-v1.5.hnsw.cached.yaml | 6 +- ...0-signal1m.bge-base-en-v1.5.hnsw.onnx.yaml | 6 +- ...vid.bge-base-en-v1.5.hnsw-int8.cached.yaml | 8 +- ...covid.bge-base-en-v1.5.hnsw-int8.onnx.yaml | 8 +- ...ec-covid.bge-base-en-v1.5.hnsw.cached.yaml | 6 +- ...trec-covid.bge-base-en-v1.5.hnsw.onnx.yaml | 6 +- ...ews.bge-base-en-v1.5.hnsw-int8.cached.yaml | 8 +- ...-news.bge-base-en-v1.5.hnsw-int8.onnx.yaml | 8 +- ...rec-news.bge-base-en-v1.5.hnsw.cached.yaml | 6 +- ...-trec-news.bge-base-en-v1.5.hnsw.onnx.yaml | 6 +- ...020.bge-base-en-v1.5.hnsw-int8.cached.yaml | 8 +- ...e2020.bge-base-en-v1.5.hnsw-int8.onnx.yaml | 8 +- ...uche2020.bge-base-en-v1.5.hnsw.cached.yaml | 6 +- ...touche2020.bge-base-en-v1.5.hnsw.onnx.yaml | 6 +- 596 files changed, 4167 insertions(+), 2682 deletions(-) diff --git a/docs/experiments-msmarco-passage.md b/docs/experiments-msmarco-passage.md index f7eed13a64..0a4e7d6084 100644 --- a/docs/experiments-msmarco-passage.md +++ b/docs/experiments-msmarco-passage.md @@ -498,5 +498,4 @@ The BM25 run with default parameters `k1=0.9`, `b=0.4` roughly corresponds to th + Results reproduced by [@alireza-taban](https://github.com/alireza-taban) on 2024-06-10 (commit [`59330e3`](https://github.com/castorini/anserini/commit/59330e355b4aaf6754622cb3a136259dea0d8d05)) + Results reproduced by [@Feng-12138](https://github.com/Feng-12138) on 2024-06-16 (commit [`ad97377`](https://github.com/castorini/anserini/commit/ad97377e463e70ee8b2f501ac7c41134af53e976)) + Results reproduced by [@hosnahoseini](https://github.com/hosnahoseini) on 2024-06-18 (commit [`ad97377`](https://github.com/castorini/anserini/commit/ad97377e463e70ee8b2f501ac7c41134af53e976)) - - ++ Results reproduced by [@FaizanFaisal25](https://github.com/FaizanFaisal25) on 2024-06-29 (commit [`e92370a`](https://github.com/FaizanFaisal25/anserini/commit/e92370a06eaa3bbc5bacdba65cc9c3f125590071)) \ No newline at end of file diff --git a/docs/regressions/regressions-beir-v1.0.0-arguana.bge-base-en-v1.5.flat-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-arguana.bge-base-en-v1.5.flat-int8.cached.md index e166240170..c2e4f7fb54 100644 --- a/docs/regressions/regressions-beir-v1.0.0-arguana.bge-base-en-v1.5.flat-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-arguana.bge-base-en-v1.5.flat-int8.cached.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): ArguAna | 0.9964 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-arguana.bge-base-en-v1.5.flat-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-arguana.bge-base-en-v1.5.flat-int8.onnx.md index 2aabf635cc..2978b169d2 100644 --- a/docs/regressions/regressions-beir-v1.0.0-arguana.bge-base-en-v1.5.flat-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-arguana.bge-base-en-v1.5.flat-int8.onnx.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): ArguAna | 0.9964 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-arguana.bge-base-en-v1.5.flat.cached.md b/docs/regressions/regressions-beir-v1.0.0-arguana.bge-base-en-v1.5.flat.cached.md index 7fd2624b30..49b9ecbc53 100644 --- a/docs/regressions/regressions-beir-v1.0.0-arguana.bge-base-en-v1.5.flat.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-arguana.bge-base-en-v1.5.flat.cached.md @@ -78,4 +78,4 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): ArguAna | 0.9964 | -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/docs/regressions/regressions-beir-v1.0.0-arguana.bge-base-en-v1.5.flat.onnx.md b/docs/regressions/regressions-beir-v1.0.0-arguana.bge-base-en-v1.5.flat.onnx.md index 3871294808..0fd17629c8 100644 --- a/docs/regressions/regressions-beir-v1.0.0-arguana.bge-base-en-v1.5.flat.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-arguana.bge-base-en-v1.5.flat.onnx.md @@ -78,5 +78,5 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): ArguAna | 0.9964 | -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/docs/regressions/regressions-beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw-int8.cached.md index c29a9aeddc..6d4b10a4b6 100644 --- a/docs/regressions/regressions-beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw-int8.cached.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-arguana.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-arguana.test.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonStringVector \ - -output runs/run.beir-v1.0.0-arguana.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-arguana.test.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.beir-v1.0.0-arguana.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-arguana.test.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-arguana.test.txt runs/run.beir-v1.0.0-arguana.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-arguana.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-arguana.test.txt runs/run.beir-v1.0.0-arguana.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-arguana.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-arguana.test.txt runs/run.beir-v1.0.0-arguana.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-arguana.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-arguana.test.txt runs/run.beir-v1.0.0-arguana.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-arguana.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-arguana.test.txt runs/run.beir-v1.0.0-arguana.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-arguana.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-arguana.test.txt runs/run.beir-v1.0.0-arguana.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-arguana.test.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): ArguAna | 0.635 | +| BEIR (v1.0.0): ArguAna | 0.636 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): ArguAna | 0.991 | +| BEIR (v1.0.0): ArguAna | 0.992 | | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): ArguAna | 0.996 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw-int8.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw-int8.onnx.md index a5ded15a64..1663c1726c 100644 --- a/docs/regressions/regressions-beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw-int8.onnx.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-arguana.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-arguana.test.tsv.gz \ -topicReader TsvString \ - -output runs/run.beir-v1.0.0-arguana.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-arguana.test.txt \ + -output runs/run.beir-v1.0.0-arguana.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-arguana.test.txt \ -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-arguana.test.txt runs/run.beir-v1.0.0-arguana.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-arguana.test.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-arguana.test.txt runs/run.beir-v1.0.0-arguana.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-arguana.test.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-arguana.test.txt runs/run.beir-v1.0.0-arguana.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-arguana.test.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-arguana.test.txt runs/run.beir-v1.0.0-arguana.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-arguana.test.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-arguana.test.txt runs/run.beir-v1.0.0-arguana.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-arguana.test.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-arguana.test.txt runs/run.beir-v1.0.0-arguana.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-arguana.test.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): ArguAna | 0.621 | +| BEIR (v1.0.0): ArguAna | 0.636 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): ArguAna | 0.971 | +| BEIR (v1.0.0): ArguAna | 0.992 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): ArguAna | 0.994 | +| BEIR (v1.0.0): ArguAna | 0.996 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw-int8.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw.cached.md index 25eab0761a..dece957530 100644 --- a/docs/regressions/regressions-beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw.cached.md @@ -80,5 +80,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): ArguAna | 0.996 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw.onnx.md index c2823bb9ce..7abfde0167 100644 --- a/docs/regressions/regressions-beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw.onnx.md @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): ArguAna | 0.623 | +| BEIR (v1.0.0): ArguAna | 0.636 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): ArguAna | 0.972 | +| BEIR (v1.0.0): ArguAna | 0.992 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): ArguAna | 0.993 | +| BEIR (v1.0.0): ArguAna | 0.996 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-bioasq.bge-base-en-v1.5.flat-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-bioasq.bge-base-en-v1.5.flat-int8.cached.md index 6706e73e0c..eaa77f5efe 100644 --- a/docs/regressions/regressions-beir-v1.0.0-bioasq.bge-base-en-v1.5.flat-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-bioasq.bge-base-en-v1.5.flat-int8.cached.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): BioASQ | 0.8059 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-bioasq.bge-base-en-v1.5.flat-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-bioasq.bge-base-en-v1.5.flat-int8.onnx.md index ac6975fe62..3aec45cc05 100644 --- a/docs/regressions/regressions-beir-v1.0.0-bioasq.bge-base-en-v1.5.flat-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-bioasq.bge-base-en-v1.5.flat-int8.onnx.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): BioASQ | 0.8059 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-bioasq.bge-base-en-v1.5.flat.cached.md b/docs/regressions/regressions-beir-v1.0.0-bioasq.bge-base-en-v1.5.flat.cached.md index f050409127..40d623cec5 100644 --- a/docs/regressions/regressions-beir-v1.0.0-bioasq.bge-base-en-v1.5.flat.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-bioasq.bge-base-en-v1.5.flat.cached.md @@ -78,4 +78,4 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): BioASQ | 0.8059 | -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/docs/regressions/regressions-beir-v1.0.0-bioasq.bge-base-en-v1.5.flat.onnx.md b/docs/regressions/regressions-beir-v1.0.0-bioasq.bge-base-en-v1.5.flat.onnx.md index 462cb399f4..45ce82ae41 100644 --- a/docs/regressions/regressions-beir-v1.0.0-bioasq.bge-base-en-v1.5.flat.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-bioasq.bge-base-en-v1.5.flat.onnx.md @@ -78,5 +78,5 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): BioASQ | 0.8059 | -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/docs/regressions/regressions-beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw-int8.cached.md index 16ea532a5d..6108b48189 100644 --- a/docs/regressions/regressions-beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw-int8.cached.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-bioasq.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-bioasq.test.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonStringVector \ - -output runs/run.beir-v1.0.0-bioasq.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-bioasq.test.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.beir-v1.0.0-bioasq.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-bioasq.test.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 5000 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-bioasq.test.txt runs/run.beir-v1.0.0-bioasq.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-bioasq.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-bioasq.test.txt runs/run.beir-v1.0.0-bioasq.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-bioasq.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-bioasq.test.txt runs/run.beir-v1.0.0-bioasq.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-bioasq.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-bioasq.test.txt runs/run.beir-v1.0.0-bioasq.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-bioasq.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-bioasq.test.txt runs/run.beir-v1.0.0-bioasq.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-bioasq.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-bioasq.test.txt runs/run.beir-v1.0.0-bioasq.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-bioasq.test.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): BioASQ | 0.407 | +| BEIR (v1.0.0): BioASQ | 0.415 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): BioASQ | 0.624 | +| BEIR (v1.0.0): BioASQ | 0.632 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): BioASQ | 0.795 | +| BEIR (v1.0.0): BioASQ | 0.806 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw-int8.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw-int8.onnx.md index 75a8eee975..bf7cfed406 100644 --- a/docs/regressions/regressions-beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw-int8.onnx.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-bioasq.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-bioasq.test.tsv.gz \ -topicReader TsvString \ - -output runs/run.beir-v1.0.0-bioasq.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-bioasq.test.txt \ + -output runs/run.beir-v1.0.0-bioasq.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-bioasq.test.txt \ -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 5000 -encoder BgeBaseEn15 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-bioasq.test.txt runs/run.beir-v1.0.0-bioasq.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-bioasq.test.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-bioasq.test.txt runs/run.beir-v1.0.0-bioasq.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-bioasq.test.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-bioasq.test.txt runs/run.beir-v1.0.0-bioasq.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-bioasq.test.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-bioasq.test.txt runs/run.beir-v1.0.0-bioasq.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-bioasq.test.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-bioasq.test.txt runs/run.beir-v1.0.0-bioasq.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-bioasq.test.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-bioasq.test.txt runs/run.beir-v1.0.0-bioasq.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-bioasq.test.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): BioASQ | 0.408 | +| BEIR (v1.0.0): BioASQ | 0.415 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): BioASQ | 0.624 | +| BEIR (v1.0.0): BioASQ | 0.632 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): BioASQ | 0.797 | +| BEIR (v1.0.0): BioASQ | 0.806 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw-int8.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw.cached.md index 18acd90df5..322bf90245 100644 --- a/docs/regressions/regressions-beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw.cached.md @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): BioASQ | 0.410 | +| BEIR (v1.0.0): BioASQ | 0.415 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): BioASQ | 0.622 | +| BEIR (v1.0.0): BioASQ | 0.632 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): BioASQ | 0.794 | +| BEIR (v1.0.0): BioASQ | 0.806 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw.onnx.md index 283bbe5fa2..8eb7c23ff9 100644 --- a/docs/regressions/regressions-beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw.onnx.md @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): BioASQ | 0.414 | +| BEIR (v1.0.0): BioASQ | 0.415 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): BioASQ | 0.628 | +| BEIR (v1.0.0): BioASQ | 0.632 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): BioASQ | 0.802 | +| BEIR (v1.0.0): BioASQ | 0.806 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-climate-fever.bge-base-en-v1.5.flat-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-climate-fever.bge-base-en-v1.5.flat-int8.cached.md index 8274abbb61..93631b396b 100644 --- a/docs/regressions/regressions-beir-v1.0.0-climate-fever.bge-base-en-v1.5.flat-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-climate-fever.bge-base-en-v1.5.flat-int8.cached.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): Climate-FEVER | 0.8307 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-climate-fever.bge-base-en-v1.5.flat-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-climate-fever.bge-base-en-v1.5.flat-int8.onnx.md index 3c2da6432c..6f1c6fc21e 100644 --- a/docs/regressions/regressions-beir-v1.0.0-climate-fever.bge-base-en-v1.5.flat-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-climate-fever.bge-base-en-v1.5.flat-int8.onnx.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): Climate-FEVER | 0.8307 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-climate-fever.bge-base-en-v1.5.flat.cached.md b/docs/regressions/regressions-beir-v1.0.0-climate-fever.bge-base-en-v1.5.flat.cached.md index ae507fd89a..1fc3402b8b 100644 --- a/docs/regressions/regressions-beir-v1.0.0-climate-fever.bge-base-en-v1.5.flat.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-climate-fever.bge-base-en-v1.5.flat.cached.md @@ -78,4 +78,4 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): Climate-FEVER | 0.8307 | -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/docs/regressions/regressions-beir-v1.0.0-climate-fever.bge-base-en-v1.5.flat.onnx.md b/docs/regressions/regressions-beir-v1.0.0-climate-fever.bge-base-en-v1.5.flat.onnx.md index e3accbdbba..215c009072 100644 --- a/docs/regressions/regressions-beir-v1.0.0-climate-fever.bge-base-en-v1.5.flat.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-climate-fever.bge-base-en-v1.5.flat.onnx.md @@ -78,5 +78,5 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): Climate-FEVER | 0.8307 | -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/docs/regressions/regressions-beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw-int8.cached.md index 9652919ae7..251facc2f0 100644 --- a/docs/regressions/regressions-beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw-int8.cached.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-climate-fever.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-climate-fever.test.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonStringVector \ - -output runs/run.beir-v1.0.0-climate-fever.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-climate-fever.test.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.beir-v1.0.0-climate-fever.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-climate-fever.test.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-climate-fever.test.txt runs/run.beir-v1.0.0-climate-fever.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-climate-fever.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-climate-fever.test.txt runs/run.beir-v1.0.0-climate-fever.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-climate-fever.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-climate-fever.test.txt runs/run.beir-v1.0.0-climate-fever.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-climate-fever.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-climate-fever.test.txt runs/run.beir-v1.0.0-climate-fever.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-climate-fever.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-climate-fever.test.txt runs/run.beir-v1.0.0-climate-fever.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-climate-fever.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-climate-fever.test.txt runs/run.beir-v1.0.0-climate-fever.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-climate-fever.test.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): Climate-FEVER | 0.309 | +| BEIR (v1.0.0): Climate-FEVER | 0.312 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): Climate-FEVER | 0.633 | +| BEIR (v1.0.0): Climate-FEVER | 0.636 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): Climate-FEVER | 0.829 | +| BEIR (v1.0.0): Climate-FEVER | 0.831 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw-int8.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw-int8.onnx.md index db21f75aa8..ab988c66ec 100644 --- a/docs/regressions/regressions-beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw-int8.onnx.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-climate-fever.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-climate-fever.test.tsv.gz \ -topicReader TsvString \ - -output runs/run.beir-v1.0.0-climate-fever.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-climate-fever.test.txt \ + -output runs/run.beir-v1.0.0-climate-fever.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-climate-fever.test.txt \ -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-climate-fever.test.txt runs/run.beir-v1.0.0-climate-fever.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-climate-fever.test.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-climate-fever.test.txt runs/run.beir-v1.0.0-climate-fever.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-climate-fever.test.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-climate-fever.test.txt runs/run.beir-v1.0.0-climate-fever.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-climate-fever.test.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-climate-fever.test.txt runs/run.beir-v1.0.0-climate-fever.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-climate-fever.test.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-climate-fever.test.txt runs/run.beir-v1.0.0-climate-fever.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-climate-fever.test.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-climate-fever.test.txt runs/run.beir-v1.0.0-climate-fever.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-climate-fever.test.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): Climate-FEVER | 0.308 | +| BEIR (v1.0.0): Climate-FEVER | 0.312 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): Climate-FEVER | 0.633 | +| BEIR (v1.0.0): Climate-FEVER | 0.636 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): Climate-FEVER | 0.829 | +| BEIR (v1.0.0): Climate-FEVER | 0.831 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw-int8.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw.cached.md index 95b04f3b46..ce03461a3a 100644 --- a/docs/regressions/regressions-beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw.cached.md @@ -78,7 +78,8 @@ With the above commands, you should be able to reproduce the following results: | **R@100** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): Climate-FEVER | 0.636 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): Climate-FEVER | 0.829 | +| BEIR (v1.0.0): Climate-FEVER | 0.831 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw.onnx.md index 7a01d7f1f8..606adcb013 100644 --- a/docs/regressions/regressions-beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw.onnx.md @@ -76,9 +76,10 @@ With the above commands, you should be able to reproduce the following results: |:-------------------------------------------------------------------------------------------------------------|-----------| | BEIR (v1.0.0): Climate-FEVER | 0.312 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): Climate-FEVER | 0.635 | +| BEIR (v1.0.0): Climate-FEVER | 0.636 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): Climate-FEVER | 0.830 | +| BEIR (v1.0.0): Climate-FEVER | 0.831 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.flat-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.flat-int8.cached.md index aac73e5978..6c707bf2a7 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.flat-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.flat-int8.cached.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-android | 0.9611 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.flat-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.flat-int8.onnx.md index f1515f1457..2c22129693 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.flat-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.flat-int8.onnx.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-android | 0.9611 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.flat.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.flat.cached.md index 7eef5b532d..da51e55dac 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.flat.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.flat.cached.md @@ -78,4 +78,4 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-android | 0.9611 | -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.flat.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.flat.onnx.md index ff71ec5911..2f83356077 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.flat.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.flat.onnx.md @@ -78,5 +78,5 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-android | 0.9611 | -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw-int8.cached.md index 3ba9a7a67d..bde24eb548 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw-int8.cached.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-cqadupstack-android.test.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonStringVector \ - -output runs/run.beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-android.test.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-android.test.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-android.test.txt runs/run.beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-android.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-android.test.txt runs/run.beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-android.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-android.test.txt runs/run.beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-android.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-android.test.txt runs/run.beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-android.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-android.test.txt runs/run.beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-android.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-android.test.txt runs/run.beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-android.test.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): CQADupStack-android | 0.509 | +| BEIR (v1.0.0): CQADupStack-android | 0.507 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-android | 0.844 | +| BEIR (v1.0.0): CQADupStack-android | 0.845 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-android | 0.962 | +| BEIR (v1.0.0): CQADupStack-android | 0.961 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw-int8.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw-int8.onnx.md index 6c95de3abf..b45d5e9a0c 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw-int8.onnx.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-cqadupstack-android.test.tsv.gz \ -topicReader TsvString \ - -output runs/run.beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-android.test.txt \ + -output runs/run.beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-android.test.txt \ -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-android.test.txt runs/run.beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-android.test.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-android.test.txt runs/run.beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-android.test.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-android.test.txt runs/run.beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-android.test.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-android.test.txt runs/run.beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-android.test.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-android.test.txt runs/run.beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-android.test.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-android.test.txt runs/run.beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-android.test.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): CQADupStack-android | 0.509 | +| BEIR (v1.0.0): CQADupStack-android | 0.507 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-android | 0.843 | +| BEIR (v1.0.0): CQADupStack-android | 0.845 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-android | 0.962 | +| BEIR (v1.0.0): CQADupStack-android | 0.961 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw-int8.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw.cached.md index aadc6b39fd..53721a588c 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw.cached.md @@ -78,7 +78,8 @@ With the above commands, you should be able to reproduce the following results: | **R@100** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-android | 0.845 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-android | 0.962 | +| BEIR (v1.0.0): CQADupStack-android | 0.961 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw.onnx.md index bce0222006..bcb5303d67 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw.onnx.md @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): CQADupStack-android | 0.508 | +| BEIR (v1.0.0): CQADupStack-android | 0.507 | | **R@100** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-android | 0.845 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-android | 0.962 | +| BEIR (v1.0.0): CQADupStack-android | 0.961 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.flat-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.flat-int8.cached.md index 4e2ce73076..c4952d1ad4 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.flat-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.flat-int8.cached.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-english | 0.8839 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.flat-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.flat-int8.onnx.md index 21a5e04431..9768beb643 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.flat-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.flat-int8.onnx.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-english | 0.8839 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.flat.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.flat.cached.md index 18dd4c937f..7425edb5bf 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.flat.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.flat.cached.md @@ -78,4 +78,4 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-english | 0.8839 | -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.flat.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.flat.onnx.md index a2f93c13c0..c09e638f4d 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.flat.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.flat.onnx.md @@ -78,5 +78,5 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-english | 0.8839 | -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw-int8.cached.md index 51582a76cf..f7c0929b89 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw-int8.cached.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-cqadupstack-english.test.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonStringVector \ - -output runs/run.beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-english.test.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-english.test.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-english.test.txt runs/run.beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-english.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-english.test.txt runs/run.beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-english.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-english.test.txt runs/run.beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-english.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-english.test.txt runs/run.beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-english.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-english.test.txt runs/run.beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-english.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-english.test.txt runs/run.beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-english.test.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): CQADupStack-english | 0.484 | +| BEIR (v1.0.0): CQADupStack-english | 0.486 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-english | 0.756 | +| BEIR (v1.0.0): CQADupStack-english | 0.759 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-english | 0.883 | +| BEIR (v1.0.0): CQADupStack-english | 0.884 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw-int8.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw-int8.onnx.md index c8c6e55d29..1cc1a0c176 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw-int8.onnx.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-cqadupstack-english.test.tsv.gz \ -topicReader TsvString \ - -output runs/run.beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-english.test.txt \ + -output runs/run.beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-english.test.txt \ -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-english.test.txt runs/run.beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-english.test.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-english.test.txt runs/run.beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-english.test.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-english.test.txt runs/run.beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-english.test.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-english.test.txt runs/run.beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-english.test.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-english.test.txt runs/run.beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-english.test.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-english.test.txt runs/run.beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-english.test.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): CQADupStack-english | 0.484 | +| BEIR (v1.0.0): CQADupStack-english | 0.486 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-english | 0.756 | +| BEIR (v1.0.0): CQADupStack-english | 0.759 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-english | 0.882 | +| BEIR (v1.0.0): CQADupStack-english | 0.884 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw-int8.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw.cached.md index 35d3c27956..eacd2c4006 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw.cached.md @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): CQADupStack-english | 0.485 | +| BEIR (v1.0.0): CQADupStack-english | 0.486 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-english | 0.757 | +| BEIR (v1.0.0): CQADupStack-english | 0.759 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-english | 0.882 | +| BEIR (v1.0.0): CQADupStack-english | 0.884 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw.onnx.md index 8d2a0f0fe1..9fd90ccb49 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw.onnx.md @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): CQADupStack-english | 0.484 | +| BEIR (v1.0.0): CQADupStack-english | 0.486 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-english | 0.756 | +| BEIR (v1.0.0): CQADupStack-english | 0.759 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-english | 0.881 | +| BEIR (v1.0.0): CQADupStack-english | 0.884 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat-int8.cached.md index 8d9adfb23b..b433ebd7f8 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat-int8.cached.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-gaming | 0.9719 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat-int8.onnx.md index 3782030df9..17b634f8c8 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat-int8.onnx.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-gaming | 0.9719 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat.cached.md index b7544e1da9..63e6bb3c70 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat.cached.md @@ -78,4 +78,4 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-gaming | 0.9719 | -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat.onnx.md index 0762c56b86..7e9e1df333 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat.onnx.md @@ -78,5 +78,5 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-gaming | 0.9719 | -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw-int8.cached.md index 79ffb45b38..3f0db2c052 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw-int8.cached.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-cqadupstack-gaming.test.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonStringVector \ - -output runs/run.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-gaming.test.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-gaming.test.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-gaming.test.txt runs/run.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-gaming.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-gaming.test.txt runs/run.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-gaming.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-gaming.test.txt runs/run.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-gaming.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-gaming.test.txt runs/run.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-gaming.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-gaming.test.txt runs/run.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-gaming.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-gaming.test.txt runs/run.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-gaming.test.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): CQADupStack-gaming | 0.593 | +| BEIR (v1.0.0): CQADupStack-gaming | 0.596 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-gaming | 0.901 | +| BEIR (v1.0.0): CQADupStack-gaming | 0.904 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-gaming | 0.969 | +| BEIR (v1.0.0): CQADupStack-gaming | 0.972 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw-int8.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw-int8.onnx.md index e41f91ea38..52524246a4 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw-int8.onnx.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-cqadupstack-gaming.test.tsv.gz \ -topicReader TsvString \ - -output runs/run.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-gaming.test.txt \ + -output runs/run.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-gaming.test.txt \ -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-gaming.test.txt runs/run.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-gaming.test.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-gaming.test.txt runs/run.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-gaming.test.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-gaming.test.txt runs/run.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-gaming.test.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-gaming.test.txt runs/run.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-gaming.test.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-gaming.test.txt runs/run.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-gaming.test.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-gaming.test.txt runs/run.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-gaming.test.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): CQADupStack-gaming | 0.593 | +| BEIR (v1.0.0): CQADupStack-gaming | 0.596 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-gaming | 0.901 | +| BEIR (v1.0.0): CQADupStack-gaming | 0.904 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-gaming | 0.969 | +| BEIR (v1.0.0): CQADupStack-gaming | 0.972 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw-int8.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw.cached.md index 25826a6cff..97e9181b0d 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw.cached.md @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): CQADupStack-gaming | 0.595 | +| BEIR (v1.0.0): CQADupStack-gaming | 0.596 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-gaming | 0.901 | +| BEIR (v1.0.0): CQADupStack-gaming | 0.904 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-gaming | 0.970 | +| BEIR (v1.0.0): CQADupStack-gaming | 0.972 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw.onnx.md index 6de343f220..258b2db5b7 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw.onnx.md @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): CQADupStack-gaming | 0.595 | +| BEIR (v1.0.0): CQADupStack-gaming | 0.596 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-gaming | 0.901 | +| BEIR (v1.0.0): CQADupStack-gaming | 0.904 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-gaming | 0.970 | +| BEIR (v1.0.0): CQADupStack-gaming | 0.972 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat-int8.cached.md index 2ea092599f..ad2fafd987 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat-int8.cached.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-gis | 0.9117 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat-int8.onnx.md index abd349b61c..2f407a171f 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat-int8.onnx.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-gis | 0.9117 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat.cached.md index bde6f607bd..ccfecc3580 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat.cached.md @@ -78,4 +78,4 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-gis | 0.9117 | -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat.onnx.md index 3c283e61a0..d108ce0b12 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat.onnx.md @@ -78,5 +78,5 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-gis | 0.9117 | -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw-int8.cached.md index f23483c924..ce2f65aa08 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw-int8.cached.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-cqadupstack-gis.test.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonStringVector \ - -output runs/run.beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-gis.test.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-gis.test.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-gis.test.txt runs/run.beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-gis.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-gis.test.txt runs/run.beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-gis.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-gis.test.txt runs/run.beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-gis.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-gis.test.txt runs/run.beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-gis.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-gis.test.txt runs/run.beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-gis.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-gis.test.txt runs/run.beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-gis.test.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): CQADupStack-gis | 0.415 | +| BEIR (v1.0.0): CQADupStack-gis | 0.413 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-gis | 0.767 | +| BEIR (v1.0.0): CQADupStack-gis | 0.768 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-gis | 0.909 | +| BEIR (v1.0.0): CQADupStack-gis | 0.912 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw-int8.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw-int8.onnx.md index 1aa850d351..b47f8d95bc 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw-int8.onnx.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-cqadupstack-gis.test.tsv.gz \ -topicReader TsvString \ - -output runs/run.beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-gis.test.txt \ + -output runs/run.beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-gis.test.txt \ -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-gis.test.txt runs/run.beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-gis.test.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-gis.test.txt runs/run.beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-gis.test.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-gis.test.txt runs/run.beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-gis.test.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-gis.test.txt runs/run.beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-gis.test.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-gis.test.txt runs/run.beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-gis.test.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-gis.test.txt runs/run.beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-gis.test.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): CQADupStack-gis | 0.416 | +| BEIR (v1.0.0): CQADupStack-gis | 0.413 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-gis | 0.767 | +| BEIR (v1.0.0): CQADupStack-gis | 0.768 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-gis | 0.909 | +| BEIR (v1.0.0): CQADupStack-gis | 0.912 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw-int8.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw.cached.md index 4cca34020a..5419e6c0cd 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw.cached.md @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): CQADupStack-gis | 0.412 | +| BEIR (v1.0.0): CQADupStack-gis | 0.413 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-gis | 0.767 | +| BEIR (v1.0.0): CQADupStack-gis | 0.768 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-gis | 0.911 | +| BEIR (v1.0.0): CQADupStack-gis | 0.912 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw.onnx.md index 1af60f7ce1..19d2fe5676 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw.onnx.md @@ -76,9 +76,10 @@ With the above commands, you should be able to reproduce the following results: |:-------------------------------------------------------------------------------------------------------------|-----------| | BEIR (v1.0.0): CQADupStack-gis | 0.413 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-gis | 0.767 | +| BEIR (v1.0.0): CQADupStack-gis | 0.768 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-gis | 0.911 | +| BEIR (v1.0.0): CQADupStack-gis | 0.912 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.flat-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.flat-int8.cached.md index 63f26daaca..8125dae548 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.flat-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.flat-int8.cached.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-mathematica | 0.8810 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.flat-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.flat-int8.onnx.md index cd1462391e..5ad5409157 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.flat-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.flat-int8.onnx.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-mathematica | 0.8810 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.flat.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.flat.cached.md index d0d50a6c6a..dba91cce43 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.flat.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.flat.cached.md @@ -78,4 +78,4 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-mathematica | 0.8810 | -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.flat.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.flat.onnx.md index a2215d6fdb..0c4467b015 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.flat.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.flat.onnx.md @@ -78,5 +78,5 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-mathematica | 0.8810 | -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw-int8.cached.md index 937fd27ba1..e6f0eb1d10 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw-int8.cached.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-cqadupstack-mathematica.test.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonStringVector \ - -output runs/run.beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-mathematica.test.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-mathematica.test.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-mathematica.test.txt runs/run.beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-mathematica.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-mathematica.test.txt runs/run.beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-mathematica.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-mathematica.test.txt runs/run.beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-mathematica.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-mathematica.test.txt runs/run.beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-mathematica.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-mathematica.test.txt runs/run.beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-mathematica.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-mathematica.test.txt runs/run.beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-mathematica.test.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): CQADupStack-mathematica | 0.315 | +| BEIR (v1.0.0): CQADupStack-mathematica | 0.316 | | **R@100** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-mathematica | 0.692 | | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-mathematica | 0.881 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw-int8.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw-int8.onnx.md index a5da4b2407..0d30ed3ece 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw-int8.onnx.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-cqadupstack-mathematica.test.tsv.gz \ -topicReader TsvString \ - -output runs/run.beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-mathematica.test.txt \ + -output runs/run.beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-mathematica.test.txt \ -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-mathematica.test.txt runs/run.beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-mathematica.test.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-mathematica.test.txt runs/run.beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-mathematica.test.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-mathematica.test.txt runs/run.beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-mathematica.test.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-mathematica.test.txt runs/run.beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-mathematica.test.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-mathematica.test.txt runs/run.beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-mathematica.test.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-mathematica.test.txt runs/run.beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-mathematica.test.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): CQADupStack-mathematica | 0.315 | +| BEIR (v1.0.0): CQADupStack-mathematica | 0.316 | | **R@100** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-mathematica | 0.692 | | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-mathematica | 0.881 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw-int8.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw.cached.md index 014a741328..9a43f31a01 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw.cached.md @@ -80,5 +80,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-mathematica | 0.881 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw.onnx.md index acd19b6f81..1e98b5af80 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw.onnx.md @@ -80,5 +80,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-mathematica | 0.881 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.flat-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.flat-int8.cached.md index 36d0bbfcd7..cb3b675444 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.flat-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.flat-int8.cached.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-physics | 0.9406 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.flat-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.flat-int8.onnx.md index b17a301263..2aaecc8d67 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.flat-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.flat-int8.onnx.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-physics | 0.9406 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.flat.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.flat.cached.md index 06995d2910..17d063fb44 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.flat.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.flat.cached.md @@ -78,4 +78,4 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-physics | 0.9406 | -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.flat.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.flat.onnx.md index 1c105c7cf6..c89b89e4ce 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.flat.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.flat.onnx.md @@ -78,5 +78,5 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-physics | 0.9406 | -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw-int8.cached.md index 77b0555bbd..a4b92f7ad0 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw-int8.cached.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-cqadupstack-physics.test.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonStringVector \ - -output runs/run.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-physics.test.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-physics.test.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-physics.test.txt runs/run.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-physics.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-physics.test.txt runs/run.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-physics.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-physics.test.txt runs/run.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-physics.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-physics.test.txt runs/run.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-physics.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-physics.test.txt runs/run.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-physics.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-physics.test.txt runs/run.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-physics.test.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): CQADupStack-physics | 0.474 | +| BEIR (v1.0.0): CQADupStack-physics | 0.472 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-physics | 0.810 | +| BEIR (v1.0.0): CQADupStack-physics | 0.808 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-physics | 0.940 | +| BEIR (v1.0.0): CQADupStack-physics | 0.941 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw-int8.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw-int8.onnx.md index 33403a0c99..5163630aac 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw-int8.onnx.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-cqadupstack-physics.test.tsv.gz \ -topicReader TsvString \ - -output runs/run.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-physics.test.txt \ + -output runs/run.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-physics.test.txt \ -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-physics.test.txt runs/run.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-physics.test.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-physics.test.txt runs/run.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-physics.test.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-physics.test.txt runs/run.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-physics.test.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-physics.test.txt runs/run.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-physics.test.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-physics.test.txt runs/run.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-physics.test.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-physics.test.txt runs/run.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-physics.test.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): CQADupStack-physics | 0.473 | +| BEIR (v1.0.0): CQADupStack-physics | 0.472 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-physics | 0.810 | +| BEIR (v1.0.0): CQADupStack-physics | 0.808 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-physics | 0.940 | +| BEIR (v1.0.0): CQADupStack-physics | 0.941 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw-int8.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw.cached.md index 195dc7a937..05191d7a43 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw.cached.md @@ -80,5 +80,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-physics | 0.941 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw.onnx.md index cc709346ad..909e28d415 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw.onnx.md @@ -80,5 +80,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-physics | 0.941 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.flat-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.flat-int8.cached.md index 19bb05457c..caf215e037 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.flat-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.flat-int8.cached.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-programmers | 0.9348 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.flat-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.flat-int8.onnx.md index bb538eac33..bed428305d 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.flat-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.flat-int8.onnx.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-programmers | 0.9348 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.flat.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.flat.cached.md index 8d04c1a5f7..0de657a270 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.flat.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.flat.cached.md @@ -78,4 +78,4 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-programmers | 0.9348 | -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.flat.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.flat.onnx.md index c149a3d680..ea63e9e298 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.flat.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.flat.onnx.md @@ -78,5 +78,5 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-programmers | 0.9348 | -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw-int8.cached.md index 4b71483208..795fb7a3f1 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw-int8.cached.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-cqadupstack-programmers.test.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonStringVector \ - -output runs/run.beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-programmers.test.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-programmers.test.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-programmers.test.txt runs/run.beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-programmers.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-programmers.test.txt runs/run.beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-programmers.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-programmers.test.txt runs/run.beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-programmers.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-programmers.test.txt runs/run.beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-programmers.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-programmers.test.txt runs/run.beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-programmers.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-programmers.test.txt runs/run.beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-programmers.test.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): CQADupStack-programmers | 0.425 | +| BEIR (v1.0.0): CQADupStack-programmers | 0.424 | | **R@100** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-programmers | 0.786 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-programmers | 0.934 | +| BEIR (v1.0.0): CQADupStack-programmers | 0.935 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw-int8.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw-int8.onnx.md index e6cbaa0dd2..826ad8736a 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw-int8.onnx.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-cqadupstack-programmers.test.tsv.gz \ -topicReader TsvString \ - -output runs/run.beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-programmers.test.txt \ + -output runs/run.beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-programmers.test.txt \ -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-programmers.test.txt runs/run.beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-programmers.test.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-programmers.test.txt runs/run.beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-programmers.test.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-programmers.test.txt runs/run.beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-programmers.test.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-programmers.test.txt runs/run.beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-programmers.test.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-programmers.test.txt runs/run.beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-programmers.test.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-programmers.test.txt runs/run.beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-programmers.test.txt ``` ## Effectiveness @@ -76,9 +76,10 @@ With the above commands, you should be able to reproduce the following results: |:-------------------------------------------------------------------------------------------------------------|-----------| | BEIR (v1.0.0): CQADupStack-programmers | 0.424 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-programmers | 0.787 | +| BEIR (v1.0.0): CQADupStack-programmers | 0.786 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-programmers | 0.934 | +| BEIR (v1.0.0): CQADupStack-programmers | 0.935 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw-int8.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw.cached.md index 62a9e4e058..36ce49f8d2 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw.cached.md @@ -80,5 +80,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-programmers | 0.935 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw.onnx.md index abfeb1f805..7ac078dafc 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw.onnx.md @@ -80,5 +80,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-programmers | 0.935 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.flat-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.flat-int8.cached.md index 2291be811a..2b1b0535f4 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.flat-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.flat-int8.cached.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-stats | 0.8445 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.flat-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.flat-int8.onnx.md index 1f39819bd5..ac87163885 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.flat-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.flat-int8.onnx.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-stats | 0.8445 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.flat.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.flat.cached.md index 5cb541c40c..14f6261466 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.flat.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.flat.cached.md @@ -78,4 +78,4 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-stats | 0.8445 | -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.flat.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.flat.onnx.md index 4d4961cfbc..d2995104a4 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.flat.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.flat.onnx.md @@ -78,5 +78,5 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-stats | 0.8445 | -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw-int8.cached.md index 524fe48df0..338acd2c21 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw-int8.cached.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-cqadupstack-stats.test.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonStringVector \ - -output runs/run.beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-stats.test.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-stats.test.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-stats.test.txt runs/run.beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-stats.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-stats.test.txt runs/run.beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-stats.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-stats.test.txt runs/run.beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-stats.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-stats.test.txt runs/run.beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-stats.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-stats.test.txt runs/run.beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-stats.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-stats.test.txt runs/run.beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-stats.test.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): CQADupStack-stats | 0.371 | +| BEIR (v1.0.0): CQADupStack-stats | 0.373 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-stats | 0.672 | +| BEIR (v1.0.0): CQADupStack-stats | 0.673 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-stats | 0.852 | +| BEIR (v1.0.0): CQADupStack-stats | 0.845 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw-int8.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw-int8.onnx.md index 7faa92c5ee..38c0b96a3f 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw-int8.onnx.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-cqadupstack-stats.test.tsv.gz \ -topicReader TsvString \ - -output runs/run.beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-stats.test.txt \ + -output runs/run.beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-stats.test.txt \ -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-stats.test.txt runs/run.beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-stats.test.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-stats.test.txt runs/run.beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-stats.test.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-stats.test.txt runs/run.beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-stats.test.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-stats.test.txt runs/run.beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-stats.test.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-stats.test.txt runs/run.beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-stats.test.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-stats.test.txt runs/run.beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-stats.test.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): CQADupStack-stats | 0.370 | +| BEIR (v1.0.0): CQADupStack-stats | 0.373 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-stats | 0.671 | +| BEIR (v1.0.0): CQADupStack-stats | 0.673 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-stats | 0.854 | +| BEIR (v1.0.0): CQADupStack-stats | 0.845 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw-int8.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw.cached.md index aab54a7ffc..d792e14c99 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw.cached.md @@ -78,7 +78,8 @@ With the above commands, you should be able to reproduce the following results: | **R@100** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-stats | 0.673 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-stats | 0.849 | +| BEIR (v1.0.0): CQADupStack-stats | 0.845 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw.onnx.md index e83db1b78f..47b734b025 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw.onnx.md @@ -76,9 +76,10 @@ With the above commands, you should be able to reproduce the following results: |:-------------------------------------------------------------------------------------------------------------|-----------| | BEIR (v1.0.0): CQADupStack-stats | 0.373 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-stats | 0.672 | +| BEIR (v1.0.0): CQADupStack-stats | 0.673 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-stats | 0.849 | +| BEIR (v1.0.0): CQADupStack-stats | 0.845 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.flat-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.flat-int8.cached.md index 26234d6b41..5982ebd6fc 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.flat-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.flat-int8.cached.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-tex | 0.8537 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.flat-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.flat-int8.onnx.md index bb397795e0..20c6ad78a9 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.flat-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.flat-int8.onnx.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-tex | 0.8537 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.flat.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.flat.cached.md index 4e2e1aef72..bb15c436f5 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.flat.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.flat.cached.md @@ -78,4 +78,4 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-tex | 0.8537 | -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.flat.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.flat.onnx.md index b52227dc6a..e4ee54e8bc 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.flat.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.flat.onnx.md @@ -78,5 +78,5 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-tex | 0.8537 | -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw-int8.cached.md index 3e4c927f81..0bae3d346f 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw-int8.cached.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-cqadupstack-tex.test.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonStringVector \ - -output runs/run.beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-tex.test.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-tex.test.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-tex.test.txt runs/run.beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-tex.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-tex.test.txt runs/run.beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-tex.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-tex.test.txt runs/run.beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-tex.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-tex.test.txt runs/run.beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-tex.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-tex.test.txt runs/run.beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-tex.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-tex.test.txt runs/run.beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-tex.test.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness @@ -76,9 +76,10 @@ With the above commands, you should be able to reproduce the following results: |:-------------------------------------------------------------------------------------------------------------|-----------| | BEIR (v1.0.0): CQADupStack-tex | 0.312 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-tex | 0.648 | +| BEIR (v1.0.0): CQADupStack-tex | 0.649 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-tex | 0.852 | +| BEIR (v1.0.0): CQADupStack-tex | 0.854 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw-int8.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw-int8.onnx.md index 587a2c29a3..f7ded54b04 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw-int8.onnx.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-cqadupstack-tex.test.tsv.gz \ -topicReader TsvString \ - -output runs/run.beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-tex.test.txt \ + -output runs/run.beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-tex.test.txt \ -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-tex.test.txt runs/run.beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-tex.test.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-tex.test.txt runs/run.beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-tex.test.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-tex.test.txt runs/run.beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-tex.test.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-tex.test.txt runs/run.beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-tex.test.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-tex.test.txt runs/run.beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-tex.test.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-tex.test.txt runs/run.beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-tex.test.txt ``` ## Effectiveness @@ -76,9 +76,10 @@ With the above commands, you should be able to reproduce the following results: |:-------------------------------------------------------------------------------------------------------------|-----------| | BEIR (v1.0.0): CQADupStack-tex | 0.312 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-tex | 0.648 | +| BEIR (v1.0.0): CQADupStack-tex | 0.649 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-tex | 0.853 | +| BEIR (v1.0.0): CQADupStack-tex | 0.854 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw-int8.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw.cached.md index a0adb70670..7c17f93854 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw.cached.md @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): CQADupStack-tex | 0.311 | +| BEIR (v1.0.0): CQADupStack-tex | 0.312 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-tex | 0.647 | +| BEIR (v1.0.0): CQADupStack-tex | 0.649 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-tex | 0.852 | +| BEIR (v1.0.0): CQADupStack-tex | 0.854 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw.onnx.md index 3dc99a070b..04864dc391 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw.onnx.md @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): CQADupStack-tex | 0.311 | +| BEIR (v1.0.0): CQADupStack-tex | 0.312 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-tex | 0.647 | +| BEIR (v1.0.0): CQADupStack-tex | 0.649 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-tex | 0.853 | +| BEIR (v1.0.0): CQADupStack-tex | 0.854 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.flat-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.flat-int8.cached.md index b9896c542c..eda2180b64 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.flat-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.flat-int8.cached.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-unix | 0.9237 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.flat-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.flat-int8.onnx.md index 8e4915c517..399f796629 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.flat-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.flat-int8.onnx.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-unix | 0.9237 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.flat.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.flat.cached.md index 89278fa400..098e5a10d0 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.flat.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.flat.cached.md @@ -78,4 +78,4 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-unix | 0.9237 | -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.flat.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.flat.onnx.md index 75c5e20bb9..46008603d8 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.flat.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.flat.onnx.md @@ -78,5 +78,5 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-unix | 0.9237 | -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw-int8.cached.md index 7e3d6f7fb9..e57f3c0f5b 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw-int8.cached.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-cqadupstack-unix.test.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonStringVector \ - -output runs/run.beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-unix.test.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-unix.test.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-unix.test.txt runs/run.beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-unix.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-unix.test.txt runs/run.beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-unix.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-unix.test.txt runs/run.beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-unix.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-unix.test.txt runs/run.beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-unix.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-unix.test.txt runs/run.beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-unix.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-unix.test.txt runs/run.beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-unix.test.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): CQADupStack-unix | 0.420 | +| BEIR (v1.0.0): CQADupStack-unix | 0.422 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-unix | 0.777 | +| BEIR (v1.0.0): CQADupStack-unix | 0.780 | | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-unix | 0.924 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw-int8.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw-int8.onnx.md index 43d95cd055..88cb8043af 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw-int8.onnx.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-cqadupstack-unix.test.tsv.gz \ -topicReader TsvString \ - -output runs/run.beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-unix.test.txt \ + -output runs/run.beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-unix.test.txt \ -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-unix.test.txt runs/run.beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-unix.test.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-unix.test.txt runs/run.beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-unix.test.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-unix.test.txt runs/run.beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-unix.test.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-unix.test.txt runs/run.beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-unix.test.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-unix.test.txt runs/run.beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-unix.test.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-unix.test.txt runs/run.beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-unix.test.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): CQADupStack-unix | 0.421 | +| BEIR (v1.0.0): CQADupStack-unix | 0.422 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-unix | 0.777 | +| BEIR (v1.0.0): CQADupStack-unix | 0.780 | | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-unix | 0.924 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw-int8.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw.cached.md index c01bf9997e..39ee03cdbc 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw.cached.md @@ -78,7 +78,8 @@ With the above commands, you should be able to reproduce the following results: | **R@100** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-unix | 0.780 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-unix | 0.925 | +| BEIR (v1.0.0): CQADupStack-unix | 0.924 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw.onnx.md index 1e88f14cf1..3cc048a3b3 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw.onnx.md @@ -78,7 +78,8 @@ With the above commands, you should be able to reproduce the following results: | **R@100** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-unix | 0.780 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-unix | 0.925 | +| BEIR (v1.0.0): CQADupStack-unix | 0.924 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.flat-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.flat-int8.cached.md index a6c181c4d1..72079d81b1 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.flat-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.flat-int8.cached.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-webmasters | 0.9380 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.flat-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.flat-int8.onnx.md index ed363f7a0a..0b0071e332 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.flat-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.flat-int8.onnx.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-webmasters | 0.9380 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.flat.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.flat.cached.md index fbdf9e9e48..e392d769aa 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.flat.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.flat.cached.md @@ -78,4 +78,4 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-webmasters | 0.9380 | -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.flat.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.flat.onnx.md index 7fb3aea44f..e279357102 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.flat.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.flat.onnx.md @@ -78,5 +78,5 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-webmasters | 0.9380 | -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw-int8.cached.md index b96083f060..442d36ba2d 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw-int8.cached.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-cqadupstack-webmasters.test.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonStringVector \ - -output runs/run.beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-webmasters.test.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-webmasters.test.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-webmasters.test.txt runs/run.beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-webmasters.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-webmasters.test.txt runs/run.beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-webmasters.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-webmasters.test.txt runs/run.beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-webmasters.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-webmasters.test.txt runs/run.beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-webmasters.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-webmasters.test.txt runs/run.beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-webmasters.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-webmasters.test.txt runs/run.beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-webmasters.test.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): CQADupStack-webmasters | 0.411 | +| BEIR (v1.0.0): CQADupStack-webmasters | 0.407 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-webmasters | 0.780 | +| BEIR (v1.0.0): CQADupStack-webmasters | 0.777 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-webmasters | 0.937 | +| BEIR (v1.0.0): CQADupStack-webmasters | 0.938 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw-int8.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw-int8.onnx.md index 7313033cdf..e6513d90dc 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw-int8.onnx.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-cqadupstack-webmasters.test.tsv.gz \ -topicReader TsvString \ - -output runs/run.beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-webmasters.test.txt \ + -output runs/run.beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-webmasters.test.txt \ -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-webmasters.test.txt runs/run.beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-webmasters.test.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-webmasters.test.txt runs/run.beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-webmasters.test.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-webmasters.test.txt runs/run.beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-webmasters.test.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-webmasters.test.txt runs/run.beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-webmasters.test.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-webmasters.test.txt runs/run.beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-webmasters.test.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-webmasters.test.txt runs/run.beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-webmasters.test.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): CQADupStack-webmasters | 0.409 | +| BEIR (v1.0.0): CQADupStack-webmasters | 0.407 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-webmasters | 0.780 | +| BEIR (v1.0.0): CQADupStack-webmasters | 0.777 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-webmasters | 0.937 | +| BEIR (v1.0.0): CQADupStack-webmasters | 0.938 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw-int8.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw.cached.md index 193718f224..9d39999cf1 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw.cached.md @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): CQADupStack-webmasters | 0.406 | +| BEIR (v1.0.0): CQADupStack-webmasters | 0.407 | | **R@100** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-webmasters | 0.777 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-webmasters | 0.937 | +| BEIR (v1.0.0): CQADupStack-webmasters | 0.938 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw.onnx.md index 89af80a407..e31dfe377c 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw.onnx.md @@ -78,7 +78,8 @@ With the above commands, you should be able to reproduce the following results: | **R@100** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-webmasters | 0.777 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-webmasters | 0.937 | +| BEIR (v1.0.0): CQADupStack-webmasters | 0.938 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.flat-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.flat-int8.cached.md index 5f65909bee..44cb1cfc59 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.flat-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.flat-int8.cached.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-wordpress | 0.8861 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.flat-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.flat-int8.onnx.md index d25a7c4889..45280d1bbd 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.flat-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.flat-int8.onnx.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-wordpress | 0.8861 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.flat.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.flat.cached.md index 45296c6320..a544772483 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.flat.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.flat.cached.md @@ -78,4 +78,4 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-wordpress | 0.8861 | -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.flat.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.flat.onnx.md index 0fd92e4981..0db22b07a5 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.flat.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.flat.onnx.md @@ -78,5 +78,5 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-wordpress | 0.8861 | -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw-int8.cached.md index a281dfa217..1af7a99b80 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw-int8.cached.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-cqadupstack-wordpress.test.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonStringVector \ - -output runs/run.beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-wordpress.test.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-wordpress.test.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-wordpress.test.txt runs/run.beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-wordpress.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-wordpress.test.txt runs/run.beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-wordpress.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-wordpress.test.txt runs/run.beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-cqadupstack-wordpress.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-wordpress.test.txt runs/run.beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-wordpress.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-wordpress.test.txt runs/run.beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-wordpress.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-wordpress.test.txt runs/run.beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-cqadupstack-wordpress.test.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): CQADupStack-wordpress | 0.354 | +| BEIR (v1.0.0): CQADupStack-wordpress | 0.355 | | **R@100** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-wordpress | 0.706 | | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-wordpress | 0.886 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw-int8.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw-int8.onnx.md index 7daf736e20..2b3e3b2c4b 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw-int8.onnx.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-cqadupstack-wordpress.test.tsv.gz \ -topicReader TsvString \ - -output runs/run.beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-wordpress.test.txt \ + -output runs/run.beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-wordpress.test.txt \ -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-wordpress.test.txt runs/run.beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-wordpress.test.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-wordpress.test.txt runs/run.beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-wordpress.test.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-wordpress.test.txt runs/run.beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-cqadupstack-wordpress.test.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-wordpress.test.txt runs/run.beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-wordpress.test.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-wordpress.test.txt runs/run.beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-wordpress.test.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-cqadupstack-wordpress.test.txt runs/run.beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-cqadupstack-wordpress.test.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): CQADupStack-wordpress | 0.354 | +| BEIR (v1.0.0): CQADupStack-wordpress | 0.355 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-wordpress | 0.707 | +| BEIR (v1.0.0): CQADupStack-wordpress | 0.706 | | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-wordpress | 0.886 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw-int8.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw.cached.md index 7063ab9ae1..e5eece510b 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw.cached.md @@ -76,9 +76,10 @@ With the above commands, you should be able to reproduce the following results: |:-------------------------------------------------------------------------------------------------------------|-----------| | BEIR (v1.0.0): CQADupStack-wordpress | 0.355 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-wordpress | 0.705 | +| BEIR (v1.0.0): CQADupStack-wordpress | 0.706 | | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-wordpress | 0.886 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw.onnx.md index 1ddc0195d7..a5963f568e 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw.onnx.md @@ -76,9 +76,10 @@ With the above commands, you should be able to reproduce the following results: |:-------------------------------------------------------------------------------------------------------------|-----------| | BEIR (v1.0.0): CQADupStack-wordpress | 0.355 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): CQADupStack-wordpress | 0.703 | +| BEIR (v1.0.0): CQADupStack-wordpress | 0.706 | | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): CQADupStack-wordpress | 0.886 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.flat-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.flat-int8.cached.md index 6721cef10b..047e1751d5 100644 --- a/docs/regressions/regressions-beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.flat-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.flat-int8.cached.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): DBPedia | 0.7833 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.flat-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.flat-int8.onnx.md index c7947a339d..e1006cd3fb 100644 --- a/docs/regressions/regressions-beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.flat-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.flat-int8.onnx.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): DBPedia | 0.7833 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.flat.cached.md b/docs/regressions/regressions-beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.flat.cached.md index eb06a91846..df65ddf765 100644 --- a/docs/regressions/regressions-beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.flat.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.flat.cached.md @@ -78,4 +78,4 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): DBPedia | 0.7833 | -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/docs/regressions/regressions-beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.flat.onnx.md b/docs/regressions/regressions-beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.flat.onnx.md index f54320a908..69036de015 100644 --- a/docs/regressions/regressions-beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.flat.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.flat.onnx.md @@ -78,5 +78,5 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): DBPedia | 0.7833 | -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/docs/regressions/regressions-beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw-int8.cached.md index cef19a56c5..131f336b09 100644 --- a/docs/regressions/regressions-beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw-int8.cached.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-dbpedia-entity.test.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonStringVector \ - -output runs/run.beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-dbpedia-entity.test.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-dbpedia-entity.test.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-dbpedia-entity.test.txt runs/run.beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-dbpedia-entity.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-dbpedia-entity.test.txt runs/run.beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-dbpedia-entity.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-dbpedia-entity.test.txt runs/run.beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-dbpedia-entity.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-dbpedia-entity.test.txt runs/run.beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-dbpedia-entity.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-dbpedia-entity.test.txt runs/run.beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-dbpedia-entity.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-dbpedia-entity.test.txt runs/run.beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-dbpedia-entity.test.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness @@ -76,9 +76,10 @@ With the above commands, you should be able to reproduce the following results: |:-------------------------------------------------------------------------------------------------------------|-----------| | BEIR (v1.0.0): DBPedia | 0.407 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): DBPedia | 0.527 | +| BEIR (v1.0.0): DBPedia | 0.530 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): DBPedia | 0.776 | +| BEIR (v1.0.0): DBPedia | 0.783 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw-int8.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw-int8.onnx.md index 1931be926e..98dc1a201f 100644 --- a/docs/regressions/regressions-beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw-int8.onnx.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-dbpedia-entity.test.tsv.gz \ -topicReader TsvString \ - -output runs/run.beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-dbpedia-entity.test.txt \ + -output runs/run.beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-dbpedia-entity.test.txt \ -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-dbpedia-entity.test.txt runs/run.beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-dbpedia-entity.test.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-dbpedia-entity.test.txt runs/run.beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-dbpedia-entity.test.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-dbpedia-entity.test.txt runs/run.beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-dbpedia-entity.test.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-dbpedia-entity.test.txt runs/run.beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-dbpedia-entity.test.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-dbpedia-entity.test.txt runs/run.beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-dbpedia-entity.test.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-dbpedia-entity.test.txt runs/run.beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-dbpedia-entity.test.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): DBPedia | 0.408 | +| BEIR (v1.0.0): DBPedia | 0.407 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): DBPedia | 0.528 | +| BEIR (v1.0.0): DBPedia | 0.530 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): DBPedia | 0.778 | +| BEIR (v1.0.0): DBPedia | 0.783 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw-int8.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw.cached.md index 7d0dd92b15..c1d415fae3 100644 --- a/docs/regressions/regressions-beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw.cached.md @@ -76,9 +76,10 @@ With the above commands, you should be able to reproduce the following results: |:-------------------------------------------------------------------------------------------------------------|-----------| | BEIR (v1.0.0): DBPedia | 0.407 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): DBPedia | 0.528 | +| BEIR (v1.0.0): DBPedia | 0.530 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): DBPedia | 0.778 | +| BEIR (v1.0.0): DBPedia | 0.783 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw.onnx.md index 1638550938..d27fd25c49 100644 --- a/docs/regressions/regressions-beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw.onnx.md @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): DBPedia | 0.408 | +| BEIR (v1.0.0): DBPedia | 0.407 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): DBPedia | 0.529 | +| BEIR (v1.0.0): DBPedia | 0.530 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): DBPedia | 0.778 | +| BEIR (v1.0.0): DBPedia | 0.783 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-fever.bge-base-en-v1.5.flat-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-fever.bge-base-en-v1.5.flat-int8.cached.md index b8ab714b03..a882f65a78 100644 --- a/docs/regressions/regressions-beir-v1.0.0-fever.bge-base-en-v1.5.flat-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-fever.bge-base-en-v1.5.flat-int8.cached.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): FEVER | 0.9855 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-fever.bge-base-en-v1.5.flat-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-fever.bge-base-en-v1.5.flat-int8.onnx.md index 6231e6267b..c54d0ff0a2 100644 --- a/docs/regressions/regressions-beir-v1.0.0-fever.bge-base-en-v1.5.flat-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-fever.bge-base-en-v1.5.flat-int8.onnx.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): FEVER | 0.9855 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-fever.bge-base-en-v1.5.flat.cached.md b/docs/regressions/regressions-beir-v1.0.0-fever.bge-base-en-v1.5.flat.cached.md index 729bf42e1b..5b8496e249 100644 --- a/docs/regressions/regressions-beir-v1.0.0-fever.bge-base-en-v1.5.flat.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-fever.bge-base-en-v1.5.flat.cached.md @@ -78,4 +78,4 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): FEVER | 0.9855 | -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/docs/regressions/regressions-beir-v1.0.0-fever.bge-base-en-v1.5.flat.onnx.md b/docs/regressions/regressions-beir-v1.0.0-fever.bge-base-en-v1.5.flat.onnx.md index 910cc2b472..4e25c013ee 100644 --- a/docs/regressions/regressions-beir-v1.0.0-fever.bge-base-en-v1.5.flat.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-fever.bge-base-en-v1.5.flat.onnx.md @@ -78,5 +78,5 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): FEVER | 0.9855 | -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/docs/regressions/regressions-beir-v1.0.0-fever.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-fever.bge-base-en-v1.5.hnsw-int8.cached.md index a70cee7e07..a58c26d946 100644 --- a/docs/regressions/regressions-beir-v1.0.0-fever.bge-base-en-v1.5.hnsw-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-fever.bge-base-en-v1.5.hnsw-int8.cached.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-fever.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-fever.test.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonStringVector \ - -output runs/run.beir-v1.0.0-fever.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-fever.test.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.beir-v1.0.0-fever.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-fever.test.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-fever.test.txt runs/run.beir-v1.0.0-fever.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-fever.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-fever.test.txt runs/run.beir-v1.0.0-fever.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-fever.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-fever.test.txt runs/run.beir-v1.0.0-fever.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-fever.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-fever.test.txt runs/run.beir-v1.0.0-fever.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-fever.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-fever.test.txt runs/run.beir-v1.0.0-fever.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-fever.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-fever.test.txt runs/run.beir-v1.0.0-fever.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-fever.test.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): FEVER | 0.860 | +| BEIR (v1.0.0): FEVER | 0.863 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): FEVER | 0.967 | +| BEIR (v1.0.0): FEVER | 0.972 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): FEVER | 0.980 | +| BEIR (v1.0.0): FEVER | 0.985 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.hnsw-int8.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-fever.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-fever.bge-base-en-v1.5.hnsw-int8.onnx.md index 77719ac3aa..2665dc1e2e 100644 --- a/docs/regressions/regressions-beir-v1.0.0-fever.bge-base-en-v1.5.hnsw-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-fever.bge-base-en-v1.5.hnsw-int8.onnx.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-fever.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-fever.test.tsv.gz \ -topicReader TsvString \ - -output runs/run.beir-v1.0.0-fever.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-fever.test.txt \ + -output runs/run.beir-v1.0.0-fever.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-fever.test.txt \ -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-fever.test.txt runs/run.beir-v1.0.0-fever.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-fever.test.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-fever.test.txt runs/run.beir-v1.0.0-fever.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-fever.test.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-fever.test.txt runs/run.beir-v1.0.0-fever.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-fever.test.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-fever.test.txt runs/run.beir-v1.0.0-fever.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-fever.test.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-fever.test.txt runs/run.beir-v1.0.0-fever.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-fever.test.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-fever.test.txt runs/run.beir-v1.0.0-fever.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-fever.test.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): FEVER | 0.860 | +| BEIR (v1.0.0): FEVER | 0.863 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): FEVER | 0.967 | +| BEIR (v1.0.0): FEVER | 0.972 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): FEVER | 0.980 | +| BEIR (v1.0.0): FEVER | 0.985 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.hnsw-int8.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-fever.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-beir-v1.0.0-fever.bge-base-en-v1.5.hnsw.cached.md index 672e576260..7a5d68961e 100644 --- a/docs/regressions/regressions-beir-v1.0.0-fever.bge-base-en-v1.5.hnsw.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-fever.bge-base-en-v1.5.hnsw.cached.md @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): FEVER | 0.861 | +| BEIR (v1.0.0): FEVER | 0.863 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): FEVER | 0.967 | +| BEIR (v1.0.0): FEVER | 0.972 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): FEVER | 0.980 | +| BEIR (v1.0.0): FEVER | 0.985 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.hnsw.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-fever.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-beir-v1.0.0-fever.bge-base-en-v1.5.hnsw.onnx.md index 1e64d53e8a..bd8ea81482 100644 --- a/docs/regressions/regressions-beir-v1.0.0-fever.bge-base-en-v1.5.hnsw.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-fever.bge-base-en-v1.5.hnsw.onnx.md @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): FEVER | 0.860 | +| BEIR (v1.0.0): FEVER | 0.863 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): FEVER | 0.967 | +| BEIR (v1.0.0): FEVER | 0.972 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): FEVER | 0.980 | +| BEIR (v1.0.0): FEVER | 0.985 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.hnsw.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-fiqa.bge-base-en-v1.5.flat-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-fiqa.bge-base-en-v1.5.flat-int8.cached.md index 0225a3dafc..b0b6772b5c 100644 --- a/docs/regressions/regressions-beir-v1.0.0-fiqa.bge-base-en-v1.5.flat-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-fiqa.bge-base-en-v1.5.flat-int8.cached.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): FiQA-2018 | 0.9083 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-fiqa.bge-base-en-v1.5.flat-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-fiqa.bge-base-en-v1.5.flat-int8.onnx.md index 283a45bf40..052ae2b55c 100644 --- a/docs/regressions/regressions-beir-v1.0.0-fiqa.bge-base-en-v1.5.flat-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-fiqa.bge-base-en-v1.5.flat-int8.onnx.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): FiQA-2018 | 0.9083 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-fiqa.bge-base-en-v1.5.flat.cached.md b/docs/regressions/regressions-beir-v1.0.0-fiqa.bge-base-en-v1.5.flat.cached.md index c861fad768..3b47fa58cf 100644 --- a/docs/regressions/regressions-beir-v1.0.0-fiqa.bge-base-en-v1.5.flat.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-fiqa.bge-base-en-v1.5.flat.cached.md @@ -78,4 +78,4 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): FiQA-2018 | 0.9083 | -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/docs/regressions/regressions-beir-v1.0.0-fiqa.bge-base-en-v1.5.flat.onnx.md b/docs/regressions/regressions-beir-v1.0.0-fiqa.bge-base-en-v1.5.flat.onnx.md index 4461b5c89b..dde5a87098 100644 --- a/docs/regressions/regressions-beir-v1.0.0-fiqa.bge-base-en-v1.5.flat.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-fiqa.bge-base-en-v1.5.flat.onnx.md @@ -78,5 +78,5 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): FiQA-2018 | 0.9083 | -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/docs/regressions/regressions-beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw-int8.cached.md index 2c9997d6a9..7269078f20 100644 --- a/docs/regressions/regressions-beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw-int8.cached.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-fiqa.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-fiqa.test.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonStringVector \ - -output runs/run.beir-v1.0.0-fiqa.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-fiqa.test.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.beir-v1.0.0-fiqa.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-fiqa.test.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-fiqa.test.txt runs/run.beir-v1.0.0-fiqa.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-fiqa.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-fiqa.test.txt runs/run.beir-v1.0.0-fiqa.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-fiqa.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-fiqa.test.txt runs/run.beir-v1.0.0-fiqa.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-fiqa.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-fiqa.test.txt runs/run.beir-v1.0.0-fiqa.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-fiqa.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-fiqa.test.txt runs/run.beir-v1.0.0-fiqa.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-fiqa.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-fiqa.test.txt runs/run.beir-v1.0.0-fiqa.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-fiqa.test.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): FiQA-2018 | 0.401 | +| BEIR (v1.0.0): FiQA-2018 | 0.407 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): FiQA-2018 | 0.737 | +| BEIR (v1.0.0): FiQA-2018 | 0.742 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): FiQA-2018 | 0.902 | +| BEIR (v1.0.0): FiQA-2018 | 0.908 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw-int8.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw-int8.onnx.md index 8a545bc8fb..66beaab2c5 100644 --- a/docs/regressions/regressions-beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw-int8.onnx.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-fiqa.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-fiqa.test.tsv.gz \ -topicReader TsvString \ - -output runs/run.beir-v1.0.0-fiqa.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-fiqa.test.txt \ + -output runs/run.beir-v1.0.0-fiqa.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-fiqa.test.txt \ -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-fiqa.test.txt runs/run.beir-v1.0.0-fiqa.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-fiqa.test.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-fiqa.test.txt runs/run.beir-v1.0.0-fiqa.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-fiqa.test.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-fiqa.test.txt runs/run.beir-v1.0.0-fiqa.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-fiqa.test.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-fiqa.test.txt runs/run.beir-v1.0.0-fiqa.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-fiqa.test.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-fiqa.test.txt runs/run.beir-v1.0.0-fiqa.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-fiqa.test.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-fiqa.test.txt runs/run.beir-v1.0.0-fiqa.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-fiqa.test.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): FiQA-2018 | 0.401 | +| BEIR (v1.0.0): FiQA-2018 | 0.407 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): FiQA-2018 | 0.737 | +| BEIR (v1.0.0): FiQA-2018 | 0.742 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): FiQA-2018 | 0.902 | +| BEIR (v1.0.0): FiQA-2018 | 0.908 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw-int8.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw.cached.md index cdffceddee..b28b745817 100644 --- a/docs/regressions/regressions-beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw.cached.md @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): FiQA-2018 | 0.405 | +| BEIR (v1.0.0): FiQA-2018 | 0.407 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): FiQA-2018 | 0.739 | +| BEIR (v1.0.0): FiQA-2018 | 0.742 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): FiQA-2018 | 0.902 | +| BEIR (v1.0.0): FiQA-2018 | 0.908 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw.onnx.md index b68c124ecd..c5186bfee7 100644 --- a/docs/regressions/regressions-beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw.onnx.md @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): FiQA-2018 | 0.405 | +| BEIR (v1.0.0): FiQA-2018 | 0.407 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): FiQA-2018 | 0.739 | +| BEIR (v1.0.0): FiQA-2018 | 0.742 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): FiQA-2018 | 0.902 | +| BEIR (v1.0.0): FiQA-2018 | 0.908 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-hotpotqa.bge-base-en-v1.5.flat-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-hotpotqa.bge-base-en-v1.5.flat-int8.cached.md index 0b57a5f34e..2bbd7740b0 100644 --- a/docs/regressions/regressions-beir-v1.0.0-hotpotqa.bge-base-en-v1.5.flat-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-hotpotqa.bge-base-en-v1.5.flat-int8.cached.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): HotpotQA | 0.9424 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-hotpotqa.bge-base-en-v1.5.flat-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-hotpotqa.bge-base-en-v1.5.flat-int8.onnx.md index dd4886048d..b04928c633 100644 --- a/docs/regressions/regressions-beir-v1.0.0-hotpotqa.bge-base-en-v1.5.flat-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-hotpotqa.bge-base-en-v1.5.flat-int8.onnx.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): HotpotQA | 0.9424 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-hotpotqa.bge-base-en-v1.5.flat.cached.md b/docs/regressions/regressions-beir-v1.0.0-hotpotqa.bge-base-en-v1.5.flat.cached.md index 19b405b4ea..96d710c8f3 100644 --- a/docs/regressions/regressions-beir-v1.0.0-hotpotqa.bge-base-en-v1.5.flat.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-hotpotqa.bge-base-en-v1.5.flat.cached.md @@ -78,4 +78,4 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): HotpotQA | 0.9424 | -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/docs/regressions/regressions-beir-v1.0.0-hotpotqa.bge-base-en-v1.5.flat.onnx.md b/docs/regressions/regressions-beir-v1.0.0-hotpotqa.bge-base-en-v1.5.flat.onnx.md index 8f8411880f..77a976cf61 100644 --- a/docs/regressions/regressions-beir-v1.0.0-hotpotqa.bge-base-en-v1.5.flat.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-hotpotqa.bge-base-en-v1.5.flat.onnx.md @@ -78,5 +78,5 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): HotpotQA | 0.9424 | -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/docs/regressions/regressions-beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw-int8.cached.md index 6322c0b3d1..2cddb490a9 100644 --- a/docs/regressions/regressions-beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw-int8.cached.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-hotpotqa.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-hotpotqa.test.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonStringVector \ - -output runs/run.beir-v1.0.0-hotpotqa.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-hotpotqa.test.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.beir-v1.0.0-hotpotqa.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-hotpotqa.test.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-hotpotqa.test.txt runs/run.beir-v1.0.0-hotpotqa.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-hotpotqa.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-hotpotqa.test.txt runs/run.beir-v1.0.0-hotpotqa.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-hotpotqa.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-hotpotqa.test.txt runs/run.beir-v1.0.0-hotpotqa.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-hotpotqa.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-hotpotqa.test.txt runs/run.beir-v1.0.0-hotpotqa.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-hotpotqa.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-hotpotqa.test.txt runs/run.beir-v1.0.0-hotpotqa.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-hotpotqa.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-hotpotqa.test.txt runs/run.beir-v1.0.0-hotpotqa.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-hotpotqa.test.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): HotpotQA | 0.722 | +| BEIR (v1.0.0): HotpotQA | 0.726 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): HotpotQA | 0.866 | +| BEIR (v1.0.0): HotpotQA | 0.873 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): HotpotQA | 0.935 | +| BEIR (v1.0.0): HotpotQA | 0.942 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw-int8.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw-int8.onnx.md index 02513efc3e..e735ab04ac 100644 --- a/docs/regressions/regressions-beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw-int8.onnx.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-hotpotqa.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-hotpotqa.test.tsv.gz \ -topicReader TsvString \ - -output runs/run.beir-v1.0.0-hotpotqa.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-hotpotqa.test.txt \ + -output runs/run.beir-v1.0.0-hotpotqa.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-hotpotqa.test.txt \ -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-hotpotqa.test.txt runs/run.beir-v1.0.0-hotpotqa.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-hotpotqa.test.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-hotpotqa.test.txt runs/run.beir-v1.0.0-hotpotqa.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-hotpotqa.test.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-hotpotqa.test.txt runs/run.beir-v1.0.0-hotpotqa.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-hotpotqa.test.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-hotpotqa.test.txt runs/run.beir-v1.0.0-hotpotqa.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-hotpotqa.test.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-hotpotqa.test.txt runs/run.beir-v1.0.0-hotpotqa.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-hotpotqa.test.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-hotpotqa.test.txt runs/run.beir-v1.0.0-hotpotqa.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-hotpotqa.test.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): HotpotQA | 0.722 | +| BEIR (v1.0.0): HotpotQA | 0.726 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): HotpotQA | 0.867 | +| BEIR (v1.0.0): HotpotQA | 0.873 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): HotpotQA | 0.936 | +| BEIR (v1.0.0): HotpotQA | 0.942 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw-int8.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw.cached.md index 1d23908ed1..bbb3ecc095 100644 --- a/docs/regressions/regressions-beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw.cached.md @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): HotpotQA | 0.722 | +| BEIR (v1.0.0): HotpotQA | 0.726 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): HotpotQA | 0.866 | +| BEIR (v1.0.0): HotpotQA | 0.873 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): HotpotQA | 0.936 | +| BEIR (v1.0.0): HotpotQA | 0.942 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw.onnx.md index fba179824d..0b8c916dde 100644 --- a/docs/regressions/regressions-beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw.onnx.md @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): HotpotQA | 0.722 | +| BEIR (v1.0.0): HotpotQA | 0.726 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): HotpotQA | 0.866 | +| BEIR (v1.0.0): HotpotQA | 0.873 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): HotpotQA | 0.935 | +| BEIR (v1.0.0): HotpotQA | 0.942 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-nfcorpus.bge-base-en-v1.5.flat-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-nfcorpus.bge-base-en-v1.5.flat-int8.cached.md index 545a37b12e..f764583e72 100644 --- a/docs/regressions/regressions-beir-v1.0.0-nfcorpus.bge-base-en-v1.5.flat-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-nfcorpus.bge-base-en-v1.5.flat-int8.cached.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): NFCorpus | 0.6622 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-nfcorpus.bge-base-en-v1.5.flat-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-nfcorpus.bge-base-en-v1.5.flat-int8.onnx.md index b671085e36..3a7a5df5d5 100644 --- a/docs/regressions/regressions-beir-v1.0.0-nfcorpus.bge-base-en-v1.5.flat-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-nfcorpus.bge-base-en-v1.5.flat-int8.onnx.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): NFCorpus | 0.6622 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-nfcorpus.bge-base-en-v1.5.flat.cached.md b/docs/regressions/regressions-beir-v1.0.0-nfcorpus.bge-base-en-v1.5.flat.cached.md index bd0411dddc..6a83b60414 100644 --- a/docs/regressions/regressions-beir-v1.0.0-nfcorpus.bge-base-en-v1.5.flat.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-nfcorpus.bge-base-en-v1.5.flat.cached.md @@ -78,4 +78,4 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): NFCorpus | 0.6622 | -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/docs/regressions/regressions-beir-v1.0.0-nfcorpus.bge-base-en-v1.5.flat.onnx.md b/docs/regressions/regressions-beir-v1.0.0-nfcorpus.bge-base-en-v1.5.flat.onnx.md index 69a14f369d..d65e0f0dd3 100644 --- a/docs/regressions/regressions-beir-v1.0.0-nfcorpus.bge-base-en-v1.5.flat.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-nfcorpus.bge-base-en-v1.5.flat.onnx.md @@ -78,5 +78,5 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): NFCorpus | 0.6622 | -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/docs/regressions/regressions-beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw-int8.cached.md index 1ff612e7a4..ed20ccbf5f 100644 --- a/docs/regressions/regressions-beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw-int8.cached.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-nfcorpus.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-nfcorpus.test.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonStringVector \ - -output runs/run.beir-v1.0.0-nfcorpus.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-nfcorpus.test.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.beir-v1.0.0-nfcorpus.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-nfcorpus.test.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-nfcorpus.test.txt runs/run.beir-v1.0.0-nfcorpus.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-nfcorpus.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-nfcorpus.test.txt runs/run.beir-v1.0.0-nfcorpus.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-nfcorpus.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-nfcorpus.test.txt runs/run.beir-v1.0.0-nfcorpus.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-nfcorpus.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-nfcorpus.test.txt runs/run.beir-v1.0.0-nfcorpus.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-nfcorpus.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-nfcorpus.test.txt runs/run.beir-v1.0.0-nfcorpus.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-nfcorpus.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-nfcorpus.test.txt runs/run.beir-v1.0.0-nfcorpus.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-nfcorpus.test.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness @@ -76,9 +76,10 @@ With the above commands, you should be able to reproduce the following results: |:-------------------------------------------------------------------------------------------------------------|-----------| | BEIR (v1.0.0): NFCorpus | 0.373 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): NFCorpus | 0.338 | +| BEIR (v1.0.0): NFCorpus | 0.337 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): NFCorpus | 0.657 | +| BEIR (v1.0.0): NFCorpus | 0.662 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw-int8.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw-int8.onnx.md index a95d4ae29f..7072dbe634 100644 --- a/docs/regressions/regressions-beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw-int8.onnx.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-nfcorpus.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-nfcorpus.test.tsv.gz \ -topicReader TsvString \ - -output runs/run.beir-v1.0.0-nfcorpus.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-nfcorpus.test.txt \ + -output runs/run.beir-v1.0.0-nfcorpus.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-nfcorpus.test.txt \ -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-nfcorpus.test.txt runs/run.beir-v1.0.0-nfcorpus.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-nfcorpus.test.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-nfcorpus.test.txt runs/run.beir-v1.0.0-nfcorpus.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-nfcorpus.test.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-nfcorpus.test.txt runs/run.beir-v1.0.0-nfcorpus.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-nfcorpus.test.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-nfcorpus.test.txt runs/run.beir-v1.0.0-nfcorpus.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-nfcorpus.test.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-nfcorpus.test.txt runs/run.beir-v1.0.0-nfcorpus.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-nfcorpus.test.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-nfcorpus.test.txt runs/run.beir-v1.0.0-nfcorpus.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-nfcorpus.test.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): NFCorpus | 0.374 | +| BEIR (v1.0.0): NFCorpus | 0.373 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): NFCorpus | 0.339 | +| BEIR (v1.0.0): NFCorpus | 0.337 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): NFCorpus | 0.657 | +| BEIR (v1.0.0): NFCorpus | 0.662 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw-int8.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw.cached.md index 556e39abf0..5d544b5b6e 100644 --- a/docs/regressions/regressions-beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw.cached.md @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): NFCorpus | 0.374 | +| BEIR (v1.0.0): NFCorpus | 0.373 | | **R@100** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): NFCorpus | 0.337 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): NFCorpus | 0.661 | +| BEIR (v1.0.0): NFCorpus | 0.662 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw.onnx.md index 172fc9d55e..f5551fe7f4 100644 --- a/docs/regressions/regressions-beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw.onnx.md @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): NFCorpus | 0.374 | +| BEIR (v1.0.0): NFCorpus | 0.373 | | **R@100** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): NFCorpus | 0.337 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): NFCorpus | 0.661 | +| BEIR (v1.0.0): NFCorpus | 0.662 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-nq.bge-base-en-v1.5.flat-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-nq.bge-base-en-v1.5.flat-int8.cached.md index c2af8ab9a1..2bf6e83b36 100644 --- a/docs/regressions/regressions-beir-v1.0.0-nq.bge-base-en-v1.5.flat-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-nq.bge-base-en-v1.5.flat-int8.cached.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): NQ | 0.9859 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-nq.bge-base-en-v1.5.flat-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-nq.bge-base-en-v1.5.flat-int8.onnx.md index a68c9eee0b..7d1a2a1bb8 100644 --- a/docs/regressions/regressions-beir-v1.0.0-nq.bge-base-en-v1.5.flat-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-nq.bge-base-en-v1.5.flat-int8.onnx.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): NQ | 0.9859 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-nq.bge-base-en-v1.5.flat.cached.md b/docs/regressions/regressions-beir-v1.0.0-nq.bge-base-en-v1.5.flat.cached.md index 40032d5bf0..752711c834 100644 --- a/docs/regressions/regressions-beir-v1.0.0-nq.bge-base-en-v1.5.flat.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-nq.bge-base-en-v1.5.flat.cached.md @@ -78,4 +78,4 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): NQ | 0.9859 | -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/docs/regressions/regressions-beir-v1.0.0-nq.bge-base-en-v1.5.flat.onnx.md b/docs/regressions/regressions-beir-v1.0.0-nq.bge-base-en-v1.5.flat.onnx.md index 7f962a5252..b1680c9112 100644 --- a/docs/regressions/regressions-beir-v1.0.0-nq.bge-base-en-v1.5.flat.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-nq.bge-base-en-v1.5.flat.onnx.md @@ -78,5 +78,5 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): NQ | 0.9859 | -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/docs/regressions/regressions-beir-v1.0.0-nq.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-nq.bge-base-en-v1.5.hnsw-int8.cached.md index 6fa7a0100a..f31050d504 100644 --- a/docs/regressions/regressions-beir-v1.0.0-nq.bge-base-en-v1.5.hnsw-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-nq.bge-base-en-v1.5.hnsw-int8.cached.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-nq.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-nq.test.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonStringVector \ - -output runs/run.beir-v1.0.0-nq.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-nq.test.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.beir-v1.0.0-nq.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-nq.test.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-nq.test.txt runs/run.beir-v1.0.0-nq.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-nq.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-nq.test.txt runs/run.beir-v1.0.0-nq.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-nq.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-nq.test.txt runs/run.beir-v1.0.0-nq.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-nq.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-nq.test.txt runs/run.beir-v1.0.0-nq.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-nq.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-nq.test.txt runs/run.beir-v1.0.0-nq.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-nq.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-nq.test.txt runs/run.beir-v1.0.0-nq.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-nq.test.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): NQ | 0.538 | +| BEIR (v1.0.0): NQ | 0.541 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): NQ | 0.940 | +| BEIR (v1.0.0): NQ | 0.942 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): NQ | 0.984 | +| BEIR (v1.0.0): NQ | 0.986 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.hnsw-int8.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-nq.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-nq.bge-base-en-v1.5.hnsw-int8.onnx.md index 94e115322a..dcd4fdf843 100644 --- a/docs/regressions/regressions-beir-v1.0.0-nq.bge-base-en-v1.5.hnsw-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-nq.bge-base-en-v1.5.hnsw-int8.onnx.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-nq.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-nq.test.tsv.gz \ -topicReader TsvString \ - -output runs/run.beir-v1.0.0-nq.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-nq.test.txt \ + -output runs/run.beir-v1.0.0-nq.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-nq.test.txt \ -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-nq.test.txt runs/run.beir-v1.0.0-nq.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-nq.test.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-nq.test.txt runs/run.beir-v1.0.0-nq.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-nq.test.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-nq.test.txt runs/run.beir-v1.0.0-nq.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-nq.test.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-nq.test.txt runs/run.beir-v1.0.0-nq.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-nq.test.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-nq.test.txt runs/run.beir-v1.0.0-nq.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-nq.test.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-nq.test.txt runs/run.beir-v1.0.0-nq.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-nq.test.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): NQ | 0.538 | +| BEIR (v1.0.0): NQ | 0.541 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): NQ | 0.939 | +| BEIR (v1.0.0): NQ | 0.942 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): NQ | 0.984 | +| BEIR (v1.0.0): NQ | 0.986 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.hnsw-int8.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-nq.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-beir-v1.0.0-nq.bge-base-en-v1.5.hnsw.cached.md index 57c84cd6fe..b43566d149 100644 --- a/docs/regressions/regressions-beir-v1.0.0-nq.bge-base-en-v1.5.hnsw.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-nq.bge-base-en-v1.5.hnsw.cached.md @@ -76,9 +76,10 @@ With the above commands, you should be able to reproduce the following results: |:-------------------------------------------------------------------------------------------------------------|-----------| | BEIR (v1.0.0): NQ | 0.541 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): NQ | 0.940 | +| BEIR (v1.0.0): NQ | 0.942 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): NQ | 0.984 | +| BEIR (v1.0.0): NQ | 0.986 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.hnsw.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-nq.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-beir-v1.0.0-nq.bge-base-en-v1.5.hnsw.onnx.md index 8b31c59c72..5e1828d5e5 100644 --- a/docs/regressions/regressions-beir-v1.0.0-nq.bge-base-en-v1.5.hnsw.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-nq.bge-base-en-v1.5.hnsw.onnx.md @@ -76,9 +76,10 @@ With the above commands, you should be able to reproduce the following results: |:-------------------------------------------------------------------------------------------------------------|-----------| | BEIR (v1.0.0): NQ | 0.541 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): NQ | 0.940 | +| BEIR (v1.0.0): NQ | 0.942 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): NQ | 0.984 | +| BEIR (v1.0.0): NQ | 0.986 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.hnsw.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-quora.bge-base-en-v1.5.flat-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-quora.bge-base-en-v1.5.flat-int8.cached.md index 33b01aa0e8..fc233c3964 100644 --- a/docs/regressions/regressions-beir-v1.0.0-quora.bge-base-en-v1.5.flat-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-quora.bge-base-en-v1.5.flat-int8.cached.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): Quora | 0.9998 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-quora.bge-base-en-v1.5.flat-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-quora.bge-base-en-v1.5.flat-int8.onnx.md index 00487e6a4e..f9a9c1b1c9 100644 --- a/docs/regressions/regressions-beir-v1.0.0-quora.bge-base-en-v1.5.flat-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-quora.bge-base-en-v1.5.flat-int8.onnx.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): Quora | 0.9998 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-quora.bge-base-en-v1.5.flat.cached.md b/docs/regressions/regressions-beir-v1.0.0-quora.bge-base-en-v1.5.flat.cached.md index 7403003afa..b4c534de2c 100644 --- a/docs/regressions/regressions-beir-v1.0.0-quora.bge-base-en-v1.5.flat.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-quora.bge-base-en-v1.5.flat.cached.md @@ -78,4 +78,4 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): Quora | 0.9998 | -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/docs/regressions/regressions-beir-v1.0.0-quora.bge-base-en-v1.5.flat.onnx.md b/docs/regressions/regressions-beir-v1.0.0-quora.bge-base-en-v1.5.flat.onnx.md index 453e702929..b6f90d9bea 100644 --- a/docs/regressions/regressions-beir-v1.0.0-quora.bge-base-en-v1.5.flat.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-quora.bge-base-en-v1.5.flat.onnx.md @@ -78,5 +78,5 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): Quora | 0.9998 | -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/docs/regressions/regressions-beir-v1.0.0-quora.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-quora.bge-base-en-v1.5.hnsw-int8.cached.md index d76616bc0e..ad01e8834d 100644 --- a/docs/regressions/regressions-beir-v1.0.0-quora.bge-base-en-v1.5.hnsw-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-quora.bge-base-en-v1.5.hnsw-int8.cached.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-quora.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-quora.test.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonStringVector \ - -output runs/run.beir-v1.0.0-quora.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-quora.test.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.beir-v1.0.0-quora.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-quora.test.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-quora.test.txt runs/run.beir-v1.0.0-quora.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-quora.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-quora.test.txt runs/run.beir-v1.0.0-quora.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-quora.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-quora.test.txt runs/run.beir-v1.0.0-quora.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-quora.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-quora.test.txt runs/run.beir-v1.0.0-quora.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-quora.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-quora.test.txt runs/run.beir-v1.0.0-quora.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-quora.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-quora.test.txt runs/run.beir-v1.0.0-quora.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-quora.test.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): Quora | 0.888 | +| BEIR (v1.0.0): Quora | 0.889 | | **R@100** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): Quora | 0.997 | | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): Quora | 1.000 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.hnsw-int8.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-quora.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-quora.bge-base-en-v1.5.hnsw-int8.onnx.md index 600bd623b4..329442a062 100644 --- a/docs/regressions/regressions-beir-v1.0.0-quora.bge-base-en-v1.5.hnsw-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-quora.bge-base-en-v1.5.hnsw-int8.onnx.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-quora.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-quora.test.tsv.gz \ -topicReader TsvString \ - -output runs/run.beir-v1.0.0-quora.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-quora.test.txt \ + -output runs/run.beir-v1.0.0-quora.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-quora.test.txt \ -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-quora.test.txt runs/run.beir-v1.0.0-quora.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-quora.test.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-quora.test.txt runs/run.beir-v1.0.0-quora.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-quora.test.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-quora.test.txt runs/run.beir-v1.0.0-quora.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-quora.test.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-quora.test.txt runs/run.beir-v1.0.0-quora.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-quora.test.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-quora.test.txt runs/run.beir-v1.0.0-quora.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-quora.test.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-quora.test.txt runs/run.beir-v1.0.0-quora.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-quora.test.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): Quora | 0.887 | +| BEIR (v1.0.0): Quora | 0.889 | | **R@100** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): Quora | 0.997 | | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): Quora | 1.000 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.hnsw-int8.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-quora.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-beir-v1.0.0-quora.bge-base-en-v1.5.hnsw.cached.md index 733d2fee7b..6e95030d3f 100644 --- a/docs/regressions/regressions-beir-v1.0.0-quora.bge-base-en-v1.5.hnsw.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-quora.bge-base-en-v1.5.hnsw.cached.md @@ -80,5 +80,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): Quora | 1.000 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.hnsw.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-quora.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-beir-v1.0.0-quora.bge-base-en-v1.5.hnsw.onnx.md index c1b54b4ae2..a5e6103fe6 100644 --- a/docs/regressions/regressions-beir-v1.0.0-quora.bge-base-en-v1.5.hnsw.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-quora.bge-base-en-v1.5.hnsw.onnx.md @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): Quora | 0.888 | +| BEIR (v1.0.0): Quora | 0.889 | | **R@100** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): Quora | 0.997 | | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): Quora | 1.000 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.hnsw.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-robust04.bge-base-en-v1.5.flat-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-robust04.bge-base-en-v1.5.flat-int8.cached.md index 98cde36987..b18044d854 100644 --- a/docs/regressions/regressions-beir-v1.0.0-robust04.bge-base-en-v1.5.flat-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-robust04.bge-base-en-v1.5.flat-int8.cached.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): Robust04 | 0.5981 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-robust04.bge-base-en-v1.5.flat-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-robust04.bge-base-en-v1.5.flat-int8.onnx.md index 20f77e166b..512bbad85b 100644 --- a/docs/regressions/regressions-beir-v1.0.0-robust04.bge-base-en-v1.5.flat-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-robust04.bge-base-en-v1.5.flat-int8.onnx.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): Robust04 | 0.5981 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-robust04.bge-base-en-v1.5.flat.cached.md b/docs/regressions/regressions-beir-v1.0.0-robust04.bge-base-en-v1.5.flat.cached.md index 385a486d47..6d8f2c8149 100644 --- a/docs/regressions/regressions-beir-v1.0.0-robust04.bge-base-en-v1.5.flat.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-robust04.bge-base-en-v1.5.flat.cached.md @@ -78,4 +78,4 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): Robust04 | 0.5981 | -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/docs/regressions/regressions-beir-v1.0.0-robust04.bge-base-en-v1.5.flat.onnx.md b/docs/regressions/regressions-beir-v1.0.0-robust04.bge-base-en-v1.5.flat.onnx.md index 37c6265359..1b9b0de01a 100644 --- a/docs/regressions/regressions-beir-v1.0.0-robust04.bge-base-en-v1.5.flat.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-robust04.bge-base-en-v1.5.flat.onnx.md @@ -78,5 +78,5 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): Robust04 | 0.5981 | -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/docs/regressions/regressions-beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw-int8.cached.md index 08b14f3ef3..6669df8518 100644 --- a/docs/regressions/regressions-beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw-int8.cached.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-robust04.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-robust04.test.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonStringVector \ - -output runs/run.beir-v1.0.0-robust04.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-robust04.test.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.beir-v1.0.0-robust04.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-robust04.test.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-robust04.test.txt runs/run.beir-v1.0.0-robust04.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-robust04.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-robust04.test.txt runs/run.beir-v1.0.0-robust04.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-robust04.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-robust04.test.txt runs/run.beir-v1.0.0-robust04.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-robust04.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-robust04.test.txt runs/run.beir-v1.0.0-robust04.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-robust04.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-robust04.test.txt runs/run.beir-v1.0.0-robust04.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-robust04.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-robust04.test.txt runs/run.beir-v1.0.0-robust04.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-robust04.test.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness @@ -76,9 +76,10 @@ With the above commands, you should be able to reproduce the following results: |:-------------------------------------------------------------------------------------------------------------|-----------| | BEIR (v1.0.0): Robust04 | 0.447 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): Robust04 | 0.347 | +| BEIR (v1.0.0): Robust04 | 0.351 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): Robust04 | 0.596 | +| BEIR (v1.0.0): Robust04 | 0.598 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw-int8.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw-int8.onnx.md index c977870767..595eb83ed2 100644 --- a/docs/regressions/regressions-beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw-int8.onnx.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-robust04.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-robust04.test.tsv.gz \ -topicReader TsvString \ - -output runs/run.beir-v1.0.0-robust04.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-robust04.test.txt \ + -output runs/run.beir-v1.0.0-robust04.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-robust04.test.txt \ -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-robust04.test.txt runs/run.beir-v1.0.0-robust04.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-robust04.test.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-robust04.test.txt runs/run.beir-v1.0.0-robust04.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-robust04.test.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-robust04.test.txt runs/run.beir-v1.0.0-robust04.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-robust04.test.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-robust04.test.txt runs/run.beir-v1.0.0-robust04.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-robust04.test.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-robust04.test.txt runs/run.beir-v1.0.0-robust04.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-robust04.test.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-robust04.test.txt runs/run.beir-v1.0.0-robust04.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-robust04.test.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): Robust04 | 0.445 | +| BEIR (v1.0.0): Robust04 | 0.447 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): Robust04 | 0.347 | +| BEIR (v1.0.0): Robust04 | 0.351 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): Robust04 | 0.592 | +| BEIR (v1.0.0): Robust04 | 0.598 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw-int8.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw.cached.md index 8c09c2f98e..4e05b956c8 100644 --- a/docs/regressions/regressions-beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw.cached.md @@ -76,9 +76,10 @@ With the above commands, you should be able to reproduce the following results: |:-------------------------------------------------------------------------------------------------------------|-----------| | BEIR (v1.0.0): Robust04 | 0.447 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): Robust04 | 0.350 | +| BEIR (v1.0.0): Robust04 | 0.351 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): Robust04 | 0.596 | +| BEIR (v1.0.0): Robust04 | 0.598 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw.onnx.md index 2f16e65b86..a30e4fd36c 100644 --- a/docs/regressions/regressions-beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw.onnx.md @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): Robust04 | 0.444 | +| BEIR (v1.0.0): Robust04 | 0.447 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): Robust04 | 0.350 | +| BEIR (v1.0.0): Robust04 | 0.351 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): Robust04 | 0.595 | +| BEIR (v1.0.0): Robust04 | 0.598 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-scidocs.bge-base-en-v1.5.flat-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-scidocs.bge-base-en-v1.5.flat-int8.cached.md index 3cf313dce6..314f593b33 100644 --- a/docs/regressions/regressions-beir-v1.0.0-scidocs.bge-base-en-v1.5.flat-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-scidocs.bge-base-en-v1.5.flat-int8.cached.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): SCIDOCS | 0.7824 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-scidocs.bge-base-en-v1.5.flat-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-scidocs.bge-base-en-v1.5.flat-int8.onnx.md index 32c1918fe3..056badc8eb 100644 --- a/docs/regressions/regressions-beir-v1.0.0-scidocs.bge-base-en-v1.5.flat-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-scidocs.bge-base-en-v1.5.flat-int8.onnx.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): SCIDOCS | 0.7824 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-scidocs.bge-base-en-v1.5.flat.cached.md b/docs/regressions/regressions-beir-v1.0.0-scidocs.bge-base-en-v1.5.flat.cached.md index 49da9f4208..1039872930 100644 --- a/docs/regressions/regressions-beir-v1.0.0-scidocs.bge-base-en-v1.5.flat.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-scidocs.bge-base-en-v1.5.flat.cached.md @@ -78,4 +78,4 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): SCIDOCS | 0.7824 | -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/docs/regressions/regressions-beir-v1.0.0-scidocs.bge-base-en-v1.5.flat.onnx.md b/docs/regressions/regressions-beir-v1.0.0-scidocs.bge-base-en-v1.5.flat.onnx.md index 56ec9aca76..1269c55430 100644 --- a/docs/regressions/regressions-beir-v1.0.0-scidocs.bge-base-en-v1.5.flat.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-scidocs.bge-base-en-v1.5.flat.onnx.md @@ -78,5 +78,5 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): SCIDOCS | 0.7824 | -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/docs/regressions/regressions-beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw-int8.cached.md index b219aa7dac..dfa815b6b9 100644 --- a/docs/regressions/regressions-beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw-int8.cached.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-scidocs.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-scidocs.test.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonStringVector \ - -output runs/run.beir-v1.0.0-scidocs.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-scidocs.test.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.beir-v1.0.0-scidocs.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-scidocs.test.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-scidocs.test.txt runs/run.beir-v1.0.0-scidocs.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-scidocs.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-scidocs.test.txt runs/run.beir-v1.0.0-scidocs.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-scidocs.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-scidocs.test.txt runs/run.beir-v1.0.0-scidocs.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-scidocs.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-scidocs.test.txt runs/run.beir-v1.0.0-scidocs.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-scidocs.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-scidocs.test.txt runs/run.beir-v1.0.0-scidocs.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-scidocs.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-scidocs.test.txt runs/run.beir-v1.0.0-scidocs.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-scidocs.test.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness @@ -76,9 +76,10 @@ With the above commands, you should be able to reproduce the following results: |:-------------------------------------------------------------------------------------------------------------|-----------| | BEIR (v1.0.0): SCIDOCS | 0.217 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): SCIDOCS | 0.494 | +| BEIR (v1.0.0): SCIDOCS | 0.496 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): SCIDOCS | 0.785 | +| BEIR (v1.0.0): SCIDOCS | 0.782 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw-int8.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw-int8.onnx.md index 84b9e1313e..9ef37100fa 100644 --- a/docs/regressions/regressions-beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw-int8.onnx.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-scidocs.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-scidocs.test.tsv.gz \ -topicReader TsvString \ - -output runs/run.beir-v1.0.0-scidocs.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-scidocs.test.txt \ + -output runs/run.beir-v1.0.0-scidocs.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-scidocs.test.txt \ -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-scidocs.test.txt runs/run.beir-v1.0.0-scidocs.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-scidocs.test.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-scidocs.test.txt runs/run.beir-v1.0.0-scidocs.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-scidocs.test.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-scidocs.test.txt runs/run.beir-v1.0.0-scidocs.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-scidocs.test.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-scidocs.test.txt runs/run.beir-v1.0.0-scidocs.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-scidocs.test.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-scidocs.test.txt runs/run.beir-v1.0.0-scidocs.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-scidocs.test.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-scidocs.test.txt runs/run.beir-v1.0.0-scidocs.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-scidocs.test.txt ``` ## Effectiveness @@ -76,9 +76,10 @@ With the above commands, you should be able to reproduce the following results: |:-------------------------------------------------------------------------------------------------------------|-----------| | BEIR (v1.0.0): SCIDOCS | 0.217 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): SCIDOCS | 0.493 | +| BEIR (v1.0.0): SCIDOCS | 0.496 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): SCIDOCS | 0.784 | +| BEIR (v1.0.0): SCIDOCS | 0.782 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw-int8.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw.cached.md index d224629a0a..91da10c20c 100644 --- a/docs/regressions/regressions-beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw.cached.md @@ -78,7 +78,8 @@ With the above commands, you should be able to reproduce the following results: | **R@100** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): SCIDOCS | 0.496 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): SCIDOCS | 0.783 | +| BEIR (v1.0.0): SCIDOCS | 0.782 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw.onnx.md index b4127385bf..5f772cd0e7 100644 --- a/docs/regressions/regressions-beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw.onnx.md @@ -78,7 +78,8 @@ With the above commands, you should be able to reproduce the following results: | **R@100** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): SCIDOCS | 0.496 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): SCIDOCS | 0.783 | +| BEIR (v1.0.0): SCIDOCS | 0.782 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-scifact.bge-base-en-v1.5.flat-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-scifact.bge-base-en-v1.5.flat-int8.cached.md index 6557b46218..782e6e5b80 100644 --- a/docs/regressions/regressions-beir-v1.0.0-scifact.bge-base-en-v1.5.flat-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-scifact.bge-base-en-v1.5.flat-int8.cached.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): SciFact | 0.9967 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-scifact.bge-base-en-v1.5.flat-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-scifact.bge-base-en-v1.5.flat-int8.onnx.md index 61ce04bafe..f116196882 100644 --- a/docs/regressions/regressions-beir-v1.0.0-scifact.bge-base-en-v1.5.flat-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-scifact.bge-base-en-v1.5.flat-int8.onnx.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): SciFact | 0.9967 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-scifact.bge-base-en-v1.5.flat.cached.md b/docs/regressions/regressions-beir-v1.0.0-scifact.bge-base-en-v1.5.flat.cached.md index fb2f4eaafc..d1fdebb4b6 100644 --- a/docs/regressions/regressions-beir-v1.0.0-scifact.bge-base-en-v1.5.flat.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-scifact.bge-base-en-v1.5.flat.cached.md @@ -78,4 +78,4 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): SciFact | 0.9967 | -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/docs/regressions/regressions-beir-v1.0.0-scifact.bge-base-en-v1.5.flat.onnx.md b/docs/regressions/regressions-beir-v1.0.0-scifact.bge-base-en-v1.5.flat.onnx.md index f4257ffe84..afefda1447 100644 --- a/docs/regressions/regressions-beir-v1.0.0-scifact.bge-base-en-v1.5.flat.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-scifact.bge-base-en-v1.5.flat.onnx.md @@ -78,5 +78,5 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): SciFact | 0.9967 | -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/docs/regressions/regressions-beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw-int8.cached.md index 61580d9df2..d56358df54 100644 --- a/docs/regressions/regressions-beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw-int8.cached.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-scifact.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-scifact.test.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonStringVector \ - -output runs/run.beir-v1.0.0-scifact.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-scifact.test.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.beir-v1.0.0-scifact.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-scifact.test.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -removeQuery -removeQuery -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-scifact.test.txt runs/run.beir-v1.0.0-scifact.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-scifact.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-scifact.test.txt runs/run.beir-v1.0.0-scifact.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-scifact.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-scifact.test.txt runs/run.beir-v1.0.0-scifact.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-scifact.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-scifact.test.txt runs/run.beir-v1.0.0-scifact.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-scifact.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-scifact.test.txt runs/run.beir-v1.0.0-scifact.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-scifact.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-scifact.test.txt runs/run.beir-v1.0.0-scifact.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-scifact.test.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness @@ -76,9 +76,10 @@ With the above commands, you should be able to reproduce the following results: |:-------------------------------------------------------------------------------------------------------------|-----------| | BEIR (v1.0.0): SciFact | 0.741 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): SciFact | 0.969 | +| BEIR (v1.0.0): SciFact | 0.967 | | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): SciFact | 0.997 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw-int8.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw-int8.onnx.md index d382d3dce7..e8f8b2211c 100644 --- a/docs/regressions/regressions-beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw-int8.onnx.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-scifact.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-scifact.test.tsv.gz \ -topicReader TsvString \ - -output runs/run.beir-v1.0.0-scifact.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-scifact.test.txt \ + -output runs/run.beir-v1.0.0-scifact.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-scifact.test.txt \ -generator VectorQueryGenerator -topicField title -removeQuery -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-scifact.test.txt runs/run.beir-v1.0.0-scifact.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-scifact.test.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-scifact.test.txt runs/run.beir-v1.0.0-scifact.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-scifact.test.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-scifact.test.txt runs/run.beir-v1.0.0-scifact.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-scifact.test.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-scifact.test.txt runs/run.beir-v1.0.0-scifact.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-scifact.test.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-scifact.test.txt runs/run.beir-v1.0.0-scifact.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-scifact.test.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-scifact.test.txt runs/run.beir-v1.0.0-scifact.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-scifact.test.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): SciFact | 0.740 | +| BEIR (v1.0.0): SciFact | 0.741 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): SciFact | 0.969 | +| BEIR (v1.0.0): SciFact | 0.967 | | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): SciFact | 0.997 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw-int8.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw.cached.md index 33c26b6323..de8e5494ab 100644 --- a/docs/regressions/regressions-beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw.cached.md @@ -80,5 +80,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): SciFact | 0.997 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw.onnx.md index 66910d4aaf..167a35df17 100644 --- a/docs/regressions/regressions-beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw.onnx.md @@ -80,5 +80,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): SciFact | 0.997 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-signal1m.bge-base-en-v1.5.flat-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-signal1m.bge-base-en-v1.5.flat-int8.cached.md index 311cda7661..c9ce948832 100644 --- a/docs/regressions/regressions-beir-v1.0.0-signal1m.bge-base-en-v1.5.flat-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-signal1m.bge-base-en-v1.5.flat-int8.cached.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): Signal-1M | 0.5331 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-signal1m.bge-base-en-v1.5.flat-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-signal1m.bge-base-en-v1.5.flat-int8.onnx.md index 073d62748e..173f369242 100644 --- a/docs/regressions/regressions-beir-v1.0.0-signal1m.bge-base-en-v1.5.flat-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-signal1m.bge-base-en-v1.5.flat-int8.onnx.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): Signal-1M | 0.5331 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-signal1m.bge-base-en-v1.5.flat.cached.md b/docs/regressions/regressions-beir-v1.0.0-signal1m.bge-base-en-v1.5.flat.cached.md index 73fe9101a7..3fa194fa80 100644 --- a/docs/regressions/regressions-beir-v1.0.0-signal1m.bge-base-en-v1.5.flat.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-signal1m.bge-base-en-v1.5.flat.cached.md @@ -78,4 +78,4 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): Signal-1M | 0.5331 | -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/docs/regressions/regressions-beir-v1.0.0-signal1m.bge-base-en-v1.5.flat.onnx.md b/docs/regressions/regressions-beir-v1.0.0-signal1m.bge-base-en-v1.5.flat.onnx.md index be976b8d2a..7fd8afda9d 100644 --- a/docs/regressions/regressions-beir-v1.0.0-signal1m.bge-base-en-v1.5.flat.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-signal1m.bge-base-en-v1.5.flat.onnx.md @@ -78,5 +78,5 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): Signal-1M | 0.5331 | -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/docs/regressions/regressions-beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw-int8.cached.md index d66e108b40..1a69349319 100644 --- a/docs/regressions/regressions-beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw-int8.cached.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-signal1m.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-signal1m.test.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonStringVector \ - -output runs/run.beir-v1.0.0-signal1m.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-signal1m.test.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.beir-v1.0.0-signal1m.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-signal1m.test.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-signal1m.test.txt runs/run.beir-v1.0.0-signal1m.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-signal1m.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-signal1m.test.txt runs/run.beir-v1.0.0-signal1m.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-signal1m.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-signal1m.test.txt runs/run.beir-v1.0.0-signal1m.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-signal1m.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-signal1m.test.txt runs/run.beir-v1.0.0-signal1m.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-signal1m.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-signal1m.test.txt runs/run.beir-v1.0.0-signal1m.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-signal1m.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-signal1m.test.txt runs/run.beir-v1.0.0-signal1m.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-signal1m.test.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): Signal-1M | 0.278 | +| BEIR (v1.0.0): Signal-1M | 0.289 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): Signal-1M | 0.291 | +| BEIR (v1.0.0): Signal-1M | 0.311 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): Signal-1M | 0.490 | +| BEIR (v1.0.0): Signal-1M | 0.533 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw-int8.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw-int8.onnx.md index 54cce98694..c34465dd2f 100644 --- a/docs/regressions/regressions-beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw-int8.onnx.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-signal1m.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-signal1m.test.tsv.gz \ -topicReader TsvString \ - -output runs/run.beir-v1.0.0-signal1m.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-signal1m.test.txt \ + -output runs/run.beir-v1.0.0-signal1m.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-signal1m.test.txt \ -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-signal1m.test.txt runs/run.beir-v1.0.0-signal1m.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-signal1m.test.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-signal1m.test.txt runs/run.beir-v1.0.0-signal1m.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-signal1m.test.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-signal1m.test.txt runs/run.beir-v1.0.0-signal1m.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-signal1m.test.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-signal1m.test.txt runs/run.beir-v1.0.0-signal1m.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-signal1m.test.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-signal1m.test.txt runs/run.beir-v1.0.0-signal1m.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-signal1m.test.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-signal1m.test.txt runs/run.beir-v1.0.0-signal1m.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-signal1m.test.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): Signal-1M | 0.277 | +| BEIR (v1.0.0): Signal-1M | 0.289 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): Signal-1M | 0.292 | +| BEIR (v1.0.0): Signal-1M | 0.311 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): Signal-1M | 0.498 | +| BEIR (v1.0.0): Signal-1M | 0.533 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw-int8.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw.cached.md index 1d57eb7b49..17dce2f5a7 100644 --- a/docs/regressions/regressions-beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw.cached.md @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): Signal-1M | 0.282 | +| BEIR (v1.0.0): Signal-1M | 0.289 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): Signal-1M | 0.298 | +| BEIR (v1.0.0): Signal-1M | 0.311 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): Signal-1M | 0.500 | +| BEIR (v1.0.0): Signal-1M | 0.533 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw.onnx.md index 2c0967e627..1f39aedb11 100644 --- a/docs/regressions/regressions-beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw.onnx.md @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): Signal-1M | 0.280 | +| BEIR (v1.0.0): Signal-1M | 0.289 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): Signal-1M | 0.294 | +| BEIR (v1.0.0): Signal-1M | 0.311 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): Signal-1M | 0.495 | +| BEIR (v1.0.0): Signal-1M | 0.533 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-trec-covid.bge-base-en-v1.5.flat-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-trec-covid.bge-base-en-v1.5.flat-int8.cached.md index 7604279108..2f83181216 100644 --- a/docs/regressions/regressions-beir-v1.0.0-trec-covid.bge-base-en-v1.5.flat-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-trec-covid.bge-base-en-v1.5.flat-int8.cached.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): TREC-COVID | 0.4768 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-trec-covid.bge-base-en-v1.5.flat-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-trec-covid.bge-base-en-v1.5.flat-int8.onnx.md index 2e85dcdde7..97f6d3f042 100644 --- a/docs/regressions/regressions-beir-v1.0.0-trec-covid.bge-base-en-v1.5.flat-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-trec-covid.bge-base-en-v1.5.flat-int8.onnx.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): TREC-COVID | 0.4768 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-trec-covid.bge-base-en-v1.5.flat.cached.md b/docs/regressions/regressions-beir-v1.0.0-trec-covid.bge-base-en-v1.5.flat.cached.md index a8d8fde488..20853d0def 100644 --- a/docs/regressions/regressions-beir-v1.0.0-trec-covid.bge-base-en-v1.5.flat.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-trec-covid.bge-base-en-v1.5.flat.cached.md @@ -78,4 +78,4 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): TREC-COVID | 0.4768 | -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/docs/regressions/regressions-beir-v1.0.0-trec-covid.bge-base-en-v1.5.flat.onnx.md b/docs/regressions/regressions-beir-v1.0.0-trec-covid.bge-base-en-v1.5.flat.onnx.md index 29a28b5aee..36e8ddfa39 100644 --- a/docs/regressions/regressions-beir-v1.0.0-trec-covid.bge-base-en-v1.5.flat.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-trec-covid.bge-base-en-v1.5.flat.onnx.md @@ -78,5 +78,5 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): TREC-COVID | 0.4768 | -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/docs/regressions/regressions-beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw-int8.cached.md index 8cbfbb63c8..449840789f 100644 --- a/docs/regressions/regressions-beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw-int8.cached.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-trec-covid.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-trec-covid.test.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonStringVector \ - -output runs/run.beir-v1.0.0-trec-covid.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-trec-covid.test.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.beir-v1.0.0-trec-covid.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-trec-covid.test.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-trec-covid.test.txt runs/run.beir-v1.0.0-trec-covid.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-trec-covid.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-trec-covid.test.txt runs/run.beir-v1.0.0-trec-covid.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-trec-covid.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-trec-covid.test.txt runs/run.beir-v1.0.0-trec-covid.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-trec-covid.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-trec-covid.test.txt runs/run.beir-v1.0.0-trec-covid.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-trec-covid.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-trec-covid.test.txt runs/run.beir-v1.0.0-trec-covid.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-trec-covid.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-trec-covid.test.txt runs/run.beir-v1.0.0-trec-covid.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-trec-covid.test.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): TREC-COVID | 0.784 | +| BEIR (v1.0.0): TREC-COVID | 0.781 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): TREC-COVID | 0.140 | +| BEIR (v1.0.0): TREC-COVID | 0.141 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): TREC-COVID | 0.475 | +| BEIR (v1.0.0): TREC-COVID | 0.477 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw-int8.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw-int8.onnx.md index 0d9f70009d..279a7bcb5d 100644 --- a/docs/regressions/regressions-beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw-int8.onnx.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-trec-covid.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-trec-covid.test.tsv.gz \ -topicReader TsvString \ - -output runs/run.beir-v1.0.0-trec-covid.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-trec-covid.test.txt \ + -output runs/run.beir-v1.0.0-trec-covid.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-trec-covid.test.txt \ -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-trec-covid.test.txt runs/run.beir-v1.0.0-trec-covid.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-trec-covid.test.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-trec-covid.test.txt runs/run.beir-v1.0.0-trec-covid.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-trec-covid.test.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-trec-covid.test.txt runs/run.beir-v1.0.0-trec-covid.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-trec-covid.test.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-trec-covid.test.txt runs/run.beir-v1.0.0-trec-covid.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-trec-covid.test.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-trec-covid.test.txt runs/run.beir-v1.0.0-trec-covid.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-trec-covid.test.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-trec-covid.test.txt runs/run.beir-v1.0.0-trec-covid.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-trec-covid.test.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): TREC-COVID | 0.784 | +| BEIR (v1.0.0): TREC-COVID | 0.781 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): TREC-COVID | 0.140 | +| BEIR (v1.0.0): TREC-COVID | 0.141 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): TREC-COVID | 0.475 | +| BEIR (v1.0.0): TREC-COVID | 0.477 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw-int8.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw.cached.md index 45b5656446..a738b61ac5 100644 --- a/docs/regressions/regressions-beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw.cached.md @@ -80,5 +80,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): TREC-COVID | 0.477 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw.onnx.md index cd3bbc270b..ece8849d8a 100644 --- a/docs/regressions/regressions-beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw.onnx.md @@ -78,7 +78,8 @@ With the above commands, you should be able to reproduce the following results: | **R@100** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): TREC-COVID | 0.141 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): TREC-COVID | 0.476 | +| BEIR (v1.0.0): TREC-COVID | 0.477 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-trec-news.bge-base-en-v1.5.flat-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-trec-news.bge-base-en-v1.5.flat-int8.cached.md index 06a3ec6ed3..cf1d4066ea 100644 --- a/docs/regressions/regressions-beir-v1.0.0-trec-news.bge-base-en-v1.5.flat-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-trec-news.bge-base-en-v1.5.flat-int8.cached.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): TREC-NEWS | 0.7875 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-trec-news.bge-base-en-v1.5.flat-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-trec-news.bge-base-en-v1.5.flat-int8.onnx.md index 54de129a73..78ba8d4bcf 100644 --- a/docs/regressions/regressions-beir-v1.0.0-trec-news.bge-base-en-v1.5.flat-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-trec-news.bge-base-en-v1.5.flat-int8.onnx.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): TREC-NEWS | 0.7875 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-trec-news.bge-base-en-v1.5.flat.cached.md b/docs/regressions/regressions-beir-v1.0.0-trec-news.bge-base-en-v1.5.flat.cached.md index 71d6a49c1c..575337a962 100644 --- a/docs/regressions/regressions-beir-v1.0.0-trec-news.bge-base-en-v1.5.flat.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-trec-news.bge-base-en-v1.5.flat.cached.md @@ -78,4 +78,4 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): TREC-NEWS | 0.7875 | -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/docs/regressions/regressions-beir-v1.0.0-trec-news.bge-base-en-v1.5.flat.onnx.md b/docs/regressions/regressions-beir-v1.0.0-trec-news.bge-base-en-v1.5.flat.onnx.md index 6426eb70b8..fb19a7da7b 100644 --- a/docs/regressions/regressions-beir-v1.0.0-trec-news.bge-base-en-v1.5.flat.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-trec-news.bge-base-en-v1.5.flat.onnx.md @@ -78,5 +78,5 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): TREC-NEWS | 0.7875 | -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/docs/regressions/regressions-beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw-int8.cached.md index e90b4ea56a..9907534ef0 100644 --- a/docs/regressions/regressions-beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw-int8.cached.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-trec-news.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-trec-news.test.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonStringVector \ - -output runs/run.beir-v1.0.0-trec-news.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-trec-news.test.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.beir-v1.0.0-trec-news.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-trec-news.test.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-trec-news.test.txt runs/run.beir-v1.0.0-trec-news.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-trec-news.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-trec-news.test.txt runs/run.beir-v1.0.0-trec-news.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-trec-news.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-trec-news.test.txt runs/run.beir-v1.0.0-trec-news.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-trec-news.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-trec-news.test.txt runs/run.beir-v1.0.0-trec-news.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-trec-news.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-trec-news.test.txt runs/run.beir-v1.0.0-trec-news.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-trec-news.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-trec-news.test.txt runs/run.beir-v1.0.0-trec-news.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-trec-news.test.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): TREC-NEWS | 0.432 | +| BEIR (v1.0.0): TREC-NEWS | 0.442 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): TREC-NEWS | 0.490 | +| BEIR (v1.0.0): TREC-NEWS | 0.499 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): TREC-NEWS | 0.777 | +| BEIR (v1.0.0): TREC-NEWS | 0.788 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw-int8.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw-int8.onnx.md index 62d41cd0e6..b93441b28b 100644 --- a/docs/regressions/regressions-beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw-int8.onnx.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-trec-news.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-trec-news.test.tsv.gz \ -topicReader TsvString \ - -output runs/run.beir-v1.0.0-trec-news.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-trec-news.test.txt \ + -output runs/run.beir-v1.0.0-trec-news.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-trec-news.test.txt \ -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-trec-news.test.txt runs/run.beir-v1.0.0-trec-news.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-trec-news.test.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-trec-news.test.txt runs/run.beir-v1.0.0-trec-news.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-trec-news.test.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-trec-news.test.txt runs/run.beir-v1.0.0-trec-news.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-trec-news.test.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-trec-news.test.txt runs/run.beir-v1.0.0-trec-news.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-trec-news.test.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-trec-news.test.txt runs/run.beir-v1.0.0-trec-news.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-trec-news.test.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-trec-news.test.txt runs/run.beir-v1.0.0-trec-news.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-trec-news.test.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): TREC-NEWS | 0.434 | +| BEIR (v1.0.0): TREC-NEWS | 0.442 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): TREC-NEWS | 0.488 | +| BEIR (v1.0.0): TREC-NEWS | 0.499 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): TREC-NEWS | 0.774 | +| BEIR (v1.0.0): TREC-NEWS | 0.788 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw-int8.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw.cached.md index ec253d22c7..3dbd88aefb 100644 --- a/docs/regressions/regressions-beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw.cached.md @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): TREC-NEWS | 0.441 | +| BEIR (v1.0.0): TREC-NEWS | 0.442 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): TREC-NEWS | 0.488 | +| BEIR (v1.0.0): TREC-NEWS | 0.499 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): TREC-NEWS | 0.770 | +| BEIR (v1.0.0): TREC-NEWS | 0.788 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw.onnx.md index d343066190..22aecc3fa7 100644 --- a/docs/regressions/regressions-beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw.onnx.md @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): TREC-NEWS | 0.439 | +| BEIR (v1.0.0): TREC-NEWS | 0.442 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): TREC-NEWS | 0.492 | +| BEIR (v1.0.0): TREC-NEWS | 0.499 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): TREC-NEWS | 0.780 | +| BEIR (v1.0.0): TREC-NEWS | 0.788 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.flat-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.flat-int8.cached.md index 0d9aacd57d..292fd48bba 100644 --- a/docs/regressions/regressions-beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.flat-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.flat-int8.cached.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): Webis-Touche2020 | 0.8298 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.flat-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.flat-int8.onnx.md index 92b06bf230..ae6eba7020 100644 --- a/docs/regressions/regressions-beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.flat-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.flat-int8.onnx.md @@ -78,5 +78,6 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): Webis-Touche2020 | 0.8298 | -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.flat.cached.md b/docs/regressions/regressions-beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.flat.cached.md index e2e1a6d183..25e9cb9c0c 100644 --- a/docs/regressions/regressions-beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.flat.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.flat.cached.md @@ -78,4 +78,4 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): Webis-Touche2020 | 0.8298 | -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/docs/regressions/regressions-beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.flat.onnx.md b/docs/regressions/regressions-beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.flat.onnx.md index b2515d8c58..589ffa1240 100644 --- a/docs/regressions/regressions-beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.flat.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.flat.onnx.md @@ -78,5 +78,5 @@ With the above commands, you should be able to reproduce the following results: | **R@1000** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): Webis-Touche2020 | 0.8298 | -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/docs/regressions/regressions-beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw-int8.cached.md index 9974f8f7de..d17ed039ff 100644 --- a/docs/regressions/regressions-beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw-int8.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw-int8.cached.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-webis-touche2020.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-webis-touche2020.test.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonStringVector \ - -output runs/run.beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-webis-touche2020.test.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-webis-touche2020.test.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-webis-touche2020.test.txt runs/run.beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-webis-touche2020.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-webis-touche2020.test.txt runs/run.beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-webis-touche2020.test.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-webis-touche2020.test.txt runs/run.beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.bge-hnsw-cached.topics.beir-v1.0.0-webis-touche2020.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-webis-touche2020.test.txt runs/run.beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-webis-touche2020.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-webis-touche2020.test.txt runs/run.beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-webis-touche2020.test.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-webis-touche2020.test.txt runs/run.beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.bge-hnsw-int8-cached.topics.beir-v1.0.0-webis-touche2020.test.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): Webis-Touche2020 | 0.252 | +| BEIR (v1.0.0): Webis-Touche2020 | 0.257 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): Webis-Touche2020 | 0.488 | +| BEIR (v1.0.0): Webis-Touche2020 | 0.486 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): Webis-Touche2020 | 0.831 | +| BEIR (v1.0.0): Webis-Touche2020 | 0.830 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw-int8.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw-int8.onnx.md index cb1eb59445..251c1419ff 100644 --- a/docs/regressions/regressions-beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw-int8.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw-int8.onnx.md @@ -56,16 +56,16 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.beir-v1.0.0-webis-touche2020.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.beir-v1.0.0-webis-touche2020.test.tsv.gz \ -topicReader TsvString \ - -output runs/run.beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-webis-touche2020.test.txt \ + -output runs/run.beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-webis-touche2020.test.txt \ -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-webis-touche2020.test.txt runs/run.beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-webis-touche2020.test.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-webis-touche2020.test.txt runs/run.beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-webis-touche2020.test.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-webis-touche2020.test.txt runs/run.beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.bge-hnsw-onnx.topics.beir-v1.0.0-webis-touche2020.test.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.beir-v1.0.0-webis-touche2020.test.txt runs/run.beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-webis-touche2020.test.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.beir-v1.0.0-webis-touche2020.test.txt runs/run.beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-webis-touche2020.test.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.beir-v1.0.0-webis-touche2020.test.txt runs/run.beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.beir-v1.0.0-webis-touche2020.test.txt ``` ## Effectiveness @@ -74,11 +74,12 @@ With the above commands, you should be able to reproduce the following results: | **nDCG@10** | **BGE-base-en-v1.5**| |:-------------------------------------------------------------------------------------------------------------|-----------| -| BEIR (v1.0.0): Webis-Touche2020 | 0.251 | +| BEIR (v1.0.0): Webis-Touche2020 | 0.257 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): Webis-Touche2020 | 0.487 | +| BEIR (v1.0.0): Webis-Touche2020 | 0.486 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): Webis-Touche2020 | 0.833 | +| BEIR (v1.0.0): Webis-Touche2020 | 0.830 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw-int8.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw.cached.md index b6a9f8c962..876df7b433 100644 --- a/docs/regressions/regressions-beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw.cached.md +++ b/docs/regressions/regressions-beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw.cached.md @@ -78,7 +78,8 @@ With the above commands, you should be able to reproduce the following results: | **R@100** | **BGE-base-en-v1.5**| | BEIR (v1.0.0): Webis-Touche2020 | 0.486 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): Webis-Touche2020 | 0.831 | +| BEIR (v1.0.0): Webis-Touche2020 | 0.830 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw.cached.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/regressions/regressions-beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw.onnx.md index 2c8f9d11e0..8bbea38b66 100644 --- a/docs/regressions/regressions-beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw.onnx.md +++ b/docs/regressions/regressions-beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw.onnx.md @@ -76,9 +76,10 @@ With the above commands, you should be able to reproduce the following results: |:-------------------------------------------------------------------------------------------------------------|-----------| | BEIR (v1.0.0): Webis-Touche2020 | 0.257 | | **R@100** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): Webis-Touche2020 | 0.487 | +| BEIR (v1.0.0): Webis-Touche2020 | 0.486 | | **R@1000** | **BGE-base-en-v1.5**| -| BEIR (v1.0.0): Webis-Touche2020 | 0.831 | +| BEIR (v1.0.0): Webis-Touche2020 | 0.830 | -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw.onnx.yaml). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/docs/rest-api.md b/docs/rest-api.md index a0c23e18ea..dd5aacc004 100644 --- a/docs/rest-api.md +++ b/docs/rest-api.md @@ -12,13 +12,55 @@ java -cp $ANSERINI_JAR io.anserini.server.Application --server.port=8081 And then navigate to [`http://localhost:8081/`](http://localhost:8081/) in your browser. +## List Indexes + +To list all the index information, the endpoint is `api/v1.0/indexes/` + +Run + +```bash +curl -X GET "http://localhost:8081/api/v1.0/indexes" +``` + +Output is a mapping from index name to `IndexInfo` enum + +```json +{ + "cacm": { + "urls": [ + "https://github.com/castorini/anserini-data/raw/master/CACM/lucene-index.cacm.20221005.252b5e.tar.gz" + ], + "cached": false, + "md5": "cfe14d543c6a27f4d742fb2d0099b8e0", + "indexName": "cacm", + "description": "Lucene index of the CACM corpus.", + "model": "BM25", + "corpus": "CACM", + "filename": "lucene-index.cacm.20221005.252b5e.tar.gz" + }, + "msmarco-v1-passage": { + "urls": [ + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.msmarco-v1-passage.20221004.252b5e.tar.gz" + ], + "cached": true, + "md5": "678876e8c99a89933d553609a0fd8793", + "indexName": "msmarco-v1-passage", + "description": "Lucene index of the MS MARCO V1 passage corpus.", + "model": "BM25", + "corpus": "MS MARCO V1 Passage", + "filename": "lucene-inverted.msmarco-v1-passage.20221004.252b5e.tar.gz" + }, + ... +} +``` + ## Search Queries The search query endpoint is `api/v1.0/indexes/{index_name}/search?query={query}&hits={hits}&qid={qid}` Path variables: -- `index`: The index name to query. Default is "msmarco-v1-passage" +- `index_name`: The index name to query. Default is "msmarco-v1-passage" Query parameters: @@ -66,45 +108,26 @@ The json results are the same as the output of the `-outputRerankerRequests` opt } ``` -## List Indexes +## Get Document Content by DocId -To list all the index information, the endpoint is `api/v1.0/indexes/` +To access the content of a document in an index, the endpoint is `api/v1.0/indexes/{index_name}/document/{docid}` -Run +Here's an example of getting the document of the top candidate from the above example: ```bash -curl -X GET "http://localhost:8081/api/v1.0/indexes" +curl -X GET "http://localhost:8080/api/v1.0/indexes/msmarco-v2.1-doc/documents/msmarco_v2.1_doc_15_390497775" ``` -Output is a mapping from index name to `IndexInfo` enum +Output is an object of the same format as a candidate from search ```json { - "cacm": { - "urls": [ - "https://github.com/castorini/anserini-data/raw/master/CACM/lucene-index.cacm.20221005.252b5e.tar.gz" - ], - "cached": false, - "md5": "cfe14d543c6a27f4d742fb2d0099b8e0", - "indexName": "cacm", - "description": "Lucene index of the CACM corpus.", - "model": "BM25", - "corpus": "CACM", - "filename": "lucene-index.cacm.20221005.252b5e.tar.gz" - }, - "msmarco-v1-passage": { - "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.msmarco-v1-passage.20221004.252b5e.tar.gz" - ], - "cached": true, - "md5": "678876e8c99a89933d553609a0fd8793", - "indexName": "msmarco-v1-passage", - "description": "Lucene index of the MS MARCO V1 passage corpus.", - "model": "BM25", - "corpus": "MS MARCO V1 Passage", - "filename": "lucene-inverted.msmarco-v1-passage.20221004.252b5e.tar.gz" - }, - ... + "doc": { + "url": "https://diabetestalk.net/blood-sugar/conversion-of-carbohydrates-to-glucose", + "title": "Conversion Of Carbohydrates To Glucose | DiabetesTalk.Net", + "headings": "...", + "body": "..." + } } ``` @@ -124,4 +147,4 @@ Output is an object containing the 'cached' property { "cached": true } -``` \ No newline at end of file +``` diff --git a/docs/start-here.md b/docs/start-here.md index 108a776b69..8acd1a06e6 100644 --- a/docs/start-here.md +++ b/docs/start-here.md @@ -392,3 +392,4 @@ If you think this guide can be improved in any way (e.g., you caught a typo or t + Results reproduced by [@alireza-taban](https://github.com/alireza-taban) on 2024-06-09 (commit [`59330e3`](https://github.com/castorini/anserini/commit/59330e355b4aaf6754622cb3a136259dea0d8d05)) + Results reproduced by [@Feng-12138](https://github.com/Feng-12138) on 2024-06-15 (commit [`ad97377`](https://github.com/castorini/anserini/commit/ad97377e463e70ee8b2f501ac7c41134af53e976)) + Results reproduced by [@hosnahoseini](https://github.com/hosnahoseini) on 2024-06-15 (commit [`ad97377`](https://github.com/castorini/anserini/commit/ad97377e463e70ee8b2f501ac7c41134af53e976)) ++ Results reproduced by [@FaizanFaisal25](https://github.com/FaizanFaisal25) on 2024-06-25 (commit [`e92370a`](https://github.com/FaizanFaisal25/anserini/commit/e92370a06eaa3bbc5bacdba65cc9c3f125590071)) \ No newline at end of file diff --git a/src/main/frontend/package.json b/src/main/frontend/package.json index 638a730122..36bd9cbf4b 100644 --- a/src/main/frontend/package.json +++ b/src/main/frontend/package.json @@ -10,13 +10,15 @@ "export": "next build && next export" }, "dependencies": { - "@emotion/react": "^11.10.4", - "@emotion/styled": "^11.10.4", + "@chakra-ui/react": "^2.8.2", + "@emotion/react": "^11.11.4", + "@emotion/styled": "^11.11.5", "@mui/material": "^5.10.10", + "framer-motion": "^11.2.11", "next": "^14.1.1", + "postcss": "^8.4.31", "react": "18.2.0", "react-dom": "18.2.0", - "postcss": "^8.4.31", "zod": "^3.22.3" }, "devDependencies": { diff --git a/src/main/frontend/pages/_app.tsx b/src/main/frontend/pages/_app.tsx index 4c4e42b26c..51c7fbb01b 100644 --- a/src/main/frontend/pages/_app.tsx +++ b/src/main/frontend/pages/_app.tsx @@ -13,18 +13,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +import '../styles/globals.css'; +import type { AppProps } from 'next/app'; +import { ChakraProvider } from '@chakra-ui/react'; -import '../styles/globals.css' -import SearchBar from './components/SearchBar' - -export default function Home() { +function MyApp({ Component, pageProps } : AppProps) { return ( -
-
-

Anserini Search Interface

-

A Lucene toolkit for reproducible information retrieval research

-
- -
+ + + ); } + +export default MyApp; \ No newline at end of file diff --git a/src/main/frontend/pages/components/Dropdown.tsx b/src/main/frontend/pages/components/Dropdown.tsx index 23dbb17ba1..ec7ae67b20 100644 --- a/src/main/frontend/pages/components/Dropdown.tsx +++ b/src/main/frontend/pages/components/Dropdown.tsx @@ -1,20 +1,5 @@ -/* - * Anserini: A Lucene toolkit for reproducible information retrieval research - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import React, { useState, useRef, useEffect } from 'react'; +import React, { useState, useEffect } from 'react'; +import { Select, HStack, Box } from '@chakra-ui/react'; interface Props { onSelect: (selectedValue: string) => void; @@ -31,97 +16,99 @@ interface IndexInfo { cached: boolean; } +interface IndexInfo { + indexName: string; + description: string; + filename: string; + corpus: string; + model: string; + urls: string[]; + md5: string; + cached: boolean; +} + const Dropdown: React.FC = ({ onSelect }) => { const [selectedCollection, setSelectedCollection] = useState(null); const [selectedCorpus, setSelectedCorpus] = useState(null); const [selectedIndex, setSelectedIndex] = useState(null); - const [indexInfoList, setIndexInfoList] = useState<{ [key: string]: IndexInfo }>({}); const [collections, setCollections] = useState<{ [key: string]: string[] | { [key: string]: string[] } }>({}); - // Fetch indexes from api - useEffect( - () => { - const fetchIndexes = async () => { - const response = await fetch('/api/v1.0/indexes'); - const indexList = await response.json(); - setIndexInfoList(indexList); + useEffect(() => { + const fetchIndexes = async () => { + const response = await fetch('/api/v1.0/indexes'); + const indexList = await response.json(); + setIndexInfoList(indexList); - const dropdownList : { [key: string]: string[] | { [key: string]: string[] } } = {}; - for (const value of Object.values(indexList)) { - const index = value as IndexInfo; + const dropdownList: { [key: string]: string[] | { [key: string]: string[] } } = {}; + for (const value of Object.values(indexList)) { + const index = value as IndexInfo; - if (index['corpus'].includes('MS MARCO V1')) { - if (!dropdownList['MS MARCO V1']) dropdownList['MS MARCO V1'] = []; - (dropdownList['MS MARCO V1'] as string[]).push(index.indexName); - } else if (index['corpus'].includes('MS MARCO V2')) { - if (!dropdownList['MS MARCO V2']) dropdownList['MS MARCO V2'] = []; - (dropdownList['MS MARCO V2'] as string[]).push(index.indexName); - } else if (index['corpus'].includes('BEIR')) { - if (!dropdownList['BEIR']) dropdownList['BEIR'] = {}; - const beir = dropdownList['BEIR'] as { [key: string]: string[] }; - const corpus = (index['corpus'] as string); - if (beir[corpus]) { - (beir[corpus] as string[]).push(index.indexName); - } else { - beir[corpus] = [index.indexName]; - } + if (index.corpus.includes('MS MARCO')) { + if (!dropdownList['MS MARCO']) dropdownList['MS MARCO'] = {}; + const msmarco = dropdownList['MS MARCO'] as { [key: string]: string[] }; + const corpus = index.corpus as string; + if (msmarco[corpus]) { + (msmarco[corpus] as string[]).push(index.indexName); + } else { + msmarco[corpus] = [index.indexName]; + } + } else if (index.corpus.includes('BEIR')) { + if (!dropdownList['BEIR']) dropdownList['BEIR'] = {}; + const beir = dropdownList['BEIR'] as { [key: string]: string[] }; + const corpus = index.corpus as string; + if (beir[corpus]) { + (beir[corpus] as string[]).push(index.indexName); + } else { + beir[corpus] = [index.indexName]; } } - setCollections(dropdownList); } + setCollections(dropdownList); + }; - fetchIndexes(); - } - , []); + fetchIndexes(); + }, []); return ( -
- - - {selectedCollection && selectedCollection.includes("MS MARCO") && <> - { + setSelectedCollection(e.target.value); + setSelectedCorpus(null); + setSelectedIndex(null); + }}> + {Object.keys(collections).map((collection) => ( + ))} - - } - - {selectedCollection=='BEIR' && <> - - } -
+ + {selectedCollection !== null && ( + <> + + {selectedCorpus && ( + + )} + + )} + + ); }; diff --git a/src/main/frontend/pages/components/SearchBar.tsx b/src/main/frontend/pages/components/SearchBar.tsx index 2f8b5e00eb..f754bd6d0f 100644 --- a/src/main/frontend/pages/components/SearchBar.tsx +++ b/src/main/frontend/pages/components/SearchBar.tsx @@ -14,31 +14,40 @@ * limitations under the License. */ -import React, { useEffect, useState } from 'react'; +import React, { useState } from 'react'; import Dropdown from './Dropdown'; +import { Input, Button, Box, Spinner, Text, VStack, HStack, Container, Heading, Divider, Flex, FormControl, Center, Select } from '@chakra-ui/react'; const SearchBar: React.FC = () => { const [loading, setLoading] = useState(false); const [results, setResults] = useState>([]); const [query, setQuery] = useState(''); + const [queryType, setQueryType] = useState('search query'); const [index, setIndex] = useState(''); const fetchResults = async (query: string, index: string) => { setLoading(true); try { let endpoint = '/api/v1.0'; - if (index != '') endpoint += `/indexes/${index}`; - endpoint += `/search?query=${query}`; - - const response = await fetch(endpoint); - const data = await response.json(); - console.log(data); - setResults(data.candidates); + if (index !== '') endpoint += `/indexes/${index}`; + if (queryType === 'search query') { + endpoint += `/search?query=${query}`; + + const response = await fetch(endpoint); + const data = await response.json(); + console.log(data); + setResults(data.candidates); + } else { + endpoint += `/documents/${query}`; + const response = await fetch(endpoint); + const data = await response.json(); + setResults([data]); + } } catch (error) { console.error("Failed to fetch data: ", error); setResults([]); } finally { - setLoading(false); + setLoading(false); } }; @@ -48,37 +57,60 @@ const SearchBar: React.FC = () => { }; return ( - <> -
-
- setIndex(selectedValue)} /> -
- setQuery(e.target.value)} - /> - -
- -
- {loading &&

Loading...

} -
    - {results.map((result, index) => ( -
    -

    Document ID: {result.docid} Score: {result.score}

    - {Object.entries(result.doc).map(([key, value]) => ( -

    - {key}: {JSON.stringify(value) as React.ReactNode} -

    - ))} -
    - ))} -
- + + Anserini Search + + +
+ + setIndex(selectedValue)} /> + + + setQuery(e.target.value)} + bg="gray.100" + border="none" + width="100%" + _focus={{ bg: 'white', boxShadow: 'outline' }} + /> + + + + {loading && } + + {results.map((result, index) => ( + + + {result.docid && + Document ID: {result.docid} + } + {result.score && Score: {result.score}} + + {result.doc && Object.entries(result.doc).map(([key, value]) => ( + + {key}: {JSON.stringify(value)} + + ))} + + ))} + + + +
+
+
); }; -export default SearchBar; \ No newline at end of file +export default SearchBar; diff --git a/src/main/frontend/pages/index.tsx b/src/main/frontend/pages/index.tsx index a47cbf17ad..cad56ea706 100644 --- a/src/main/frontend/pages/index.tsx +++ b/src/main/frontend/pages/index.tsx @@ -14,29 +14,11 @@ * limitations under the License. */ -import type { NextPage } from 'next' -import Head from 'next/head' -import SearchBar from './components/SearchBar' +import { Container } from '@chakra-ui/react'; +import SearchBar from './components/SearchBar'; -const Home: NextPage = () => { +export default function Home() { return ( -
- - TEST APP - - - - -
-

- Welcome to Anserini! -

- - - -
-
- ) -} - -export default Home + + ); +} \ No newline at end of file diff --git a/src/main/frontend/styles/globals.css b/src/main/frontend/styles/globals.css index e716706928..ef53a3bb05 100644 --- a/src/main/frontend/styles/globals.css +++ b/src/main/frontend/styles/globals.css @@ -1,17 +1,3 @@ -/* globals.css */ - -/* Container for interface title and description */ -.header h1 { - font-size: 2.5rem; - margin-bottom: 0.5rem; - color: #333; -} - -.header p { - font-size: 1rem; - color: #666; -} - body { margin: 0; padding: 0; diff --git a/src/main/frontend/yarn.lock b/src/main/frontend/yarn.lock index 34f138a9f0..a3f4621b3d 100644 --- a/src/main/frontend/yarn.lock +++ b/src/main/frontend/yarn.lock @@ -16,11 +16,6 @@ dependencies: "@babel/types" "^7.18.6" -"@babel/helper-plugin-utils@^7.18.6": - version "7.19.0" - resolved "https://registry.yarnpkg.com/@babel/helper-plugin-utils/-/helper-plugin-utils-7.19.0.tgz#4796bb14961521f0f8715990bee2fb6e51ce21bf" - integrity sha512-40Ryx7I8mT+0gaNxm8JGTZFUITNqdLAgdg0hXzeVZxVD6nFsdhQvip6v8dqkRHzsz1VFpFAaOCHNn0vKBL7Czw== - "@babel/helper-string-parser@^7.19.4": version "7.19.4" resolved "https://registry.yarnpkg.com/@babel/helper-string-parser/-/helper-string-parser-7.19.4.tgz#38d3acb654b4701a9b77fb0615a96f775c3a9e63" @@ -40,13 +35,6 @@ chalk "^2.0.0" js-tokens "^4.0.0" -"@babel/plugin-syntax-jsx@^7.17.12": - version "7.18.6" - resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-jsx/-/plugin-syntax-jsx-7.18.6.tgz#a8feef63b010150abd97f1649ec296e849943ca0" - integrity sha512-6mmljtAedFGTWu2p/8WIORGwy+61PLgOMPOdazc7YoJ9ZCWUyFy3A6CpPkRKLKD1ToAesxX8KGEViAiLo9N+7Q== - dependencies: - "@babel/helper-plugin-utils" "^7.18.6" - "@babel/runtime-corejs3@^7.10.2": version "7.19.6" resolved "https://registry.yarnpkg.com/@babel/runtime-corejs3/-/runtime-corejs3-7.19.6.tgz#778471a71d915cf3b955a9201bebabfe924f872a" @@ -55,6 +43,13 @@ core-js-pure "^3.25.1" regenerator-runtime "^0.13.4" +"@babel/runtime@^7.0.0", "@babel/runtime@^7.12.13": + version "7.24.7" + resolved "https://registry.yarnpkg.com/@babel/runtime/-/runtime-7.24.7.tgz#f4f0d5530e8dbdf59b3451b9b3e594b6ba082e12" + integrity sha512-UwgBRMjJP+xv857DCngvqXI3Iq6J4v0wXmwc6sapg+zyhbwmQX67LUEFrkK5tbyJ30jGuG3ZvWpBiB9LCy1kWw== + dependencies: + regenerator-runtime "^0.14.0" + "@babel/runtime@^7.10.2", "@babel/runtime@^7.12.5", "@babel/runtime@^7.18.3", "@babel/runtime@^7.18.9", "@babel/runtime@^7.19.0", "@babel/runtime@^7.5.5", "@babel/runtime@^7.8.7": version "7.19.4" resolved "https://registry.yarnpkg.com/@babel/runtime/-/runtime-7.19.4.tgz#a42f814502ee467d55b38dd1c256f53a7b885c78" @@ -71,25 +66,830 @@ "@babel/helper-validator-identifier" "^7.19.1" to-fast-properties "^2.0.0" -"@emotion/babel-plugin@^11.10.0": - version "11.10.2" - resolved "https://registry.yarnpkg.com/@emotion/babel-plugin/-/babel-plugin-11.10.2.tgz#879db80ba622b3f6076917a1e6f648b1c7d008c7" - integrity sha512-xNQ57njWTFVfPAc3cjfuaPdsgLp5QOSuRsj9MA6ndEhH/AzuZM86qIQzt6rq+aGBwj3n5/TkLmU5lhAfdRmogA== +"@chakra-ui/accordion@2.3.1": + version "2.3.1" + resolved "https://registry.yarnpkg.com/@chakra-ui/accordion/-/accordion-2.3.1.tgz#a326509e286a5c4e8478de9bc2b4b05017039e6b" + integrity sha512-FSXRm8iClFyU+gVaXisOSEw0/4Q+qZbFRiuhIAkVU6Boj0FxAMrlo9a8AV5TuF77rgaHytCdHk0Ng+cyUijrag== + dependencies: + "@chakra-ui/descendant" "3.1.0" + "@chakra-ui/icon" "3.2.0" + "@chakra-ui/react-context" "2.1.0" + "@chakra-ui/react-use-controllable-state" "2.1.0" + "@chakra-ui/react-use-merge-refs" "2.1.0" + "@chakra-ui/shared-utils" "2.0.5" + "@chakra-ui/transition" "2.1.0" + +"@chakra-ui/alert@2.2.2": + version "2.2.2" + resolved "https://registry.yarnpkg.com/@chakra-ui/alert/-/alert-2.2.2.tgz#aeba951d120c7c6e69d5f515a695ad6e4db43ffe" + integrity sha512-jHg4LYMRNOJH830ViLuicjb3F+v6iriE/2G5T+Sd0Hna04nukNJ1MxUmBPE+vI22me2dIflfelu2v9wdB6Pojw== + dependencies: + "@chakra-ui/icon" "3.2.0" + "@chakra-ui/react-context" "2.1.0" + "@chakra-ui/shared-utils" "2.0.5" + "@chakra-ui/spinner" "2.1.0" + +"@chakra-ui/anatomy@2.2.2": + version "2.2.2" + resolved "https://registry.yarnpkg.com/@chakra-ui/anatomy/-/anatomy-2.2.2.tgz#2d0e14cba2534d92077ca28abf8c183b6e27897b" + integrity sha512-MV6D4VLRIHr4PkW4zMyqfrNS1mPlCTiCXwvYGtDFQYr+xHFfonhAuf9WjsSc0nyp2m0OdkSLnzmVKkZFLo25Tg== + +"@chakra-ui/avatar@2.3.0": + version "2.3.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/avatar/-/avatar-2.3.0.tgz#f018a2714d1e3ba5970bcf66558887925fdfccf4" + integrity sha512-8gKSyLfygnaotbJbDMHDiJoF38OHXUYVme4gGxZ1fLnQEdPVEaIWfH+NndIjOM0z8S+YEFnT9KyGMUtvPrBk3g== + dependencies: + "@chakra-ui/image" "2.1.0" + "@chakra-ui/react-children-utils" "2.0.6" + "@chakra-ui/react-context" "2.1.0" + "@chakra-ui/shared-utils" "2.0.5" + +"@chakra-ui/breadcrumb@2.2.0": + version "2.2.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/breadcrumb/-/breadcrumb-2.2.0.tgz#751bc48498f3c403f97b5d9aae528ebfd405ef48" + integrity sha512-4cWCG24flYBxjruRi4RJREWTGF74L/KzI2CognAW/d/zWR0CjiScuJhf37Am3LFbCySP6WSoyBOtTIoTA4yLEA== + dependencies: + "@chakra-ui/react-children-utils" "2.0.6" + "@chakra-ui/react-context" "2.1.0" + "@chakra-ui/shared-utils" "2.0.5" + +"@chakra-ui/breakpoint-utils@2.0.8": + version "2.0.8" + resolved "https://registry.yarnpkg.com/@chakra-ui/breakpoint-utils/-/breakpoint-utils-2.0.8.tgz#750d3712668b69f6e8917b45915cee0e08688eed" + integrity sha512-Pq32MlEX9fwb5j5xx8s18zJMARNHlQZH2VH1RZgfgRDpp7DcEgtRW5AInfN5CfqdHLO1dGxA7I3MqEuL5JnIsA== + dependencies: + "@chakra-ui/shared-utils" "2.0.5" + +"@chakra-ui/button@2.1.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/button/-/button-2.1.0.tgz#623ed32cc92fc8e52492923e9924791fc6f25447" + integrity sha512-95CplwlRKmmUXkdEp/21VkEWgnwcx2TOBG6NfYlsuLBDHSLlo5FKIiE2oSi4zXc4TLcopGcWPNcm/NDaSC5pvA== + dependencies: + "@chakra-ui/react-context" "2.1.0" + "@chakra-ui/react-use-merge-refs" "2.1.0" + "@chakra-ui/shared-utils" "2.0.5" + "@chakra-ui/spinner" "2.1.0" + +"@chakra-ui/card@2.2.0": + version "2.2.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/card/-/card-2.2.0.tgz#b5e59dc51c171fced76ea76bf26088803b8bc184" + integrity sha512-xUB/k5MURj4CtPAhdSoXZidUbm8j3hci9vnc+eZJVDqhDOShNlD6QeniQNRPRys4lWAQLCbFcrwL29C8naDi6g== + dependencies: + "@chakra-ui/shared-utils" "2.0.5" + +"@chakra-ui/checkbox@2.3.2": + version "2.3.2" + resolved "https://registry.yarnpkg.com/@chakra-ui/checkbox/-/checkbox-2.3.2.tgz#4ecb14a2f57b7470d1a58542ca4691c3b105bfa1" + integrity sha512-85g38JIXMEv6M+AcyIGLh7igNtfpAN6KGQFYxY9tBj0eWvWk4NKQxvqqyVta0bSAyIl1rixNIIezNpNWk2iO4g== + dependencies: + "@chakra-ui/form-control" "2.2.0" + "@chakra-ui/react-context" "2.1.0" + "@chakra-ui/react-types" "2.0.7" + "@chakra-ui/react-use-callback-ref" "2.1.0" + "@chakra-ui/react-use-controllable-state" "2.1.0" + "@chakra-ui/react-use-merge-refs" "2.1.0" + "@chakra-ui/react-use-safe-layout-effect" "2.1.0" + "@chakra-ui/react-use-update-effect" "2.1.0" + "@chakra-ui/shared-utils" "2.0.5" + "@chakra-ui/visually-hidden" "2.2.0" + "@zag-js/focus-visible" "0.16.0" + +"@chakra-ui/clickable@2.1.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/clickable/-/clickable-2.1.0.tgz#800fa8d10cf45a41fc50a3df32c679a3ce1921c3" + integrity sha512-flRA/ClPUGPYabu+/GLREZVZr9j2uyyazCAUHAdrTUEdDYCr31SVGhgh7dgKdtq23bOvAQJpIJjw/0Bs0WvbXw== + dependencies: + "@chakra-ui/react-use-merge-refs" "2.1.0" + "@chakra-ui/shared-utils" "2.0.5" + +"@chakra-ui/close-button@2.1.1": + version "2.1.1" + resolved "https://registry.yarnpkg.com/@chakra-ui/close-button/-/close-button-2.1.1.tgz#995b245c56eb41465a71d8667840c238618a7b66" + integrity sha512-gnpENKOanKexswSVpVz7ojZEALl2x5qjLYNqSQGbxz+aP9sOXPfUS56ebyBrre7T7exuWGiFeRwnM0oVeGPaiw== + dependencies: + "@chakra-ui/icon" "3.2.0" + +"@chakra-ui/color-mode@2.2.0": + version "2.2.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/color-mode/-/color-mode-2.2.0.tgz#828d47234c74ba2fb4c5dd63a63331aead20b9f6" + integrity sha512-niTEA8PALtMWRI9wJ4LL0CSBDo8NBfLNp4GD6/0hstcm3IlbBHTVKxN6HwSaoNYfphDQLxCjT4yG+0BJA5tFpg== + dependencies: + "@chakra-ui/react-use-safe-layout-effect" "2.1.0" + +"@chakra-ui/control-box@2.1.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/control-box/-/control-box-2.1.0.tgz#0f4586797b3154c02463bc5c106782e70c88f04f" + integrity sha512-gVrRDyXFdMd8E7rulL0SKeoljkLQiPITFnsyMO8EFHNZ+AHt5wK4LIguYVEq88APqAGZGfHFWXr79RYrNiE3Mg== + +"@chakra-ui/counter@2.1.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/counter/-/counter-2.1.0.tgz#e413a2f1093a18f847bb7aa240117fde788a59e6" + integrity sha512-s6hZAEcWT5zzjNz2JIWUBzRubo9la/oof1W7EKZVVfPYHERnl5e16FmBC79Yfq8p09LQ+aqFKm/etYoJMMgghw== + dependencies: + "@chakra-ui/number-utils" "2.0.7" + "@chakra-ui/react-use-callback-ref" "2.1.0" + "@chakra-ui/shared-utils" "2.0.5" + +"@chakra-ui/css-reset@2.3.0": + version "2.3.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/css-reset/-/css-reset-2.3.0.tgz#83e3160a9c2a12431cad0ee27ebfbf3aedc5c9c7" + integrity sha512-cQwwBy5O0jzvl0K7PLTLgp8ijqLPKyuEMiDXwYzl95seD3AoeuoCLyzZcJtVqaUZ573PiBdAbY/IlZcwDOItWg== + +"@chakra-ui/descendant@3.1.0": + version "3.1.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/descendant/-/descendant-3.1.0.tgz#f3b80ed13ffc4bf1d615b3ed5541bd0905375cca" + integrity sha512-VxCIAir08g5w27klLyi7PVo8BxhW4tgU/lxQyujkmi4zx7hT9ZdrcQLAted/dAa+aSIZ14S1oV0Q9lGjsAdxUQ== + dependencies: + "@chakra-ui/react-context" "2.1.0" + "@chakra-ui/react-use-merge-refs" "2.1.0" + +"@chakra-ui/dom-utils@2.1.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/dom-utils/-/dom-utils-2.1.0.tgz#d15df89e458ef19756db04c7cfd084eb552454f0" + integrity sha512-ZmF2qRa1QZ0CMLU8M1zCfmw29DmPNtfjR9iTo74U5FPr3i1aoAh7fbJ4qAlZ197Xw9eAW28tvzQuoVWeL5C7fQ== + +"@chakra-ui/editable@3.1.0": + version "3.1.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/editable/-/editable-3.1.0.tgz#065783c2e3389c3bb9ab0582cb50d38e1dc00fa1" + integrity sha512-j2JLrUL9wgg4YA6jLlbU88370eCRyor7DZQD9lzpY95tSOXpTljeg3uF9eOmDnCs6fxp3zDWIfkgMm/ExhcGTg== + dependencies: + "@chakra-ui/react-context" "2.1.0" + "@chakra-ui/react-types" "2.0.7" + "@chakra-ui/react-use-callback-ref" "2.1.0" + "@chakra-ui/react-use-controllable-state" "2.1.0" + "@chakra-ui/react-use-focus-on-pointer-down" "2.1.0" + "@chakra-ui/react-use-merge-refs" "2.1.0" + "@chakra-ui/react-use-safe-layout-effect" "2.1.0" + "@chakra-ui/react-use-update-effect" "2.1.0" + "@chakra-ui/shared-utils" "2.0.5" + +"@chakra-ui/event-utils@2.0.8": + version "2.0.8" + resolved "https://registry.yarnpkg.com/@chakra-ui/event-utils/-/event-utils-2.0.8.tgz#e6439ba200825a2f15d8f1973d267d1c00a6d1b4" + integrity sha512-IGM/yGUHS+8TOQrZGpAKOJl/xGBrmRYJrmbHfUE7zrG3PpQyXvbLDP1M+RggkCFVgHlJi2wpYIf0QtQlU0XZfw== + +"@chakra-ui/focus-lock@2.1.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/focus-lock/-/focus-lock-2.1.0.tgz#580e5450fe85356987b9a246abaff8333369c667" + integrity sha512-EmGx4PhWGjm4dpjRqM4Aa+rCWBxP+Rq8Uc/nAVnD4YVqkEhBkrPTpui2lnjsuxqNaZ24fIAZ10cF1hlpemte/w== + dependencies: + "@chakra-ui/dom-utils" "2.1.0" + react-focus-lock "^2.9.4" + +"@chakra-ui/form-control@2.2.0": + version "2.2.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/form-control/-/form-control-2.2.0.tgz#69c771d6406ddac8ab357ae88446cc11827656a4" + integrity sha512-wehLC1t4fafCVJ2RvJQT2jyqsAwX7KymmiGqBu7nQoQz8ApTkGABWpo/QwDh3F/dBLrouHDoOvGmYTqft3Mirw== + dependencies: + "@chakra-ui/icon" "3.2.0" + "@chakra-ui/react-context" "2.1.0" + "@chakra-ui/react-types" "2.0.7" + "@chakra-ui/react-use-merge-refs" "2.1.0" + "@chakra-ui/shared-utils" "2.0.5" + +"@chakra-ui/hooks@2.2.1": + version "2.2.1" + resolved "https://registry.yarnpkg.com/@chakra-ui/hooks/-/hooks-2.2.1.tgz#b86ce5eeaaab877ddcb11a50842d1227306ace28" + integrity sha512-RQbTnzl6b1tBjbDPf9zGRo9rf/pQMholsOudTxjy4i9GfTfz6kgp5ValGjQm2z7ng6Z31N1cnjZ1AlSzQ//ZfQ== + dependencies: + "@chakra-ui/react-utils" "2.0.12" + "@chakra-ui/utils" "2.0.15" + compute-scroll-into-view "3.0.3" + copy-to-clipboard "3.3.3" + +"@chakra-ui/icon@3.2.0": + version "3.2.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/icon/-/icon-3.2.0.tgz#92b9454aa0d561b4994bcd6a1b3bb1fdd5c67bef" + integrity sha512-xxjGLvlX2Ys4H0iHrI16t74rG9EBcpFvJ3Y3B7KMQTrnW34Kf7Da/UC8J67Gtx85mTHW020ml85SVPKORWNNKQ== + dependencies: + "@chakra-ui/shared-utils" "2.0.5" + +"@chakra-ui/image@2.1.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/image/-/image-2.1.0.tgz#6c205f1ca148e3bf58345b0b5d4eb3d959eb9f87" + integrity sha512-bskumBYKLiLMySIWDGcz0+D9Th0jPvmX6xnRMs4o92tT3Od/bW26lahmV2a2Op2ItXeCmRMY+XxJH5Gy1i46VA== + dependencies: + "@chakra-ui/react-use-safe-layout-effect" "2.1.0" + "@chakra-ui/shared-utils" "2.0.5" + +"@chakra-ui/input@2.1.2": + version "2.1.2" + resolved "https://registry.yarnpkg.com/@chakra-ui/input/-/input-2.1.2.tgz#0cad49ec372f8f21f2f4f1db365f34b9a708ff9d" + integrity sha512-GiBbb3EqAA8Ph43yGa6Mc+kUPjh4Spmxp1Pkelr8qtudpc3p2PJOOebLpd90mcqw8UePPa+l6YhhPtp6o0irhw== + dependencies: + "@chakra-ui/form-control" "2.2.0" + "@chakra-ui/object-utils" "2.1.0" + "@chakra-ui/react-children-utils" "2.0.6" + "@chakra-ui/react-context" "2.1.0" + "@chakra-ui/shared-utils" "2.0.5" + +"@chakra-ui/layout@2.3.1": + version "2.3.1" + resolved "https://registry.yarnpkg.com/@chakra-ui/layout/-/layout-2.3.1.tgz#0601c5eb91555d24a7015a7c9d4e01fed2698557" + integrity sha512-nXuZ6WRbq0WdgnRgLw+QuxWAHuhDtVX8ElWqcTK+cSMFg/52eVP47czYBE5F35YhnoW2XBwfNoNgZ7+e8Z01Rg== + dependencies: + "@chakra-ui/breakpoint-utils" "2.0.8" + "@chakra-ui/icon" "3.2.0" + "@chakra-ui/object-utils" "2.1.0" + "@chakra-ui/react-children-utils" "2.0.6" + "@chakra-ui/react-context" "2.1.0" + "@chakra-ui/shared-utils" "2.0.5" + +"@chakra-ui/lazy-utils@2.0.5": + version "2.0.5" + resolved "https://registry.yarnpkg.com/@chakra-ui/lazy-utils/-/lazy-utils-2.0.5.tgz#363c3fa1d421362790b416ffa595acb835e1ae5b" + integrity sha512-UULqw7FBvcckQk2n3iPO56TMJvDsNv0FKZI6PlUNJVaGsPbsYxK/8IQ60vZgaTVPtVcjY6BE+y6zg8u9HOqpyg== + +"@chakra-ui/live-region@2.1.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/live-region/-/live-region-2.1.0.tgz#02b4b1d997075f19a7a9a87187e08c72e82ef0dd" + integrity sha512-ZOxFXwtaLIsXjqnszYYrVuswBhnIHHP+XIgK1vC6DePKtyK590Wg+0J0slDwThUAd4MSSIUa/nNX84x1GMphWw== + +"@chakra-ui/media-query@3.3.0": + version "3.3.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/media-query/-/media-query-3.3.0.tgz#40f9151dedb6a7af9df3be0474b59a799c92c619" + integrity sha512-IsTGgFLoICVoPRp9ykOgqmdMotJG0CnPsKvGQeSFOB/dZfIujdVb14TYxDU4+MURXry1MhJ7LzZhv+Ml7cr8/g== + dependencies: + "@chakra-ui/breakpoint-utils" "2.0.8" + "@chakra-ui/react-env" "3.1.0" + "@chakra-ui/shared-utils" "2.0.5" + +"@chakra-ui/menu@2.2.1": + version "2.2.1" + resolved "https://registry.yarnpkg.com/@chakra-ui/menu/-/menu-2.2.1.tgz#7d9810d435f6b40fa72ed867a33b88a1ef75073f" + integrity sha512-lJS7XEObzJxsOwWQh7yfG4H8FzFPRP5hVPN/CL+JzytEINCSBvsCDHrYPQGp7jzpCi8vnTqQQGQe0f8dwnXd2g== + dependencies: + "@chakra-ui/clickable" "2.1.0" + "@chakra-ui/descendant" "3.1.0" + "@chakra-ui/lazy-utils" "2.0.5" + "@chakra-ui/popper" "3.1.0" + "@chakra-ui/react-children-utils" "2.0.6" + "@chakra-ui/react-context" "2.1.0" + "@chakra-ui/react-use-animation-state" "2.1.0" + "@chakra-ui/react-use-controllable-state" "2.1.0" + "@chakra-ui/react-use-disclosure" "2.1.0" + "@chakra-ui/react-use-focus-effect" "2.1.0" + "@chakra-ui/react-use-merge-refs" "2.1.0" + "@chakra-ui/react-use-outside-click" "2.2.0" + "@chakra-ui/react-use-update-effect" "2.1.0" + "@chakra-ui/shared-utils" "2.0.5" + "@chakra-ui/transition" "2.1.0" + +"@chakra-ui/modal@2.3.1": + version "2.3.1" + resolved "https://registry.yarnpkg.com/@chakra-ui/modal/-/modal-2.3.1.tgz#524dc32b6b4f545b54ae531dbf6c74e1052ee794" + integrity sha512-TQv1ZaiJMZN+rR9DK0snx/OPwmtaGH1HbZtlYt4W4s6CzyK541fxLRTjIXfEzIGpvNW+b6VFuFjbcR78p4DEoQ== + dependencies: + "@chakra-ui/close-button" "2.1.1" + "@chakra-ui/focus-lock" "2.1.0" + "@chakra-ui/portal" "2.1.0" + "@chakra-ui/react-context" "2.1.0" + "@chakra-ui/react-types" "2.0.7" + "@chakra-ui/react-use-merge-refs" "2.1.0" + "@chakra-ui/shared-utils" "2.0.5" + "@chakra-ui/transition" "2.1.0" + aria-hidden "^1.2.3" + react-remove-scroll "^2.5.6" + +"@chakra-ui/number-input@2.1.2": + version "2.1.2" + resolved "https://registry.yarnpkg.com/@chakra-ui/number-input/-/number-input-2.1.2.tgz#dda9095fba6a4b89212332db02831b94120da163" + integrity sha512-pfOdX02sqUN0qC2ysuvgVDiws7xZ20XDIlcNhva55Jgm095xjm8eVdIBfNm3SFbSUNxyXvLTW/YQanX74tKmuA== + dependencies: + "@chakra-ui/counter" "2.1.0" + "@chakra-ui/form-control" "2.2.0" + "@chakra-ui/icon" "3.2.0" + "@chakra-ui/react-context" "2.1.0" + "@chakra-ui/react-types" "2.0.7" + "@chakra-ui/react-use-callback-ref" "2.1.0" + "@chakra-ui/react-use-event-listener" "2.1.0" + "@chakra-ui/react-use-interval" "2.1.0" + "@chakra-ui/react-use-merge-refs" "2.1.0" + "@chakra-ui/react-use-safe-layout-effect" "2.1.0" + "@chakra-ui/react-use-update-effect" "2.1.0" + "@chakra-ui/shared-utils" "2.0.5" + +"@chakra-ui/number-utils@2.0.7": + version "2.0.7" + resolved "https://registry.yarnpkg.com/@chakra-ui/number-utils/-/number-utils-2.0.7.tgz#aaee979ca2fb1923a0373a91619473811315db11" + integrity sha512-yOGxBjXNvLTBvQyhMDqGU0Oj26s91mbAlqKHiuw737AXHt0aPllOthVUqQMeaYLwLCjGMg0jtI7JReRzyi94Dg== + +"@chakra-ui/object-utils@2.1.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/object-utils/-/object-utils-2.1.0.tgz#a4ecf9cea92f1de09f5531f53ffdc41e0b19b6c3" + integrity sha512-tgIZOgLHaoti5PYGPTwK3t/cqtcycW0owaiOXoZOcpwwX/vlVb+H1jFsQyWiiwQVPt9RkoSLtxzXamx+aHH+bQ== + +"@chakra-ui/pin-input@2.1.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/pin-input/-/pin-input-2.1.0.tgz#61e6bbf909ec510634307b2861c4f1891a9f8d81" + integrity sha512-x4vBqLStDxJFMt+jdAHHS8jbh294O53CPQJoL4g228P513rHylV/uPscYUHrVJXRxsHfRztQO9k45jjTYaPRMw== + dependencies: + "@chakra-ui/descendant" "3.1.0" + "@chakra-ui/react-children-utils" "2.0.6" + "@chakra-ui/react-context" "2.1.0" + "@chakra-ui/react-use-controllable-state" "2.1.0" + "@chakra-ui/react-use-merge-refs" "2.1.0" + "@chakra-ui/shared-utils" "2.0.5" + +"@chakra-ui/popover@2.2.1": + version "2.2.1" + resolved "https://registry.yarnpkg.com/@chakra-ui/popover/-/popover-2.2.1.tgz#89cfd29817abcd204da570073c0f2b4d8072c3a3" + integrity sha512-K+2ai2dD0ljvJnlrzesCDT9mNzLifE3noGKZ3QwLqd/K34Ym1W/0aL1ERSynrcG78NKoXS54SdEzkhCZ4Gn/Zg== + dependencies: + "@chakra-ui/close-button" "2.1.1" + "@chakra-ui/lazy-utils" "2.0.5" + "@chakra-ui/popper" "3.1.0" + "@chakra-ui/react-context" "2.1.0" + "@chakra-ui/react-types" "2.0.7" + "@chakra-ui/react-use-animation-state" "2.1.0" + "@chakra-ui/react-use-disclosure" "2.1.0" + "@chakra-ui/react-use-focus-effect" "2.1.0" + "@chakra-ui/react-use-focus-on-pointer-down" "2.1.0" + "@chakra-ui/react-use-merge-refs" "2.1.0" + "@chakra-ui/shared-utils" "2.0.5" + +"@chakra-ui/popper@3.1.0": + version "3.1.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/popper/-/popper-3.1.0.tgz#92a9180c6894763af3b22a6003f9a9d958fe2659" + integrity sha512-ciDdpdYbeFG7og6/6J8lkTFxsSvwTdMLFkpVylAF6VNC22jssiWfquj2eyD4rJnzkRFPvIWJq8hvbfhsm+AjSg== + dependencies: + "@chakra-ui/react-types" "2.0.7" + "@chakra-ui/react-use-merge-refs" "2.1.0" + "@popperjs/core" "^2.9.3" + +"@chakra-ui/portal@2.1.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/portal/-/portal-2.1.0.tgz#9e7f57424d7041738b6563cac80134561080bd27" + integrity sha512-9q9KWf6SArEcIq1gGofNcFPSWEyl+MfJjEUg/un1SMlQjaROOh3zYr+6JAwvcORiX7tyHosnmWC3d3wI2aPSQg== + dependencies: + "@chakra-ui/react-context" "2.1.0" + "@chakra-ui/react-use-safe-layout-effect" "2.1.0" + +"@chakra-ui/progress@2.2.0": + version "2.2.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/progress/-/progress-2.2.0.tgz#67444ea9779631d7c8395b2c9c78e5634f994999" + integrity sha512-qUXuKbuhN60EzDD9mHR7B67D7p/ZqNS2Aze4Pbl1qGGZfulPW0PY8Rof32qDtttDQBkzQIzFGE8d9QpAemToIQ== + dependencies: + "@chakra-ui/react-context" "2.1.0" + +"@chakra-ui/provider@2.4.2": + version "2.4.2" + resolved "https://registry.yarnpkg.com/@chakra-ui/provider/-/provider-2.4.2.tgz#92cb10b6a7df0720e3fa62716dc7cd872ae3ea3d" + integrity sha512-w0Tef5ZCJK1mlJorcSjItCSbyvVuqpvyWdxZiVQmE6fvSJR83wZof42ux0+sfWD+I7rHSfj+f9nzhNaEWClysw== + dependencies: + "@chakra-ui/css-reset" "2.3.0" + "@chakra-ui/portal" "2.1.0" + "@chakra-ui/react-env" "3.1.0" + "@chakra-ui/system" "2.6.2" + "@chakra-ui/utils" "2.0.15" + +"@chakra-ui/radio@2.1.2": + version "2.1.2" + resolved "https://registry.yarnpkg.com/@chakra-ui/radio/-/radio-2.1.2.tgz#66db19c61a2e628aaf5e727027f7c3b4006ea898" + integrity sha512-n10M46wJrMGbonaghvSRnZ9ToTv/q76Szz284gv4QUWvyljQACcGrXIONUnQ3BIwbOfkRqSk7Xl/JgZtVfll+w== + dependencies: + "@chakra-ui/form-control" "2.2.0" + "@chakra-ui/react-context" "2.1.0" + "@chakra-ui/react-types" "2.0.7" + "@chakra-ui/react-use-merge-refs" "2.1.0" + "@chakra-ui/shared-utils" "2.0.5" + "@zag-js/focus-visible" "0.16.0" + +"@chakra-ui/react-children-utils@2.0.6": + version "2.0.6" + resolved "https://registry.yarnpkg.com/@chakra-ui/react-children-utils/-/react-children-utils-2.0.6.tgz#6c480c6a60678fcb75cb7d57107c7a79e5179b92" + integrity sha512-QVR2RC7QsOsbWwEnq9YduhpqSFnZGvjjGREV8ygKi8ADhXh93C8azLECCUVgRJF2Wc+So1fgxmjLcbZfY2VmBA== + +"@chakra-ui/react-context@2.1.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/react-context/-/react-context-2.1.0.tgz#4858be1d5ff1c8ac0a0ec088d93a3b7f1cbbff99" + integrity sha512-iahyStvzQ4AOwKwdPReLGfDesGG+vWJfEsn0X/NoGph/SkN+HXtv2sCfYFFR9k7bb+Kvc6YfpLlSuLvKMHi2+w== + +"@chakra-ui/react-env@3.1.0": + version "3.1.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/react-env/-/react-env-3.1.0.tgz#7d3c1c05a501bb369524d9f3d38c9325eb16ab50" + integrity sha512-Vr96GV2LNBth3+IKzr/rq1IcnkXv+MLmwjQH6C8BRtn3sNskgDFD5vLkVXcEhagzZMCh8FR3V/bzZPojBOyNhw== + dependencies: + "@chakra-ui/react-use-safe-layout-effect" "2.1.0" + +"@chakra-ui/react-types@2.0.7": + version "2.0.7" + resolved "https://registry.yarnpkg.com/@chakra-ui/react-types/-/react-types-2.0.7.tgz#799c166a44882b23059c8f510eac9bd5d0869ac4" + integrity sha512-12zv2qIZ8EHwiytggtGvo4iLT0APris7T0qaAWqzpUGS0cdUtR8W+V1BJ5Ocq+7tA6dzQ/7+w5hmXih61TuhWQ== + +"@chakra-ui/react-use-animation-state@2.1.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/react-use-animation-state/-/react-use-animation-state-2.1.0.tgz#eab661fbafd96804fe867b0df0c27e78feefe6e2" + integrity sha512-CFZkQU3gmDBwhqy0vC1ryf90BVHxVN8cTLpSyCpdmExUEtSEInSCGMydj2fvn7QXsz/za8JNdO2xxgJwxpLMtg== + dependencies: + "@chakra-ui/dom-utils" "2.1.0" + "@chakra-ui/react-use-event-listener" "2.1.0" + +"@chakra-ui/react-use-callback-ref@2.1.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/react-use-callback-ref/-/react-use-callback-ref-2.1.0.tgz#a508085f4d9e7d84d4ceffdf5f41745c9ac451d7" + integrity sha512-efnJrBtGDa4YaxDzDE90EnKD3Vkh5a1t3w7PhnRQmsphLy3g2UieasoKTlT2Hn118TwDjIv5ZjHJW6HbzXA9wQ== + +"@chakra-ui/react-use-controllable-state@2.1.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/react-use-controllable-state/-/react-use-controllable-state-2.1.0.tgz#8fb6fa2f45d0c04173582ae8297e604ffdb9c7d9" + integrity sha512-QR/8fKNokxZUs4PfxjXuwl0fj/d71WPrmLJvEpCTkHjnzu7LnYvzoe2wB867IdooQJL0G1zBxl0Dq+6W1P3jpg== + dependencies: + "@chakra-ui/react-use-callback-ref" "2.1.0" + +"@chakra-ui/react-use-disclosure@2.1.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/react-use-disclosure/-/react-use-disclosure-2.1.0.tgz#90093eaf45db1bea7a6851dd0ce5cdb3eb66f90a" + integrity sha512-Ax4pmxA9LBGMyEZJhhUZobg9C0t3qFE4jVF1tGBsrLDcdBeLR9fwOogIPY9Hf0/wqSlAryAimICbr5hkpa5GSw== + dependencies: + "@chakra-ui/react-use-callback-ref" "2.1.0" + +"@chakra-ui/react-use-event-listener@2.1.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/react-use-event-listener/-/react-use-event-listener-2.1.0.tgz#afea2645bd9b38f754fc2b8eb858f9bb22385ded" + integrity sha512-U5greryDLS8ISP69DKDsYcsXRtAdnTQT+jjIlRYZ49K/XhUR/AqVZCK5BkR1spTDmO9H8SPhgeNKI70ODuDU/Q== + dependencies: + "@chakra-ui/react-use-callback-ref" "2.1.0" + +"@chakra-ui/react-use-focus-effect@2.1.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/react-use-focus-effect/-/react-use-focus-effect-2.1.0.tgz#963fb790370dfadd51d12666ff2da60706f53a2a" + integrity sha512-xzVboNy7J64xveLcxTIJ3jv+lUJKDwRM7Szwn9tNzUIPD94O3qwjV7DDCUzN2490nSYDF4OBMt/wuDBtaR3kUQ== + dependencies: + "@chakra-ui/dom-utils" "2.1.0" + "@chakra-ui/react-use-event-listener" "2.1.0" + "@chakra-ui/react-use-safe-layout-effect" "2.1.0" + "@chakra-ui/react-use-update-effect" "2.1.0" + +"@chakra-ui/react-use-focus-on-pointer-down@2.1.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/react-use-focus-on-pointer-down/-/react-use-focus-on-pointer-down-2.1.0.tgz#2fbcf6bc7d06d97606747e231a908d5c387ca0cc" + integrity sha512-2jzrUZ+aiCG/cfanrolsnSMDykCAbv9EK/4iUyZno6BYb3vziucmvgKuoXbMPAzWNtwUwtuMhkby8rc61Ue+Lg== + dependencies: + "@chakra-ui/react-use-event-listener" "2.1.0" + +"@chakra-ui/react-use-interval@2.1.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/react-use-interval/-/react-use-interval-2.1.0.tgz#2602c097b3ab74b6644812e4f5efaad621218d98" + integrity sha512-8iWj+I/+A0J08pgEXP1J1flcvhLBHkk0ln7ZvGIyXiEyM6XagOTJpwNhiu+Bmk59t3HoV/VyvyJTa+44sEApuw== + dependencies: + "@chakra-ui/react-use-callback-ref" "2.1.0" + +"@chakra-ui/react-use-latest-ref@2.1.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/react-use-latest-ref/-/react-use-latest-ref-2.1.0.tgz#d1e926130102566ece1d39f8a48ed125e0c8441a" + integrity sha512-m0kxuIYqoYB0va9Z2aW4xP/5b7BzlDeWwyXCH6QpT2PpW3/281L3hLCm1G0eOUcdVlayqrQqOeD6Mglq+5/xoQ== + +"@chakra-ui/react-use-merge-refs@2.1.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/react-use-merge-refs/-/react-use-merge-refs-2.1.0.tgz#c0c233527abdbea9a1348269c192012205762314" + integrity sha512-lERa6AWF1cjEtWSGjxWTaSMvneccnAVH4V4ozh8SYiN9fSPZLlSG3kNxfNzdFvMEhM7dnP60vynF7WjGdTgQbQ== + +"@chakra-ui/react-use-outside-click@2.2.0": + version "2.2.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/react-use-outside-click/-/react-use-outside-click-2.2.0.tgz#5570b772a255f6f02b69e967127397c1b5fa3d3c" + integrity sha512-PNX+s/JEaMneijbgAM4iFL+f3m1ga9+6QK0E5Yh4s8KZJQ/bLwZzdhMz8J/+mL+XEXQ5J0N8ivZN28B82N1kNw== + dependencies: + "@chakra-ui/react-use-callback-ref" "2.1.0" + +"@chakra-ui/react-use-pan-event@2.1.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/react-use-pan-event/-/react-use-pan-event-2.1.0.tgz#51c21bc3c0e9e73d1faef5ea4f7e3c3d071a2758" + integrity sha512-xmL2qOHiXqfcj0q7ZK5s9UjTh4Gz0/gL9jcWPA6GVf+A0Od5imEDa/Vz+533yQKWiNSm1QGrIj0eJAokc7O4fg== + dependencies: + "@chakra-ui/event-utils" "2.0.8" + "@chakra-ui/react-use-latest-ref" "2.1.0" + framesync "6.1.2" + +"@chakra-ui/react-use-previous@2.1.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/react-use-previous/-/react-use-previous-2.1.0.tgz#f6046e6f7398b1e8d7e66ff7ebb8d61c92a2d3d0" + integrity sha512-pjxGwue1hX8AFcmjZ2XfrQtIJgqbTF3Qs1Dy3d1krC77dEsiCUbQ9GzOBfDc8pfd60DrB5N2tg5JyHbypqh0Sg== + +"@chakra-ui/react-use-safe-layout-effect@2.1.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/react-use-safe-layout-effect/-/react-use-safe-layout-effect-2.1.0.tgz#3a95f0ba6fd5d2d0aa14919160f2c825f13e686f" + integrity sha512-Knbrrx/bcPwVS1TorFdzrK/zWA8yuU/eaXDkNj24IrKoRlQrSBFarcgAEzlCHtzuhufP3OULPkELTzz91b0tCw== + +"@chakra-ui/react-use-size@2.1.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/react-use-size/-/react-use-size-2.1.0.tgz#fcf3070eaade8b4a84af8ce5341c4d5ca0a42bec" + integrity sha512-tbLqrQhbnqOjzTaMlYytp7wY8BW1JpL78iG7Ru1DlV4EWGiAmXFGvtnEt9HftU0NJ0aJyjgymkxfVGI55/1Z4A== + dependencies: + "@zag-js/element-size" "0.10.5" + +"@chakra-ui/react-use-timeout@2.1.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/react-use-timeout/-/react-use-timeout-2.1.0.tgz#24415f54267d7241a3c1d36a5cae4d472834cef7" + integrity sha512-cFN0sobKMM9hXUhyCofx3/Mjlzah6ADaEl/AXl5Y+GawB5rgedgAcu2ErAgarEkwvsKdP6c68CKjQ9dmTQlJxQ== + dependencies: + "@chakra-ui/react-use-callback-ref" "2.1.0" + +"@chakra-ui/react-use-update-effect@2.1.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/react-use-update-effect/-/react-use-update-effect-2.1.0.tgz#5c57cd1f50c2a6a8119e0f57f69510723d69884b" + integrity sha512-ND4Q23tETaR2Qd3zwCKYOOS1dfssojPLJMLvUtUbW5M9uW1ejYWgGUobeAiOVfSplownG8QYMmHTP86p/v0lbA== + +"@chakra-ui/react-utils@2.0.12": + version "2.0.12" + resolved "https://registry.yarnpkg.com/@chakra-ui/react-utils/-/react-utils-2.0.12.tgz#d6b773b9a5b2e51dce61f51ac8a0e9a0f534f479" + integrity sha512-GbSfVb283+YA3kA8w8xWmzbjNWk14uhNpntnipHCftBibl0lxtQ9YqMFQLwuFOO0U2gYVocszqqDWX+XNKq9hw== + dependencies: + "@chakra-ui/utils" "2.0.15" + +"@chakra-ui/react@^2.8.2": + version "2.8.2" + resolved "https://registry.yarnpkg.com/@chakra-ui/react/-/react-2.8.2.tgz#94d692fb35e4447748c5bfd73d8d38a746193c7d" + integrity sha512-Hn0moyxxyCDKuR9ywYpqgX8dvjqwu9ArwpIb9wHNYjnODETjLwazgNIliCVBRcJvysGRiV51U2/JtJVrpeCjUQ== + dependencies: + "@chakra-ui/accordion" "2.3.1" + "@chakra-ui/alert" "2.2.2" + "@chakra-ui/avatar" "2.3.0" + "@chakra-ui/breadcrumb" "2.2.0" + "@chakra-ui/button" "2.1.0" + "@chakra-ui/card" "2.2.0" + "@chakra-ui/checkbox" "2.3.2" + "@chakra-ui/close-button" "2.1.1" + "@chakra-ui/control-box" "2.1.0" + "@chakra-ui/counter" "2.1.0" + "@chakra-ui/css-reset" "2.3.0" + "@chakra-ui/editable" "3.1.0" + "@chakra-ui/focus-lock" "2.1.0" + "@chakra-ui/form-control" "2.2.0" + "@chakra-ui/hooks" "2.2.1" + "@chakra-ui/icon" "3.2.0" + "@chakra-ui/image" "2.1.0" + "@chakra-ui/input" "2.1.2" + "@chakra-ui/layout" "2.3.1" + "@chakra-ui/live-region" "2.1.0" + "@chakra-ui/media-query" "3.3.0" + "@chakra-ui/menu" "2.2.1" + "@chakra-ui/modal" "2.3.1" + "@chakra-ui/number-input" "2.1.2" + "@chakra-ui/pin-input" "2.1.0" + "@chakra-ui/popover" "2.2.1" + "@chakra-ui/popper" "3.1.0" + "@chakra-ui/portal" "2.1.0" + "@chakra-ui/progress" "2.2.0" + "@chakra-ui/provider" "2.4.2" + "@chakra-ui/radio" "2.1.2" + "@chakra-ui/react-env" "3.1.0" + "@chakra-ui/select" "2.1.2" + "@chakra-ui/skeleton" "2.1.0" + "@chakra-ui/skip-nav" "2.1.0" + "@chakra-ui/slider" "2.1.0" + "@chakra-ui/spinner" "2.1.0" + "@chakra-ui/stat" "2.1.1" + "@chakra-ui/stepper" "2.3.1" + "@chakra-ui/styled-system" "2.9.2" + "@chakra-ui/switch" "2.1.2" + "@chakra-ui/system" "2.6.2" + "@chakra-ui/table" "2.1.0" + "@chakra-ui/tabs" "3.0.0" + "@chakra-ui/tag" "3.1.1" + "@chakra-ui/textarea" "2.1.2" + "@chakra-ui/theme" "3.3.1" + "@chakra-ui/theme-utils" "2.0.21" + "@chakra-ui/toast" "7.0.2" + "@chakra-ui/tooltip" "2.3.1" + "@chakra-ui/transition" "2.1.0" + "@chakra-ui/utils" "2.0.15" + "@chakra-ui/visually-hidden" "2.2.0" + +"@chakra-ui/select@2.1.2": + version "2.1.2" + resolved "https://registry.yarnpkg.com/@chakra-ui/select/-/select-2.1.2.tgz#f57d6cec0559373c32094fd4a5abd32855829264" + integrity sha512-ZwCb7LqKCVLJhru3DXvKXpZ7Pbu1TDZ7N0PdQ0Zj1oyVLJyrpef1u9HR5u0amOpqcH++Ugt0f5JSmirjNlctjA== + dependencies: + "@chakra-ui/form-control" "2.2.0" + "@chakra-ui/shared-utils" "2.0.5" + +"@chakra-ui/shared-utils@2.0.5": + version "2.0.5" + resolved "https://registry.yarnpkg.com/@chakra-ui/shared-utils/-/shared-utils-2.0.5.tgz#cb2b49705e113853647f1822142619570feba081" + integrity sha512-4/Wur0FqDov7Y0nCXl7HbHzCg4aq86h+SXdoUeuCMD3dSj7dpsVnStLYhng1vxvlbUnLpdF4oz5Myt3i/a7N3Q== + +"@chakra-ui/skeleton@2.1.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/skeleton/-/skeleton-2.1.0.tgz#e3b25dd3afa330029d6d63be0f7cb8d44ad25531" + integrity sha512-JNRuMPpdZGd6zFVKjVQ0iusu3tXAdI29n4ZENYwAJEMf/fN0l12sVeirOxkJ7oEL0yOx2AgEYFSKdbcAgfUsAQ== + dependencies: + "@chakra-ui/media-query" "3.3.0" + "@chakra-ui/react-use-previous" "2.1.0" + "@chakra-ui/shared-utils" "2.0.5" + +"@chakra-ui/skip-nav@2.1.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/skip-nav/-/skip-nav-2.1.0.tgz#cac27eecc6eded1e83c8f0cf7445d727739cb325" + integrity sha512-Hk+FG+vadBSH0/7hwp9LJnLjkO0RPGnx7gBJWI4/SpoJf3e4tZlWYtwGj0toYY4aGKl93jVghuwGbDBEMoHDug== + +"@chakra-ui/slider@2.1.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/slider/-/slider-2.1.0.tgz#1caeed18761ba2a390777418cc9389ba25e39bce" + integrity sha512-lUOBcLMCnFZiA/s2NONXhELJh6sY5WtbRykPtclGfynqqOo47lwWJx+VP7xaeuhDOPcWSSecWc9Y1BfPOCz9cQ== + dependencies: + "@chakra-ui/number-utils" "2.0.7" + "@chakra-ui/react-context" "2.1.0" + "@chakra-ui/react-types" "2.0.7" + "@chakra-ui/react-use-callback-ref" "2.1.0" + "@chakra-ui/react-use-controllable-state" "2.1.0" + "@chakra-ui/react-use-latest-ref" "2.1.0" + "@chakra-ui/react-use-merge-refs" "2.1.0" + "@chakra-ui/react-use-pan-event" "2.1.0" + "@chakra-ui/react-use-size" "2.1.0" + "@chakra-ui/react-use-update-effect" "2.1.0" + +"@chakra-ui/spinner@2.1.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/spinner/-/spinner-2.1.0.tgz#aa24a3d692c6ac90714e0f0f82c76c12c78c8e60" + integrity sha512-hczbnoXt+MMv/d3gE+hjQhmkzLiKuoTo42YhUG7Bs9OSv2lg1fZHW1fGNRFP3wTi6OIbD044U1P9HK+AOgFH3g== + dependencies: + "@chakra-ui/shared-utils" "2.0.5" + +"@chakra-ui/stat@2.1.1": + version "2.1.1" + resolved "https://registry.yarnpkg.com/@chakra-ui/stat/-/stat-2.1.1.tgz#a204ba915795345996a16c79794d84826d7dcc2d" + integrity sha512-LDn0d/LXQNbAn2KaR3F1zivsZCewY4Jsy1qShmfBMKwn6rI8yVlbvu6SiA3OpHS0FhxbsZxQI6HefEoIgtqY6Q== + dependencies: + "@chakra-ui/icon" "3.2.0" + "@chakra-ui/react-context" "2.1.0" + "@chakra-ui/shared-utils" "2.0.5" + +"@chakra-ui/stepper@2.3.1": + version "2.3.1" + resolved "https://registry.yarnpkg.com/@chakra-ui/stepper/-/stepper-2.3.1.tgz#a0a0b73e147f202ab4e51cae55dad45489cc89fd" + integrity sha512-ky77lZbW60zYkSXhYz7kbItUpAQfEdycT0Q4bkHLxfqbuiGMf8OmgZOQkOB9uM4v0zPwy2HXhe0vq4Dd0xa55Q== + dependencies: + "@chakra-ui/icon" "3.2.0" + "@chakra-ui/react-context" "2.1.0" + "@chakra-ui/shared-utils" "2.0.5" + +"@chakra-ui/styled-system@2.9.2": + version "2.9.2" + resolved "https://registry.yarnpkg.com/@chakra-ui/styled-system/-/styled-system-2.9.2.tgz#898ab63da560a4a014f7b05fa7767e8c76da6d2f" + integrity sha512-To/Z92oHpIE+4nk11uVMWqo2GGRS86coeMmjxtpnErmWRdLcp1WVCVRAvn+ZwpLiNR+reWFr2FFqJRsREuZdAg== + dependencies: + "@chakra-ui/shared-utils" "2.0.5" + csstype "^3.1.2" + lodash.mergewith "4.6.2" + +"@chakra-ui/switch@2.1.2": + version "2.1.2" + resolved "https://registry.yarnpkg.com/@chakra-ui/switch/-/switch-2.1.2.tgz#f7c6878d8126bfac8fa3b939079f1017c21b7479" + integrity sha512-pgmi/CC+E1v31FcnQhsSGjJnOE2OcND4cKPyTE+0F+bmGm48Q/b5UmKD9Y+CmZsrt/7V3h8KNczowupfuBfIHA== + dependencies: + "@chakra-ui/checkbox" "2.3.2" + "@chakra-ui/shared-utils" "2.0.5" + +"@chakra-ui/system@2.6.2": + version "2.6.2" + resolved "https://registry.yarnpkg.com/@chakra-ui/system/-/system-2.6.2.tgz#528ec955bd6a7f74da46470ee8225b1e2c80a78b" + integrity sha512-EGtpoEjLrUu4W1fHD+a62XR+hzC5YfsWm+6lO0Kybcga3yYEij9beegO0jZgug27V+Rf7vns95VPVP6mFd/DEQ== + dependencies: + "@chakra-ui/color-mode" "2.2.0" + "@chakra-ui/object-utils" "2.1.0" + "@chakra-ui/react-utils" "2.0.12" + "@chakra-ui/styled-system" "2.9.2" + "@chakra-ui/theme-utils" "2.0.21" + "@chakra-ui/utils" "2.0.15" + react-fast-compare "3.2.2" + +"@chakra-ui/table@2.1.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/table/-/table-2.1.0.tgz#20dce14c5e4d70dc7c6c0e87cce9b05907ff8c50" + integrity sha512-o5OrjoHCh5uCLdiUb0Oc0vq9rIAeHSIRScc2ExTC9Qg/uVZl2ygLrjToCaKfaaKl1oQexIeAcZDKvPG8tVkHyQ== + dependencies: + "@chakra-ui/react-context" "2.1.0" + "@chakra-ui/shared-utils" "2.0.5" + +"@chakra-ui/tabs@3.0.0": + version "3.0.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/tabs/-/tabs-3.0.0.tgz#854c06880af26158d7c72881c4b5e0453f6c485d" + integrity sha512-6Mlclp8L9lqXmsGWF5q5gmemZXOiOYuh0SGT/7PgJVNPz3LXREXlXg2an4MBUD8W5oTkduCX+3KTMCwRrVrDYw== + dependencies: + "@chakra-ui/clickable" "2.1.0" + "@chakra-ui/descendant" "3.1.0" + "@chakra-ui/lazy-utils" "2.0.5" + "@chakra-ui/react-children-utils" "2.0.6" + "@chakra-ui/react-context" "2.1.0" + "@chakra-ui/react-use-controllable-state" "2.1.0" + "@chakra-ui/react-use-merge-refs" "2.1.0" + "@chakra-ui/react-use-safe-layout-effect" "2.1.0" + "@chakra-ui/shared-utils" "2.0.5" + +"@chakra-ui/tag@3.1.1": + version "3.1.1" + resolved "https://registry.yarnpkg.com/@chakra-ui/tag/-/tag-3.1.1.tgz#d05284b6549a84d3a08e57eec57df3ad0eebd882" + integrity sha512-Bdel79Dv86Hnge2PKOU+t8H28nm/7Y3cKd4Kfk9k3lOpUh4+nkSGe58dhRzht59lEqa4N9waCgQiBdkydjvBXQ== + dependencies: + "@chakra-ui/icon" "3.2.0" + "@chakra-ui/react-context" "2.1.0" + +"@chakra-ui/textarea@2.1.2": + version "2.1.2" + resolved "https://registry.yarnpkg.com/@chakra-ui/textarea/-/textarea-2.1.2.tgz#30f8af0e233cec2dee79d527450c6586e7122eff" + integrity sha512-ip7tvklVCZUb2fOHDb23qPy/Fr2mzDOGdkrpbNi50hDCiV4hFX02jdQJdi3ydHZUyVgZVBKPOJ+lT9i7sKA2wA== + dependencies: + "@chakra-ui/form-control" "2.2.0" + "@chakra-ui/shared-utils" "2.0.5" + +"@chakra-ui/theme-tools@2.1.2": + version "2.1.2" + resolved "https://registry.yarnpkg.com/@chakra-ui/theme-tools/-/theme-tools-2.1.2.tgz#913be05879cd816c546993ccb9ff7615f85ff69f" + integrity sha512-Qdj8ajF9kxY4gLrq7gA+Azp8CtFHGO9tWMN2wfF9aQNgG9AuMhPrUzMq9AMQ0MXiYcgNq/FD3eegB43nHVmXVA== + dependencies: + "@chakra-ui/anatomy" "2.2.2" + "@chakra-ui/shared-utils" "2.0.5" + color2k "^2.0.2" + +"@chakra-ui/theme-utils@2.0.21": + version "2.0.21" + resolved "https://registry.yarnpkg.com/@chakra-ui/theme-utils/-/theme-utils-2.0.21.tgz#da7ed541a5241a8ed0384eb14f37fa9b998382cf" + integrity sha512-FjH5LJbT794r0+VSCXB3lT4aubI24bLLRWB+CuRKHijRvsOg717bRdUN/N1fEmEpFnRVrbewttWh/OQs0EWpWw== + dependencies: + "@chakra-ui/shared-utils" "2.0.5" + "@chakra-ui/styled-system" "2.9.2" + "@chakra-ui/theme" "3.3.1" + lodash.mergewith "4.6.2" + +"@chakra-ui/theme@3.3.1": + version "3.3.1" + resolved "https://registry.yarnpkg.com/@chakra-ui/theme/-/theme-3.3.1.tgz#75c6cd0b5c70c0aa955068274ee4780f299bd8a4" + integrity sha512-Hft/VaT8GYnItGCBbgWd75ICrIrIFrR7lVOhV/dQnqtfGqsVDlrztbSErvMkoPKt0UgAkd9/o44jmZ6X4U2nZQ== + dependencies: + "@chakra-ui/anatomy" "2.2.2" + "@chakra-ui/shared-utils" "2.0.5" + "@chakra-ui/theme-tools" "2.1.2" + +"@chakra-ui/toast@7.0.2": + version "7.0.2" + resolved "https://registry.yarnpkg.com/@chakra-ui/toast/-/toast-7.0.2.tgz#d1c396bbfced12e22b010899731fd8cc294d53ec" + integrity sha512-yvRP8jFKRs/YnkuE41BVTq9nB2v/KDRmje9u6dgDmE5+1bFt3bwjdf9gVbif4u5Ve7F7BGk5E093ARRVtvLvXA== + dependencies: + "@chakra-ui/alert" "2.2.2" + "@chakra-ui/close-button" "2.1.1" + "@chakra-ui/portal" "2.1.0" + "@chakra-ui/react-context" "2.1.0" + "@chakra-ui/react-use-timeout" "2.1.0" + "@chakra-ui/react-use-update-effect" "2.1.0" + "@chakra-ui/shared-utils" "2.0.5" + "@chakra-ui/styled-system" "2.9.2" + "@chakra-ui/theme" "3.3.1" + +"@chakra-ui/tooltip@2.3.1": + version "2.3.1" + resolved "https://registry.yarnpkg.com/@chakra-ui/tooltip/-/tooltip-2.3.1.tgz#29fb8508a37bb6b20ab8dbb32bca6cd59b098796" + integrity sha512-Rh39GBn/bL4kZpuEMPPRwYNnccRCL+w9OqamWHIB3Qboxs6h8cOyXfIdGxjo72lvhu1QI/a4KFqkM3St+WfC0A== + dependencies: + "@chakra-ui/dom-utils" "2.1.0" + "@chakra-ui/popper" "3.1.0" + "@chakra-ui/portal" "2.1.0" + "@chakra-ui/react-types" "2.0.7" + "@chakra-ui/react-use-disclosure" "2.1.0" + "@chakra-ui/react-use-event-listener" "2.1.0" + "@chakra-ui/react-use-merge-refs" "2.1.0" + "@chakra-ui/shared-utils" "2.0.5" + +"@chakra-ui/transition@2.1.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/transition/-/transition-2.1.0.tgz#c8e95564f7ab356e78119780037bae5ad150c7b3" + integrity sha512-orkT6T/Dt+/+kVwJNy7zwJ+U2xAZ3EU7M3XCs45RBvUnZDr/u9vdmaM/3D/rOpmQJWgQBwKPJleUXrYWUagEDQ== + dependencies: + "@chakra-ui/shared-utils" "2.0.5" + +"@chakra-ui/utils@2.0.15": + version "2.0.15" + resolved "https://registry.yarnpkg.com/@chakra-ui/utils/-/utils-2.0.15.tgz#bd800b1cff30eb5a5e8c36fa039f49984b4c5e4a" + integrity sha512-El4+jL0WSaYYs+rJbuYFDbjmfCcfGDmRY95GO4xwzit6YAPZBLcR65rOEwLps+XWluZTy1xdMrusg/hW0c1aAA== + dependencies: + "@types/lodash.mergewith" "4.6.7" + css-box-model "1.2.1" + framesync "6.1.2" + lodash.mergewith "4.6.2" + +"@chakra-ui/visually-hidden@2.2.0": + version "2.2.0" + resolved "https://registry.yarnpkg.com/@chakra-ui/visually-hidden/-/visually-hidden-2.2.0.tgz#9b0ecef8f01263ab808ba3bda7b36a0d91b4d5c1" + integrity sha512-KmKDg01SrQ7VbTD3+cPWf/UfpF5MSwm3v7MWi0n5t8HnnadT13MF0MJCDSXbBWnzLv1ZKJ6zlyAOeARWX+DpjQ== + +"@emotion/babel-plugin@^11.11.0": + version "11.11.0" + resolved "https://registry.yarnpkg.com/@emotion/babel-plugin/-/babel-plugin-11.11.0.tgz#c2d872b6a7767a9d176d007f5b31f7d504bb5d6c" + integrity sha512-m4HEDZleaaCH+XgDDsPF15Ht6wTLsgDTeR3WYj9Q/k76JtWhrJjcP4+/XlG8LGT/Rol9qUfOIztXeA84ATpqPQ== dependencies: "@babel/helper-module-imports" "^7.16.7" - "@babel/plugin-syntax-jsx" "^7.17.12" "@babel/runtime" "^7.18.3" - "@emotion/hash" "^0.9.0" - "@emotion/memoize" "^0.8.0" - "@emotion/serialize" "^1.1.0" + "@emotion/hash" "^0.9.1" + "@emotion/memoize" "^0.8.1" + "@emotion/serialize" "^1.1.2" babel-plugin-macros "^3.1.0" convert-source-map "^1.5.0" escape-string-regexp "^4.0.0" find-root "^1.1.0" source-map "^0.5.7" - stylis "4.0.13" + stylis "4.2.0" -"@emotion/cache@^11.10.0", "@emotion/cache@^11.10.3": +"@emotion/cache@^11.10.3": version "11.10.3" resolved "https://registry.yarnpkg.com/@emotion/cache/-/cache-11.10.3.tgz#c4f67904fad10c945fea5165c3a5a0583c164b87" integrity sha512-Psmp/7ovAa8appWh3g51goxu/z3iVms7JXOreq136D8Bbn6dYraPnmL6mdM8GThEx9vwSn92Fz+mGSjBzN8UPQ== @@ -100,10 +900,21 @@ "@emotion/weak-memoize" "^0.3.0" stylis "4.0.13" -"@emotion/hash@^0.9.0": - version "0.9.0" - resolved "https://registry.yarnpkg.com/@emotion/hash/-/hash-0.9.0.tgz#c5153d50401ee3c027a57a177bc269b16d889cb7" - integrity sha512-14FtKiHhy2QoPIzdTcvh//8OyBlknNs2nXRwIhG904opCby3l+9Xaf/wuPvICBF0rc1ZCNBd3nKe9cd2mecVkQ== +"@emotion/cache@^11.11.0": + version "11.11.0" + resolved "https://registry.yarnpkg.com/@emotion/cache/-/cache-11.11.0.tgz#809b33ee6b1cb1a625fef7a45bc568ccd9b8f3ff" + integrity sha512-P34z9ssTCBi3e9EI1ZsWpNHcfY1r09ZO0rZbRO2ob3ZQMnFI35jB536qoXbkdesr5EUhYi22anuEJuyxifaqAQ== + dependencies: + "@emotion/memoize" "^0.8.1" + "@emotion/sheet" "^1.2.2" + "@emotion/utils" "^1.2.1" + "@emotion/weak-memoize" "^0.3.1" + stylis "4.2.0" + +"@emotion/hash@^0.9.1": + version "0.9.1" + resolved "https://registry.yarnpkg.com/@emotion/hash/-/hash-0.9.1.tgz#4ffb0055f7ef676ebc3a5a91fb621393294e2f43" + integrity sha512-gJB6HLm5rYwSLI6PQa+X1t5CFGrv1J1TWG+sOyMCeKz2ojaj6Fnl/rZEspogG+cvqbt4AE/2eIyD2QfLKTBNlQ== "@emotion/is-prop-valid@^1.2.0": version "1.2.0" @@ -112,34 +923,46 @@ dependencies: "@emotion/memoize" "^0.8.0" +"@emotion/is-prop-valid@^1.2.2": + version "1.2.2" + resolved "https://registry.yarnpkg.com/@emotion/is-prop-valid/-/is-prop-valid-1.2.2.tgz#d4175076679c6a26faa92b03bb786f9e52612337" + integrity sha512-uNsoYd37AFmaCdXlg6EYD1KaPOaRWRByMCYzbKUX4+hhMfrxdVSelShywL4JVaAeM/eHUOSprYBQls+/neX3pw== + dependencies: + "@emotion/memoize" "^0.8.1" + "@emotion/memoize@^0.8.0": version "0.8.0" resolved "https://registry.yarnpkg.com/@emotion/memoize/-/memoize-0.8.0.tgz#f580f9beb67176fa57aae70b08ed510e1b18980f" integrity sha512-G/YwXTkv7Den9mXDO7AhLWkE3q+I92B+VqAE+dYG4NGPaHZGvt3G8Q0p9vmE+sq7rTGphUbAvmQ9YpbfMQGGlA== -"@emotion/react@^11.10.4": - version "11.10.4" - resolved "https://registry.yarnpkg.com/@emotion/react/-/react-11.10.4.tgz#9dc6bccbda5d70ff68fdb204746c0e8b13a79199" - integrity sha512-j0AkMpr6BL8gldJZ6XQsQ8DnS9TxEQu1R+OGmDZiWjBAJtCcbt0tS3I/YffoqHXxH6MjgI7KdMbYKw3MEiU9eA== +"@emotion/memoize@^0.8.1": + version "0.8.1" + resolved "https://registry.yarnpkg.com/@emotion/memoize/-/memoize-0.8.1.tgz#c1ddb040429c6d21d38cc945fe75c818cfb68e17" + integrity sha512-W2P2c/VRW1/1tLox0mVUalvnWXxavmv/Oum2aPsRcoDJuob75FC3Y8FbpfLwUegRcxINtGUMPq0tFCvYNTBXNA== + +"@emotion/react@^11.11.4": + version "11.11.4" + resolved "https://registry.yarnpkg.com/@emotion/react/-/react-11.11.4.tgz#3a829cac25c1f00e126408fab7f891f00ecc3c1d" + integrity sha512-t8AjMlF0gHpvvxk5mAtCqR4vmxiGHCeJBaQO6gncUSdklELOgtwjerNY2yuJNfwnc6vi16U/+uMF+afIawJ9iw== dependencies: "@babel/runtime" "^7.18.3" - "@emotion/babel-plugin" "^11.10.0" - "@emotion/cache" "^11.10.0" - "@emotion/serialize" "^1.1.0" - "@emotion/use-insertion-effect-with-fallbacks" "^1.0.0" - "@emotion/utils" "^1.2.0" - "@emotion/weak-memoize" "^0.3.0" + "@emotion/babel-plugin" "^11.11.0" + "@emotion/cache" "^11.11.0" + "@emotion/serialize" "^1.1.3" + "@emotion/use-insertion-effect-with-fallbacks" "^1.0.1" + "@emotion/utils" "^1.2.1" + "@emotion/weak-memoize" "^0.3.1" hoist-non-react-statics "^3.3.1" -"@emotion/serialize@^1.1.0": - version "1.1.0" - resolved "https://registry.yarnpkg.com/@emotion/serialize/-/serialize-1.1.0.tgz#b1f97b1011b09346a40e9796c37a3397b4ea8ea8" - integrity sha512-F1ZZZW51T/fx+wKbVlwsfchr5q97iW8brAnXmsskz4d0hVB4O3M/SiA3SaeH06x02lSNzkkQv+n3AX3kCXKSFA== +"@emotion/serialize@^1.1.2", "@emotion/serialize@^1.1.3", "@emotion/serialize@^1.1.4": + version "1.1.4" + resolved "https://registry.yarnpkg.com/@emotion/serialize/-/serialize-1.1.4.tgz#fc8f6d80c492cfa08801d544a05331d1cc7cd451" + integrity sha512-RIN04MBT8g+FnDwgvIUi8czvr1LU1alUMI05LekWB5DGyTm8cCBMCRpq3GqaiyEDRptEXOyXnvZ58GZYu4kBxQ== dependencies: - "@emotion/hash" "^0.9.0" - "@emotion/memoize" "^0.8.0" - "@emotion/unitless" "^0.8.0" - "@emotion/utils" "^1.2.0" + "@emotion/hash" "^0.9.1" + "@emotion/memoize" "^0.8.1" + "@emotion/unitless" "^0.8.1" + "@emotion/utils" "^1.2.1" csstype "^3.0.2" "@emotion/sheet@^1.2.0": @@ -147,38 +970,53 @@ resolved "https://registry.yarnpkg.com/@emotion/sheet/-/sheet-1.2.0.tgz#771b1987855839e214fc1741bde43089397f7be5" integrity sha512-OiTkRgpxescko+M51tZsMq7Puu/KP55wMT8BgpcXVG2hqXc0Vo0mfymJ/Uj24Hp0i083ji/o0aLddh08UEjq8w== -"@emotion/styled@^11.10.4": - version "11.10.4" - resolved "https://registry.yarnpkg.com/@emotion/styled/-/styled-11.10.4.tgz#e93f84a4d54003c2acbde178c3f97b421fce1cd4" - integrity sha512-pRl4R8Ez3UXvOPfc2bzIoV8u9P97UedgHS4FPX594ntwEuAMA114wlaHvOK24HB48uqfXiGlYIZYCxVJ1R1ttQ== +"@emotion/sheet@^1.2.2": + version "1.2.2" + resolved "https://registry.yarnpkg.com/@emotion/sheet/-/sheet-1.2.2.tgz#d58e788ee27267a14342303e1abb3d508b6d0fec" + integrity sha512-0QBtGvaqtWi+nx6doRwDdBIzhNdZrXUppvTM4dtZZWEGTXL/XE/yJxLMGlDT1Gt+UHH5IX1n+jkXyytE/av7OA== + +"@emotion/styled@^11.11.5": + version "11.11.5" + resolved "https://registry.yarnpkg.com/@emotion/styled/-/styled-11.11.5.tgz#0c5c8febef9d86e8a926e663b2e5488705545dfb" + integrity sha512-/ZjjnaNKvuMPxcIiUkf/9SHoG4Q196DRl1w82hQ3WCsjo1IUR8uaGWrC6a87CrYAW0Kb/pK7hk8BnLgLRi9KoQ== dependencies: "@babel/runtime" "^7.18.3" - "@emotion/babel-plugin" "^11.10.0" - "@emotion/is-prop-valid" "^1.2.0" - "@emotion/serialize" "^1.1.0" - "@emotion/use-insertion-effect-with-fallbacks" "^1.0.0" - "@emotion/utils" "^1.2.0" - -"@emotion/unitless@^0.8.0": - version "0.8.0" - resolved "https://registry.yarnpkg.com/@emotion/unitless/-/unitless-0.8.0.tgz#a4a36e9cbdc6903737cd20d38033241e1b8833db" - integrity sha512-VINS5vEYAscRl2ZUDiT3uMPlrFQupiKgHz5AA4bCH1miKBg4qtwkim1qPmJj/4WG6TreYMY111rEFsjupcOKHw== - -"@emotion/use-insertion-effect-with-fallbacks@^1.0.0": - version "1.0.0" - resolved "https://registry.yarnpkg.com/@emotion/use-insertion-effect-with-fallbacks/-/use-insertion-effect-with-fallbacks-1.0.0.tgz#ffadaec35dbb7885bd54de3fa267ab2f860294df" - integrity sha512-1eEgUGmkaljiBnRMTdksDV1W4kUnmwgp7X9G8B++9GYwl1lUdqSndSriIrTJ0N7LQaoauY9JJ2yhiOYK5+NI4A== + "@emotion/babel-plugin" "^11.11.0" + "@emotion/is-prop-valid" "^1.2.2" + "@emotion/serialize" "^1.1.4" + "@emotion/use-insertion-effect-with-fallbacks" "^1.0.1" + "@emotion/utils" "^1.2.1" + +"@emotion/unitless@^0.8.1": + version "0.8.1" + resolved "https://registry.yarnpkg.com/@emotion/unitless/-/unitless-0.8.1.tgz#182b5a4704ef8ad91bde93f7a860a88fd92c79a3" + integrity sha512-KOEGMu6dmJZtpadb476IsZBclKvILjopjUii3V+7MnXIQCYh8W3NgNcgwo21n9LXZX6EDIKvqfjYxXebDwxKmQ== + +"@emotion/use-insertion-effect-with-fallbacks@^1.0.1": + version "1.0.1" + resolved "https://registry.yarnpkg.com/@emotion/use-insertion-effect-with-fallbacks/-/use-insertion-effect-with-fallbacks-1.0.1.tgz#08de79f54eb3406f9daaf77c76e35313da963963" + integrity sha512-jT/qyKZ9rzLErtrjGgdkMBn2OP8wl0G3sQlBb3YPryvKHsjvINUhVaPFfP+fpBcOkmrVOVEEHQFJ7nbj2TH2gw== "@emotion/utils@^1.2.0": version "1.2.0" resolved "https://registry.yarnpkg.com/@emotion/utils/-/utils-1.2.0.tgz#9716eaccbc6b5ded2ea5a90d65562609aab0f561" integrity sha512-sn3WH53Kzpw8oQ5mgMmIzzyAaH2ZqFEbozVVBSYp538E06OSE6ytOp7pRAjNQR+Q/orwqdQYJSe2m3hCOeznkw== +"@emotion/utils@^1.2.1": + version "1.2.1" + resolved "https://registry.yarnpkg.com/@emotion/utils/-/utils-1.2.1.tgz#bbab58465738d31ae4cb3dbb6fc00a5991f755e4" + integrity sha512-Y2tGf3I+XVnajdItskUCn6LX+VUDmP6lTL4fcqsXAv43dnlbZiuW4MWQW38rW/BVWSE7Q/7+XQocmpnRYILUmg== + "@emotion/weak-memoize@^0.3.0": version "0.3.0" resolved "https://registry.yarnpkg.com/@emotion/weak-memoize/-/weak-memoize-0.3.0.tgz#ea89004119dc42db2e1dba0f97d553f7372f6fcb" integrity sha512-AHPmaAx+RYfZz0eYu6Gviiagpmiyw98ySSlQvCUhVGDRtDFe4DBS0x1bSjdF3gqUDYOczB+yYvBTtEylYSdRhg== +"@emotion/weak-memoize@^0.3.1": + version "0.3.1" + resolved "https://registry.yarnpkg.com/@emotion/weak-memoize/-/weak-memoize-0.3.1.tgz#d0fce5d07b0620caa282b5131c297bb60f9d87e6" + integrity sha512-EsBwpc7hBUJWAsNPBmJy4hxWx12v6bshQsldrVmjxJoc3isbxhOrF2IcCpaXxfvq03NwkI7sbsOLXbYuqF/8Ww== + "@eslint/eslintrc@^1.3.3": version "1.3.3" resolved "https://registry.yarnpkg.com/@eslint/eslintrc/-/eslintrc-1.3.3.tgz#2b044ab39fdfa75b4688184f9e573ce3c5b0ff95" @@ -382,6 +1220,11 @@ resolved "https://registry.yarnpkg.com/@popperjs/core/-/core-2.11.6.tgz#cee20bd55e68a1720bdab363ecf0c821ded4cd45" integrity sha512-50/17A98tWUfQ176raKiOGXuYpLyyVMkxxG6oylzL3BPOlA6ADGdK7EYunSa4I064xerltq9TGXs8HmOk5E+vw== +"@popperjs/core@^2.9.3": + version "2.11.8" + resolved "https://registry.yarnpkg.com/@popperjs/core/-/core-2.11.8.tgz#6b79032e760a0899cd4204710beede972a3a185f" + integrity sha512-P1st0aksCrn9sGZhp8GMYwBnQsbvAWsZAX44oXNNvLHGqAOcoVxmjZiohstwQ7SqKnbR47akdNi+uleWD8+g6A== + "@rushstack/eslint-patch@^1.1.3": version "1.2.0" resolved "https://registry.yarnpkg.com/@rushstack/eslint-patch/-/eslint-patch-1.2.0.tgz#8be36a1f66f3265389e90b5f9c9962146758f728" @@ -399,6 +1242,18 @@ resolved "https://registry.yarnpkg.com/@types/json5/-/json5-0.0.29.tgz#ee28707ae94e11d2b827bcbe5270bcea7f3e71ee" integrity sha512-dRLjCWHYg4oaA77cxO64oO+7JwCwnIzkZPdrrC71jQmQtlhM556pwKo5bUzqvZndkVbeFLIIi+9TC40JNF5hNQ== +"@types/lodash.mergewith@4.6.7": + version "4.6.7" + resolved "https://registry.yarnpkg.com/@types/lodash.mergewith/-/lodash.mergewith-4.6.7.tgz#eaa65aa5872abdd282f271eae447b115b2757212" + integrity sha512-3m+lkO5CLRRYU0fhGRp7zbsGi6+BZj0uTVSwvcKU+nSlhjA9/QRNfuSGnD2mX6hQA7ZbmcCkzk5h4ZYGOtk14A== + dependencies: + "@types/lodash" "*" + +"@types/lodash@*": + version "4.17.5" + resolved "https://registry.yarnpkg.com/@types/lodash/-/lodash-4.17.5.tgz#e6c29b58e66995d57cd170ce3e2a61926d55ee04" + integrity sha512-MBIOHVZqVqgfro1euRDWX7OO0fBVUUMrN6Pwm8LQsz8cWhEpihlvR70ENj3f40j58TNxZaWv2ndSkInykNBBJw== + "@types/node@18.11.3": version "18.11.3" resolved "https://registry.yarnpkg.com/@types/node/-/node-18.11.3.tgz#78a6d7ec962b596fc2d2ec102c4dd3ef073fea6a" @@ -493,6 +1348,23 @@ "@typescript-eslint/types" "5.40.1" eslint-visitor-keys "^3.3.0" +"@zag-js/dom-query@0.16.0": + version "0.16.0" + resolved "https://registry.yarnpkg.com/@zag-js/dom-query/-/dom-query-0.16.0.tgz#bca46bcd78f78c900064478646d95f9781ed098e" + integrity sha512-Oqhd6+biWyKnhKwFFuZrrf6lxBz2tX2pRQe6grUnYwO6HJ8BcbqZomy2lpOdr+3itlaUqx+Ywj5E5ZZDr/LBfQ== + +"@zag-js/element-size@0.10.5": + version "0.10.5" + resolved "https://registry.yarnpkg.com/@zag-js/element-size/-/element-size-0.10.5.tgz#a24bad2eeb7e2c8709e32be5336e158e1a1a174f" + integrity sha512-uQre5IidULANvVkNOBQ1tfgwTQcGl4hliPSe69Fct1VfYb2Fd0jdAcGzqQgPhfrXFpR62MxLPB7erxJ/ngtL8w== + +"@zag-js/focus-visible@0.16.0": + version "0.16.0" + resolved "https://registry.yarnpkg.com/@zag-js/focus-visible/-/focus-visible-0.16.0.tgz#c9e53e3dbab0f2649d04a489bb379f5800f4f069" + integrity sha512-a7U/HSopvQbrDU4GLerpqiMcHKEkQkNPeDZJWz38cw/6Upunh41GjHetq5TB84hxyCaDzJ6q2nEdNoBQfC0FKA== + dependencies: + "@zag-js/dom-query" "0.16.0" + acorn-jsx@^5.3.2: version "5.3.2" resolved "https://registry.yarnpkg.com/acorn-jsx/-/acorn-jsx-5.3.2.tgz#7ed5bb55908b3b2f1bc55c6af1653bada7f07937" @@ -537,6 +1409,13 @@ argparse@^2.0.1: resolved "https://registry.yarnpkg.com/argparse/-/argparse-2.0.1.tgz#246f50f3ca78a3240f6c997e8a9bd1eac49e4b38" integrity sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q== +aria-hidden@^1.2.3: + version "1.2.4" + resolved "https://registry.yarnpkg.com/aria-hidden/-/aria-hidden-1.2.4.tgz#b78e383fdbc04d05762c78b4a25a501e736c4522" + integrity sha512-y+CcFFwelSXpLZk/7fMB2mUbGtX9lKycf1MWJ7CaTIERyitVlyQx6C+sxcROU2BAJ24OiZyK+8wj2i8AlBoS3A== + dependencies: + tslib "^2.0.0" + aria-query@^4.2.2: version "4.2.2" resolved "https://registry.yarnpkg.com/aria-query/-/aria-query-4.2.2.tgz#0d2ca6c9aceb56b8977e9fed6aed7e15bbd2f83b" @@ -701,6 +1580,16 @@ color-name@~1.1.4: resolved "https://registry.yarnpkg.com/color-name/-/color-name-1.1.4.tgz#c2a09a87acbde69543de6f63fa3995c826c536a2" integrity sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA== +color2k@^2.0.2: + version "2.0.3" + resolved "https://registry.yarnpkg.com/color2k/-/color2k-2.0.3.tgz#a771244f6b6285541c82aa65ff0a0c624046e533" + integrity sha512-zW190nQTIoXcGCaU08DvVNFTmQhUpnJfVuAKfWqUQkflXKpaDdpaYoM0iluLS9lgJNHyBF58KKA2FBEwkD7wog== + +compute-scroll-into-view@3.0.3: + version "3.0.3" + resolved "https://registry.yarnpkg.com/compute-scroll-into-view/-/compute-scroll-into-view-3.0.3.tgz#c418900a5c56e2b04b885b54995df164535962b1" + integrity sha512-nadqwNxghAGTamwIqQSG433W6OADZx2vCo3UXHNrzTRHK/htu+7+L0zhjEoaeaQVNAi3YgqWDv8+tzf0hRfR+A== + concat-map@0.0.1: version "0.0.1" resolved "https://registry.yarnpkg.com/concat-map/-/concat-map-0.0.1.tgz#d8a96bd77fd68df7793a73036a3ba0d5405d477b" @@ -711,6 +1600,13 @@ convert-source-map@^1.5.0: resolved "https://registry.yarnpkg.com/convert-source-map/-/convert-source-map-1.9.0.tgz#7faae62353fb4213366d0ca98358d22e8368b05f" integrity sha512-ASFBup0Mz1uyiIjANan1jzLQami9z1PoYSZCiiYW2FczPbenXc45FZdBZLzOT+r6+iciuEModtmCti+hjaAk0A== +copy-to-clipboard@3.3.3: + version "3.3.3" + resolved "https://registry.yarnpkg.com/copy-to-clipboard/-/copy-to-clipboard-3.3.3.tgz#55ac43a1db8ae639a4bd99511c148cdd1b83a1b0" + integrity sha512-2KV8NhB5JqC3ky0r9PMCAZKbUHSwtEo4CwCs0KXgruG43gX5PMqDEBbVU4OUzw2MuAWUfsuFmWvEKG5QRfSnJA== + dependencies: + toggle-selection "^1.0.6" + core-js-pure@^3.25.1: version "3.25.5" resolved "https://registry.yarnpkg.com/core-js-pure/-/core-js-pure-3.25.5.tgz#79716ba54240c6aa9ceba6eee08cf79471ba184d" @@ -736,11 +1632,23 @@ cross-spawn@^7.0.2: shebang-command "^2.0.0" which "^2.0.1" +css-box-model@1.2.1: + version "1.2.1" + resolved "https://registry.yarnpkg.com/css-box-model/-/css-box-model-1.2.1.tgz#59951d3b81fd6b2074a62d49444415b0d2b4d7c1" + integrity sha512-a7Vr4Q/kd/aw96bnJG332W9V9LkJO69JRcaCYDUqjp6/z0w6VcZjgAcTbgFxEPfBgdnAwlh3iwu+hLopa+flJw== + dependencies: + tiny-invariant "^1.0.6" + csstype@^3.0.2, csstype@^3.1.1: version "3.1.1" resolved "https://registry.yarnpkg.com/csstype/-/csstype-3.1.1.tgz#841b532c45c758ee546a11d5bd7b7b473c8c30b9" integrity sha512-DJR/VvkAvSZW9bTouZue2sSxDwdTN92uHjqeKVm+0dAqdfNykRzQ95tay8aXMBAAPpUiq4Qcug2L7neoRh2Egw== +csstype@^3.1.2: + version "3.1.3" + resolved "https://registry.yarnpkg.com/csstype/-/csstype-3.1.3.tgz#d80ff294d114fb0e6ac500fbf85b60137d7eff81" + integrity sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw== + damerau-levenshtein@^1.0.8: version "1.0.8" resolved "https://registry.yarnpkg.com/damerau-levenshtein/-/damerau-levenshtein-1.0.8.tgz#b43d286ccbd36bc5b2f7ed41caf2d0aba1f8a6e7" @@ -780,6 +1688,11 @@ define-properties@^1.1.3, define-properties@^1.1.4: has-property-descriptors "^1.0.0" object-keys "^1.1.1" +detect-node-es@^1.1.0: + version "1.1.0" + resolved "https://registry.yarnpkg.com/detect-node-es/-/detect-node-es-1.1.0.tgz#163acdf643330caa0b4cd7c21e7ee7755d6fa493" + integrity sha512-ypdmJU/TbBby2Dxibuv7ZLW3Bs1QEmM7nHjEANfohJLvE0XVujisn1qPJcZxg+qDucsr+bP6fLD1rPS3AhJ7EQ== + dir-glob@^3.0.1: version "3.0.1" resolved "https://registry.yarnpkg.com/dir-glob/-/dir-glob-3.0.1.tgz#56dbf73d992a4a93ba1584f4534063fd2e41717f" @@ -1157,6 +2070,27 @@ flatted@^3.1.0: resolved "https://registry.yarnpkg.com/flatted/-/flatted-3.2.7.tgz#609f39207cb614b89d0765b477cb2d437fbf9787" integrity sha512-5nqDSxl8nn5BSNxyR3n4I6eDmbolI6WT+QqR547RwxQapgjQBmtktdP+HTBb/a/zLsbzERTONyUB5pefh5TtjQ== +focus-lock@^1.3.5: + version "1.3.5" + resolved "https://registry.yarnpkg.com/focus-lock/-/focus-lock-1.3.5.tgz#aa644576e5ec47d227b57eb14e1efb2abf33914c" + integrity sha512-QFaHbhv9WPUeLYBDe/PAuLKJ4Dd9OPvKs9xZBr3yLXnUrDNaVXKu2baDBXe3naPY30hgHYSsf2JW4jzas2mDEQ== + dependencies: + tslib "^2.0.3" + +framer-motion@^11.2.11: + version "11.2.11" + resolved "https://registry.yarnpkg.com/framer-motion/-/framer-motion-11.2.11.tgz#bbadbb15914d90e54cc5fa52c4b5ba7418f26c04" + integrity sha512-n+ozoEzgJu/2h9NoQMokF+CwNqIRVyuRC4RwMPwklfrrTjbVV32k9uBIgqYAwn7Jfpt5LuDVCtT57MWz1FbaLw== + dependencies: + tslib "^2.4.0" + +framesync@6.1.2: + version "6.1.2" + resolved "https://registry.yarnpkg.com/framesync/-/framesync-6.1.2.tgz#755eff2fb5b8f3b4d2b266dd18121b300aefea27" + integrity sha512-jBTqhX6KaQVDyus8muwZbBeGGP0XgujBRbQ7gM7BRdS3CadCZIHiawyzYLnafYcvZIh5j8WE7cxZKFn7dXhu9g== + dependencies: + tslib "2.4.0" + fs.realpath@^1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/fs.realpath/-/fs.realpath-1.0.0.tgz#1504ad2523158caa40db4a2787cb01411994ea4f" @@ -1191,6 +2125,11 @@ get-intrinsic@^1.0.2, get-intrinsic@^1.1.0, get-intrinsic@^1.1.1, get-intrinsic@ has "^1.0.3" has-symbols "^1.0.3" +get-nonce@^1.0.0: + version "1.0.1" + resolved "https://registry.yarnpkg.com/get-nonce/-/get-nonce-1.0.1.tgz#fdf3f0278073820d2ce9426c18f07481b1e0cdf3" + integrity sha512-FJhYRoDaiatfEkUK8HKlicmu/3SGFD51q3itKDGoSTysQJBnfOcxU5GxnhE1E6soB76MbT0MBtnKJuXyAx+96Q== + get-symbol-description@^1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/get-symbol-description/-/get-symbol-description-1.0.0.tgz#7fdb81c900101fbd564dd5f1a30af5aadc1e58d6" @@ -1354,6 +2293,13 @@ internal-slot@^1.0.3: has "^1.0.3" side-channel "^1.0.4" +invariant@^2.2.4: + version "2.2.4" + resolved "https://registry.yarnpkg.com/invariant/-/invariant-2.2.4.tgz#610f3c92c9359ce1db616e538008d23ff35158e6" + integrity sha512-phJfQVBuaJM5raOpJjSfkiD6BpbCE4Ns//LaXl6wGYtUBY83nWS6Rf9tXm2e8VaK60JEjYldbPif/A2B1C2gNA== + dependencies: + loose-envify "^1.0.0" + is-arrayish@^0.2.1: version "0.2.1" resolved "https://registry.yarnpkg.com/is-arrayish/-/is-arrayish-0.2.1.tgz#77c99840527aa8ecb1a8ba697b80645a7a926a9d" @@ -1552,7 +2498,12 @@ lodash.merge@^4.6.2: resolved "https://registry.yarnpkg.com/lodash.merge/-/lodash.merge-4.6.2.tgz#558aa53b43b661e1925a0afdfa36a9a1085fe57a" integrity sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ== -loose-envify@^1.1.0, loose-envify@^1.4.0: +lodash.mergewith@4.6.2: + version "4.6.2" + resolved "https://registry.yarnpkg.com/lodash.mergewith/-/lodash.mergewith-4.6.2.tgz#617121f89ac55f59047c7aec1ccd6654c6590f55" + integrity sha512-GK3g5RPZWTRSeLSpgP8Xhra+pnjBC56q9FZYe1d5RN3TJ35dbkGy3YqBSMbyCrlbi+CM9Z3Jk5yTL7RCsqboyQ== + +loose-envify@^1.0.0, loose-envify@^1.1.0, loose-envify@^1.4.0: version "1.4.0" resolved "https://registry.yarnpkg.com/loose-envify/-/loose-envify-1.4.0.tgz#71ee51fa7be4caec1a63839f7e682d8132d30caf" integrity sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q== @@ -1826,6 +2777,13 @@ queue-microtask@^1.2.2: resolved "https://registry.yarnpkg.com/queue-microtask/-/queue-microtask-1.2.3.tgz#4929228bbc724dfac43e0efb058caf7b6cfb6243" integrity sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A== +react-clientside-effect@^1.2.6: + version "1.2.6" + resolved "https://registry.yarnpkg.com/react-clientside-effect/-/react-clientside-effect-1.2.6.tgz#29f9b14e944a376b03fb650eed2a754dd128ea3a" + integrity sha512-XGGGRQAKY+q25Lz9a/4EPqom7WRjz3z9R2k4jhVKA/puQFH/5Nt27vFZYql4m4NVNdUvX8PS3O7r/Zzm7cjUlg== + dependencies: + "@babel/runtime" "^7.12.13" + react-dom@18.2.0: version "18.2.0" resolved "https://registry.yarnpkg.com/react-dom/-/react-dom-18.2.0.tgz#22aaf38708db2674ed9ada224ca4aa708d821e3d" @@ -1834,6 +2792,23 @@ react-dom@18.2.0: loose-envify "^1.1.0" scheduler "^0.23.0" +react-fast-compare@3.2.2: + version "3.2.2" + resolved "https://registry.yarnpkg.com/react-fast-compare/-/react-fast-compare-3.2.2.tgz#929a97a532304ce9fee4bcae44234f1ce2c21d49" + integrity sha512-nsO+KSNgo1SbJqJEYRE9ERzo7YtYbou/OqjSQKxV7jcKox7+usiUVZOAC+XnDOABXggQTno0Y1CpVnuWEc1boQ== + +react-focus-lock@^2.9.4: + version "2.12.1" + resolved "https://registry.yarnpkg.com/react-focus-lock/-/react-focus-lock-2.12.1.tgz#0eaefd5fc34de8998967043d902e426352393349" + integrity sha512-lfp8Dve4yJagkHiFrC1bGtib3mF2ktqwPJw4/WGcgPW+pJ/AVQA5X2vI7xgp13FcxFEpYBBHpXai/N2DBNC0Jw== + dependencies: + "@babel/runtime" "^7.0.0" + focus-lock "^1.3.5" + prop-types "^15.6.2" + react-clientside-effect "^1.2.6" + use-callback-ref "^1.3.2" + use-sidecar "^1.1.2" + react-is@^16.13.1, react-is@^16.7.0: version "16.13.1" resolved "https://registry.yarnpkg.com/react-is/-/react-is-16.13.1.tgz#789729a4dc36de2999dc156dd6c1d9c18cea56a4" @@ -1844,6 +2819,34 @@ react-is@^18.2.0: resolved "https://registry.yarnpkg.com/react-is/-/react-is-18.2.0.tgz#199431eeaaa2e09f86427efbb4f1473edb47609b" integrity sha512-xWGDIW6x921xtzPkhiULtthJHoJvBbF3q26fzloPCK0hsvxtPVelvftw3zjbHWSkR2km9Z+4uxbDDK/6Zw9B8w== +react-remove-scroll-bar@^2.3.6: + version "2.3.6" + resolved "https://registry.yarnpkg.com/react-remove-scroll-bar/-/react-remove-scroll-bar-2.3.6.tgz#3e585e9d163be84a010180b18721e851ac81a29c" + integrity sha512-DtSYaao4mBmX+HDo5YWYdBWQwYIQQshUV/dVxFxK+KM26Wjwp1gZ6rv6OC3oujI6Bfu6Xyg3TwK533AQutsn/g== + dependencies: + react-style-singleton "^2.2.1" + tslib "^2.0.0" + +react-remove-scroll@^2.5.6: + version "2.5.10" + resolved "https://registry.yarnpkg.com/react-remove-scroll/-/react-remove-scroll-2.5.10.tgz#5fae456a23962af6d3c38ca1978bcfe0806c4061" + integrity sha512-m3zvBRANPBw3qxVVjEIPEQinkcwlFZ4qyomuWVpNJdv4c6MvHfXV0C3L9Jx5rr3HeBHKNRX+1jreB5QloDIJjA== + dependencies: + react-remove-scroll-bar "^2.3.6" + react-style-singleton "^2.2.1" + tslib "^2.1.0" + use-callback-ref "^1.3.0" + use-sidecar "^1.1.2" + +react-style-singleton@^2.2.1: + version "2.2.1" + resolved "https://registry.yarnpkg.com/react-style-singleton/-/react-style-singleton-2.2.1.tgz#f99e420492b2d8f34d38308ff660b60d0b1205b4" + integrity sha512-ZWj0fHEMyWkHzKYUr2Bs/4zU6XLmq9HsgBURm7g5pAVfyn49DgUiNgY2d4lXRlYSiCif9YBGpQleewkcqddc7g== + dependencies: + get-nonce "^1.0.0" + invariant "^2.2.4" + tslib "^2.0.0" + react-transition-group@^4.4.5: version "4.4.5" resolved "https://registry.yarnpkg.com/react-transition-group/-/react-transition-group-4.4.5.tgz#e53d4e3f3344da8521489fbef8f2581d42becdd1" @@ -1866,6 +2869,11 @@ regenerator-runtime@^0.13.4: resolved "https://registry.yarnpkg.com/regenerator-runtime/-/regenerator-runtime-0.13.10.tgz#ed07b19616bcbec5da6274ebc75ae95634bfc2ee" integrity sha512-KepLsg4dU12hryUO7bp/axHAKvwGOCV0sGloQtpagJ12ai+ojVDqkeGSiRX1zlq+kjIMZ1t7gpze+26QqtdGqw== +regenerator-runtime@^0.14.0: + version "0.14.1" + resolved "https://registry.yarnpkg.com/regenerator-runtime/-/regenerator-runtime-0.14.1.tgz#356ade10263f685dda125100cd862c1db895327f" + integrity sha512-dYnhHh0nJoMfnkZs6GmmhFknAGRrLznOu5nc9ML+EJxGvrx6H7teuevqVqCuPcPK//3eDrrjQhehXVx9cnkGdw== + regexp.prototype.flags@^1.4.1, regexp.prototype.flags@^1.4.3: version "1.4.3" resolved "https://registry.yarnpkg.com/regexp.prototype.flags/-/regexp.prototype.flags-1.4.3.tgz#87cab30f80f66660181a3bb7bf5981a872b367ac" @@ -2057,6 +3065,11 @@ stylis@4.0.13: resolved "https://registry.yarnpkg.com/stylis/-/stylis-4.0.13.tgz#f5db332e376d13cc84ecfe5dace9a2a51d954c91" integrity sha512-xGPXiFVl4YED9Jh7Euv2V220mriG9u4B2TA6Ybjc1catrstKD2PpIdU3U0RKpkVBC2EhmL/F0sPCr9vrFTNRag== +stylis@4.2.0: + version "4.2.0" + resolved "https://registry.yarnpkg.com/stylis/-/stylis-4.2.0.tgz#79daee0208964c8fe695a42fcffcac633a211a51" + integrity sha512-Orov6g6BB1sDfYgzWfTHDOxamtX1bE/zo104Dh9e6fqJ3PooipYyfJ0pUmrZO2wAvO8YbEyeFrkV91XTsGMSrw== + supports-color@^5.3.0: version "5.5.0" resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-5.5.0.tgz#e2e69a44ac8772f78a1ec0b35b689df6530efc8f" @@ -2081,6 +3094,11 @@ text-table@^0.2.0: resolved "https://registry.yarnpkg.com/text-table/-/text-table-0.2.0.tgz#7f5ee823ae805207c00af2df4a84ec3fcfa570b4" integrity sha512-N+8UisAXDGk8PFXP4HAzVR9nbfmVJ3zYLAWiTIoqC5v5isinhr+r5uaO8+7r3BMfuNIufIsA7RdpVgacC2cSpw== +tiny-invariant@^1.0.6: + version "1.3.3" + resolved "https://registry.yarnpkg.com/tiny-invariant/-/tiny-invariant-1.3.3.tgz#46680b7a873a0d5d10005995eb90a70d74d60127" + integrity sha512-+FbBPE1o9QAYvviau/qC5SE3caw21q3xkvWKBtja5vgqOWIHHJ3ioaq1VPfn/Szqctz2bU/oYeKd9/z5BL+PVg== + to-fast-properties@^2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/to-fast-properties/-/to-fast-properties-2.0.0.tgz#dc5e698cbd079265bc73e0377681a4e4e83f616e" @@ -2093,6 +3111,11 @@ to-regex-range@^5.0.1: dependencies: is-number "^7.0.0" +toggle-selection@^1.0.6: + version "1.0.6" + resolved "https://registry.yarnpkg.com/toggle-selection/-/toggle-selection-1.0.6.tgz#6e45b1263f2017fa0acc7d89d78b15b8bf77da32" + integrity sha512-BiZS+C1OS8g/q2RRbJmy59xpyghNBqrr6k5L/uKBGRsTfxmu3ffiRnd8mlGPUVayg8pvfi5urfnu8TU7DVOkLQ== + tsconfig-paths@^3.14.1: version "3.14.1" resolved "https://registry.yarnpkg.com/tsconfig-paths/-/tsconfig-paths-3.14.1.tgz#ba0734599e8ea36c862798e920bcf163277b137a" @@ -2103,15 +3126,20 @@ tsconfig-paths@^3.14.1: minimist "^1.2.6" strip-bom "^3.0.0" +tslib@2.4.0, tslib@^2.4.0: + version "2.4.0" + resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.4.0.tgz#7cecaa7f073ce680a05847aa77be941098f36dc3" + integrity sha512-d6xOpEDfsi2CZVlPQzGeux8XMwLT9hssAsaPYExaQMuYskwb+x1x7J371tWlbBdWHroy99KnVB6qIkUbs5X3UQ== + tslib@^1.8.1: version "1.14.1" resolved "https://registry.yarnpkg.com/tslib/-/tslib-1.14.1.tgz#cf2d38bdc34a134bcaf1091c41f6619e2f672d00" integrity sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg== -tslib@^2.4.0: - version "2.4.0" - resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.4.0.tgz#7cecaa7f073ce680a05847aa77be941098f36dc3" - integrity sha512-d6xOpEDfsi2CZVlPQzGeux8XMwLT9hssAsaPYExaQMuYskwb+x1x7J371tWlbBdWHroy99KnVB6qIkUbs5X3UQ== +tslib@^2.0.0, tslib@^2.0.3, tslib@^2.1.0: + version "2.6.3" + resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.6.3.tgz#0438f810ad7a9edcde7a241c3d80db693c8cbfe0" + integrity sha512-xNvxJEOUiWPGhUuUdQgAJPKOOJfGnIyKySOc09XkKsgdUV/3E2zvwZYdejjmRgPCgcym1juLH3226yA7sEFJKQ== tsutils@^3.21.0: version "3.21.0" @@ -2154,6 +3182,21 @@ uri-js@^4.2.2: dependencies: punycode "^2.1.0" +use-callback-ref@^1.3.0, use-callback-ref@^1.3.2: + version "1.3.2" + resolved "https://registry.yarnpkg.com/use-callback-ref/-/use-callback-ref-1.3.2.tgz#6134c7f6ff76e2be0b56c809b17a650c942b1693" + integrity sha512-elOQwe6Q8gqZgDA8mrh44qRTQqpIHDcZ3hXTLjBe1i4ph8XpNJnO+aQf3NaG+lriLopI4HMx9VjQLfPQ6vhnoA== + dependencies: + tslib "^2.0.0" + +use-sidecar@^1.1.2: + version "1.1.2" + resolved "https://registry.yarnpkg.com/use-sidecar/-/use-sidecar-1.1.2.tgz#2f43126ba2d7d7e117aa5855e5d8f0276dfe73c2" + integrity sha512-epTbsLuzZ7lPClpz2TyryBfztm7m+28DlEv2ZCQ3MDr5ssiwyOwGH/e5F9CkfWjJ1t4clvI58yF822/GUkjjhw== + dependencies: + detect-node-es "^1.1.0" + tslib "^2.0.0" + which-boxed-primitive@^1.0.2: version "1.0.2" resolved "https://registry.yarnpkg.com/which-boxed-primitive/-/which-boxed-primitive-1.0.2.tgz#13757bc89b209b049fe5d86430e21cf40a89a8e6" diff --git a/src/main/java/io/anserini/index/AbstractIndexer.java b/src/main/java/io/anserini/index/AbstractIndexer.java index 3e3d0c4932..b7502999a1 100644 --- a/src/main/java/io/anserini/index/AbstractIndexer.java +++ b/src/main/java/io/anserini/index/AbstractIndexer.java @@ -14,326 +14,327 @@ * limitations under the License. */ -package io.anserini.index; - -import io.anserini.collection.DocumentCollection; -import io.anserini.collection.FileSegment; -import io.anserini.collection.SourceDocument; -import io.anserini.index.generator.EmptyDocumentException; -import io.anserini.index.generator.InvalidDocumentException; -import io.anserini.index.generator.LuceneDocumentGenerator; -import io.anserini.index.generator.SkippedDocumentException; -import org.apache.commons.lang3.time.DurationFormatUtils; -import org.apache.logging.log4j.Level; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.apache.logging.log4j.core.config.Configurator; -import org.apache.lucene.document.Document; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.Term; -import org.kohsuke.args4j.Option; - -import java.io.File; -import java.io.IOException; -import java.lang.reflect.InvocationTargetException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.List; -import java.util.Set; -import java.util.concurrent.Executors; -import java.util.concurrent.ThreadPoolExecutor; -import java.util.concurrent.TimeUnit; - -public abstract class AbstractIndexer implements Runnable { - private static final Logger LOG = LogManager.getLogger(AbstractIndexer.class); - - public static class Args { - @Option(name = "-collection", metaVar = "[class]", required = true, usage = "Collection class in io.anserini.collection.") - public String collectionClass; - - @Option(name = "-input", metaVar = "[path]", required = true, usage = "Input collection.") - public String input; - - @Option(name = "-index", metaVar = "[path]", required = true, usage = "Index path.") - public String index; - - @Option(name = "-uniqueDocid", usage = "Removes duplicate documents with the same docid during indexing.") - public boolean uniqueDocid = false; - - @Option(name = "-optimize", usage = "Optimizes index by merging into a single index segment.") - public boolean optimize = false; - - @Option(name = "-memoryBuffer", metaVar = "[mb]", usage = "Memory buffer size in MB.") - public int memoryBuffer = 4096; - - @Option(name = "-threads", metaVar = "[num]", usage = "Number of indexing threads.") - public int threads = 4; - - @Option(name = "-verbose", forbids = {"-quiet"}, usage = "Enables verbose logging for each indexing thread.") - public boolean verbose = false; - - @Option(name = "-quiet", forbids = {"-verbose"}, usage = "Turns off all logging.") - public boolean quiet = false; - - @Option(name = "-options", usage = "Print information about options.") - public Boolean options = false; - - @Option(name = "-shard.count", metaVar = "[n]", - usage = "Number of shards to partition the document collection into.") - public int shardCount = -1; - - @Option(name = "-shard.current", metaVar = "[n]", - usage = "The current shard number to generate (indexed from 0).") - public int shardCurrent = -1; - } - - public class IndexerThread extends Thread { - private final Path inputFile; - private final LuceneDocumentGenerator generator; - private final Set whitelistDocids; - - public IndexerThread(Path inputFile, LuceneDocumentGenerator generator) { - this(inputFile, generator, null); - } - - public IndexerThread(Path inputFile, LuceneDocumentGenerator generator, Set docids) { - this.inputFile = inputFile; - this.generator = generator; - this.whitelistDocids = docids; - - setName(inputFile.getFileName().toString()); - } - - @Override - public void run() { - try(FileSegment segment = collection.createFileSegment(inputFile)) { - // We keep track of two separate counts: the total count of documents in this file segment (cnt), - // and the number of documents in this current "batch" (batch). We update the global counter every - // 10k documents: this is so that we get intermediate updates, which is informative if a collection - // has only one file segment; see https://github.com/castorini/anserini/issues/683 - int cnt = 0; - int batch = 0; - - for (SourceDocument d : segment) { - if (!d.indexable()) { - counters.unindexable.incrementAndGet(); - continue; - } - - try { - if (whitelistDocids != null && !whitelistDocids.contains(d.id())) { - counters.skipped.incrementAndGet(); - continue; - } - - Document doc = generator.createDocument(d); - if (args.uniqueDocid) { - // Note that we're reading the config directly, which is within scope. - writer.updateDocument(new Term("id", d.id()), doc); - } else { - writer.addDocument(doc); - } - - cnt++; - batch++; - } catch (EmptyDocumentException e1) { - counters.empty.incrementAndGet(); - continue; - } catch (SkippedDocumentException e2) { - counters.skipped.incrementAndGet(); - continue; - } catch (InvalidDocumentException e3) { - counters.errors.incrementAndGet(); - continue; - } - - // Add the counts from this batch, reset batch counter. - if (batch % 10000 == 0) { - counters.indexed.addAndGet(batch); - batch = 0; - } - } - - // Add the remaining documents. - counters.indexed.addAndGet(batch); - - int skipped = segment.getSkippedCount(); - if (skipped > 0) { - counters.skipped.addAndGet(skipped); - LOG.warn(inputFile.getParent().getFileName().toString() + File.separator + - inputFile.getFileName().toString() + ": " + skipped + " docs skipped."); - } - - if (segment.getErrorStatus()) { - counters.errors.incrementAndGet(); - LOG.error(inputFile.getParent().getFileName().toString() + File.separator + - inputFile.getFileName().toString() + ": error iterating through segment."); - } - - // Log at the debug level because this can be quite noisy if there are lots of file segments. - LOG.debug(inputFile.getParent().getFileName().toString() + File.separator + - inputFile.getFileName().toString() + ": " + cnt + " docs added."); - } catch (Exception e) { - e.printStackTrace(); - LOG.error(Thread.currentThread().getName() + ": Unexpected Exception:", e.getMessage()); - } - } - } - - protected final Args args; - protected Counters counters = new Counters(); - protected Path collectionPath; - protected DocumentCollection collection; - protected Class> generatorClass; - protected IndexWriter writer; - - @SuppressWarnings("unchecked") - public AbstractIndexer(Args args) { - this.args = args; - - if (args.verbose) { - // If verbose logging enabled, changed default log level to DEBUG so we get per-thread logging messages. - Configurator.setRootLevel(Level.DEBUG); - LOG.info("Setting log level to " + Level.DEBUG); - } else if (args.quiet) { - // If quiet mode enabled, only report warnings and above. - Configurator.setRootLevel(Level.WARN); - } else { - // Otherwise, we get the standard set of log messages. - Configurator.setRootLevel(Level.INFO); - LOG.info("Setting log level to " + Level.INFO); - } - - LOG.info("============ Loading Index Configuration ============"); - LOG.info("AbstractIndexer settings:"); - LOG.info(" + DocumentCollection path: " + args.input); - LOG.info(" + CollectionClass: " + args.collectionClass); - LOG.info(" + Index path: " + args.index); - LOG.info(" + Threads: " + args.threads); - LOG.info(" + Optimize (merge segments)? " + args.optimize); - - // Our documentation uses /path/to/foo as a convention: to make copy and paste of the commands work, - // we assume collections/ as the path location. - String pathStr = args.input; - if (pathStr.startsWith("/path/to")) { - pathStr = pathStr.replace("/path/to", "collections"); - } - this.collectionPath = Paths.get(pathStr); - if (!Files.exists(collectionPath) || !Files.isReadable(collectionPath) || !Files.isDirectory(collectionPath)) { - throw new IllegalArgumentException(String.format("Invalid collection path \"%s\".", collectionPath)); - } - - try { - Class> collectionClass = (Class>) - Class.forName("io.anserini.collection." + args.collectionClass); - this.collection = collectionClass.getConstructor(Path.class).newInstance(collectionPath); - } catch (Exception e) { - throw new IllegalArgumentException(String.format("Unable to load collection class \"%s\".", args.collectionClass)); - } - } - - @Override - public void run() { - LOG.info("============ Indexing Collection ============"); - final long start = System.nanoTime(); - - final List segmentPaths = args.shardCount > 1 ? - collection.getSegmentPaths(args.shardCount, args.shardCurrent) : - collection.getSegmentPaths(); - final int segmentCnt = segmentPaths.size(); - - final ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(args.threads); - LOG.info(String.format("Thread pool with %s threads initialized.", args.threads)); - LOG.info(String.format("%,d %s found in %s", segmentCnt, (segmentCnt == 1 ? "file" : "files"), collectionPath)); - LOG.info("Starting to index..."); - - // Dispatch to default method to process the segments; subclasses can override this method if desired. - processSegments(executor, segmentPaths); - executor.shutdown(); - - try { - // Wait for existing tasks to terminate. - while (!executor.awaitTermination(1, TimeUnit.MINUTES)) { - if (segmentCnt == 1) { - LOG.info(String.format("%,d documents indexed", counters.indexed.get())); - } else { - LOG.info(String.format("%.2f%% of files completed, %,d documents indexed", - (double) executor.getCompletedTaskCount() / segmentCnt * 100.0d, counters.indexed.get())); - } - } - } catch (InterruptedException ie) { - // (Re-)Cancel if current thread also interrupted. - executor.shutdownNow(); - // Preserve interrupt status. - Thread.currentThread().interrupt(); - } - - if (segmentCnt != executor.getCompletedTaskCount()) { - throw new RuntimeException("totalFiles = " + segmentCnt + - " is not equal to completedTaskCount = " + executor.getCompletedTaskCount()); - } - - long numIndexed = writer.getDocStats().maxDoc; - if (numIndexed != counters.indexed.get()) { - // We want to log a warning here, as opposed to throw an exception, because for certain collections, - // this might be expected. For example, when indexing tweets - if a tweet is delivered multiple times - // (i.e., same docid), with -uniqueDocid we're going to update the doc in the index in place, leading - // to differences between the counts. - LOG.warn(String.format("Unexpected difference between number of indexed documents (%d) and index maxDoc (%d).", - numIndexed, counters.indexed.get())); - } - - // Do a final commit. - try { - writer.commit(); - if (args.optimize) { - writer.forceMerge(1); - } - } catch (IOException e) { - // It is possible that this happens... but nothing much we can do at this point, - // so just log the error and move on. - LOG.error(e); - } finally { - try { - writer.close(); - } catch (IOException e) { - // It is possible that this happens... but nothing much we can do at this point, - // so just log the error and move on. - LOG.error(e); - } - } - - LOG.info(String.format("Indexing Complete! %,d documents indexed", numIndexed)); - LOG.info("============ Final Counter Values ============"); - LOG.info(String.format("indexed: %,12d", counters.indexed.get())); - LOG.info(String.format("unindexable: %,12d", counters.unindexable.get())); - LOG.info(String.format("empty: %,12d", counters.empty.get())); - LOG.info(String.format("skipped: %,12d", counters.skipped.get())); - LOG.info(String.format("errors: %,12d", counters.errors.get())); - - final long durationMillis = TimeUnit.MILLISECONDS.convert(System.nanoTime() - start, TimeUnit.NANOSECONDS); - LOG.info(String.format("Total %,d documents indexed in %s", numIndexed, - DurationFormatUtils.formatDuration(durationMillis, "HH:mm:ss"))); - } - - // Default method to process the segments; subclasses can override this method if desired. - protected void processSegments(ThreadPoolExecutor executor, List segmentPaths) { - segmentPaths.forEach((segmentPath) -> { - try { - // Each thread gets its own document generator, so we don't need to make any assumptions about its thread safety. - @SuppressWarnings("unchecked") - LuceneDocumentGenerator generator = (LuceneDocumentGenerator) - generatorClass.getDeclaredConstructor((Class []) null).newInstance(); - - executor.execute(new IndexerThread(segmentPath, generator)); - } catch (InstantiationException | IllegalAccessException | InvocationTargetException | NoSuchMethodException e) { - throw new IllegalArgumentException(String.format("Unable to load LuceneDocumentGenerator \"%s\".", generatorClass.getSimpleName())); - } - }); - } - - public Counters getCounters() { - return this.counters; - } -} + package io.anserini.index; + + import io.anserini.collection.DocumentCollection; + import io.anserini.collection.FileSegment; + import io.anserini.collection.SourceDocument; + import io.anserini.index.generator.EmptyDocumentException; + import io.anserini.index.generator.InvalidDocumentException; + import io.anserini.index.generator.LuceneDocumentGenerator; + import io.anserini.index.generator.SkippedDocumentException; + import org.apache.commons.lang3.time.DurationFormatUtils; + import org.apache.logging.log4j.Level; + import org.apache.logging.log4j.LogManager; + import org.apache.logging.log4j.Logger; + import org.apache.logging.log4j.core.config.Configurator; + import org.apache.lucene.document.Document; + import org.apache.lucene.index.IndexWriter; + import org.apache.lucene.index.Term; + import org.kohsuke.args4j.Option; + + import java.io.File; + import java.io.IOException; + import java.lang.reflect.InvocationTargetException; + import java.nio.file.Files; + import java.nio.file.Path; + import java.nio.file.Paths; + import java.util.List; + import java.util.Set; + import java.util.concurrent.Executors; + import java.util.concurrent.ThreadPoolExecutor; + import java.util.concurrent.TimeUnit; + + public abstract class AbstractIndexer implements Runnable { + private static final Logger LOG = LogManager.getLogger(AbstractIndexer.class); + + public static class Args { + @Option(name = "-collection", metaVar = "[class]", required = true, usage = "Collection class in io.anserini.collection.") + public String collectionClass; + + @Option(name = "-input", metaVar = "[path]", required = true, usage = "Input collection.") + public String input; + + @Option(name = "-index", metaVar = "[path]", required = true, usage = "Index path.") + public String index; + + @Option(name = "-uniqueDocid", usage = "Removes duplicate documents with the same docid during indexing.") + public boolean uniqueDocid = false; + + @Option(name = "-optimize", usage = "Optimizes index by merging into a single index segment.") + public boolean optimize = false; + + @Option(name = "-memoryBuffer", metaVar = "[mb]", usage = "Memory buffer size in MB.") + public int memoryBuffer = 4096; + + @Option(name = "-threads", metaVar = "[num]", usage = "Number of indexing threads.") + public int threads = 4; + + @Option(name = "-verbose", forbids = {"-quiet"}, usage = "Enables verbose logging for each indexing thread.") + public boolean verbose = false; + + @Option(name = "-quiet", forbids = {"-verbose"}, usage = "Turns off all logging.") + public boolean quiet = false; + + @Option(name = "-options", usage = "Print information about options.") + public Boolean options = false; + + @Option(name = "-shard.count", metaVar = "[n]", + usage = "Number of shards to partition the document collection into.") + public int shardCount = -1; + + @Option(name = "-shard.current", metaVar = "[n]", + usage = "The current shard number to generate (indexed from 0).") + public int shardCurrent = -1; + } + + public class IndexerThread extends Thread { + private final Path inputFile; + private final LuceneDocumentGenerator generator; + private final Set whitelistDocids; + + public IndexerThread(Path inputFile, LuceneDocumentGenerator generator) { + this(inputFile, generator, null); + } + + public IndexerThread(Path inputFile, LuceneDocumentGenerator generator, Set docids) { + this.inputFile = inputFile; + this.generator = generator; + this.whitelistDocids = docids; + + setName(inputFile.getFileName().toString()); + } + + @Override + public void run() { + try(FileSegment segment = collection.createFileSegment(inputFile)) { + // We keep track of two separate counts: the total count of documents in this file segment (cnt), + // and the number of documents in this current "batch" (batch). We update the global counter every + // 10k documents: this is so that we get intermediate updates, which is informative if a collection + // has only one file segment; see https://github.com/castorini/anserini/issues/683 + int cnt = 0; + int batch = 0; + + for (SourceDocument d : segment) { + if (!d.indexable()) { + counters.unindexable.incrementAndGet(); + continue; + } + + try { + if (whitelistDocids != null && !whitelistDocids.contains(d.id())) { + counters.skipped.incrementAndGet(); + continue; + } + + Document doc = generator.createDocument(d); + if (args.uniqueDocid) { + // Note that we're reading the config directly, which is within scope. + writer.updateDocument(new Term("id", d.id()), doc); + } else { + writer.addDocument(doc); + } + + cnt++; + batch++; + } catch (EmptyDocumentException e1) { + counters.empty.incrementAndGet(); + continue; + } catch (SkippedDocumentException e2) { + counters.skipped.incrementAndGet(); + continue; + } catch (InvalidDocumentException e3) { + counters.errors.incrementAndGet(); + continue; + } + + // Add the counts from this batch, reset batch counter. + if (batch % 10000 == 0) { + counters.indexed.addAndGet(batch); + batch = 0; + } + } + + // Add the remaining documents. + counters.indexed.addAndGet(batch); + + int skipped = segment.getSkippedCount(); + if (skipped > 0) { + counters.skipped.addAndGet(skipped); + LOG.warn(inputFile.getParent().getFileName().toString() + File.separator + + inputFile.getFileName().toString() + ": " + skipped + " docs skipped."); + } + + if (segment.getErrorStatus()) { + counters.errors.incrementAndGet(); + LOG.error(inputFile.getParent().getFileName().toString() + File.separator + + inputFile.getFileName().toString() + ": error iterating through segment."); + } + + // Log at the debug level because this can be quite noisy if there are lots of file segments. + LOG.debug(inputFile.getParent().getFileName().toString() + File.separator + + inputFile.getFileName().toString() + ": " + cnt + " docs added."); + } catch (Exception e) { + e.printStackTrace(); + LOG.error(Thread.currentThread().getName() + ": Unexpected Exception:", e.getMessage()); + } + } + } + + protected final Args args; + protected Counters counters = new Counters(); + protected Path collectionPath; + protected DocumentCollection collection; + protected Class> generatorClass; + protected IndexWriter writer; + + @SuppressWarnings("unchecked") + public AbstractIndexer(Args args) { + this.args = args; + + if (args.verbose) { + // If verbose logging enabled, changed default log level to DEBUG so we get per-thread logging messages. + Configurator.setRootLevel(Level.DEBUG); + LOG.info("Setting log level to " + Level.DEBUG); + } else if (args.quiet) { + // If quiet mode enabled, only report warnings and above. + Configurator.setRootLevel(Level.WARN); + } else { + // Otherwise, we get the standard set of log messages. + Configurator.setRootLevel(Level.INFO); + LOG.info("Setting log level to " + Level.INFO); + } + + LOG.info("============ Loading Index Configuration ============"); + LOG.info("AbstractIndexer settings:"); + LOG.info(" + DocumentCollection path: " + args.input); + LOG.info(" + CollectionClass: " + args.collectionClass); + LOG.info(" + Index path: " + args.index); + LOG.info(" + Threads: " + args.threads); + LOG.info(" + Optimize (merge segments)? " + args.optimize); + + // Our documentation uses /path/to/foo as a convention: to make copy and paste of the commands work, + // we assume collections/ as the path location. + String pathStr = args.input; + if (pathStr.startsWith("/path/to")) { + pathStr = pathStr.replace("/path/to", "collections"); + } + this.collectionPath = Paths.get(pathStr); + if (!Files.exists(collectionPath) || !Files.isReadable(collectionPath) || !Files.isDirectory(collectionPath)) { + throw new IllegalArgumentException(String.format("Invalid collection path \"%s\".", collectionPath)); + } + + try { + Class> collectionClass = (Class>) + Class.forName("io.anserini.collection." + args.collectionClass); + this.collection = collectionClass.getConstructor(Path.class).newInstance(collectionPath); + } catch (Exception e) { + throw new IllegalArgumentException(String.format("Unable to load collection class \"%s\".", args.collectionClass)); + } + } + + @Override + public void run() { + LOG.info("============ Indexing Collection ============"); + final long start = System.nanoTime(); + + final List segmentPaths = args.shardCount > 1 ? + collection.getSegmentPaths(args.shardCount, args.shardCurrent) : + collection.getSegmentPaths(); + final int segmentCnt = segmentPaths.size(); + + final ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(args.threads); + LOG.info(String.format("Thread pool with %s threads initialized.", args.threads)); + LOG.info(String.format("%,d %s found in %s", segmentCnt, (segmentCnt == 1 ? "file" : "files"), collectionPath)); + LOG.info("Starting to index..."); + + // Dispatch to default method to process the segments; subclasses can override this method if desired. + processSegments(executor, segmentPaths); + executor.shutdown(); + + try { + // Wait for existing tasks to terminate. + while (!executor.awaitTermination(1, TimeUnit.MINUTES)) { + if (segmentCnt == 1) { + LOG.info(String.format("%,d documents indexed", counters.indexed.get())); + } else { + LOG.info(String.format("%.2f%% of files completed, %,d documents indexed", + (double) executor.getCompletedTaskCount() / segmentCnt * 100.0d, counters.indexed.get())); + } + } + } catch (InterruptedException ie) { + // (Re-)Cancel if current thread also interrupted. + executor.shutdownNow(); + // Preserve interrupt status. + Thread.currentThread().interrupt(); + } + + if (segmentCnt != executor.getCompletedTaskCount()) { + throw new RuntimeException("totalFiles = " + segmentCnt + + " is not equal to completedTaskCount = " + executor.getCompletedTaskCount()); + } + + long numIndexed = writer.getDocStats().maxDoc; + if (numIndexed != counters.indexed.get()) { + // We want to log a warning here, as opposed to throw an exception, because for certain collections, + // this might be expected. For example, when indexing tweets - if a tweet is delivered multiple times + // (i.e., same docid), with -uniqueDocid we're going to update the doc in the index in place, leading + // to differences between the counts. + LOG.warn(String.format("Unexpected difference between number of indexed documents (%d) and index maxDoc (%d).", + numIndexed, counters.indexed.get())); + } + + // Do a final commit. + try { + writer.commit(); + if (args.optimize) { + writer.forceMerge(1); + } + } catch (IOException e) { + // It is possible that this happens... but nothing much we can do at this point, + // so just log the error and move on. + LOG.error(e); + } finally { + try { + writer.close(); + } catch (IOException e) { + // It is possible that this happens... but nothing much we can do at this point, + // so just log the error and move on. + LOG.error(e); + } + } + + LOG.info(String.format("Indexing Complete! %,d documents indexed", numIndexed)); + LOG.info("============ Final Counter Values ============"); + LOG.info(String.format("indexed: %,12d", counters.indexed.get())); + LOG.info(String.format("unindexable: %,12d", counters.unindexable.get())); + LOG.info(String.format("empty: %,12d", counters.empty.get())); + LOG.info(String.format("skipped: %,12d", counters.skipped.get())); + LOG.info(String.format("errors: %,12d", counters.errors.get())); + + final long durationMillis = TimeUnit.MILLISECONDS.convert(System.nanoTime() - start, TimeUnit.NANOSECONDS); + LOG.info(String.format("Total %,d documents indexed in %s", numIndexed, + DurationFormatUtils.formatDuration(durationMillis, "HH:mm:ss"))); + } + + // Default method to process the segments; subclasses can override this method if desired. + protected void processSegments(ThreadPoolExecutor executor, List segmentPaths) { + segmentPaths.forEach((segmentPath) -> { + try { + // Each thread gets its own document generator, so we don't need to make any assumptions about its thread safety. + @SuppressWarnings("unchecked") + LuceneDocumentGenerator generator = (LuceneDocumentGenerator) + generatorClass.getDeclaredConstructor(Args.class).newInstance(args); + + + executor.execute(new IndexerThread(segmentPath, generator)); + } catch (InstantiationException | IllegalAccessException | InvocationTargetException | NoSuchMethodException e) { + throw new IllegalArgumentException(String.format("Unable to load LuceneDocumentGenerator \"%s\".", generatorClass.getSimpleName())); + } + }); + } + + public Counters getCounters() { + return this.counters; + } + } \ No newline at end of file diff --git a/src/main/java/io/anserini/index/IndexHnswDenseVectors.java b/src/main/java/io/anserini/index/IndexHnswDenseVectors.java index 870135be57..3f3002ee20 100644 --- a/src/main/java/io/anserini/index/IndexHnswDenseVectors.java +++ b/src/main/java/io/anserini/index/IndexHnswDenseVectors.java @@ -14,215 +14,216 @@ * limitations under the License. */ -package io.anserini.index; + package io.anserini.index; -import io.anserini.collection.SourceDocument; + import io.anserini.collection.SourceDocument; +import io.anserini.index.generator.HnswJsonWithSafeTensorsDenseVectorDocumentGenerator; import io.anserini.index.generator.LuceneDocumentGenerator; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.apache.lucene.codecs.KnnVectorsFormat; -import org.apache.lucene.codecs.KnnVectorsReader; -import org.apache.lucene.codecs.KnnVectorsWriter; -import org.apache.lucene.codecs.lucene99.Lucene99Codec; -import org.apache.lucene.codecs.lucene99.Lucene99HnswScalarQuantizedVectorsFormat; -import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat; -import org.apache.lucene.index.ConcurrentMergeScheduler; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.NoMergePolicy; -import org.apache.lucene.index.SegmentReadState; -import org.apache.lucene.index.SegmentWriteState; -import org.apache.lucene.index.TieredMergePolicy; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.FSDirectory; -import org.kohsuke.args4j.CmdLineException; -import org.kohsuke.args4j.CmdLineParser; -import org.kohsuke.args4j.Option; -import org.kohsuke.args4j.ParserProperties; - -import java.io.IOException; -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.List; - -public final class IndexHnswDenseVectors extends AbstractIndexer { - private static final Logger LOG = LogManager.getLogger(IndexHnswDenseVectors.class); - - public static final class Args extends AbstractIndexer.Args { - @Option(name = "-generator", metaVar = "[class]", usage = "Document generator class in io.anserini.index.generator.") - public String generatorClass = "DenseVectorDocumentGenerator"; - - @Option(name = "-M", metaVar = "[num]", usage = "HNSW parameters M") - public int M = 16; - - @Option(name = "-efC", metaVar = "[num]", usage = "HNSW parameters ef Construction") - public int efC = 100; - - @Option(name = "-quantize.int8", usage = "Quantize vectors into int8.") - public boolean quantizeInt8 = false; - - @Option(name = "-storeVectors", usage = "Boolean switch to store raw raw vectors.") - public boolean storeVectors = false; - - @Option(name = "-noMerge", usage = "Do not merge segments (fast indexing, slow retrieval).") - public boolean noMerge = false; - - @Option(name = "-maxThreadMemoryBeforeFlush", metaVar = "[num]", usage = "Maximum memory consumption per thread before triggering a forced flush (in MB); must be smaller than 2048.") - public int maxThreadMemoryBeforeFlush = 2047; - // This is the most aggressive possible setting; default is 1945. - // If the setting is too aggressive, may result in GCLocker issues. - - @Option(name = "-maxMergedSegmentSize", metaVar = "[num]", usage = "Maximum sized segment to produce during normal merging (in MB).") - public int maxMergedSegmentSize = 1024 * 16; - - @Option(name = "-segmentsPerTier", metaVar = "[num]", usage = "Allowed number of segments per tier.") - public int segmentsPerTier = 10; - - @Option(name = "-maxMergeAtOnce", metaVar = "[num]", usage = "Maximum number of segments to be merged at a time during \"normal\" merging.") - public int maxMergeAtOnce = 10; - } - - @SuppressWarnings("unchecked") - public IndexHnswDenseVectors(Args args) throws Exception { - super(args); - - try { - super.generatorClass = (Class>) - Class.forName("io.anserini.index.generator." + args.generatorClass); - } catch (Exception e) { - throw new IllegalArgumentException(String.format("Unable to load generator class \"%s\".", args.generatorClass)); - } - - try { - final Directory dir = FSDirectory.open(Paths.get(args.index)); - final IndexWriterConfig config; - - if (args.quantizeInt8) { - config = new IndexWriterConfig().setCodec( - new Lucene99Codec() { - @Override - public KnnVectorsFormat getKnnVectorsFormatForField(String field) { - return new DelegatingKnnVectorsFormat( - new Lucene99HnswScalarQuantizedVectorsFormat(args.M, args.efC), 4096); - } - }); - } else { - config = new IndexWriterConfig().setCodec( - new Lucene99Codec() { - @Override - public KnnVectorsFormat getKnnVectorsFormatForField(String field) { - return new DelegatingKnnVectorsFormat( - new Lucene99HnswVectorsFormat(args.M, args.efC), 4096); - } - }); - } - - config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); - config.setRAMBufferSizeMB(args.memoryBuffer); - config.setRAMPerThreadHardLimitMB(args.maxThreadMemoryBeforeFlush); - config.setUseCompoundFile(false); - config.setMergeScheduler(new ConcurrentMergeScheduler()); - - if (args.noMerge) { - config.setMergePolicy(NoMergePolicy.INSTANCE); - } else { - TieredMergePolicy mergePolicy = new TieredMergePolicy(); - if (args.optimize) { - // If we're going to merge down into a single segment at the end, skip intermediate merges, - // since they are a waste of time. - mergePolicy.setMaxMergeAtOnce(256); - mergePolicy.setSegmentsPerTier(256); - } else { - mergePolicy.setFloorSegmentMB(1024); - mergePolicy.setMaxMergedSegmentMB(args.maxMergedSegmentSize); - mergePolicy.setSegmentsPerTier(args.segmentsPerTier); - mergePolicy.setMaxMergeAtOnce(args.maxMergeAtOnce); - } - config.setMergePolicy(mergePolicy); - } - - this.writer = new IndexWriter(dir, config); - } catch (Exception e) { - throw new IllegalArgumentException(String.format("Unable to create IndexWriter: %s.", e.getMessage())); - } - - LOG.info("HnswIndexer settings:"); - LOG.info(" + Generator: " + args.generatorClass); - LOG.info(" + M: " + args.M); - LOG.info(" + efC: " + args.efC); - LOG.info(" + Store document vectors? " + args.storeVectors); - LOG.info(" + Int8 quantization? " + args.quantizeInt8); - LOG.info(" + Codec: " + this.writer.getConfig().getCodec()); - LOG.info(" + MemoryBuffer: " + args.memoryBuffer); - LOG.info(" + MaxThreadMemoryBeforeFlush: " + args.maxThreadMemoryBeforeFlush); - - if (args.noMerge) { - LOG.info(" + MergePolicy: NoMerge"); - } else if (args.optimize) { - LOG.info(" + MergePolicy: TieredMergePolicy (force merge into a single index segment)"); - } else { - LOG.info(" + MergePolicy: TieredMergePolicy"); - LOG.info(" + MaxMergedSegmentSize: " + args.maxMergedSegmentSize); - LOG.info(" + SegmentsPerTier: " + args.segmentsPerTier); - LOG.info(" + MaxMergeAtOnce: " + args.maxMergeAtOnce); - } - } - - // Solution provided by Solr, see https://www.mail-archive.com/java-user@lucene.apache.org/msg52149.html - // This class exists because Lucene95HnswVectorsFormat's getMaxDimensions method is final and we - // need to workaround that constraint to allow more than the default number of dimensions. - private static final class DelegatingKnnVectorsFormat extends KnnVectorsFormat { - private final KnnVectorsFormat delegate; - private final int maxDimensions; - - public DelegatingKnnVectorsFormat(KnnVectorsFormat delegate, int maxDimensions) { - super(delegate.getName()); - this.delegate = delegate; - this.maxDimensions = maxDimensions; - } - - @Override - public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException { - return delegate.fieldsWriter(state); - } - - @Override - public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException { - return delegate.fieldsReader(state); - } - - @Override - public int getMaxDimensions(String fieldName) { - return maxDimensions; - } - } - - public static void main(String[] args) throws Exception { - Args indexArgs = new Args(); - CmdLineParser parser = new CmdLineParser(indexArgs, ParserProperties.defaults().withUsageWidth(120)); - - try { - parser.parseArgument(args); - } catch (CmdLineException e) { - if (indexArgs.options) { - System.err.printf("Options for %s:\n\n", IndexHnswDenseVectors.class.getSimpleName()); - parser.printUsage(System.err); - - List required = new ArrayList<>(); - parser.getOptions().forEach((option) -> { - if (option.option.required()) { - required.add(option.option.toString()); - } - }); - - System.err.printf("\nRequired options are %s\n", required); - } else { - System.err.printf("Error: %s. For help, use \"-options\" to print out information about options.\n", e.getMessage()); - } - - return; - } - - new IndexHnswDenseVectors(indexArgs).run(); - } -} + import org.apache.logging.log4j.LogManager; + import org.apache.logging.log4j.Logger; + import org.apache.lucene.codecs.KnnVectorsFormat; + import org.apache.lucene.codecs.KnnVectorsReader; + import org.apache.lucene.codecs.KnnVectorsWriter; + import org.apache.lucene.codecs.lucene99.Lucene99Codec; + import org.apache.lucene.codecs.lucene99.Lucene99HnswScalarQuantizedVectorsFormat; + import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat; + import org.apache.lucene.index.ConcurrentMergeScheduler; + import org.apache.lucene.index.IndexWriter; + import org.apache.lucene.index.IndexWriterConfig; + import org.apache.lucene.index.NoMergePolicy; + import org.apache.lucene.index.SegmentReadState; + import org.apache.lucene.index.SegmentWriteState; + import org.apache.lucene.index.TieredMergePolicy; + import org.apache.lucene.store.Directory; + import org.apache.lucene.store.FSDirectory; + import org.kohsuke.args4j.CmdLineException; + import org.kohsuke.args4j.CmdLineParser; + import org.kohsuke.args4j.Option; + import org.kohsuke.args4j.ParserProperties; + + import java.io.IOException; + import java.nio.file.Paths; + import java.util.ArrayList; + import java.util.List; + + public final class IndexHnswDenseVectors extends AbstractIndexer { + private static final Logger LOG = LogManager.getLogger(IndexHnswDenseVectors.class); + + public static final class Args extends AbstractIndexer.Args { + @Option(name = "-generator", metaVar = "[class]", usage = "Document generator class in io.anserini.index.generator.") + public String generatorClass = "DenseVectorDocumentGenerator"; + + @Option(name = "-M", metaVar = "[num]", usage = "HNSW parameters M") + public int M = 16; + + @Option(name = "-efC", metaVar = "[num]", usage = "HNSW parameters ef Construction") + public int efC = 100; + + @Option(name = "-quantize.int8", usage = "Quantize vectors into int8.") + public boolean quantizeInt8 = false; + + @Option(name = "-storeVectors", usage = "Boolean switch to store raw raw vectors.") + public boolean storeVectors = false; + + @Option(name = "-noMerge", usage = "Do not merge segments (fast indexing, slow retrieval).") + public boolean noMerge = false; + + @Option(name = "-maxThreadMemoryBeforeFlush", metaVar = "[num]", usage = "Maximum memory consumption per thread before triggering a forced flush (in MB); must be smaller than 2048.") + public int maxThreadMemoryBeforeFlush = 2047; + // This is the most aggressive possible setting; default is 1945. + // If the setting is too aggressive, may result in GCLocker issues. + + @Option(name = "-maxMergedSegmentSize", metaVar = "[num]", usage = "Maximum sized segment to produce during normal merging (in MB).") + public int maxMergedSegmentSize = 1024 * 16; + + @Option(name = "-segmentsPerTier", metaVar = "[num]", usage = "Allowed number of segments per tier.") + public int segmentsPerTier = 10; + + @Option(name = "-maxMergeAtOnce", metaVar = "[num]", usage = "Maximum number of segments to be merged at a time during \"normal\" merging.") + public int maxMergeAtOnce = 10; + } + + @SuppressWarnings("unchecked") + public IndexHnswDenseVectors(Args args) throws Exception { + super(args); + + try { + super.generatorClass = (Class>) + Class.forName("io.anserini.index.generator." + args.generatorClass); + } catch (Exception e) { + throw new IllegalArgumentException(String.format("Unable to load generator class \"%s\".", args.generatorClass)); + } + + try { + final Directory dir = FSDirectory.open(Paths.get(args.index)); + final IndexWriterConfig config; + + if (args.quantizeInt8) { + config = new IndexWriterConfig().setCodec( + new Lucene99Codec() { + @Override + public KnnVectorsFormat getKnnVectorsFormatForField(String field) { + return new DelegatingKnnVectorsFormat( + new Lucene99HnswScalarQuantizedVectorsFormat(args.M, args.efC), 4096); + } + }); + } else { + config = new IndexWriterConfig().setCodec( + new Lucene99Codec() { + @Override + public KnnVectorsFormat getKnnVectorsFormatForField(String field) { + return new DelegatingKnnVectorsFormat( + new Lucene99HnswVectorsFormat(args.M, args.efC), 4096); + } + }); + } + + config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); + config.setRAMBufferSizeMB(args.memoryBuffer); + config.setRAMPerThreadHardLimitMB(args.maxThreadMemoryBeforeFlush); + config.setUseCompoundFile(false); + config.setMergeScheduler(new ConcurrentMergeScheduler()); + + if (args.noMerge) { + config.setMergePolicy(NoMergePolicy.INSTANCE); + } else { + TieredMergePolicy mergePolicy = new TieredMergePolicy(); + if (args.optimize) { + // If we're going to merge down into a single segment at the end, skip intermediate merges, + // since they are a waste of time. + mergePolicy.setMaxMergeAtOnce(256); + mergePolicy.setSegmentsPerTier(256); + } else { + mergePolicy.setFloorSegmentMB(1024); + mergePolicy.setMaxMergedSegmentMB(args.maxMergedSegmentSize); + mergePolicy.setSegmentsPerTier(args.segmentsPerTier); + mergePolicy.setMaxMergeAtOnce(args.maxMergeAtOnce); + } + config.setMergePolicy(mergePolicy); + } + + this.writer = new IndexWriter(dir, config); + } catch (Exception e) { + throw new IllegalArgumentException(String.format("Unable to create IndexWriter: %s.", e.getMessage())); + } + + LOG.info("HnswIndexer settings:"); + LOG.info(" + Generator: " + args.generatorClass); + LOG.info(" + M: " + args.M); + LOG.info(" + efC: " + args.efC); + LOG.info(" + Store document vectors? " + args.storeVectors); + LOG.info(" + Int8 quantization? " + args.quantizeInt8); + LOG.info(" + Codec: " + this.writer.getConfig().getCodec()); + LOG.info(" + MemoryBuffer: " + args.memoryBuffer); + LOG.info(" + MaxThreadMemoryBeforeFlush: " + args.maxThreadMemoryBeforeFlush); + + if (args.noMerge) { + LOG.info(" + MergePolicy: NoMerge"); + } else if (args.optimize) { + LOG.info(" + MergePolicy: TieredMergePolicy (force merge into a single index segment)"); + } else { + LOG.info(" + MergePolicy: TieredMergePolicy"); + LOG.info(" + MaxMergedSegmentSize: " + args.maxMergedSegmentSize); + LOG.info(" + SegmentsPerTier: " + args.segmentsPerTier); + LOG.info(" + MaxMergeAtOnce: " + args.maxMergeAtOnce); + } + } + + // Solution provided by Solr, see https://www.mail-archive.com/java-user@lucene.apache.org/msg52149.html + // This class exists because Lucene95HnswVectorsFormat's getMaxDimensions method is final and we + // need to workaround that constraint to allow more than the default number of dimensions. + private static final class DelegatingKnnVectorsFormat extends KnnVectorsFormat { + private final KnnVectorsFormat delegate; + private final int maxDimensions; + + public DelegatingKnnVectorsFormat(KnnVectorsFormat delegate, int maxDimensions) { + super(delegate.getName()); + this.delegate = delegate; + this.maxDimensions = maxDimensions; + } + + @Override + public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException { + return delegate.fieldsWriter(state); + } + + @Override + public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException { + return delegate.fieldsReader(state); + } + + @Override + public int getMaxDimensions(String fieldName) { + return maxDimensions; + } + } + + public static void main(String[] args) throws Exception { + Args indexArgs = new Args(); + CmdLineParser parser = new CmdLineParser(indexArgs, ParserProperties.defaults().withUsageWidth(120)); + + try { + parser.parseArgument(args); + } catch (CmdLineException e) { + if (indexArgs.options) { + System.err.printf("Options for %s:\n\n", IndexHnswDenseVectors.class.getSimpleName()); + parser.printUsage(System.err); + + List required = new ArrayList<>(); + parser.getOptions().forEach((option) -> { + if (option.option.required()) { + required.add(option.option.toString()); + } + }); + + System.err.printf("\nRequired options are %s\n", required); + } else { + System.err.printf("Error: %s. For help, use \"-options\" to print out information about options.\n", e.getMessage()); + } + + return; + } + + new IndexHnswDenseVectors(indexArgs).run(); + } + } \ No newline at end of file diff --git a/src/main/java/io/anserini/index/generator/HnswJsonWithSafeTensorsDenseVectorDocumentGenerator.java b/src/main/java/io/anserini/index/generator/HnswJsonWithSafeTensorsDenseVectorDocumentGenerator.java index a99fc1c8f5..d7a5cecf88 100644 --- a/src/main/java/io/anserini/index/generator/HnswJsonWithSafeTensorsDenseVectorDocumentGenerator.java +++ b/src/main/java/io/anserini/index/generator/HnswJsonWithSafeTensorsDenseVectorDocumentGenerator.java @@ -2,7 +2,8 @@ import io.anserini.collection.SourceDocument; import io.anserini.index.Constants; -import io.anserini.index.IndexCollection; +import io.anserini.index.IndexHnswDenseVectors; +import io.anserini.index.IndexHnswDenseVectors.Args; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.lucene.document.BinaryDocValuesField; @@ -26,36 +27,20 @@ public class HnswJsonWithSafeTensorsDenseVectorDocumentGenerator implements LuceneDocumentGenerator { private static final Logger LOG = LogManager.getLogger(HnswJsonWithSafeTensorsDenseVectorDocumentGenerator.class); - protected IndexCollection.Args args; + protected Args args; private HashSet allowedFileSuffix; - public HnswJsonWithSafeTensorsDenseVectorDocumentGenerator(IndexCollection.Args args) { - super(); - this.args = args; + public HnswJsonWithSafeTensorsDenseVectorDocumentGenerator() { this.allowedFileSuffix = new HashSet<>(Arrays.asList(".json", ".jsonl", ".gz")); LOG.info("V1 Initializing HnswJsonWithSafeTensorsDenseVectorDocumentGenerator..."); - initializeArgs(); } - public void setArgs(IndexCollection.Args args) { + public void setArgs(IndexHnswDenseVectors.Args args) { this.args = args; LOG.info("Args set via setter method:"); LOG.info(" - Input path: " + this.args.input); } - private void initializeArgs() { - if (this.args.input == null || this.args.input.isEmpty()) { - String inputPath = System.getProperty("input.path"); - if (inputPath != null && !inputPath.isEmpty()) { - this.args.input = inputPath; - LOG.info("Initialized input path from system property: " + this.args.input); - } else { - LOG.error("Input path is not provided and cannot be initialized."); - throw new IllegalArgumentException("Input path is not provided."); - } - } - } - @Override public Document createDocument(T src) throws InvalidDocumentException { try { @@ -94,6 +79,7 @@ public Document createDocument(T src) throws InvalidDocumentException { // Create the Lucene document String id = src.id(); + LOG.info("Processing document ID: " + id); int[] docidAscii = id.chars().toArray(); Integer index = null; @@ -121,7 +107,7 @@ public Document createDocument(T src) throws InvalidDocumentException { return document; } catch (Exception e) { LOG.error("Error creating document", e); - LOG.error("trace: " + e.getStackTrace()); + LOG.error("trace: " + Arrays.toString(e.getStackTrace())); LOG.error("Document ID: " + src.id()); LOG.error("Document contents: " + src.contents()); LOG.error("Paths: " + this.args.input); diff --git a/src/main/java/io/anserini/server/ControllerV1_0.java b/src/main/java/io/anserini/server/ControllerV1_0.java index 07bea1e188..76e255609c 100644 --- a/src/main/java/io/anserini/server/ControllerV1_0.java +++ b/src/main/java/io/anserini/server/ControllerV1_0.java @@ -59,6 +59,12 @@ public Map searchIndex(@PathVariable(value = "index", required = return queryMap; } + @RequestMapping(method = RequestMethod.GET, path = "/indexes/{index}/documents/{docid}") + public Map getDocument(@PathVariable("index") String index, @PathVariable("docid") String docid) { + SearchService searchService = new SearchService(index); + return searchService.getDocument(docid); + } + @RequestMapping(method = RequestMethod.GET, path = "/indexes/{index}/status") public Map getIndexStatus(@PathVariable("index") String index) { if (!IndexInfo.contains(index)) { diff --git a/src/main/java/io/anserini/server/SearchService.java b/src/main/java/io/anserini/server/SearchService.java index 81281e60a7..44771b4294 100644 --- a/src/main/java/io/anserini/server/SearchService.java +++ b/src/main/java/io/anserini/server/SearchService.java @@ -57,16 +57,23 @@ public List> search(String query, int hits) { ScoredDoc[] results = searcher.search(query, hits); List> candidates = new ArrayList<>(); for (ScoredDoc r : results) { - String raw = r.lucene_document.get(Constants.RAW); - JsonNode rootNode = mapper.readTree(raw); - Map content = mapper.convertValue(rootNode, Map.class); - content.remove("docid"); Map candidate = new LinkedHashMap<>(); candidate.put("docid", r.docid); candidate.put("score", r.score); - candidate.put("doc", content); + String raw = r.lucene_document.get(Constants.RAW); + if (raw != null) { + JsonNode rootNode = mapper.readTree(raw); + Map content = mapper.convertValue(rootNode, Map.class); + content.remove("docid"); + content.remove("id"); + content.remove("_id"); + candidate.put("doc", content); + } else { + candidate.put("doc", null); + } candidates.add(candidate); } + searcher.close(); return candidates; } catch (Exception e) { e.printStackTrace(); @@ -74,4 +81,27 @@ public List> search(String query, int hits) { } } + public Map getDocument(String docid) { + try { + SimpleSearcher searcher = new SimpleSearcher(indexDir); + String raw = searcher.doc(docid).get(Constants.RAW); + Map candidate = new LinkedHashMap<>(); + if (raw != null) { + JsonNode rootNode = mapper.readTree(raw); + Map content = mapper.convertValue(rootNode, Map.class); + content.remove("docid"); + content.remove("id"); + content.remove("_id"); + candidate.put("doc", content); + } else { + candidate.put("doc", null); + } + searcher.close(); + return candidate; + } catch (Exception e) { + e.printStackTrace(); + return Map.of(); + } + } + } \ No newline at end of file diff --git a/src/main/python/run_regression.py b/src/main/python/run_regression.py index a91257149d..72741f06ac 100644 --- a/src/main/python/run_regression.py +++ b/src/main/python/run_regression.py @@ -25,12 +25,12 @@ import stat import tarfile import time +import yaml +from collections import defaultdict from multiprocessing import Pool from subprocess import call, Popen, PIPE -from urllib.request import urlretrieve - -import yaml from tqdm import tqdm +from urllib.request import urlretrieve logger = logging.getLogger('regression_test') logger.setLevel(logging.INFO) @@ -183,6 +183,87 @@ def construct_convert_commands(yaml_data): return converting_commands +beir_flat_int8_onnx = defaultdict(lambda: 0.004) +beir_flat_int8_onnx['ArguAna'] = 0.03 +beir_flat_int8_onnx['NFCorpus'] = 0.007 +beir_flat_int8_onnx['Signal-1M'] = 0.006 +beir_flat_int8_onnx['TREC-NEWS'] = 0.01 +beir_flat_int8_onnx['Webis-Touche2020'] = 0.007 + +beir_flat_int8_cached = defaultdict(lambda: 0.004) +beir_flat_int8_cached['BioASQ'] = 0.005 +beir_flat_int8_cached['NFCorpus'] = 0.006 +beir_flat_int8_cached['Signal-1M'] = 0.007 +beir_flat_int8_cached['TREC-NEWS'] = 0.01 +beir_flat_int8_cached['Webis-Touche2020'] = 0.007 + +beir_flat_onnx = defaultdict(lambda: 0.001) +beir_flat_onnx['ArguAna'] = 0.02 +beir_flat_onnx['CQADupStack-wordpress'] = 0.002 +beir_flat_onnx['Quora'] = 0.002 +beir_flat_onnx['Robust04'] = 0.004 + +beir_flat_cached = defaultdict(lambda: 1e-9) + +beir_flat_tolerance = { + 'flat-int8-onnx': beir_flat_int8_onnx, + 'flat-int8-cached': beir_flat_int8_cached, + 'flat-onnx': beir_flat_onnx, + 'flat-cached': beir_flat_cached, +} + +beir_hnsw_int8_onnx = defaultdict(lambda: 0.005) +beir_hnsw_int8_onnx['ArguAna'] = 0.03 +beir_hnsw_int8_onnx['BioASQ'] = 0.02 +beir_hnsw_int8_onnx['DBPedia'] = 0.006 +beir_hnsw_int8_onnx['FiQA-2018'] = 0.007 +beir_hnsw_int8_onnx['HotpotQA'] = 0.008 +beir_hnsw_int8_onnx['NFCorpus'] = 0.006 +beir_hnsw_int8_onnx['Robust04'] = 0.006 +beir_hnsw_int8_onnx['Signal-1M'] = 0.04 +beir_hnsw_int8_onnx['TREC-NEWS'] = 0.02 +beir_hnsw_int8_onnx['Webis-Touche2020'] = 0.01 + +beir_hnsw_int8_cached = defaultdict(lambda: 0.005) +beir_hnsw_int8_cached['BioASQ'] = 0.02 +beir_hnsw_int8_cached['FiQA-2018'] = 0.007 +beir_hnsw_int8_cached['HotpotQA'] = 0.007 +beir_hnsw_int8_cached['Signal-1M'] = 0.04 +beir_hnsw_int8_cached['TREC-NEWS'] = 0.02 +beir_hnsw_int8_cached['Webis-Touche2020'] = 0.006 + +beir_hnsw_onnx = defaultdict(lambda: 0.003) +beir_hnsw_onnx['ArguAna'] = 0.02 +beir_hnsw_onnx['CQADupStack-wordpress'] = 0.004 +beir_hnsw_onnx['DBPedia'] = 0.006 +beir_hnsw_onnx['FEVER'] = 0.007 +beir_hnsw_onnx['FiQA-2018'] = 0.007 +beir_hnsw_onnx['HotpotQA'] = 0.007 +beir_hnsw_onnx['Robust04'] = 0.004 +beir_hnsw_onnx['Signal-1M'] = 0.05 +beir_hnsw_onnx['TREC-NEWS'] = 0.02 + +beir_hnsw_cached = defaultdict(lambda: 0.003) +beir_hnsw_cached['DBPedia'] = 0.006 +beir_hnsw_cached['FEVER'] = 0.008 +beir_hnsw_cached['FiQA-2018'] = 0.008 +beir_hnsw_cached['HotpotQA'] = 0.007 +beir_hnsw_cached['Signal-1M'] = 0.05 +beir_hnsw_cached['TREC-NEWS'] = 0.025 + +beir_hnsw_tolerance = { + 'hnsw-int8-onnx': beir_hnsw_int8_onnx, + 'hnsw-int8-cached': beir_hnsw_int8_cached, + 'hnsw-onnx': beir_hnsw_onnx, + 'hnsw-cached': beir_hnsw_cached, +} + +flat_model_type_pattern = re.compile(r'(flat-int8-onnx|flat-int8-cached|flat-onnx|flat-cached)$') +hnsw_model_type_pattern = re.compile(r'(hnsw-int8-onnx|hnsw-int8-cached|hnsw-onnx|hnsw-cached)$') + +beir_dataset_pattern = re.compile(r'BEIR \(v1.0.0\): (.*)$') + + def evaluate_and_verify(yaml_data, dry_run): fail_str = '\033[91m[FAIL]\033[0m ' ok_str = ' [OK] ' @@ -215,123 +296,102 @@ def evaluate_and_verify(yaml_data, dry_run): using_flat = True if 'type' in model and model['type'] == 'flat' else False if using_flat and 'BEIR' in topic_set['name']: - if model['name'].endswith('-flat-int8-onnx'): - if topic_set['name'].endswith('ArguAna'): - flat_tolerance_ok = 0.021 - elif topic_set['name'].endswith('NFCorpus') and metric['metric'] == 'R@1000': - flat_tolerance_ok = 0.007 - elif topic_set['name'].endswith('Signal-1M'): - flat_tolerance_ok = 0.006 - elif topic_set['name'].endswith('TREC-NEWS'): - flat_tolerance_ok = 0.01 - elif topic_set['name'].endswith('Webis-Touche2020'): - flat_tolerance_ok = 0.007 - else: - flat_tolerance_ok = 0.005 - elif model['name'].endswith('-flat-int8-cached'): - if topic_set['name'].endswith('BioASQ'): - flat_tolerance_ok = 0.005 - elif topic_set['name'].endswith('NFCorpus') and metric['metric'] == 'R@1000': - flat_tolerance_ok = 0.006 - elif topic_set['name'].endswith('Signal-1M'): - flat_tolerance_ok = 0.007 - elif topic_set['name'].endswith('TREC-NEWS'): - flat_tolerance_ok = 0.009 - elif topic_set['name'].endswith('Webis-Touche2020'): - flat_tolerance_ok = 0.007 - else: - flat_tolerance_ok = 0.004 - elif model['name'].endswith('-flat-onnx'): - if topic_set['name'].endswith('ArguAna'): - flat_tolerance_ok = 0.02 - elif topic_set['name'].endswith('Robust04'): - flat_tolerance_ok = 0.004 - else: - flat_tolerance_ok = 0.002 - else: - flat_tolerance_ok = 1e-9 + # Extract BEIR dataset + match = beir_dataset_pattern.search(topic_set['name']) + beir_dataset = match.group(1) + + # Extract model + match = flat_model_type_pattern.search(model['name']) + model_type = match.group(1) + + # Lookup tolerance + tolerance_ok = beir_flat_tolerance[model_type][beir_dataset] elif using_flat and 'MS MARCO Passage' in topic_set['name']: if model['name'].endswith('-flat-int8-onnx'): - flat_tolerance_ok = 0.002 + tolerance_ok = 0.002 elif model['name'].endswith('-flat-int8-cached'): if model['name'] == 'openai-ada2-flat-int8-cached': - flat_tolerance_ok = 0.008 + tolerance_ok = 0.008 else: - flat_tolerance_ok = 0.002 + tolerance_ok = 0.002 elif model['name'].endswith('-flat-onnx'): - flat_tolerance_ok = 0.0001 + tolerance_ok = 0.0001 else: - flat_tolerance_ok = 1e-9 - #print(f'Tolerance: {flat_tolerance_ok}') + tolerance_ok = 1e-9 elif using_flat and 'DL19' in topic_set['name']: if model['name'].endswith('-flat-int8-onnx'): if model['name'] == 'bge-flat-int8-onnx': - flat_tolerance_ok = 0.007 + tolerance_ok = 0.007 elif model['name'] == 'cos-dpr-distil-flat-int8-onnx': - flat_tolerance_ok = 0.004 + tolerance_ok = 0.004 else: - flat_tolerance_ok = 0.002 + tolerance_ok = 0.002 elif model['name'].endswith('-flat-int8-cached'): if model['name'] == 'openai-ada2-flat-int8-cached': - flat_tolerance_ok = 0.008 + tolerance_ok = 0.008 else: - flat_tolerance_ok = 0.002 + tolerance_ok = 0.002 elif model['name'].endswith('-flat-onnx'): if model['name'] == 'bge-flat-onnx': - flat_tolerance_ok = 0.008 + tolerance_ok = 0.008 else: - flat_tolerance_ok = 0.0001 + tolerance_ok = 0.0001 else: - flat_tolerance_ok = 1e-9 - #print(f'DL19 Tolerance: {flat_tolerance_ok}') + tolerance_ok = 1e-9 elif using_flat and 'DL20' in topic_set['name']: if model['name'].endswith('-flat-int8-onnx'): if model['name'] == 'bge-flat-int8-onnx': - flat_tolerance_ok = 0.004 + tolerance_ok = 0.004 elif model['name'] == 'cos-dpr-distil-flat-int8-onnx': - flat_tolerance_ok = 0.004 + tolerance_ok = 0.004 else: - flat_tolerance_ok = 0.002 + tolerance_ok = 0.002 elif model['name'].endswith('-flat-int8-cached'): if model['name'] == 'bge-flat-int8-cached': - flat_tolerance_ok = 0.005 + tolerance_ok = 0.005 elif model['name'] == 'cos-dpr-distil-flat-int8-cached': - flat_tolerance_ok = 0.004 + tolerance_ok = 0.004 else: - flat_tolerance_ok = 0.002 + tolerance_ok = 0.002 elif model['name'].endswith('-flat-onnx'): if model['name'] == 'bge-flat-onnx': - flat_tolerance_ok = 0.005 + tolerance_ok = 0.005 else: - flat_tolerance_ok = 0.0001 + tolerance_ok = 0.0001 else: - flat_tolerance_ok = 1e-9 - #print(f'DL20 Tolerance: {flat_tolerance_ok}') + tolerance_ok = 1e-9 else: - flat_tolerance_ok = 1e-9 + tolerance_ok = 1e-9 + + if using_hnsw and 'BEIR' in topic_set['name']: + # Extract BEIR dataset + match = beir_dataset_pattern.search(topic_set['name']) + beir_dataset = match.group(1) + + # Extract model + match = hnsw_model_type_pattern.search(model['name']) + model_type = match.group(1) + + # Lookup tolerance + tolerance_ok = beir_hnsw_tolerance[model_type][beir_dataset] - # For HNSW, only print out score to third digit - if using_hnsw: - result_str = 'expected: {0:.3f} actual: {1:.3f} - metric: {2:<8} model: {3} topics: {4}'.format( - expected, actual, metric['metric'], model['name'], topic_set['id']) - if using_flat: + if using_flat or using_hnsw: result_str = (f'expected: {expected:.4f} actual: {actual:.4f} ' - f'(delta={abs(expected-actual):.4f}, tolerance={abs(flat_tolerance_ok):.4f}) - ' + f'(delta={abs(expected-actual):.4f}, tolerance={abs(tolerance_ok):.4f}) - ' f'metric: {metric["metric"]:<8} model: {model["name"]} topics: {topic_set["id"]}') else: result_str = (f'expected: {expected:.4f} actual: {actual:.4f} (delta={abs(expected-actual):.4f}) - ' f'metric: {metric["metric"]:<8} model: {model["name"]} topics: {topic_set["id"]}') - # - For inverted indexes, we expect scores to match precisely. - # - For flat indexes (on dense vectors), use the tolerance values set above. - # - For HNSW, be more tolerant, but as long as the actual score is higher than the expected score, - # let the test pass. + # For flat and HNSW indexes: + # - to get "OK", we need to be within specified tolerance. + # - to get "OKish", we need to be within 150% of specified tolerance. if is_close(expected, actual) or actual > expected or \ - (using_flat and is_close(expected, actual, abs_tol=flat_tolerance_ok)) or \ - (using_hnsw and is_close(expected, actual, abs_tol=0.005)): + (using_flat and is_close(expected, actual, abs_tol=tolerance_ok)) or \ + (using_hnsw and is_close(expected, actual, abs_tol=tolerance_ok)): logger.info(ok_str + result_str) - # For ONNX runs with HNSW, increase tolerance a bit because we observe minor differences across OSes. - elif using_hnsw and is_close(expected, actual, abs_tol=0.0101): + elif (using_flat and is_close(expected, actual, abs_tol=tolerance_ok * 1.5)) or \ + (using_hnsw and is_close(expected, actual, abs_tol=tolerance_ok * 1.5)): logger.info(okish_str + result_str) okish = True else: diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-arguana.bge-base-en-v1.5.flat-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-arguana.bge-base-en-v1.5.flat-int8.cached.template index 6eaaddf970..262e891b5e 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-arguana.bge-base-en-v1.5.flat-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-arguana.bge-base-en-v1.5.flat-int8.cached.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-arguana.bge-base-en-v1.5.flat-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-arguana.bge-base-en-v1.5.flat-int8.onnx.template index 4bbe0062a9..c936299443 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-arguana.bge-base-en-v1.5.flat-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-arguana.bge-base-en-v1.5.flat-int8.onnx.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-arguana.bge-base-en-v1.5.flat.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-arguana.bge-base-en-v1.5.flat.cached.template index 19507ec3a4..c6511f9185 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-arguana.bge-base-en-v1.5.flat.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-arguana.bge-base-en-v1.5.flat.cached.template @@ -59,4 +59,4 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-arguana.bge-base-en-v1.5.flat.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-arguana.bge-base-en-v1.5.flat.onnx.template index e7a69b8e1f..2e18d8d193 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-arguana.bge-base-en-v1.5.flat.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-arguana.bge-base-en-v1.5.flat.onnx.template @@ -59,5 +59,5 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw-int8.cached.template index eb2b1a62be..0ea459dd5d 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw-int8.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw-int8.onnx.template index 5db4723457..7ba229ebf8 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw-int8.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw.cached.template index 42da8dc6e8..1830438ba8 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw.onnx.template index 3a473b2b9e..2c9d9fbb0f 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-bioasq.bge-base-en-v1.5.flat-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-bioasq.bge-base-en-v1.5.flat-int8.cached.template index 16299c9d67..c841f9f7f4 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-bioasq.bge-base-en-v1.5.flat-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-bioasq.bge-base-en-v1.5.flat-int8.cached.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-bioasq.bge-base-en-v1.5.flat-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-bioasq.bge-base-en-v1.5.flat-int8.onnx.template index 4ddac03086..71617251b0 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-bioasq.bge-base-en-v1.5.flat-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-bioasq.bge-base-en-v1.5.flat-int8.onnx.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-bioasq.bge-base-en-v1.5.flat.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-bioasq.bge-base-en-v1.5.flat.cached.template index 2f7ea61696..927284a38a 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-bioasq.bge-base-en-v1.5.flat.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-bioasq.bge-base-en-v1.5.flat.cached.template @@ -59,4 +59,4 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-bioasq.bge-base-en-v1.5.flat.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-bioasq.bge-base-en-v1.5.flat.onnx.template index 9bbaa8fe0b..a0803206be 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-bioasq.bge-base-en-v1.5.flat.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-bioasq.bge-base-en-v1.5.flat.onnx.template @@ -59,5 +59,5 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw-int8.cached.template index 8699ce7aa8..05c7a3eac2 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw-int8.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw-int8.onnx.template index ba88cb27a3..140cba9724 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw-int8.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw.cached.template index 747c7a1e4e..50bb40272d 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw.onnx.template index e2ed701f9b..2fa38bf7c4 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-climate-fever.bge-base-en-v1.5.flat-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-climate-fever.bge-base-en-v1.5.flat-int8.cached.template index 95874bc723..f123328094 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-climate-fever.bge-base-en-v1.5.flat-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-climate-fever.bge-base-en-v1.5.flat-int8.cached.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-climate-fever.bge-base-en-v1.5.flat-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-climate-fever.bge-base-en-v1.5.flat-int8.onnx.template index 86516a1d95..13b9f617c0 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-climate-fever.bge-base-en-v1.5.flat-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-climate-fever.bge-base-en-v1.5.flat-int8.onnx.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-climate-fever.bge-base-en-v1.5.flat.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-climate-fever.bge-base-en-v1.5.flat.cached.template index a3194fbce4..4f445c4c83 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-climate-fever.bge-base-en-v1.5.flat.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-climate-fever.bge-base-en-v1.5.flat.cached.template @@ -59,4 +59,4 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-climate-fever.bge-base-en-v1.5.flat.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-climate-fever.bge-base-en-v1.5.flat.onnx.template index 4464d84709..754d14afbf 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-climate-fever.bge-base-en-v1.5.flat.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-climate-fever.bge-base-en-v1.5.flat.onnx.template @@ -59,5 +59,5 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw-int8.cached.template index 5a1618beb8..a171d3e6cf 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw-int8.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw-int8.onnx.template index ef1d7c8b15..4715ee4c14 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw-int8.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw.cached.template index c5cb8f1822..75173695a0 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw.onnx.template index bd6ae93131..edab40c985 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.flat-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.flat-int8.cached.template index 4faccb10b6..a49ff3ea60 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.flat-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.flat-int8.cached.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.flat-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.flat-int8.onnx.template index e878cf2632..c95b809202 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.flat-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.flat-int8.onnx.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.flat.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.flat.cached.template index 02e75222e2..ac47d89d70 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.flat.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.flat.cached.template @@ -59,4 +59,4 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.flat.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.flat.onnx.template index 50a3de1b5a..1bf93f62d2 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.flat.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.flat.onnx.template @@ -59,5 +59,5 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw-int8.cached.template index 8c916c614c..1b06763a7a 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw-int8.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw-int8.onnx.template index 122df085ab..af12ee23a8 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw-int8.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw.cached.template index b0e9fb765a..720d20afe1 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw.onnx.template index 82451ba39e..150b8c2ee3 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.flat-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.flat-int8.cached.template index 14854acf6f..698c8f65ac 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.flat-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.flat-int8.cached.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.flat-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.flat-int8.onnx.template index f91f5e796b..8b8d4ac98d 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.flat-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.flat-int8.onnx.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.flat.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.flat.cached.template index e6e59531fa..07163356ea 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.flat.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.flat.cached.template @@ -59,4 +59,4 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.flat.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.flat.onnx.template index 30539cb7e8..4692f02b94 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.flat.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.flat.onnx.template @@ -59,5 +59,5 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw-int8.cached.template index 84944c780c..8c42137f22 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw-int8.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw-int8.onnx.template index c02d799ae0..62cd23ca69 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw-int8.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw.cached.template index 2b03640f96..bee4b64819 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw.onnx.template index 03f3723fe1..b607b8cf39 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat-int8.cached.template index 3d092544b0..572b353fe7 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat-int8.cached.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat-int8.onnx.template index 595e48b41e..d0c3655b84 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat-int8.onnx.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat.cached.template index 29dce25c7d..da1cf0b372 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat.cached.template @@ -59,4 +59,4 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat.onnx.template index a3bccd6bb8..3312196465 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat.onnx.template @@ -59,5 +59,5 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw-int8.cached.template index 284b3b9bef..e96dec3f6a 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw-int8.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw-int8.onnx.template index 04e697fafa..81c621052b 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw-int8.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw.cached.template index a28fdd4fbd..311fac23c3 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw.onnx.template index 3db6beee86..f456d74e00 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat-int8.cached.template index 1b9d71d837..6d4ace359f 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat-int8.cached.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat-int8.onnx.template index 8ad2394e80..b5a6313885 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat-int8.onnx.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat.cached.template index f1767b246a..45a90c11d6 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat.cached.template @@ -59,4 +59,4 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat.onnx.template index 670e8d1121..f797df4977 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat.onnx.template @@ -59,5 +59,5 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw-int8.cached.template index 61960fa188..a0126c1dab 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw-int8.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw-int8.onnx.template index 0f2cf98ffe..d02b9cfe37 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw-int8.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw.cached.template index 183cf132b4..c4b2b79918 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw.onnx.template index c161656973..95893dfb6d 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.flat-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.flat-int8.cached.template index 31cfae13d7..27e04008ff 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.flat-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.flat-int8.cached.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.flat-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.flat-int8.onnx.template index a707832696..7845692d70 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.flat-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.flat-int8.onnx.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.flat.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.flat.cached.template index cbdac209e9..095bcc524b 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.flat.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.flat.cached.template @@ -59,4 +59,4 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.flat.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.flat.onnx.template index 4cd45fd3b8..b0dbfdbff2 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.flat.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.flat.onnx.template @@ -59,5 +59,5 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw-int8.cached.template index b6306ff577..9e29a7c658 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw-int8.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw-int8.onnx.template index b95df37096..07f968bc60 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw-int8.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw.cached.template index 583865be0f..0154151dbf 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw.onnx.template index 2ca8aa2244..a48f68c534 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.flat-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.flat-int8.cached.template index d3db704164..ea4c434f06 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.flat-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.flat-int8.cached.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.flat-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.flat-int8.onnx.template index 69bd75dad5..0b01b82519 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.flat-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.flat-int8.onnx.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.flat.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.flat.cached.template index 50e04697b6..6d5f15a980 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.flat.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.flat.cached.template @@ -59,4 +59,4 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.flat.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.flat.onnx.template index e805b8a1e0..bb70964224 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.flat.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.flat.onnx.template @@ -59,5 +59,5 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw-int8.cached.template index 3e4c55b59f..bab4374f35 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw-int8.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw-int8.onnx.template index 9b2ed4ee88..e65420d91d 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw-int8.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw.cached.template index 3c121f0532..8b370b536b 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw.onnx.template index 9783ab707b..95bfb6c2b2 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.flat-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.flat-int8.cached.template index ff4f23c9d5..a041b6da13 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.flat-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.flat-int8.cached.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.flat-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.flat-int8.onnx.template index cdb511737e..b1b1baee05 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.flat-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.flat-int8.onnx.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.flat.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.flat.cached.template index c9b558d66c..dec6e4499f 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.flat.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.flat.cached.template @@ -59,4 +59,4 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.flat.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.flat.onnx.template index 43cec12d50..1be911edc8 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.flat.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.flat.onnx.template @@ -59,5 +59,5 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw-int8.cached.template index 78fb1ae681..095b3ada0d 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw-int8.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw-int8.onnx.template index c573dba8d5..665d3dd551 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw-int8.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw.cached.template index 73798eae56..c068b90fc8 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw.onnx.template index 0b34d19508..84ad5d4198 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.flat-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.flat-int8.cached.template index ead3d4f3d2..decd89efdd 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.flat-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.flat-int8.cached.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.flat-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.flat-int8.onnx.template index 6baaba255f..59b9bd2b4b 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.flat-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.flat-int8.onnx.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.flat.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.flat.cached.template index b69c2c473e..7e66d41839 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.flat.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.flat.cached.template @@ -59,4 +59,4 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.flat.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.flat.onnx.template index 62249014bc..f1c203f74c 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.flat.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.flat.onnx.template @@ -59,5 +59,5 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw-int8.cached.template index f6f33faa0f..635ebd6fbb 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw-int8.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw-int8.onnx.template index 03fd8057f7..a78a77654d 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw-int8.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw.cached.template index 4ba72b1988..348f10c27f 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw.onnx.template index 61da69081c..831c426411 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.flat-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.flat-int8.cached.template index d96400f6a4..cf650ce18d 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.flat-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.flat-int8.cached.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.flat-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.flat-int8.onnx.template index 3f97ae746e..61be4f89c9 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.flat-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.flat-int8.onnx.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.flat.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.flat.cached.template index 01dfb21b3d..d3baaa20d1 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.flat.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.flat.cached.template @@ -59,4 +59,4 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.flat.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.flat.onnx.template index fe2d1e533a..f875c14d34 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.flat.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.flat.onnx.template @@ -59,5 +59,5 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw-int8.cached.template index 6571b43797..5da2d26eea 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw-int8.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw-int8.onnx.template index 03f213a230..0e83c6fd5f 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw-int8.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw.cached.template index dabe08d969..5664170fc2 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw.onnx.template index 9ce8d915b5..b6d6a1e232 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.flat-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.flat-int8.cached.template index d11f3751fa..679de5e3e9 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.flat-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.flat-int8.cached.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.flat-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.flat-int8.onnx.template index 6eba7db644..7e8dcd1fcd 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.flat-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.flat-int8.onnx.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.flat.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.flat.cached.template index 17d9e7e87e..e0b2fdd94a 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.flat.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.flat.cached.template @@ -59,4 +59,4 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.flat.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.flat.onnx.template index 3fee3a920e..fb97feec91 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.flat.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.flat.onnx.template @@ -59,5 +59,5 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw-int8.cached.template index 67fdb45746..e2030dd20e 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw-int8.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw-int8.onnx.template index 81f0bdd66d..9c3d9edee3 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw-int8.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw.cached.template index a08f31935e..69d7293b95 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw.onnx.template index ed5e22847c..af3ca19ae1 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.flat-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.flat-int8.cached.template index af650bb014..fc30143951 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.flat-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.flat-int8.cached.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.flat-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.flat-int8.onnx.template index 2b7ba2a817..c3891815ac 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.flat-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.flat-int8.onnx.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.flat.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.flat.cached.template index 9dcc3292bd..1ba3e76e55 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.flat.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.flat.cached.template @@ -59,4 +59,4 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.flat.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.flat.onnx.template index 3afcc5fde5..27aefe8506 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.flat.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.flat.onnx.template @@ -59,5 +59,5 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw-int8.cached.template index aa2153899d..af1d7a7500 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw-int8.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw-int8.onnx.template index 8ec1326db9..49cf301a76 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw-int8.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw.cached.template index 3021c43cd3..91e2d7887a 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw.onnx.template index ec824b9fb6..f69422801a 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.flat-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.flat-int8.cached.template index ea2d1377c5..f062624229 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.flat-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.flat-int8.cached.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.flat-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.flat-int8.onnx.template index 8b53741767..c05c47fd68 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.flat-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.flat-int8.onnx.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.flat.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.flat.cached.template index f1ec9025dd..cda8997d1f 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.flat.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.flat.cached.template @@ -59,4 +59,4 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.flat.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.flat.onnx.template index ab8e88028e..eb4a868992 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.flat.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.flat.onnx.template @@ -59,5 +59,5 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw-int8.cached.template index 4f1efada13..a9683d37d2 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw-int8.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw-int8.onnx.template index 7a829d269c..744e087d48 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw-int8.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw.cached.template index ff3d33c1dc..17c0226707 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw.onnx.template index 5e99e764ed..a68486b200 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.flat-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.flat-int8.cached.template index 62e26e1e0f..224147a639 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.flat-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.flat-int8.cached.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.flat-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.flat-int8.onnx.template index f7834d6dc8..35defeb947 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.flat-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.flat-int8.onnx.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.flat.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.flat.cached.template index 39c002927b..f5bb7b2a3b 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.flat.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.flat.cached.template @@ -59,4 +59,4 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.flat.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.flat.onnx.template index f308da7320..854273d2b4 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.flat.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.flat.onnx.template @@ -59,5 +59,5 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw-int8.cached.template index 0f32cee2ad..ceaf01f48d 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw-int8.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw-int8.onnx.template index d6a5178692..e53caf82c9 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw-int8.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw.cached.template index cd4a81370f..d07261324e 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw.onnx.template index 560e3271f7..806536bcea 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-fever.bge-base-en-v1.5.flat-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-fever.bge-base-en-v1.5.flat-int8.cached.template index 481f53f3d6..c9d8685251 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-fever.bge-base-en-v1.5.flat-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-fever.bge-base-en-v1.5.flat-int8.cached.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-fever.bge-base-en-v1.5.flat-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-fever.bge-base-en-v1.5.flat-int8.onnx.template index 95e7afb323..af686604f9 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-fever.bge-base-en-v1.5.flat-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-fever.bge-base-en-v1.5.flat-int8.onnx.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-fever.bge-base-en-v1.5.flat.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-fever.bge-base-en-v1.5.flat.cached.template index 4568be9d1b..d98498bc77 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-fever.bge-base-en-v1.5.flat.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-fever.bge-base-en-v1.5.flat.cached.template @@ -59,4 +59,4 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-fever.bge-base-en-v1.5.flat.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-fever.bge-base-en-v1.5.flat.onnx.template index 4a7971d985..904eeb4d0e 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-fever.bge-base-en-v1.5.flat.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-fever.bge-base-en-v1.5.flat.onnx.template @@ -59,5 +59,5 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-fever.bge-base-en-v1.5.hnsw-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-fever.bge-base-en-v1.5.hnsw-int8.cached.template index 63520ac463..f8d682a851 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-fever.bge-base-en-v1.5.hnsw-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-fever.bge-base-en-v1.5.hnsw-int8.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-fever.bge-base-en-v1.5.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-fever.bge-base-en-v1.5.hnsw-int8.onnx.template index 424709e794..ab6b3a854f 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-fever.bge-base-en-v1.5.hnsw-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-fever.bge-base-en-v1.5.hnsw-int8.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-fever.bge-base-en-v1.5.hnsw.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-fever.bge-base-en-v1.5.hnsw.cached.template index 60c6237376..8669ad6486 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-fever.bge-base-en-v1.5.hnsw.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-fever.bge-base-en-v1.5.hnsw.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-fever.bge-base-en-v1.5.hnsw.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-fever.bge-base-en-v1.5.hnsw.onnx.template index f600d12a6a..34e47e9ae9 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-fever.bge-base-en-v1.5.hnsw.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-fever.bge-base-en-v1.5.hnsw.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-fiqa.bge-base-en-v1.5.flat-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-fiqa.bge-base-en-v1.5.flat-int8.cached.template index b8fc86bdba..527a7511c0 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-fiqa.bge-base-en-v1.5.flat-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-fiqa.bge-base-en-v1.5.flat-int8.cached.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-fiqa.bge-base-en-v1.5.flat-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-fiqa.bge-base-en-v1.5.flat-int8.onnx.template index 967992916c..1b83e666f6 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-fiqa.bge-base-en-v1.5.flat-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-fiqa.bge-base-en-v1.5.flat-int8.onnx.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-fiqa.bge-base-en-v1.5.flat.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-fiqa.bge-base-en-v1.5.flat.cached.template index 21e5ded2a3..66d4edafb1 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-fiqa.bge-base-en-v1.5.flat.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-fiqa.bge-base-en-v1.5.flat.cached.template @@ -59,4 +59,4 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-fiqa.bge-base-en-v1.5.flat.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-fiqa.bge-base-en-v1.5.flat.onnx.template index f926176247..c728e48678 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-fiqa.bge-base-en-v1.5.flat.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-fiqa.bge-base-en-v1.5.flat.onnx.template @@ -59,5 +59,5 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw-int8.cached.template index 86e006c9e0..2d364c3476 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw-int8.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw-int8.onnx.template index 5b09436234..581fe36ed8 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw-int8.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw.cached.template index 3dabab7be0..40a2a8c0cf 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw.onnx.template index 8d0a8a4abd..ffa3dc23af 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.flat-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.flat-int8.cached.template index d68bd42cbd..eab0a877da 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.flat-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.flat-int8.cached.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.flat-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.flat-int8.onnx.template index 568e817c70..7b72cb5c72 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.flat-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.flat-int8.onnx.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.flat.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.flat.cached.template index 1e1598ba85..7bb38916ef 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.flat.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.flat.cached.template @@ -59,4 +59,4 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.flat.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.flat.onnx.template index c5f6d7a878..016c12aeb9 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.flat.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.flat.onnx.template @@ -59,5 +59,5 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw-int8.cached.template index 5297d135ae..5dafd50807 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw-int8.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw-int8.onnx.template index 167eeb7bc5..6f7a38b773 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw-int8.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw.cached.template index eba71fcead..0d37ffeaec 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw.onnx.template index a132900cdc..0302374648 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.flat-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.flat-int8.cached.template index 68637f149c..7a7b7f34be 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.flat-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.flat-int8.cached.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.flat-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.flat-int8.onnx.template index cbc031f78c..9cc194d414 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.flat-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.flat-int8.onnx.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.flat.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.flat.cached.template index 5a6e5499c6..8b55da2c44 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.flat.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.flat.cached.template @@ -59,4 +59,4 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.flat.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.flat.onnx.template index 971edd32fa..03c9295ba8 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.flat.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.flat.onnx.template @@ -59,5 +59,5 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw-int8.cached.template index 594e0dea85..e6e8ee16c2 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw-int8.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw-int8.onnx.template index 173dd56110..fac764fee6 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw-int8.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw.cached.template index 67b3e521d7..833a5523fe 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw.onnx.template index 3efba492e4..e1b27d2078 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-nq.bge-base-en-v1.5.flat-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-nq.bge-base-en-v1.5.flat-int8.cached.template index 55ddb08437..2e85aa6157 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-nq.bge-base-en-v1.5.flat-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-nq.bge-base-en-v1.5.flat-int8.cached.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-nq.bge-base-en-v1.5.flat-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-nq.bge-base-en-v1.5.flat-int8.onnx.template index 909a4a3c1b..df40f0df79 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-nq.bge-base-en-v1.5.flat-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-nq.bge-base-en-v1.5.flat-int8.onnx.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-nq.bge-base-en-v1.5.flat.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-nq.bge-base-en-v1.5.flat.cached.template index 44cb63e440..20e96cf674 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-nq.bge-base-en-v1.5.flat.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-nq.bge-base-en-v1.5.flat.cached.template @@ -59,4 +59,4 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-nq.bge-base-en-v1.5.flat.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-nq.bge-base-en-v1.5.flat.onnx.template index dcee83bfec..9eb33b4905 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-nq.bge-base-en-v1.5.flat.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-nq.bge-base-en-v1.5.flat.onnx.template @@ -59,5 +59,5 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-nq.bge-base-en-v1.5.hnsw-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-nq.bge-base-en-v1.5.hnsw-int8.cached.template index 4351c9b9b1..5f9e43179a 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-nq.bge-base-en-v1.5.hnsw-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-nq.bge-base-en-v1.5.hnsw-int8.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-nq.bge-base-en-v1.5.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-nq.bge-base-en-v1.5.hnsw-int8.onnx.template index 0413483033..7b34ccc5b8 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-nq.bge-base-en-v1.5.hnsw-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-nq.bge-base-en-v1.5.hnsw-int8.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-nq.bge-base-en-v1.5.hnsw.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-nq.bge-base-en-v1.5.hnsw.cached.template index 8423c22708..2cba96579f 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-nq.bge-base-en-v1.5.hnsw.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-nq.bge-base-en-v1.5.hnsw.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-nq.bge-base-en-v1.5.hnsw.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-nq.bge-base-en-v1.5.hnsw.onnx.template index b1c54f40ca..d3fd44561c 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-nq.bge-base-en-v1.5.hnsw.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-nq.bge-base-en-v1.5.hnsw.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-quora.bge-base-en-v1.5.flat-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-quora.bge-base-en-v1.5.flat-int8.cached.template index b560570fb3..37c71d895e 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-quora.bge-base-en-v1.5.flat-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-quora.bge-base-en-v1.5.flat-int8.cached.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-quora.bge-base-en-v1.5.flat-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-quora.bge-base-en-v1.5.flat-int8.onnx.template index a8df0648bc..9fb6c5a2b0 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-quora.bge-base-en-v1.5.flat-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-quora.bge-base-en-v1.5.flat-int8.onnx.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-quora.bge-base-en-v1.5.flat.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-quora.bge-base-en-v1.5.flat.cached.template index 1b0d900144..2555341c98 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-quora.bge-base-en-v1.5.flat.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-quora.bge-base-en-v1.5.flat.cached.template @@ -59,4 +59,4 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-quora.bge-base-en-v1.5.flat.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-quora.bge-base-en-v1.5.flat.onnx.template index a29e131184..04e31a3830 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-quora.bge-base-en-v1.5.flat.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-quora.bge-base-en-v1.5.flat.onnx.template @@ -59,5 +59,5 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-quora.bge-base-en-v1.5.hnsw-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-quora.bge-base-en-v1.5.hnsw-int8.cached.template index 7a03711fd1..8823478411 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-quora.bge-base-en-v1.5.hnsw-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-quora.bge-base-en-v1.5.hnsw-int8.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-quora.bge-base-en-v1.5.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-quora.bge-base-en-v1.5.hnsw-int8.onnx.template index 1e5da122f8..ea3ea09861 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-quora.bge-base-en-v1.5.hnsw-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-quora.bge-base-en-v1.5.hnsw-int8.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-quora.bge-base-en-v1.5.hnsw.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-quora.bge-base-en-v1.5.hnsw.cached.template index d92a55bbb2..f170612384 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-quora.bge-base-en-v1.5.hnsw.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-quora.bge-base-en-v1.5.hnsw.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-quora.bge-base-en-v1.5.hnsw.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-quora.bge-base-en-v1.5.hnsw.onnx.template index 36def32b84..5dd2c02b21 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-quora.bge-base-en-v1.5.hnsw.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-quora.bge-base-en-v1.5.hnsw.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-robust04.bge-base-en-v1.5.flat-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-robust04.bge-base-en-v1.5.flat-int8.cached.template index 83572ef983..33f3b9a28c 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-robust04.bge-base-en-v1.5.flat-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-robust04.bge-base-en-v1.5.flat-int8.cached.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-robust04.bge-base-en-v1.5.flat-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-robust04.bge-base-en-v1.5.flat-int8.onnx.template index d05e2057c1..fe2d9ddf65 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-robust04.bge-base-en-v1.5.flat-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-robust04.bge-base-en-v1.5.flat-int8.onnx.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-robust04.bge-base-en-v1.5.flat.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-robust04.bge-base-en-v1.5.flat.cached.template index 390ffe5dfd..783120753f 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-robust04.bge-base-en-v1.5.flat.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-robust04.bge-base-en-v1.5.flat.cached.template @@ -59,4 +59,4 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-robust04.bge-base-en-v1.5.flat.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-robust04.bge-base-en-v1.5.flat.onnx.template index 2519e0ec89..121dbff13c 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-robust04.bge-base-en-v1.5.flat.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-robust04.bge-base-en-v1.5.flat.onnx.template @@ -59,5 +59,5 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw-int8.cached.template index b7ff028d4b..c32ce1e27e 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw-int8.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw-int8.onnx.template index 909eae9e4c..689621d8d4 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw-int8.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw.cached.template index af9702b0f3..0676d52730 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw.onnx.template index 93ef8b1b69..df04002b26 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-scidocs.bge-base-en-v1.5.flat-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-scidocs.bge-base-en-v1.5.flat-int8.cached.template index c542db777a..1d092d57fe 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-scidocs.bge-base-en-v1.5.flat-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-scidocs.bge-base-en-v1.5.flat-int8.cached.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-scidocs.bge-base-en-v1.5.flat-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-scidocs.bge-base-en-v1.5.flat-int8.onnx.template index 2e8b7f217c..5c0e482494 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-scidocs.bge-base-en-v1.5.flat-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-scidocs.bge-base-en-v1.5.flat-int8.onnx.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-scidocs.bge-base-en-v1.5.flat.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-scidocs.bge-base-en-v1.5.flat.cached.template index d34f7d36b8..e3a24ee52a 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-scidocs.bge-base-en-v1.5.flat.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-scidocs.bge-base-en-v1.5.flat.cached.template @@ -59,4 +59,4 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-scidocs.bge-base-en-v1.5.flat.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-scidocs.bge-base-en-v1.5.flat.onnx.template index 79bea8987a..4895f13597 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-scidocs.bge-base-en-v1.5.flat.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-scidocs.bge-base-en-v1.5.flat.onnx.template @@ -59,5 +59,5 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw-int8.cached.template index 049eaba6d9..4c2af71075 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw-int8.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw-int8.onnx.template index 477ecb25ee..a2a1b49c48 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw-int8.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw.cached.template index b6037876df..bf00680ccc 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw.onnx.template index 07844f91d2..bd7a473633 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-scifact.bge-base-en-v1.5.flat-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-scifact.bge-base-en-v1.5.flat-int8.cached.template index e5ce04af07..9f8ff4b0b8 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-scifact.bge-base-en-v1.5.flat-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-scifact.bge-base-en-v1.5.flat-int8.cached.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-scifact.bge-base-en-v1.5.flat-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-scifact.bge-base-en-v1.5.flat-int8.onnx.template index 902b02888f..098e4dabfe 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-scifact.bge-base-en-v1.5.flat-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-scifact.bge-base-en-v1.5.flat-int8.onnx.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-scifact.bge-base-en-v1.5.flat.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-scifact.bge-base-en-v1.5.flat.cached.template index a20b76eef6..ba32939308 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-scifact.bge-base-en-v1.5.flat.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-scifact.bge-base-en-v1.5.flat.cached.template @@ -59,4 +59,4 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-scifact.bge-base-en-v1.5.flat.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-scifact.bge-base-en-v1.5.flat.onnx.template index 36ae9a3405..29db2c188b 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-scifact.bge-base-en-v1.5.flat.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-scifact.bge-base-en-v1.5.flat.onnx.template @@ -59,5 +59,5 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw-int8.cached.template index 5975ad8771..7b07cf11cc 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw-int8.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw-int8.onnx.template index 86fd9e8e97..4ed7f33e87 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw-int8.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw.cached.template index d93e7ab9ff..749e4f32c5 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw.onnx.template index f2382dac4b..07fb4416a0 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-signal1m.bge-base-en-v1.5.flat-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-signal1m.bge-base-en-v1.5.flat-int8.cached.template index 76b6921541..8d36a48b74 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-signal1m.bge-base-en-v1.5.flat-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-signal1m.bge-base-en-v1.5.flat-int8.cached.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-signal1m.bge-base-en-v1.5.flat-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-signal1m.bge-base-en-v1.5.flat-int8.onnx.template index ec069a3b57..668410cd88 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-signal1m.bge-base-en-v1.5.flat-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-signal1m.bge-base-en-v1.5.flat-int8.onnx.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-signal1m.bge-base-en-v1.5.flat.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-signal1m.bge-base-en-v1.5.flat.cached.template index d58a39a4e8..434c2d490a 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-signal1m.bge-base-en-v1.5.flat.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-signal1m.bge-base-en-v1.5.flat.cached.template @@ -59,4 +59,4 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-signal1m.bge-base-en-v1.5.flat.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-signal1m.bge-base-en-v1.5.flat.onnx.template index 2920c44057..ba8ec8d193 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-signal1m.bge-base-en-v1.5.flat.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-signal1m.bge-base-en-v1.5.flat.onnx.template @@ -59,5 +59,5 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw-int8.cached.template index 8e3983aebf..704e22cd0b 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw-int8.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw-int8.onnx.template index 33a1c26573..83ce303ac9 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw-int8.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw.cached.template index b8594e3283..171fff2691 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw.onnx.template index 732550acbd..db40b335d2 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-trec-covid.bge-base-en-v1.5.flat-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-trec-covid.bge-base-en-v1.5.flat-int8.cached.template index 24ea9b62ee..211583e08b 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-trec-covid.bge-base-en-v1.5.flat-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-trec-covid.bge-base-en-v1.5.flat-int8.cached.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-trec-covid.bge-base-en-v1.5.flat-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-trec-covid.bge-base-en-v1.5.flat-int8.onnx.template index fed2958435..de8a096ccd 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-trec-covid.bge-base-en-v1.5.flat-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-trec-covid.bge-base-en-v1.5.flat-int8.onnx.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-trec-covid.bge-base-en-v1.5.flat.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-trec-covid.bge-base-en-v1.5.flat.cached.template index 5311214c51..ed1487ff84 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-trec-covid.bge-base-en-v1.5.flat.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-trec-covid.bge-base-en-v1.5.flat.cached.template @@ -59,4 +59,4 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-trec-covid.bge-base-en-v1.5.flat.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-trec-covid.bge-base-en-v1.5.flat.onnx.template index 4680fb7b3c..17afbd0590 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-trec-covid.bge-base-en-v1.5.flat.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-trec-covid.bge-base-en-v1.5.flat.onnx.template @@ -59,5 +59,5 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw-int8.cached.template index 8e056dad98..0c4f35b33f 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw-int8.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw-int8.onnx.template index fbc28514bf..d9b95458a3 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw-int8.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw.cached.template index a7b039ec3b..1ab8be5c60 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw.onnx.template index d304b2fb9a..5429c23007 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-trec-news.bge-base-en-v1.5.flat-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-trec-news.bge-base-en-v1.5.flat-int8.cached.template index cdee1fd47d..96b1928f4a 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-trec-news.bge-base-en-v1.5.flat-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-trec-news.bge-base-en-v1.5.flat-int8.cached.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-trec-news.bge-base-en-v1.5.flat-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-trec-news.bge-base-en-v1.5.flat-int8.onnx.template index 0ebb4267c1..01a71922be 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-trec-news.bge-base-en-v1.5.flat-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-trec-news.bge-base-en-v1.5.flat-int8.onnx.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-trec-news.bge-base-en-v1.5.flat.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-trec-news.bge-base-en-v1.5.flat.cached.template index 287c5049ee..0a96f7ebc5 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-trec-news.bge-base-en-v1.5.flat.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-trec-news.bge-base-en-v1.5.flat.cached.template @@ -59,4 +59,4 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-trec-news.bge-base-en-v1.5.flat.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-trec-news.bge-base-en-v1.5.flat.onnx.template index ebfad34690..edebc30ea3 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-trec-news.bge-base-en-v1.5.flat.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-trec-news.bge-base-en-v1.5.flat.onnx.template @@ -59,5 +59,5 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw-int8.cached.template index 2fb7b244d1..bad558d3dc 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw-int8.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw-int8.onnx.template index fd9193d018..37841d21a4 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw-int8.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw.cached.template index 0f885884f7..bd364bc5bc 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw.onnx.template index b042820b4d..3353032f25 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.flat-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.flat-int8.cached.template index 983471e588..db597b12ae 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.flat-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.flat-int8.cached.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes, results may differ slightly, but the nDCG@10 score should generally be within 0.004 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With cached queries on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.flat-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.flat-int8.onnx.template index 5742056c79..b62469d73d 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.flat-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.flat-int8.onnx.template @@ -59,5 +59,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries on non-quantized indexes. -With quantized indexes and on-the-fly ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.005 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.004 of the results reported above (with some outliers). +Note that quantization is non-deterministic due to sampling (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.flat.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.flat.cached.template index 73defa75fe..3814730a5a 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.flat.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.flat.cached.template @@ -59,4 +59,4 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that since we're running brute-force search, the results should be reproducible _exactly_. +Note that since we're running brute-force search with cached queries on non-quantized flat indexes, the results should be reproducible _exactly_. diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.flat.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.flat.onnx.template index 5a527dc783..0056dd8da0 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.flat.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.flat.onnx.template @@ -59,5 +59,5 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -The above figures are from running brute-force search with cached queries. -With ONNX query encoding, results may differ slightly, but the nDCG@10 score should generally be within 0.002 of the result reported above (with a small number of outliers). +The above figures are from running brute-force search with cached queries on non-quantized flat indexes. +With ONNX query encoding on non-quantized flat indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.001 of the results reported above (with some outliers). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw-int8.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw-int8.cached.template index eec96b8713..379c521836 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw-int8.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw-int8.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw-int8.onnx.template index b8fd5141a7..f5851d20e4 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw-int8.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw-int8.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.005 of the results reported above (with some outliers). +Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw.cached.template b/src/main/resources/docgen/templates/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw.cached.template index 9081c31d22..90641553fe 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw.cached.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw.cached.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With cached queries on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/docgen/templates/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw.onnx.template b/src/main/resources/docgen/templates/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw.onnx.template index 0f816a7852..0816205962 100644 --- a/src/main/resources/docgen/templates/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw.onnx.template +++ b/src/main/resources/docgen/templates/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw.onnx.template @@ -61,5 +61,6 @@ With the above commands, you should be able to reproduce the following results: ${effectiveness} -Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}). +The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes. +With ONNX query encoding on non-quantized HNSW indexes, observed results may differ slightly (typically, lower), but scores should generally be within 0.003 of the results reported above (with some outliers). +Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials). diff --git a/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw-int8.cached.yaml index a829e26f19..4f836a7951 100644 --- a/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw-int8.cached.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-arguana.test.txt models: - - name: bge-hnsw-cached + - name: bge-hnsw-int8-cached display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.635 + - 0.6361 R@100: - - 0.991 + - 0.9915 R@1000: - - 0.996 + - 0.9964 diff --git a/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw-int8.onnx.yaml index 5c342370db..641f274952 100644 --- a/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw-int8.onnx.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-arguana.test.txt models: - - name: bge-hnsw-onnx + - name: bge-hnsw-int8-onnx display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.621 + - 0.6361 R@100: - - 0.971 + - 0.9915 R@1000: - - 0.994 + - 0.9964 diff --git a/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw.cached.yaml index 7741f1b9bd..3d662cdebe 100644 --- a/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw.cached.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.636 + - 0.6361 R@100: - - 0.992 + - 0.9915 R@1000: - - 0.996 + - 0.9964 diff --git a/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw.onnx.yaml index c8cea414f9..030c56d4bd 100644 --- a/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw.onnx.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.623 + - 0.6361 R@100: - - 0.972 + - 0.9915 R@1000: - - 0.993 + - 0.9964 diff --git a/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw-int8.cached.yaml index 369179c89c..228d152e99 100644 --- a/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw-int8.cached.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-bioasq.test.txt models: - - name: bge-hnsw-cached + - name: bge-hnsw-int8-cached display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 5000 results: nDCG@10: - - 0.407 + - 0.4149 R@100: - - 0.624 + - 0.6317 R@1000: - - 0.795 + - 0.8059 diff --git a/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw-int8.onnx.yaml index b8c41e4652..bcc64ec72d 100644 --- a/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw-int8.onnx.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-bioasq.test.txt models: - - name: bge-hnsw-onnx + - name: bge-hnsw-int8-onnx display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 5000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.408 + - 0.4149 R@100: - - 0.624 + - 0.6317 R@1000: - - 0.797 + - 0.8059 diff --git a/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw.cached.yaml index f3d7773e01..3b9b22d6c9 100644 --- a/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw.cached.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 5000 results: nDCG@10: - - 0.410 + - 0.4149 R@100: - - 0.622 + - 0.6317 R@1000: - - 0.794 + - 0.8059 diff --git a/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw.onnx.yaml index e0bacc6208..7025a2467b 100644 --- a/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw.onnx.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 5000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.414 + - 0.4149 R@100: - - 0.628 + - 0.6317 R@1000: - - 0.802 + - 0.8059 diff --git a/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw-int8.cached.yaml index 1e140fabe9..92f3d564e2 100644 --- a/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw-int8.cached.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-climate-fever.test.txt models: - - name: bge-hnsw-cached + - name: bge-hnsw-int8-cached display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.309 + - 0.3119 R@100: - - 0.633 + - 0.6362 R@1000: - - 0.829 + - 0.8307 diff --git a/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw-int8.onnx.yaml index 663d269275..5c40b278b9 100644 --- a/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw-int8.onnx.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-climate-fever.test.txt models: - - name: bge-hnsw-onnx + - name: bge-hnsw-int8-onnx display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.308 + - 0.3119 R@100: - - 0.633 + - 0.6362 R@1000: - - 0.829 + - 0.8307 diff --git a/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw.cached.yaml index 2ca5627e6e..1bf1ca502e 100644 --- a/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw.cached.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.312 + - 0.3119 R@100: - - 0.636 + - 0.6362 R@1000: - - 0.829 + - 0.8307 diff --git a/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw.onnx.yaml index b972325036..fa78c29c00 100644 --- a/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw.onnx.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.312 + - 0.3119 R@100: - - 0.635 + - 0.6362 R@1000: - - 0.830 + - 0.8307 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw-int8.cached.yaml index 6874ca99b3..75b05d2957 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw-int8.cached.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-cqadupstack-android.test.txt models: - - name: bge-hnsw-cached + - name: bge-hnsw-int8-cached display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.509 + - 0.5075 R@100: - - 0.844 + - 0.8454 R@1000: - - 0.962 + - 0.9611 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw-int8.onnx.yaml index 37b8d0234a..73cad003f7 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw-int8.onnx.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-cqadupstack-android.test.txt models: - - name: bge-hnsw-onnx + - name: bge-hnsw-int8-onnx display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.509 + - 0.5075 R@100: - - 0.843 + - 0.8454 R@1000: - - 0.962 + - 0.9611 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw.cached.yaml index 14ca3ae9d3..b60022254d 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw.cached.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.507 + - 0.5075 R@100: - - 0.845 + - 0.8454 R@1000: - - 0.962 + - 0.9611 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw.onnx.yaml index 695cf72c02..3bf4f0740b 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw.onnx.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.508 + - 0.5075 R@100: - - 0.845 + - 0.8454 R@1000: - - 0.962 + - 0.9611 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw-int8.cached.yaml index d3b4a8d1f1..bde38d46b2 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw-int8.cached.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-cqadupstack-english.test.txt models: - - name: bge-hnsw-cached + - name: bge-hnsw-int8-cached display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.484 + - 0.4857 R@100: - - 0.756 + - 0.7587 R@1000: - - 0.883 + - 0.8839 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw-int8.onnx.yaml index 2451cbf860..74bba72cd0 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw-int8.onnx.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-cqadupstack-english.test.txt models: - - name: bge-hnsw-onnx + - name: bge-hnsw-int8-onnx display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.484 + - 0.4857 R@100: - - 0.756 + - 0.7587 R@1000: - - 0.882 + - 0.8839 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw.cached.yaml index a15ff4521e..241068c061 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw.cached.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.485 + - 0.4857 R@100: - - 0.757 + - 0.7587 R@1000: - - 0.882 + - 0.8839 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw.onnx.yaml index 88347f114b..84520b8cb1 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw.onnx.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.484 + - 0.4857 R@100: - - 0.756 + - 0.7587 R@1000: - - 0.881 + - 0.8839 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw-int8.cached.yaml index 44878df7c5..0d6c473cb4 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw-int8.cached.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-cqadupstack-gaming.test.txt models: - - name: bge-hnsw-cached + - name: bge-hnsw-int8-cached display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.593 + - 0.5965 R@100: - - 0.901 + - 0.9036 R@1000: - - 0.969 + - 0.9719 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw-int8.onnx.yaml index 4b90e357cb..8e9e24cd79 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw-int8.onnx.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-cqadupstack-gaming.test.txt models: - - name: bge-hnsw-onnx + - name: bge-hnsw-int8-onnx display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.593 + - 0.5965 R@100: - - 0.901 + - 0.9036 R@1000: - - 0.969 + - 0.9719 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw.cached.yaml index a9f26257e4..30074bf38b 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw.cached.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.595 + - 0.5965 R@100: - - 0.901 + - 0.9036 R@1000: - - 0.970 + - 0.9719 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw.onnx.yaml index 6502a1a163..26504e4f79 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw.onnx.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.595 + - 0.5965 R@100: - - 0.901 + - 0.9036 R@1000: - - 0.970 + - 0.9719 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw-int8.cached.yaml index ba9efc3c54..3556d02fcd 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw-int8.cached.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-cqadupstack-gis.test.txt models: - - name: bge-hnsw-cached + - name: bge-hnsw-int8-cached display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.415 + - 0.4127 R@100: - - 0.767 + - 0.7682 R@1000: - - 0.909 + - 0.9117 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw-int8.onnx.yaml index 29b6d6553e..39d45c96da 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw-int8.onnx.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-cqadupstack-gis.test.txt models: - - name: bge-hnsw-onnx + - name: bge-hnsw-int8-onnx display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.416 + - 0.4127 R@100: - - 0.767 + - 0.7682 R@1000: - - 0.909 + - 0.9117 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw.cached.yaml index fd801a9c0c..424fe7dea1 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw.cached.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.412 + - 0.4127 R@100: - - 0.767 + - 0.7682 R@1000: - - 0.911 + - 0.9117 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw.onnx.yaml index b5737a38f5..67c007f45c 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw.onnx.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.413 + - 0.4127 R@100: - - 0.767 + - 0.7682 R@1000: - - 0.911 + - 0.9117 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw-int8.cached.yaml index 33e958097c..229149abc2 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw-int8.cached.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-cqadupstack-mathematica.test.txt models: - - name: bge-hnsw-cached + - name: bge-hnsw-int8-cached display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.315 + - 0.3163 R@100: - - 0.692 + - 0.6922 R@1000: - - 0.881 + - 0.8810 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw-int8.onnx.yaml index e7a07d89a7..db3fa687bc 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw-int8.onnx.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-cqadupstack-mathematica.test.txt models: - - name: bge-hnsw-onnx + - name: bge-hnsw-int8-onnx display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.315 + - 0.3163 R@100: - - 0.692 + - 0.6922 R@1000: - - 0.881 + - 0.8810 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw.cached.yaml index 20f9e5603d..cf3d3dfe8a 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw.cached.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.316 + - 0.3163 R@100: - - 0.692 + - 0.6922 R@1000: - - 0.881 + - 0.8810 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw.onnx.yaml index d63b15e0d9..ec44c77af1 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw.onnx.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.316 + - 0.3163 R@100: - - 0.692 + - 0.6922 R@1000: - - 0.881 + - 0.8810 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw-int8.cached.yaml index b42c10fc68..b4d2e0c83e 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw-int8.cached.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-cqadupstack-physics.test.txt models: - - name: bge-hnsw-cached + - name: bge-hnsw-int8-cached display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.474 + - 0.4722 R@100: - - 0.810 + - 0.8081 R@1000: - - 0.940 + - 0.9406 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw-int8.onnx.yaml index ab1e51c978..7a89a98ca3 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw-int8.onnx.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-cqadupstack-physics.test.txt models: - - name: bge-hnsw-onnx + - name: bge-hnsw-int8-onnx display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.473 + - 0.4722 R@100: - - 0.810 + - 0.8081 R@1000: - - 0.940 + - 0.9406 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw.cached.yaml index 1fa509562e..b756798155 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw.cached.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.472 + - 0.4722 R@100: - - 0.808 + - 0.8081 R@1000: - - 0.941 + - 0.9406 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw.onnx.yaml index a2fc352cdc..8d170c3c2f 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw.onnx.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.472 + - 0.4722 R@100: - - 0.808 + - 0.8081 R@1000: - - 0.941 + - 0.9406 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw-int8.cached.yaml index cd3afc80bb..6bd85a5cdf 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw-int8.cached.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-cqadupstack-programmers.test.txt models: - - name: bge-hnsw-cached + - name: bge-hnsw-int8-cached display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.425 + - 0.4242 R@100: - - 0.786 + - 0.7856 R@1000: - - 0.934 + - 0.9348 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw-int8.onnx.yaml index 6bd93d48fa..464fc436d1 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw-int8.onnx.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-cqadupstack-programmers.test.txt models: - - name: bge-hnsw-onnx + - name: bge-hnsw-int8-onnx display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.424 + - 0.4242 R@100: - - 0.787 + - 0.7856 R@1000: - - 0.934 + - 0.9348 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw.cached.yaml index 982d97140c..1eea05c9a0 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw.cached.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.424 + - 0.4242 R@100: - - 0.786 + - 0.7856 R@1000: - - 0.935 + - 0.9348 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw.onnx.yaml index 4d4d2f027b..68519a1c85 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw.onnx.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.424 + - 0.4242 R@100: - - 0.786 + - 0.7856 R@1000: - - 0.935 + - 0.9348 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw-int8.cached.yaml index 5e62f88dc1..2e9cba2422 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw-int8.cached.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-cqadupstack-stats.test.txt models: - - name: bge-hnsw-cached + - name: bge-hnsw-int8-cached display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.371 + - 0.3732 R@100: - - 0.672 + - 0.6727 R@1000: - - 0.852 + - 0.8445 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw-int8.onnx.yaml index cbf46162b4..3409b76059 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw-int8.onnx.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-cqadupstack-stats.test.txt models: - - name: bge-hnsw-onnx + - name: bge-hnsw-int8-onnx display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.370 + - 0.3732 R@100: - - 0.671 + - 0.6727 R@1000: - - 0.854 + - 0.8445 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw.cached.yaml index 5310c79f7f..3cd3470a50 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw.cached.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.373 + - 0.3732 R@100: - - 0.673 + - 0.6727 R@1000: - - 0.849 + - 0.8445 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw.onnx.yaml index 1d9b240721..df46c1d1a0 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw.onnx.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.373 + - 0.3732 R@100: - - 0.672 + - 0.6727 R@1000: - - 0.849 + - 0.8445 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw-int8.cached.yaml index e56259fe72..f12614bad3 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw-int8.cached.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-cqadupstack-tex.test.txt models: - - name: bge-hnsw-cached + - name: bge-hnsw-int8-cached display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.312 + - 0.3115 R@100: - - 0.648 + - 0.6486 R@1000: - - 0.852 + - 0.8537 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw-int8.onnx.yaml index 961a16900c..ca011134dc 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw-int8.onnx.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-cqadupstack-tex.test.txt models: - - name: bge-hnsw-onnx + - name: bge-hnsw-int8-onnx display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.312 + - 0.3115 R@100: - - 0.648 + - 0.6486 R@1000: - - 0.853 + - 0.8537 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw.cached.yaml index 18d4c3440f..1a1268ea6a 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw.cached.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.311 + - 0.3115 R@100: - - 0.647 + - 0.6486 R@1000: - - 0.852 + - 0.8537 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw.onnx.yaml index 10ea1e41ba..d87f14d5a6 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw.onnx.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.311 + - 0.3115 R@100: - - 0.647 + - 0.6486 R@1000: - - 0.853 + - 0.8537 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw-int8.cached.yaml index 21fda80273..b7288f385e 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw-int8.cached.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-cqadupstack-unix.test.txt models: - - name: bge-hnsw-cached + - name: bge-hnsw-int8-cached display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.420 + - 0.4219 R@100: - - 0.777 + - 0.7797 R@1000: - - 0.924 + - 0.9237 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw-int8.onnx.yaml index 3c003c9687..9b13d44cc7 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw-int8.onnx.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-cqadupstack-unix.test.txt models: - - name: bge-hnsw-onnx + - name: bge-hnsw-int8-onnx display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.421 + - 0.4219 R@100: - - 0.777 + - 0.7797 R@1000: - - 0.924 + - 0.9237 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw.cached.yaml index f4cd8b81e0..341442b695 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw.cached.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.422 + - 0.4219 R@100: - - 0.780 + - 0.7797 R@1000: - - 0.925 + - 0.9237 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw.onnx.yaml index 32e9ad0939..3b441b36dd 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw.onnx.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.422 + - 0.4219 R@100: - - 0.780 + - 0.7797 R@1000: - - 0.925 + - 0.9237 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw-int8.cached.yaml index 3560628fb1..5dd697e0da 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw-int8.cached.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-cqadupstack-webmasters.test.txt models: - - name: bge-hnsw-cached + - name: bge-hnsw-int8-cached display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.411 + - 0.4065 R@100: - - 0.780 + - 0.7774 R@1000: - - 0.937 + - 0.9380 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw-int8.onnx.yaml index dedc66c85b..73db9e0d92 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw-int8.onnx.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-cqadupstack-webmasters.test.txt models: - - name: bge-hnsw-onnx + - name: bge-hnsw-int8-onnx display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.409 + - 0.4065 R@100: - - 0.780 + - 0.7774 R@1000: - - 0.937 + - 0.9380 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw.cached.yaml index c86cff9208..c29d408982 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw.cached.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.406 + - 0.4065 R@100: - - 0.777 + - 0.7774 R@1000: - - 0.937 + - 0.9380 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw.onnx.yaml index d46acde415..361dfdd2cc 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw.onnx.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.407 + - 0.4065 R@100: - - 0.777 + - 0.7774 R@1000: - - 0.937 + - 0.9380 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw-int8.cached.yaml index cb96db8cb4..df62c58e8e 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw-int8.cached.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-cqadupstack-wordpress.test.txt models: - - name: bge-hnsw-cached + - name: bge-hnsw-int8-cached display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.354 + - 0.3547 R@100: - - 0.706 + - 0.7065 R@1000: - - 0.886 + - 0.8861 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw-int8.onnx.yaml index 8e194869cc..acea963eca 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw-int8.onnx.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-cqadupstack-wordpress.test.txt models: - - name: bge-hnsw-onnx + - name: bge-hnsw-int8-onnx display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.354 + - 0.3547 R@100: - - 0.707 + - 0.7065 R@1000: - - 0.886 + - 0.8861 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw.cached.yaml index 668d9f6e0c..87cdec52cc 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw.cached.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.355 + - 0.3547 R@100: - - 0.705 + - 0.7065 R@1000: - - 0.886 + - 0.8861 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw.onnx.yaml index 7d60f71c80..ee96822c77 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw.onnx.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.355 + - 0.3547 R@100: - - 0.703 + - 0.7065 R@1000: - - 0.886 + - 0.8861 diff --git a/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw-int8.cached.yaml index 780c0900b4..558fe3ea97 100644 --- a/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw-int8.cached.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-dbpedia-entity.test.txt models: - - name: bge-hnsw-cached + - name: bge-hnsw-int8-cached display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.407 + - 0.4074 R@100: - - 0.527 + - 0.5303 R@1000: - - 0.776 + - 0.7833 diff --git a/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw-int8.onnx.yaml index 20fef020d0..47a332c494 100644 --- a/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw-int8.onnx.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-dbpedia-entity.test.txt models: - - name: bge-hnsw-onnx + - name: bge-hnsw-int8-onnx display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.408 + - 0.4074 R@100: - - 0.528 + - 0.5303 R@1000: - - 0.778 + - 0.7833 diff --git a/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw.cached.yaml index ebae8abdd3..ee52477158 100644 --- a/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw.cached.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.407 + - 0.4074 R@100: - - 0.528 + - 0.5303 R@1000: - - 0.778 + - 0.7833 diff --git a/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw.onnx.yaml index e808713681..d245f2f936 100644 --- a/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw.onnx.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.408 + - 0.4074 R@100: - - 0.529 + - 0.5303 R@1000: - - 0.778 + - 0.7833 diff --git a/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.hnsw-int8.cached.yaml index 6c9c85bf9c..d2e660e26a 100644 --- a/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.hnsw-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.hnsw-int8.cached.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-fever.test.txt models: - - name: bge-hnsw-cached + - name: bge-hnsw-int8-cached display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.860 + - 0.8630 R@100: - - 0.967 + - 0.9719 R@1000: - - 0.980 + - 0.9855 diff --git a/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.hnsw-int8.onnx.yaml index 37636b7608..9f335ffd27 100644 --- a/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.hnsw-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.hnsw-int8.onnx.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-fever.test.txt models: - - name: bge-hnsw-onnx + - name: bge-hnsw-int8-onnx display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.860 + - 0.8630 R@100: - - 0.967 + - 0.9719 R@1000: - - 0.980 + - 0.9855 diff --git a/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.hnsw.cached.yaml index 3d64b9941e..7b26b2a7e4 100644 --- a/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.hnsw.cached.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.861 + - 0.8630 R@100: - - 0.967 + - 0.9719 R@1000: - - 0.980 + - 0.9855 diff --git a/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.hnsw.onnx.yaml index d1a6b4ed2e..6b215f43b6 100644 --- a/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.hnsw.onnx.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.860 + - 0.8630 R@100: - - 0.967 + - 0.9719 R@1000: - - 0.980 + - 0.9855 diff --git a/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw-int8.cached.yaml index 5aab456f53..dabfbfdc37 100644 --- a/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw-int8.cached.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-fiqa.test.txt models: - - name: bge-hnsw-cached + - name: bge-hnsw-int8-cached display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.401 + - 0.4065 R@100: - - 0.737 + - 0.7415 R@1000: - - 0.902 + - 0.9083 diff --git a/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw-int8.onnx.yaml index fc7870ee2a..b7d8c74bce 100644 --- a/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw-int8.onnx.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-fiqa.test.txt models: - - name: bge-hnsw-onnx + - name: bge-hnsw-int8-onnx display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.401 + - 0.4065 R@100: - - 0.737 + - 0.7415 R@1000: - - 0.902 + - 0.9083 diff --git a/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw.cached.yaml index 1d0c2200a8..296cb76cbf 100644 --- a/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw.cached.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.405 + - 0.4065 R@100: - - 0.739 + - 0.7415 R@1000: - - 0.902 + - 0.9083 diff --git a/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw.onnx.yaml index 9c92344886..2528a056a9 100644 --- a/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw.onnx.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.405 + - 0.4065 R@100: - - 0.739 + - 0.7415 R@1000: - - 0.902 + - 0.9083 diff --git a/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw-int8.cached.yaml index e15760d29f..672ec20416 100644 --- a/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw-int8.cached.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-hotpotqa.test.txt models: - - name: bge-hnsw-cached + - name: bge-hnsw-int8-cached display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.722 + - 0.7259 R@100: - - 0.866 + - 0.8727 R@1000: - - 0.935 + - 0.9424 diff --git a/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw-int8.onnx.yaml index b4686f4d5f..126e98a8ae 100644 --- a/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw-int8.onnx.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-hotpotqa.test.txt models: - - name: bge-hnsw-onnx + - name: bge-hnsw-int8-onnx display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.722 + - 0.7259 R@100: - - 0.867 + - 0.8727 R@1000: - - 0.936 + - 0.9424 diff --git a/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw.cached.yaml index e0b75363c6..141b755960 100644 --- a/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw.cached.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.722 + - 0.7259 R@100: - - 0.866 + - 0.8727 R@1000: - - 0.936 + - 0.9424 diff --git a/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw.onnx.yaml index 4ab4097ac7..8051257bb1 100644 --- a/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw.onnx.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.722 + - 0.7259 R@100: - - 0.866 + - 0.8727 R@1000: - - 0.935 + - 0.9424 diff --git a/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw-int8.cached.yaml index 35348e3ee1..1c13cb8a38 100644 --- a/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw-int8.cached.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-nfcorpus.test.txt models: - - name: bge-hnsw-cached + - name: bge-hnsw-int8-cached display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.373 + - 0.3735 R@100: - - 0.338 + - 0.3368 R@1000: - - 0.657 + - 0.6622 diff --git a/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw-int8.onnx.yaml index 2ffe37dd9f..e53328fecf 100644 --- a/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw-int8.onnx.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-nfcorpus.test.txt models: - - name: bge-hnsw-onnx + - name: bge-hnsw-int8-onnx display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.374 + - 0.3735 R@100: - - 0.339 + - 0.3368 R@1000: - - 0.657 + - 0.6622 diff --git a/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw.cached.yaml index 79a7619f33..f5dface1da 100644 --- a/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw.cached.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.374 + - 0.3735 R@100: - - 0.337 + - 0.3368 R@1000: - - 0.661 + - 0.6622 diff --git a/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw.onnx.yaml index 784bfc1c62..89e42a62d6 100644 --- a/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw.onnx.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.374 + - 0.3735 R@100: - - 0.337 + - 0.3368 R@1000: - - 0.661 + - 0.6622 diff --git a/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.hnsw-int8.cached.yaml index 6ea1579e96..7e7686d7d8 100644 --- a/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.hnsw-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.hnsw-int8.cached.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-nq.test.txt models: - - name: bge-hnsw-cached + - name: bge-hnsw-int8-cached display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.538 + - 0.5413 R@100: - - 0.940 + - 0.9415 R@1000: - - 0.984 + - 0.9859 diff --git a/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.hnsw-int8.onnx.yaml index 3b93a71352..1db044cf7e 100644 --- a/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.hnsw-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.hnsw-int8.onnx.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-nq.test.txt models: - - name: bge-hnsw-onnx + - name: bge-hnsw-int8-onnx display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.538 + - 0.5413 R@100: - - 0.939 + - 0.9415 R@1000: - - 0.984 + - 0.9859 diff --git a/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.hnsw.cached.yaml index ce009a4faf..72d14423f9 100644 --- a/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.hnsw.cached.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.541 + - 0.5413 R@100: - - 0.940 + - 0.9415 R@1000: - - 0.984 + - 0.9859 diff --git a/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.hnsw.onnx.yaml index 3ca4e1680e..970c1d211a 100644 --- a/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.hnsw.onnx.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.541 + - 0.5413 R@100: - - 0.940 + - 0.9415 R@1000: - - 0.984 + - 0.9859 diff --git a/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.hnsw-int8.cached.yaml index 57e4455e8b..d850cfa267 100644 --- a/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.hnsw-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.hnsw-int8.cached.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-quora.test.txt models: - - name: bge-hnsw-cached + - name: bge-hnsw-int8-cached display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.888 + - 0.8890 R@100: - - 0.997 + - 0.9967 R@1000: - - 1.000 + - 0.9998 diff --git a/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.hnsw-int8.onnx.yaml index 77af8d7fee..08d0afa7f8 100644 --- a/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.hnsw-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.hnsw-int8.onnx.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-quora.test.txt models: - - name: bge-hnsw-onnx + - name: bge-hnsw-int8-onnx display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.887 + - 0.8890 R@100: - - 0.997 + - 0.9967 R@1000: - - 1.000 + - 0.9998 diff --git a/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.hnsw.cached.yaml index 8500b4d1cc..df81d9322a 100644 --- a/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.hnsw.cached.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.889 + - 0.8890 R@100: - - 0.997 + - 0.9967 R@1000: - - 1.000 + - 0.9998 diff --git a/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.hnsw.onnx.yaml index abdf791911..aa0be90af0 100644 --- a/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.hnsw.onnx.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.888 + - 0.8890 R@100: - - 0.997 + - 0.9967 R@1000: - - 1.000 + - 0.9998 diff --git a/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw-int8.cached.yaml index 1a8412f7c2..134a0bd288 100644 --- a/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw-int8.cached.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-robust04.test.txt models: - - name: bge-hnsw-cached + - name: bge-hnsw-int8-cached display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.447 + - 0.4465 R@100: - - 0.347 + - 0.3507 R@1000: - - 0.596 + - 0.5981 diff --git a/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw-int8.onnx.yaml index 5fabd9effd..1eb90d29fd 100644 --- a/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw-int8.onnx.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-robust04.test.txt models: - - name: bge-hnsw-onnx + - name: bge-hnsw-int8-onnx display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.445 + - 0.4465 R@100: - - 0.347 + - 0.3507 R@1000: - - 0.592 + - 0.5981 diff --git a/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw.cached.yaml index bfbbac69c2..df18221aa8 100644 --- a/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw.cached.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.447 + - 0.4465 R@100: - - 0.350 + - 0.3507 R@1000: - - 0.596 + - 0.5981 diff --git a/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw.onnx.yaml index 3ab5662cf1..46978f5131 100644 --- a/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw.onnx.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.444 + - 0.4465 R@100: - - 0.350 + - 0.3507 R@1000: - - 0.595 + - 0.5981 diff --git a/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw-int8.cached.yaml index 3f73ef6439..c1c0218348 100644 --- a/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw-int8.cached.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-scidocs.test.txt models: - - name: bge-hnsw-cached + - name: bge-hnsw-int8-cached display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.217 + - 0.2170 R@100: - - 0.494 + - 0.4959 R@1000: - - 0.785 + - 0.7824 diff --git a/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw-int8.onnx.yaml index daf07f5d34..746b029bb2 100644 --- a/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw-int8.onnx.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-scidocs.test.txt models: - - name: bge-hnsw-onnx + - name: bge-hnsw-int8-onnx display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.217 + - 0.2170 R@100: - - 0.493 + - 0.4959 R@1000: - - 0.784 + - 0.7824 diff --git a/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw.cached.yaml index 5c9c589d62..09ca7ec2a4 100644 --- a/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw.cached.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.217 + - 0.2170 R@100: - - 0.496 + - 0.4959 R@1000: - - 0.783 + - 0.7824 diff --git a/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw.onnx.yaml index ece1cfe0f2..7fdf691f29 100644 --- a/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw.onnx.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.217 + - 0.2170 R@100: - - 0.496 + - 0.4959 R@1000: - - 0.783 + - 0.7824 diff --git a/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw-int8.cached.yaml index 680c54e9c0..23b7bb3320 100644 --- a/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw-int8.cached.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-scifact.test.txt models: - - name: bge-hnsw-cached + - name: bge-hnsw-int8-cached display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField vector -removeQuery -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.741 + - 0.7408 R@100: - - 0.969 + - 0.9667 R@1000: - - 0.997 + - 0.9967 diff --git a/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw-int8.onnx.yaml index 58dec79a6e..25cd77bd5d 100644 --- a/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw-int8.onnx.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-scifact.test.txt models: - - name: bge-hnsw-onnx + - name: bge-hnsw-int8-onnx display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField title -removeQuery -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.740 + - 0.7408 R@100: - - 0.969 + - 0.9667 R@1000: - - 0.997 + - 0.9967 diff --git a/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw.cached.yaml index 57d1613b4c..d2b93a87bb 100644 --- a/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw.cached.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField vector -removeQuery -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.741 + - 0.7408 R@100: - - 0.967 + - 0.9667 R@1000: - - 0.997 + - 0.9967 diff --git a/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw.onnx.yaml index 14456a9410..0e421989d6 100644 --- a/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw.onnx.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField title -removeQuery -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.741 + - 0.7408 R@100: - - 0.967 + - 0.9667 R@1000: - - 0.997 + - 0.9967 diff --git a/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw-int8.cached.yaml index b71665cbc4..f68213f7ac 100644 --- a/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw-int8.cached.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-signal1m.test.txt models: - - name: bge-hnsw-cached + - name: bge-hnsw-int8-cached display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.278 + - 0.2886 R@100: - - 0.291 + - 0.3112 R@1000: - - 0.490 + - 0.5331 diff --git a/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw-int8.onnx.yaml index 43a0944d39..84c9858a1e 100644 --- a/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw-int8.onnx.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-signal1m.test.txt models: - - name: bge-hnsw-onnx + - name: bge-hnsw-int8-onnx display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.277 + - 0.2886 R@100: - - 0.292 + - 0.3112 R@1000: - - 0.498 + - 0.5331 diff --git a/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw.cached.yaml index 9f354c4428..551c014cba 100644 --- a/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw.cached.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.282 + - 0.2886 R@100: - - 0.298 + - 0.3112 R@1000: - - 0.500 + - 0.5331 diff --git a/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw.onnx.yaml index d484d86af2..66bfd60462 100644 --- a/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw.onnx.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.280 + - 0.2886 R@100: - - 0.294 + - 0.3112 R@1000: - - 0.495 + - 0.5331 diff --git a/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw-int8.cached.yaml index c8430101c1..ef7771f3a5 100644 --- a/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw-int8.cached.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-trec-covid.test.txt models: - - name: bge-hnsw-cached + - name: bge-hnsw-int8-cached display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.784 + - 0.7814 R@100: - - 0.140 + - 0.1406 R@1000: - - 0.475 + - 0.4768 diff --git a/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw-int8.onnx.yaml index 02cf08abba..0e72af904f 100644 --- a/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw-int8.onnx.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-trec-covid.test.txt models: - - name: bge-hnsw-onnx + - name: bge-hnsw-int8-onnx display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.784 + - 0.7814 R@100: - - 0.140 + - 0.1406 R@1000: - - 0.475 + - 0.4768 diff --git a/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw.cached.yaml index 83a4932d81..c2ea0de9a7 100644 --- a/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw.cached.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.781 + - 0.7814 R@100: - - 0.141 + - 0.1406 R@1000: - - 0.477 + - 0.4768 diff --git a/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw.onnx.yaml index 73f94c3243..6a05675c75 100644 --- a/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw.onnx.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.781 + - 0.7814 R@100: - - 0.141 + - 0.1406 R@1000: - - 0.476 + - 0.4768 diff --git a/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw-int8.cached.yaml index 44c8e13db4..3531614b58 100644 --- a/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw-int8.cached.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-trec-news.test.txt models: - - name: bge-hnsw-cached + - name: bge-hnsw-int8-cached display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.432 + - 0.4425 R@100: - - 0.490 + - 0.4992 R@1000: - - 0.777 + - 0.7875 diff --git a/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw-int8.onnx.yaml index f7f7b424fe..3611aeb913 100644 --- a/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw-int8.onnx.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-trec-news.test.txt models: - - name: bge-hnsw-onnx + - name: bge-hnsw-int8-onnx display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.434 + - 0.4425 R@100: - - 0.488 + - 0.4992 R@1000: - - 0.774 + - 0.7875 diff --git a/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw.cached.yaml index a3a7759838..c07663b8fb 100644 --- a/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw.cached.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.441 + - 0.4425 R@100: - - 0.488 + - 0.4992 R@1000: - - 0.770 + - 0.7875 diff --git a/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw.onnx.yaml index dcd435b7df..6fea4eefc6 100644 --- a/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw.onnx.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.439 + - 0.4425 R@100: - - 0.492 + - 0.4992 R@1000: - - 0.780 + - 0.7875 diff --git a/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw-int8.cached.yaml index 323741f18f..77c8af193d 100644 --- a/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw-int8.cached.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-webis-touche2020.test.txt models: - - name: bge-hnsw-cached + - name: bge-hnsw-int8-cached display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.252 + - 0.2570 R@100: - - 0.488 + - 0.4857 R@1000: - - 0.831 + - 0.8298 diff --git a/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw-int8.onnx.yaml index 0d260cd4c9..4f41a1eadf 100644 --- a/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw-int8.onnx.yaml @@ -40,14 +40,14 @@ topics: qrel: qrels.beir-v1.0.0-webis-touche2020.test.txt models: - - name: bge-hnsw-onnx + - name: bge-hnsw-int8-onnx display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.251 + - 0.2570 R@100: - - 0.487 + - 0.4857 R@1000: - - 0.833 + - 0.8298 diff --git a/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw.cached.yaml index 55736848fa..7219cd7b5e 100644 --- a/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw.cached.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads 16 -hits 1000 -efSearch 1000 results: nDCG@10: - - 0.257 + - 0.2570 R@100: - - 0.486 + - 0.4857 R@1000: - - 0.831 + - 0.8298 diff --git a/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw.onnx.yaml index 2efa738683..5d3f4e86bf 100644 --- a/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw.onnx.yaml @@ -46,8 +46,8 @@ models: params: -generator VectorQueryGenerator -topicField title -removeQuery -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 results: nDCG@10: - - 0.257 + - 0.2570 R@100: - - 0.487 + - 0.4857 R@1000: - - 0.831 + - 0.8298