Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

use rerank query in conjunction with LSH URP #4

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
.idea/
lucene-solr/
target/
*.iml
258 changes: 155 additions & 103 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ This plugin is the same as [Vector Scoring Plugin for Elasticsearch](https://git

## Plugin installation

The plugin was developed and tested on Solr `6.6.0`.
The plugin was developed and tested on Solr `7.4.0`.

1. Copy VectorPlugin.jar to {solr.install.dir}/dist/plugins/
2. Add the library to solrconfig.xml file:
Expand All @@ -14,149 +14,201 @@ The plugin was developed and tested on Solr `6.6.0`.
```
3. Add the plugin Query parser to solrconfig.xml:
```
<queryParser name="vp" class="com.github.saaay71.solr.VectorQParserPlugin" />
<queryParser name="vp" class="com.github.saaay71.solr.query.VectorQParserPlugin" />
```
4. Add the fieldType `VectorField` to schema file(managed-schema):
```
<fieldType name="VectorField" class="solr.TextField" indexed="true" termOffsets="true" stored="true" termPayloads="true" termPositions="true" termVectors="true" storeOffsetsWithPositions="true">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
</analyzer>
</fieldType>
<fieldType name="VectorField" class="solr.BinaryField" stored="true" indexed="false" multiValued="false"/>
```
5. Add the field `vector` to schema file:
```
<field name="vector" type="VectorField" indexed="true" termOffsets="true" stored="true" termPositions="true" termVectors="true" multiValued="true"/>
<field name="_vector_" type="VectorField" />
<field name="_lsh_hash_" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="vector" type="string" indexed="true" stored="true"/>
```
6. Start Solr!

6. Add the LSH urp to solrconfig.xml
```
<updateRequestProcessorChain name="LSH">
<processor class="com.github.saaay71.solr.updateprocessor.LSHUpdateProcessorFactory" >
<int name="seed">5</int>
<int name="buckets">50</int>
<int name="stages">50</int>
<int name="dimensions">6</int>
<str name="field">vector</str>
</processor>
<processor class="solr.RunUpdateProcessorFactory" />
</updateRequestProcessorChain>
```

7. Start Solr!

## Example

### Add example documents

```sh
curl -X POST -H "Content-Type: application/json" http://localhost:8983/solr/{your-collection-name}/update?commit=true --data-binary '
curl -X POST -H "Content-Type: application/json" http://localhost:8983/solr/{your-collection-name}/update?update.chain=LSH&commit=true --data-binary '
[
{"name":"example 0", "vector":"0|1.55 1|3.53 2|2.3 3|0.7 4|3.44 5|2.33 "},
{"name":"example 1", "vector":"0|3.54 1|0.4 2|4.16 3|4.88 4|4.28 5|4.25 "},
{"name":"example 2", "vector":"0|1.11 1|0.6 2|1.47 3|1.99 4|2.91 5|1.01 "},
{"name":"example 3", "vector":"0|0.06 1|4.73 2|0.29 3|1.27 4|0.69 5|3.9 "},
{"name":"example 4", "vector":"0|4.01 1|3.69 2|2 3|4.36 4|1.09 5|0.1 "},
{"name":"example 5", "vector":"0|0.64 1|3.95 2|1.03 3|1.65 4|0.99 5|0.09 "}
{"id":"1", "vector":"1.55,3.53,2.3,0.7,3.44,2.33"},
{"id":"2", "vector":"3.54,0.4,4.16,4.88,4.28,4.25"}
]'
```

### Query documents
Open your browser and copy the links
#### Query 1
```
http://localhost:8983/solr/{your-collection-name}/query?fl=name,score,vector&q={!vp f=vector vector="0.1,4.75,0.3,1.2,0.7,4.0"}
http://localhost:8983/solr/{your-collection-name}/query?fl=name,score,vector&q={!vp f=vector vector=\"1.55,3.53,2.3,0.7,3.44,2.33\" lsh=\"true\" topNDocs=\"5\"}&fl=name,score,vector,_vector_,_lsh_hash_
```

You should see the following result:
```
{
"responseHeader":{
"status":0,
"QTime":1,
"QTime":8,
"params":{
"q":"{!myqp f=vector vector=\"0.1,4.75,0.3,1.2,0.7,4.0\"}",
"fl":"name,score,vector"}},
"response":{"numFound":6,"start":0,"maxScore":0.99984086,"docs":[
{
"name":["example 3"],
"vector":["0|0.06 1|4.73 2|0.29 3|1.27 4|0.69 5|3.9 "],
"score":0.99984086},
{
"name":["example 0"],
"vector":["0|1.55 1|3.53 2|2.3 3|0.7 4|3.44 5|2.33 "],
"score":0.7693964},
{
"name":["example 5"],
"vector":["0|0.64 1|3.95 2|1.03 3|1.65 4|0.99 5|0.09 "],
"score":0.76322395},
{
"name":["example 4"],
"vector":["0|4.01 1|3.69 2|2 3|4.36 4|1.09 5|0.1 "],
"score":0.5328145},
{
"name":["example 1"],
"vector":["0|3.54 1|0.4 2|4.16 3|4.88 4|4.28 5|4.25 "],
"score":0.48513117},
"q":"{!vp f=vector vector=\"1.55,3.53,2.3,0.7,3.44,2.33\" lsh=\"true\" topNDocs=\"5\"}",
"fl":"id, score, vector, _vector_, _lsh_hash_",
"wt":"xml"}},
"response":{"numFound":1,"start":0,"maxScore":36.65736,"docs":[
{
"name":["example 2"],
"vector":["0|1.11 1|0.6 2|1.47 3|1.99 4|2.91 5|1.01 "],
"score":0.44909418}]
}}
"id": "1",
"vector":"1.55,3.53,2.3,0.7,3.44,2.33",
"_vector_":"/z/GZmZAYeuFQBMzMz8zMzNAXCj2QBUeuA==",
"_lsh_hash_":["0_8",
"1_35",
"2_7",
"3_10",
"4_2",
"5_35",
"6_16",
"7_30",
"8_27",
"9_12",
"10_7",
"11_32",
"12_48",
"13_36",
"14_10",
"15_7",
"16_42",
"17_5",
"18_3",
"19_2",
"20_1",
"21_0",
"22_24",
"23_18",
"24_42",
"25_31",
"26_35",
"27_8",
"28_1",
"29_24",
"30_47",
"31_14",
"32_22",
"33_39",
"34_0",
"35_34",
"36_34",
"37_39",
"38_27",
"39_27",
"40_45",
"41_10",
"42_21",
"43_34",
"44_41",
"45_9",
"46_31",
"47_0",
"48_4",
"49_43"],
"score":36.65736}
]
}
}
```

#### Query 2
Adding the parameter `cosine=false` calculates the dot product
Quering on other fields and with vector scoring.
```
http://localhost:8983/solr/{your-collection-name}/query?fl=name,score,vector&q={!vp f=vector vector="0.1,4.75,0.3,1.2,0.7,4.0" cosine=false}
http://localhost:8983/solr/{your-collection-name}/query?fl=name,score,vector&q={!vp f=vector vector=\"3.54,0.4,4.16,4.88,4.28,4.25\" lsh=\"true\" topNDocs=\"5\" v=\"id:2\"}&fl=name,score,vector,_vector_,_lsh_hash_
```

result of query 2:
or
```

{
"responseHeader":{
"status":0,
"QTime":0,
"params":{
"q":"{!myqp f=vector vector=\"0.1,4.75,0.3,1.2,0.7,4.0\" cosine=false}",
"fl":"name,score,vector"}},
"response":{"numFound":6,"start":0,"maxScore":40.1675,"docs":[
{
"name":["example 3"],
"vector":["0|0.06 1|4.73 2|0.29 3|1.27 4|0.69 5|3.9 "],
"score":40.1675},
{
"name":["example 0"],
"vector":["0|1.55 1|3.53 2|2.3 3|0.7 4|3.44 5|2.33 "],
"score":30.180502},
{
"name":["example 1"],
"vector":["0|3.54 1|0.4 2|4.16 3|4.88 4|4.28 5|4.25 "],
"score":29.354},
{
"name":["example"],
"vector":["0|4.01 1|3.69 2|2 3|4.36 4|1.09 5|0.1 "],
"score":24.923502},
{
"name":["example"],
"vector":["0|0.64 1|3.95 2|1.03 3|1.65 4|0.99 5|0.09 "],
"score":22.1685},
{
"name":["example"],
"vector":["0|1.11 1|0.6 2|1.47 3|1.99 4|2.91 5|1.01 "],
"score":11.867001}]
}}
http://localhost:8983/solr/{your-collection-name}/query?fl=name,score,vector&q={!vp f=vector vector=\"3.54,0.4,4.16,4.88,4.28,4.25\" lsh=\"true\" topNDocs=\"5\"} id:2&fl=name,score,vector,_vector_,_lsh_hash_
```

#### Query 3
Quering on other fields and with vector scoring.
```
http://localhost:8983/solr/{your-collection-name}/query?fl=name,score,vector&q={!vp f=vector vector="0.1,4.75,0.3,1.2,0.7,4.0" cosine=false}name="example 2","example 4"
```

result of query 3:
result of query 2:
```
{
"responseHeader":{
"status":0,
"QTime":1,
"QTime":9,
"params":{
"q":"{!myqp f=vector vector=\"0.1,4.75,0.3,1.2,0.7,4.0\" cosine=false}name=\"example 2\",\"example 4\"",
"fl":"name,score,vector"}},
"response":{"numFound":2,"start":0,"maxScore":24.923502,"docs":[
{
"name":["example 4"],
"vector":["0|4.01 1|3.69 2|2 3|4.36 4|1.09 5|0.1 "],
"score":24.923502},
"q":"{!vp f=vector vector=\"3.54,0.4,4.16,4.88,4.28,4.25\" lsh=\"true\" topNDocs=\"5\" v=\"id:2\"}",
"fl":"name, score, vector, _vector_, _lsh_hash_",
"wt":"xml"}},
"response":{"numFound":1,"start":0,"maxScore":38.649788,"docs":[
{
"name":["example 2"],
"vector":["0|1.11 1|0.6 2|1.47 3|1.99 4|2.91 5|1.01 "],
"score":11.867001}]
}}
"id": "2",
"vector":"3.54,0.4,4.16,4.88,4.28,4.25",
"_vector_":"/0Bij1w+zMzNQIUeuECcKPZAiPXDQIgAAA==",
"_lsh_hash_":["0_2",
"1_33",
"2_39",
"3_49",
"4_38",
"5_12",
"6_21",
"7_26",
"8_44",
"9_25",
"10_15",
"11_25",
"12_24",
"13_8",
"14_22",
"15_43",
"16_1",
"17_17",
"18_14",
"19_1",
"20_26",
"21_47",
"22_15",
"23_36",
"24_21",
"25_41",
"26_32",
"27_35",
"28_13",
"29_4",
"30_2",
"31_39",
"32_19",
"33_36",
"34_15",
"35_30",
"36_17",
"37_0",
"38_39",
"39_32",
"40_5",
"41_1",
"42_33",
"43_0",
"44_32",
"45_21",
"46_23",
"47_4",
"48_24",
"49_16"],
"score":38.649788}]
}
}
```
Binary file removed VectorPlugin.jar
Binary file not shown.
Loading