Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add vector type to Dgraph #9050

Merged
merged 13 commits into from
Mar 19, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fix comments
  • Loading branch information
Harshil Goel committed Mar 19, 2024
commit afcdab51ba7bba7d334f449a78f681f5363ab4bb
4 changes: 2 additions & 2 deletions protos/pb.proto
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ message Result {
repeated FacetsList facet_matrix = 5;
repeated LangList lang_matrix = 6;
bool list = 7;
map<string, uint64> extra_metrics = 8;
map<string, uint64> vector_metrics = 8;
}

message Order {
Expand Down Expand Up @@ -526,7 +526,7 @@ message SchemaUpdate {
message VectorSpec {
mangalaman93 marked this conversation as resolved.
Show resolved Hide resolved
// This names the kind of Vector Index, e.g.,
// hnsw, lsh, hypertree, ...
string name = 1;
string indexName = 1;
mangalaman93 marked this conversation as resolved.
Show resolved Hide resolved
repeated OptionPair options = 2;
}

Expand Down
757 changes: 379 additions & 378 deletions protos/pb/pb.pb.go

Large diffs are not rendered by default.

3 changes: 0 additions & 3 deletions query/math.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@
package query

import (
"testing"

"github.com/golang/glog"
"github.com/pkg/errors"

Expand All @@ -31,7 +29,6 @@ type mathTree struct {
Const types.Val // If its a const value node.
Val map[uint64]types.Val
Child []*mathTree
t *testing.T
}

var (
Expand Down
23 changes: 17 additions & 6 deletions query/math_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,20 @@ func TestProcessBinary(t *testing.T) {
}},
out: types.Val{Tid: types.FloatID, Value: 8.0},
},
{in: &mathTree{
harshil-goel marked this conversation as resolved.
Show resolved Hide resolved
Fn: "+",
Child: []*mathTree{
{Const: types.Val{
Tid: types.VFloatID,
Value: []float32{0.0005, 0.25001, 0.7500001}}},
{Const: types.Val{
Tid: types.VFloatID,
Value: []float32{0.0005, 0.25001, 0.7500001}}},
}},
out: types.Val{
Tid: types.VFloatID,
Value: []float32{0.001, 0.50002, 1.5000002}},
},
{in: &mathTree{
Fn: "+",
Child: []*mathTree{
Expand Down Expand Up @@ -269,14 +283,11 @@ func TestProcessBinary(t *testing.T) {
Child: []*mathTree{
{Const: types.Val{
Tid: types.IntID,
Value: int64(2)},
t: t},
Value: int64(2)}},
{Const: types.Val{
Tid: types.VFloatID,
Value: []float32{0.25, 0.5, 0.75}},
t: t},
},
t: t},
Value: []float32{0.25, 0.5, 0.75}}},
}},
out: types.Val{
Tid: types.VFloatID,
Value: []float32{0.5, 1.0, 1.5}},
Expand Down
5 changes: 2 additions & 3 deletions schema/parse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@ func checkSchema(t *testing.T, h map[string]*pb.SchemaUpdate, expected []nameTyp
for _, nt := range expected {
typ, found := h[nt.name]
require.True(t, found, nt)
require.EqualValuesf(t, *nt.typ, *typ, "found in map: %+v\n expected: %+v",
*typ, *nt.typ)
require.EqualValuesf(t, *nt.typ, *typ, "found in map: %+v\n expected: %+v", *typ, *nt.typ)
}
}

Expand All @@ -53,7 +52,7 @@ age:int .
name: string .
address: string .
<http://scalar.com/helloworld/> : string .
coordinates: vfloat .
coordinates: float32vector .
`

func TestSchema(t *testing.T) {
Expand Down
4 changes: 2 additions & 2 deletions types/conversion.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ func ParseVFloat(s string) ([]float32, error) {
}

func cannotConvertToVFloat(s string) error {
return errors.Errorf("Cannot convert %s to vfloat", s)
return errors.Errorf("cannot convert %s to vfloat", s)
}

// Convert converts the value to given scalar type.
Expand All @@ -108,7 +108,7 @@ func Convert(from Val, toID TypeID) (Val, error) {
// sanity: we expect a value
data, ok := from.Value.([]byte)
if !ok {
return to, errors.Errorf("Invalid data to convert to %s", toID.Name())
return to, errors.Errorf("invalid data to convert to %s", toID.Name())
}

fromID := from.Tid
Expand Down
22 changes: 11 additions & 11 deletions types/scalar_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,17 +68,17 @@ const (
)

var typeNameMap = map[string]TypeID{
"default": DefaultID,
"binary": BinaryID,
"int": IntID,
"float": FloatID,
"bool": BoolID,
"datetime": DateTimeID,
"geo": GeoID,
"uid": UidID,
"string": StringID,
"password": PasswordID,
"vfloat": VFloatID,
"default": DefaultID,
"binary": BinaryID,
"int": IntID,
"float": FloatID,
"bool": BoolID,
"datetime": DateTimeID,
"geo": GeoID,
"uid": UidID,
"string": StringID,
"password": PasswordID,
"float32vector": VFloatID,
harshil-goel marked this conversation as resolved.
Show resolved Hide resolved
}

// TypeID represents the type of the data.
Expand Down
45 changes: 10 additions & 35 deletions types/value.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,50 +21,23 @@ import (
"math"
"strconv"
"strings"
"unsafe"
)

// BytesAsFloatArray(encoded) converts encoded into a []float32.
// If len(encoded) % 4 is not 0, it will ignore any trailing
// bytes, and simply convert 4 bytes at a time to generate the
// float64 entries.
// WARNING: Current implementation always requires a memory allocation!
// Current implementation assuming littleEndian encoding
func BytesAsFloatArray(encoded []byte) []float32 {
// Unfortunately, this is not as simple as casting the result,
// and it is also not possible to directly use the
// golang "unsafe" library to directly do the conversion.
// The operation:
// []float32(encoded) does not compile!
// Whereas:
// []float32(unsafe.Slice(unsafe.Ptr(encoded), len(encoded)))
// might compile (actually have not tested it), but its success or
// failure depends on agreement of the serialization mechanism
// of the source data and the data as it exists on the machine where
// the operation is being performed.
// The machine where this operation gets run might prefer
// BigEndian/LittleEndian, but the machine that sent it may have
// preferred the other, and there is no way to tell!
//
// The solution below, unfortunately, requires another memory
// allocation.
// TODO Potential optimization: If we detect that current machine is
// using LittleEndian format, there might be a way of making this
// work with the golang "unsafe" library.

mangalaman93 marked this conversation as resolved.
Show resolved Hide resolved
resultLen := len(encoded) / 4
if resultLen == 0 {
return []float32{}
}
retVal := make([]float32, resultLen)
for i := 0; i < resultLen; i++ {
// Assume LittleEndian for encoding since this is
// the assumption elsewhere when reading from client.
// See dgraph-io/dgo/protos/api.pb.go
// See also dgraph-io/dgraph/types/conversion.go
// This also seems to be the preference from many examples
// I have found via Google search. It's unclear why this
// should be a preference.
bits := binary.LittleEndian.Uint32(encoded)
retVal[i] = math.Float32frombits(bits)
retVal[i] = *(*float32)(unsafe.Pointer(&encoded[0]))
encoded = encoded[4:]
}
return retVal
Expand All @@ -86,15 +59,17 @@ func FloatArrayAsBytes(v []float32) []byte {
}

func FloatArrayAsString(v []float32) string {
retVal := "["
var sb strings.Builder

sb.WriteRune('[')
for i := range v {
retVal += strconv.FormatFloat(float64(v[i]), 'f', -1, 32)
sb.WriteString(strconv.FormatFloat(float64(v[i]), 'f', -1, 32))
if i != len(v)-1 {
retVal += ", "
sb.WriteRune(',')
}
}
retVal += "]"
return retVal
sb.WriteRune(']')
return sb.String()
}

// TypeForValue tries to determine the most likely type based on a value. We only want to use this
Expand Down