Skip to content

Commit

Permalink
Implement order preserving transform for inner product search (spotif…
Browse files Browse the repository at this point in the history
…y#25)

* WIP implement order preserving transform

* Get tests working.

* Add test for new inner product accuracy.

* try setting specific numpy version, use default_rng instead of seed

* downgrade to latest available for python 3.8

* add note on inner product implementation, fix np usage in test

* Fix ASAN violations.

* WIP pull in metadata changes

* remove todo

* unpin numpy

* WIP

* Fix order-preserving transform loading and saving.

* clang-format

---------

Co-authored-by: Peter Sobot <psobot@spotify.com>
  • Loading branch information
dylanrb123 and psobot authored Oct 6, 2023
1 parent ccc3ef7 commit ef07b9a
Show file tree
Hide file tree
Showing 34 changed files with 243 additions and 44 deletions.
23 changes: 21 additions & 2 deletions cpp/Metadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,11 @@ namespace Metadata {
*/
class V1 {
public:
V1(int numDimensions, SpaceType spaceType, StorageDataType storageDataType)
V1(int numDimensions, SpaceType spaceType, StorageDataType storageDataType,
float maxNorm, bool useOrderPreservingTransform)
: numDimensions(numDimensions), spaceType(spaceType),
storageDataType(storageDataType) {}
storageDataType(storageDataType), maxNorm(maxNorm),
useOrderPreservingTransform(useOrderPreservingTransform) {}

V1() {}
virtual ~V1() {}
Expand All @@ -48,6 +50,15 @@ class V1 {

SpaceType getSpaceType() { return spaceType; }

float getMaxNorm() { return maxNorm; }

bool getUseOrderPreservingTransform() const {
return useOrderPreservingTransform;
}
void setUseOrderPreservingTransform(bool newValue) {
useOrderPreservingTransform = newValue;
}

void setNumDimensions(int newNumDimensions) {
numDimensions = newNumDimensions;
}
Expand All @@ -58,25 +69,33 @@ class V1 {

void setSpaceType(SpaceType newSpaceType) { spaceType = newSpaceType; }

void setMaxNorm(float newMaxNorm) { maxNorm = newMaxNorm; }

virtual void serializeToStream(std::shared_ptr<OutputStream> stream) {
stream->write("VOYA", 4);
writeBinaryPOD(stream, version());
writeBinaryPOD(stream, numDimensions);
writeBinaryPOD(stream, spaceType);
writeBinaryPOD(stream, storageDataType);
writeBinaryPOD(stream, maxNorm);
writeBinaryPOD(stream, useOrderPreservingTransform);
};

virtual void loadFromStream(std::shared_ptr<InputStream> stream) {
// Version has already been loaded before we get here!
readBinaryPOD(stream, numDimensions);
readBinaryPOD(stream, spaceType);
readBinaryPOD(stream, storageDataType);
readBinaryPOD(stream, maxNorm);
readBinaryPOD(stream, useOrderPreservingTransform);
};

private:
int numDimensions;
SpaceType spaceType;
StorageDataType storageDataType;
float maxNorm;
bool useOrderPreservingTransform;
};

static std::unique_ptr<Metadata::V1>
Expand Down
Loading

0 comments on commit ef07b9a

Please sign in to comment.