From 42e9ecf72d64cb572dcda96419e580c47fce2629 Mon Sep 17 00:00:00 2001 From: Owen Ou <169064+owenthereal@users.noreply.github.com> Date: Fri, 30 Jun 2023 13:13:11 -0700 Subject: [PATCH] Install `pg_vector` extension with `pgxman` (#106) * Get `pgxman install` to work for Spilo * Get `pgxman install` to work for Postgres * Add pg_vector acceptance test * Add pg_vector to Spilo whitelist --- Dockerfile | 9 +++ Dockerfile.spilo | 10 ++- acceptance/shared/cases.go | 68 +++++++++++++++++++ docker-bake.hcl | 4 ++ .../scripts/configure_spilo.py | 2 +- third-party/pgxman_install.sh | 59 ++++++++++++++++ 6 files changed, 150 insertions(+), 2 deletions(-) create mode 100755 third-party/pgxman_install.sh diff --git a/Dockerfile b/Dockerfile index 705fc8ab..2993f470 100644 --- a/Dockerfile +++ b/Dockerfile @@ -47,3 +47,12 @@ COPY --from=ivm /pg_ivm / COPY --from=columnar /pg_ext / COPY files/postgres/docker-entrypoint-initdb.d /docker-entrypoint-initdb.d/ + +# Install pgxman extensions +ARG POSTGRES_BASE_VERSION +# Always force rebuild of this layer +ARG TIMESTAMP=1 +COPY third-party/pgxman_install.sh /tmp/pgxman_install.sh +RUN set -eux; \ + /tmp/pgxman_install.sh ${POSTGRES_BASE_VERSION}; \ + rm -f /tmp/pgxman_install.sh diff --git a/Dockerfile.spilo b/Dockerfile.spilo index 54bd22e6..869e19c1 100644 --- a/Dockerfile.spilo +++ b/Dockerfile.spilo @@ -22,7 +22,7 @@ RUN set -eux; \ # s3 deps lsb-release \ wget \ - ; \ + ; \ rm -rf /var/lib/apt/lists/* # mysql ext @@ -70,3 +70,11 @@ COPY files/spilo/postgres-appliance/pgq_ticker.ini /home/postgres/ ARG POSTGRES_BASE_VERSION # Default envs ENV PGVERSION=${POSTGRES_BASE_VERSION} SPILO_PROVIDER=local PGUSER_SUPERUSER=postgres PGPASSWORD_SUPERUSER=hydra + +# Install pgxman extensions +# Always force rebuild of this layer +ARG TIMESTAMP=1 +COPY third-party/pgxman_install.sh /tmp/pgxman_install.sh +RUN set -eux; \ + /tmp/pgxman_install.sh 13,14; \ + rm -f /tmp/pgxman_install.sh diff --git a/acceptance/shared/cases.go b/acceptance/shared/cases.go index 6cffda6b..2de71561 100644 --- a/acceptance/shared/cases.go +++ b/acceptance/shared/cases.go @@ -373,6 +373,74 @@ SELECT count(*) FROM userdata_2; } }, }, + { + Name: "pg_vector available", + SQL: ` +SELECT count(1) FROM pg_available_extensions WHERE name = 'vector'; + `, + Validate: func(t *testing.T, row pgx.Row) { + err := row.Scan() + if err == nil { + t.Error("pg_vector should exist") + } + }, + }, + { + Name: "enable pg_vector", + SQL: ` +CREATE EXTENSION vector; + `, + }, + { + Name: "pg_vector ext enabled", + SQL: ` +SELECT count(1) FROM pg_extension WHERE extname = 'vector'; + `, + Validate: func(t *testing.T, row pgx.Row) { + var count int + if err := row.Scan(&count); err != nil { + t.Fatal(err) + } + + if want, got := 1, count; want != got { + t.Errorf("pg_vector ext should exist") + } + }, + }, + { + Name: "create pg_vector column", + SQL: ` +CREATE TABLE items (id bigserial PRIMARY KEY, embedding vector(3)); + `, + }, + { + Name: "insert pg_vector data", + SQL: ` +INSERT INTO items (embedding) VALUES ('[1,2,3]'), ('[4,5,6]'); + `, + }, + { + Name: "validate pg_vector data", + SQL: ` +SELECT * FROM items ORDER BY embedding <-> '[3,1,2]' LIMIT 1; + `, + Validate: func(t *testing.T, row pgx.Row) { + var result struct { + ID int + Embedding string + } + if err := row.Scan(&result.ID, &result.Embedding); err != nil { + t.Fatal(err) + } + + if want, got := 1, result.ID; want != got { + t.Errorf("item ID should equal") + } + if want, got := "[1,2,3]", result.Embedding; want != got { + t.Errorf("item embedding should equal: want=%s, got=%s", want, got) + } + }, + }, } var ( diff --git a/docker-bake.hcl b/docker-bake.hcl index 080bd51a..c24bdd13 100644 --- a/docker-bake.hcl +++ b/docker-bake.hcl @@ -35,6 +35,10 @@ target "shared" { "linux/amd64", "linux/arm64" ] + + args = { + TIMESTAMP = "${timestamp()}" + } } target "postgres" { diff --git a/files/spilo/postgres-appliance/scripts/configure_spilo.py b/files/spilo/postgres-appliance/scripts/configure_spilo.py index ed3f62ce..efffbaad 100755 --- a/files/spilo/postgres-appliance/scripts/configure_spilo.py +++ b/files/spilo/postgres-appliance/scripts/configure_spilo.py @@ -310,7 +310,7 @@ def deep_update(a, b): pg_stat_statements.track_utility: 'off' extwlist.extensions: 'btree_gin,btree_gist,citext,extra_window_functions,first_last_agg,hll,\ hstore,hypopg,intarray,ltree,pgcrypto,pgq,pgq_node,pg_ivm,pg_trgm,postgres_fdw,mysql_fdw,multicorn,\ -parquet_s3_fdw,tablefunc,uuid-ossp' +parquet_s3_fdw,vector,tablefunc,uuid-ossp' extwlist.custom_path: /scripts cron.use_background_workers: 'on' pg_hba: diff --git a/third-party/pgxman_install.sh b/third-party/pgxman_install.sh new file mode 100755 index 00000000..a185c32e --- /dev/null +++ b/third-party/pgxman_install.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash + +set -euo pipefail + +main() { + local pg_version="$1" + + get_architecture || return 1 + local _arch="$RETVAL" + + echo "Installing PGXMan extensions for PostgreSQL $pg_version..." + + wget -O "/tmp/pgxman_linux_${_arch}.deb" "https://github.com/pgxman/release/releases/latest/download/pgxman_linux_${_arch}.deb" + apt install "/tmp/pgxman_linux_${_arch}.deb" + + pgxman update + + local _extensions=( + "pgvector=0.4.4" + ) + local _packages=() + for _ext in "${_extensions[@]}"; do + _packages+=("$(printf "%s@%s " "$_ext" "$pg_version")") + done + printf -v _packages_args '%s ' "${_packages[@]}" + + pgxman install $_packages_args +} + +get_architecture() { + local _cputype _arch + _cputype="$(uname -m)" + + case "$_cputype" in + i386 | i486 | i686 | i786 | x86) + _cputype=386 + ;; + + xscale | arm | armv6l | armv7l | armv8l) + _cputype=arm + ;; + + aarch64 | arm64) + _cputype=arm64 + ;; + + x86_64 | x86-64 | x64 | amd64) + _cputype=amd64 + ;; + + *) + err "unknown CPU type: $_cputype" + ;; + esac + + RETVAL="$_cputype" +} + +main "$@" || exit 1