Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ All notable changes to this project will be documented in this file. From versio
- Fix not returning `Content-Length` on empty HTTP `201` responses by @laurenceisla in #4518
- Fix inaccurate Server-Timing header durations by @steve-chavez in #4522
- Fix inaccurate "Schema cache queried" logs by @steve-chavez in #4522
- Fix performance and high memory usage of relation hint calculation by @mkleczek in #4462 #4463
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was just going to release v14 then realized this changelog entry was put in the wrong spot, messing up our postgrest-release command.

This was already fixed on #4589, but please be mindful of the CHANGELOG placement @mkleczek 🙏


## [14.1] - 2025-11-05

Expand Down
16 changes: 7 additions & 9 deletions src/PostgREST/Error.hs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ Module : PostgREST.Error
Description : PostgREST error HTTP responses
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE RecordWildCards #-}

module PostgREST.Error
Expand Down Expand Up @@ -41,6 +42,7 @@ import Network.HTTP.Types.Header (Header)
import PostgREST.MediaType (MediaType (..))
import qualified PostgREST.MediaType as MediaType

import PostgREST.SchemaCache (SchemaCache (SchemaCache, dbTablesFuzzyIndex))
import PostgREST.SchemaCache.Identifiers (QualifiedIdentifier (..),
Schema)
import PostgREST.SchemaCache.Relationship (Cardinality (..),
Expand All @@ -49,10 +51,8 @@ import PostgREST.SchemaCache.Relationship (Cardinality (..),
RelationshipsMap)
import PostgREST.SchemaCache.Routine (Routine (..),
RoutineParam (..))
import PostgREST.SchemaCache.Table (Table (..))
import Protolude


class (ErrorBody a, JSON.ToJSON a) => PgrstError a where
status :: a -> HTTP.Status
headers :: a -> [Header]
Expand Down Expand Up @@ -250,7 +250,7 @@ data SchemaCacheError
| NoRelBetween Text Text (Maybe Text) Text RelationshipsMap
| NoRpc Text Text [Text] MediaType Bool [QualifiedIdentifier] [Routine]
| ColumnNotFound Text Text
| TableNotFound Text Text [Table]
| TableNotFound Text Text SchemaCache
deriving Show

instance PgrstError SchemaCacheError where
Expand Down Expand Up @@ -313,7 +313,7 @@ instance ErrorBody SchemaCacheError where
where
onlySingleParams = isInvPost && contentType `elem` [MTTextPlain, MTTextXML, MTOctetStream]
hint (AmbiguousRpc _) = Just "Try renaming the parameters or the function itself in the database so function overloading can be resolved"
hint (TableNotFound schemaName relName tbls) = JSON.String <$> tableNotFoundHint schemaName relName tbls
hint (TableNotFound schemaName relName schemaCache) = JSON.String <$> tableNotFoundHint schemaName relName schemaCache

hint _ = Nothing

Expand Down Expand Up @@ -428,13 +428,11 @@ noRpcHint schema procName params allProcs overloadedProcs =

-- |
-- Do a fuzzy search in all tables in the same schema and return closest result
tableNotFoundHint :: Text -> Text -> [Table] -> Maybe Text
tableNotFoundHint schema tblName tblList
tableNotFoundHint :: Text -> Text -> SchemaCache -> Maybe Text
tableNotFoundHint schema tblName SchemaCache{dbTablesFuzzyIndex}
= fmap (\tbl -> "Perhaps you meant the table '" <> schema <> "." <> tbl <> "'") perhapsTable
where
perhapsTable = Fuzzy.getOne fuzzyTableSet tblName
fuzzyTableSet = Fuzzy.fromList [ tableName tbl | tbl <- tblList, tableSchema tbl == schema]

perhapsTable = (`Fuzzy.getOne` tblName) =<< HM.lookup schema dbTablesFuzzyIndex

compressedRel :: Relationship -> JSON.Value
-- An ambiguousness error cannot happen for computed relationships TODO refactor so this mempty is not needed
Expand Down
12 changes: 6 additions & 6 deletions src/PostgREST/Plan.hs
Original file line number Diff line number Diff line change
Expand Up @@ -170,15 +170,15 @@ dbActionPlan dbAct conf apiReq sCache = case dbAct of

wrappedReadPlan :: QualifiedIdentifier -> AppConfig -> SchemaCache -> ApiRequest -> Bool -> Either Error CrudPlan
wrappedReadPlan identifier conf sCache apiRequest@ApiRequest{iPreferences=Preferences{..},..} headersOnly = do
qi <- findTable identifier (dbTables sCache)
qi <- findTable identifier sCache
rPlan <- readPlan qi conf sCache apiRequest
(handler, mediaType) <- mapLeft ApiRequestError $ negotiateContent conf apiRequest qi iAcceptMediaType (dbMediaHandlers sCache) (hasDefaultSelect rPlan)
if not (null invalidPrefs) && preferHandling == Just Strict then Left $ ApiRequestError $ InvalidPreferences invalidPrefs else Right ()
return $ WrappedReadPlan rPlan SQL.Read handler mediaType headersOnly qi

mutateReadPlan :: Mutation -> ApiRequest -> QualifiedIdentifier -> AppConfig -> SchemaCache -> Either Error CrudPlan
mutateReadPlan mutation apiRequest@ApiRequest{iPreferences=Preferences{..},..} identifier conf sCache = do
qi <- findTable identifier (dbTables sCache)
qi <- findTable identifier sCache
rPlan <- readPlan qi conf sCache apiRequest
mPlan <- mutatePlan mutation qi apiRequest sCache rPlan
if not (null invalidPrefs) && preferHandling == Just Strict then Left $ ApiRequestError $ InvalidPreferences invalidPrefs else Right ()
Expand Down Expand Up @@ -810,10 +810,10 @@ validateAggFunctions aggFunctionsAllowed (Node rp@ReadPlan {select} forest)
| otherwise = Node rp <$> traverse (validateAggFunctions aggFunctionsAllowed) forest

-- | Lookup table in the schema cache before creating read plan
findTable :: QualifiedIdentifier -> TablesMap -> Either Error QualifiedIdentifier
findTable qi@QualifiedIdentifier{..} tableMap =
case HM.lookup qi tableMap of
Nothing -> Left $ SchemaCacheErr $ TableNotFound qiSchema qiName (HM.elems tableMap)
findTable :: QualifiedIdentifier -> SchemaCache -> Either Error QualifiedIdentifier
findTable qi@QualifiedIdentifier{..} sc@SchemaCache{dbTables} =
case HM.lookup qi dbTables of
Nothing -> Left $ SchemaCacheErr $ TableNotFound qiSchema qiName sc
Just _ -> Right qi

addFilters :: ResolverContext -> ApiRequest -> ReadPlanTree -> Either Error ReadPlanTree
Expand Down
4 changes: 2 additions & 2 deletions src/PostgREST/Response.hs
Original file line number Diff line number Diff line change
Expand Up @@ -213,10 +213,10 @@ actionResponse (MaybeDbResult InspectPlan{ipHdrsOnly=headersOnly} body) _ versio
in
Right $ PgrstResponse HTTP.status200 (MediaType.toContentType MTOpenAPI : cLHeader ++ maybeToList (profileHeader schema negotiatedByProfile)) rsBody

actionResponse (NoDbResult (RelInfoPlan qi@QualifiedIdentifier{..})) _ _ _ SchemaCache{dbTables} _ _ =
actionResponse (NoDbResult (RelInfoPlan qi@QualifiedIdentifier{..})) _ _ _ sc@SchemaCache{dbTables} _ _ =
case HM.lookup qi dbTables of
Just tbl -> respondInfo $ allowH tbl
Nothing -> Left $ Error.SchemaCacheErr $ Error.TableNotFound qiSchema qiName (HM.elems dbTables)
Nothing -> Left $ Error.SchemaCacheErr $ Error.TableNotFound qiSchema qiName sc
where
allowH table =
let hasPK = not . null $ tablePKCols table in
Expand Down
40 changes: 28 additions & 12 deletions src/PostgREST/SchemaCache.hs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ These queries are executed once at startup or when PostgREST is reloaded.

module PostgREST.SchemaCache
( SchemaCache(..)
, TablesFuzzyIndex
, querySchemaCache
, showSummary
, decodeFuncs
Expand Down Expand Up @@ -66,21 +67,28 @@ import PostgREST.SchemaCache.Table (Column (..), ColumnMap,

import qualified PostgREST.MediaType as MediaType

import Control.Arrow ((&&&))
import Protolude
import System.IO.Unsafe (unsafePerformIO)
import Control.Arrow ((&&&))
import qualified Data.FuzzySet as Fuzzy
import Protolude
import System.IO.Unsafe (unsafePerformIO)

type TablesFuzzyIndex = HM.HashMap Schema Fuzzy.FuzzySet

data SchemaCache = SchemaCache
{ dbTables :: TablesMap
, dbRelationships :: RelationshipsMap
, dbRoutines :: RoutineMap
, dbRepresentations :: RepresentationsMap
, dbMediaHandlers :: MediaHandlerMap
, dbTimezones :: TimezoneNames
}
{ dbTables :: TablesMap
, dbRelationships :: RelationshipsMap
, dbRoutines :: RoutineMap
, dbRepresentations :: RepresentationsMap
, dbMediaHandlers :: MediaHandlerMap
, dbTimezones :: TimezoneNames
-- Memoized fuzzy index of table names per schema to support approximate matching
-- Since index construction can be expensive, we build it once and store in the SchemaCache
-- Haskell lazy evaluation ensures it's only built on first use and memoized afterwards
, dbTablesFuzzyIndex :: TablesFuzzyIndex
} deriving (Show)

instance JSON.ToJSON SchemaCache where
toJSON (SchemaCache tabs rels routs reps hdlers tzs) = JSON.object [
toJSON (SchemaCache tabs rels routs reps hdlers tzs _) = JSON.object [
"dbTables" .= JSON.toJSON tabs
, "dbRelationships" .= JSON.toJSON rels
, "dbRoutines" .= JSON.toJSON routs
Expand All @@ -90,7 +98,7 @@ instance JSON.ToJSON SchemaCache where
]

showSummary :: SchemaCache -> Text
showSummary (SchemaCache tbls rels routs reps mediaHdlrs tzs) =
showSummary (SchemaCache tbls rels routs reps mediaHdlrs tzs _) =
T.intercalate ", "
[ show (HM.size tbls) <> " Relations"
, show (HM.size rels) <> " Relationships"
Expand Down Expand Up @@ -138,6 +146,8 @@ data KeyDep
-- | A SQL query that can be executed independently
type SqlQuery = ByteString

maxDbTablesForFuzzySearch :: Int
maxDbTablesForFuzzySearch = 500

querySchemaCache :: AppConfig -> SQL.Transaction SchemaCache
querySchemaCache conf@AppConfig{..} = do
Expand Down Expand Up @@ -166,6 +176,11 @@ querySchemaCache conf@AppConfig{..} = do
, dbRepresentations = reps
, dbMediaHandlers = HM.union mHdlers initialMediaHandlers -- the custom handlers will override the initial ones
, dbTimezones = tzones

, dbTablesFuzzyIndex =
-- Only build fuzzy index for schemas with a reasonable number of tables
-- Fuzzy.FuzzySet is memory heavy we just don't use it for large schemas
Fuzzy.fromList <$> HM.filter ((< maxDbTablesForFuzzySearch) . length) (HM.fromListWith (<>) ((qiSchema &&& pure . qiName) <$> HM.keys tabsWViewsPks))
}
where
schemas = toList configDbSchemas
Expand Down Expand Up @@ -203,6 +218,7 @@ removeInternal schemas dbStruct =
, dbRepresentations = dbRepresentations dbStruct -- no need to filter, not directly exposed through the API
, dbMediaHandlers = dbMediaHandlers dbStruct
, dbTimezones = dbTimezones dbStruct
, dbTablesFuzzyIndex = dbTablesFuzzyIndex dbStruct
}
where
hasInternalJunction ComputedRelationship{} = False
Expand Down
22 changes: 22 additions & 0 deletions test/io/fixtures/big_schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -11375,12 +11375,34 @@ ALTER TABLE ONLY apflora.zielber

ALTER TABLE apflora."user" ENABLE ROW LEVEL SECURITY;


CREATE SCHEMA fuzzysearch;

-- Create many tables to test fuzzy string search
-- computing hints for non existing tables
DO
$$
DECLARE
r record;
BEGIN
FOR r IN
SELECT
format('CREATE TABLE fuzzysearch.unknown_table_%s ()', n) AS ct
FROM
generate_series(1, 499) n
LOOP
EXECUTE r.ct;
END LOOP;
END
$$;

DROP ROLE IF EXISTS postgrest_test_anonymous;
CREATE ROLE postgrest_test_anonymous;

GRANT postgrest_test_anonymous TO :PGUSER;

GRANT USAGE ON SCHEMA apflora TO postgrest_test_anonymous;
GRANT USAGE ON SCHEMA fuzzysearch TO postgrest_test_anonymous;

GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA apflora
TO postgrest_test_anonymous;
Expand Down
20 changes: 20 additions & 0 deletions test/io/test_big_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,23 @@ def test_should_not_fail_with_stack_overflow(defaultenv):
assert response.status_code == 404
data = response.json()
assert data["code"] == "PGRST205"


def test_second_request_for_non_existent_table_should_be_quick(defaultenv):
"requesting a non-existent relationship should be quick after the fuzzy search index is loaded (2nd request)"

env = {
**defaultenv,
"PGRST_DB_SCHEMAS": "fuzzysearch",
"PGRST_DB_POOL": "2",
"PGRST_DB_ANON_ROLE": "postgrest_test_anonymous",
}

with run(env=env, wait_max_seconds=30) as postgrest:
response = postgrest.session.get("/unknown-table")
assert response.status_code == 404
data = response.json()
assert data["code"] == "PGRST205"
first_duration = response.elapsed.total_seconds()
response = postgrest.session.get("/unknown-table")
assert response.elapsed.total_seconds() < first_duration / 10
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test is flaky and fails often both locally and in CI which is not desirable. If the point is to prove that the second request is always faster, I think we should remove the / 10 part?