Cache and store mem3 shard properties in one place only

nickva · nickva · commit 3754deb3a7db · 2025-10-30T13:52:12.000-04:00
Previously, shard properties were duplicated across all Q*N shards in the cache. If we needed to access them we loaded all the shards (from ets or disk), and then immediately threw them all away except the first one. To optimise and clean up properties put them in their own ?OPTS ets table. Item lookup, updating, and cleanup mirrors the behavior of ?SHARDS. There are a few other related optimisations and cleanups: * In the `for_docid` function we calculated the hash twice: once, when we calculated the `HashKey` for the ets selector, then again, in the `load_shards_from_disk(DbName, DocId)` if we loaded shards from disk. To optimise it, calculate the `HashKey` once and pass it on as `load_shards_from_disk(DbName, HashKey)`. * Previously, we didn't cache the properties for the shards dbs itself, so add a way to do that. If shards db changes the changes feed will restart ,and then the shards dbs properties will update again. These properties may be used used in the `_all_docs` call for instance, so having it cached would help not having to load it from disk. * Remove functions which were not used anywhere, and stop exporting functions which are used locally only: `mem3:engine/1`, `find_dirty_shards/0`, `gen_engine_opt/1`, `get_props_opt/1`, `get_shard_props/1`. * For mem3 shards and opts ets tables, since there could be multiple pending writers in different processes trying to update different entries, it makes sense to also enable `{write_concurrency, auto}` for those public tables. See: https://www.erlang.org/doc/apps/stdlib/ets#new_2_write_concurrency
diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl
@@ -454,8 +454,7 @@ delete_db_req(#httpd{} = Req, DbName) ->
     end.
 
 do_db_req(#httpd{path_parts = [DbName | _], user_ctx = Ctx} = Req, Fun) ->
-    Shard = hd(mem3:shards(DbName)),
-    Props = couch_util:get_value(props, Shard#shard.opts, []),
+    Props = mem3:props(DbName),
     Opts =
         case Ctx of
             undefined ->
diff --git a/src/fabric/src/fabric.erl b/src/fabric/src/fabric.erl
@@ -670,8 +670,7 @@ doc(Db0, {_} = Doc) ->
             true ->
                 Db0;
             false ->
-                Shard = hd(mem3:shards(Db0)),
-                Props = couch_util:get_value(props, Shard#shard.opts, []),
+                Props = mem3:props(Db0),
                 {ok, Db1} = couch_db:clustered_db(Db0, [{props, Props}]),
                 Db1
         end,
diff --git a/src/fabric/src/fabric_util.erl b/src/fabric/src/fabric_util.erl
@@ -296,15 +296,12 @@ is_users_db(DbName) ->
 path_ends_with(Path, Suffix) ->
     Suffix =:= couch_db:dbname_suffix(Path).
 
-open_cluster_db(#shard{dbname = DbName, opts = Options}) ->
-    case couch_util:get_value(props, Options) of
-        Props when is_list(Props) ->
-            {ok, Db} = couch_db:clustered_db(DbName, [{props, Props}]),
-            Db;
-        _ ->
-            {ok, Db} = couch_db:clustered_db(DbName, []),
-            Db
-    end.
+open_cluster_db(#shard{dbname = DbName}) ->
+    open_cluster_db(DbName);
+open_cluster_db(DbName) when is_binary(DbName) ->
+    Props = mem3:props(DbName),
+    {ok, Db} = couch_db:clustered_db(DbName, [{props, Props}]),
+    Db.
 
 open_cluster_db(DbName, Opts) ->
     % as admin
@@ -320,25 +317,22 @@ kv(Item, Count) ->
 doc_id_and_rev(#doc{id = DocId, revs = {RevNum, [RevHash | _]}}) ->
     {DocId, {RevNum, RevHash}}.
 
-is_partitioned(DbName0) when is_binary(DbName0) ->
-    Shards = mem3:shards(fabric:dbname(DbName0)),
-    is_partitioned(open_cluster_db(hd(Shards)));
+is_partitioned(DbName) when is_binary(DbName) ->
+    is_partitioned(open_cluster_db(DbName));
 is_partitioned(Db) ->
     couch_db:is_partitioned(Db).
 
 validate_all_docs_args(DbName, Args) when is_list(DbName) ->
     validate_all_docs_args(list_to_binary(DbName), Args);
 validate_all_docs_args(DbName, Args) when is_binary(DbName) ->
-    Shards = mem3:shards(fabric:dbname(DbName)),
-    Db = open_cluster_db(hd(Shards)),
+    Db = open_cluster_db(DbName),
     validate_all_docs_args(Db, Args);
 validate_all_docs_args(Db, Args) ->
     true = couch_db:is_clustered(Db),
     couch_mrview_util:validate_all_docs_args(Db, Args).
 
 validate_args(DbName, DDoc, Args) when is_binary(DbName) ->
-    Shards = mem3:shards(fabric:dbname(DbName)),
-    Db = open_cluster_db(hd(Shards)),
+    Db = open_cluster_db(DbName),
     validate_args(Db, DDoc, Args);
 validate_args(Db, DDoc, Args) ->
     true = couch_db:is_clustered(Db),
diff --git a/src/fabric/test/eunit/fabric_bench_test.erl b/src/fabric/test/eunit/fabric_bench_test.erl
@@ -59,7 +59,7 @@ t_old_db_deletion_works(_Ctx) ->
     % Quick db creation and deletion is racy so
     % we have to wait until the db is gone before proceeding.
     WaitFun = fun() ->
-        try mem3_shards:opts_for_db(Db) of
+        try mem3:props(Db) of
             _ -> wait
         catch
             error:database_does_not_exist ->
diff --git a/src/mem3/include/mem3.hrl b/src/mem3/include/mem3.hrl
@@ -22,7 +22,7 @@
     dbname :: binary() | 'undefined',
     range :: [non_neg_integer() | '$1' | '$2'] | '_' | 'undefined',
     ref :: reference() | '_' | 'undefined',
-    opts :: list() | 'undefined'
+    opts = []:: list() | 'undefined'
 }).
 
 %% Do not reference outside of mem3.
@@ -33,7 +33,7 @@
     range :: [non_neg_integer() | '$1' | '$2'] | '_',
     ref :: reference() | 'undefined' | '_',
     order :: non_neg_integer() | 'undefined' | '_',
-    opts :: list()
+    opts = []:: list()
 }).
 
 %% types
diff --git a/src/mem3/src/mem3.erl b/src/mem3/src/mem3.erl
@@ -18,6 +18,7 @@
     restart/0,
     nodes/0,
     node_info/2,
+    props/1,
     shards/1, shards/2,
     choose_shards/2,
     n/1, n/2,
@@ -40,7 +41,7 @@
 -export([generate_shard_suffix/0]).
 
 %% For mem3 use only.
--export([name/1, node/1, range/1, engine/1]).
+-export([name/1, node/1, range/1]).
 
 -include_lib("mem3/include/mem3.hrl").
 
@@ -115,6 +116,11 @@ nodes() ->
 node_info(Node, Key) ->
     mem3_nodes:get_node_info(Node, Key).
 
+-spec props(DbName :: iodata()) -> [].
+props(DbName) ->
+    Opts = mem3_shards:opts_for_db(DbName),
+    couch_util:get_value(props, Opts, []).
+
 -spec shards(DbName :: iodata()) -> [#shard{}].
 shards(DbName) ->
     shards_int(DbName, []).
@@ -135,8 +141,7 @@ shards_int(DbName, Options) ->
                     name = ShardDbName,
                     dbname = ShardDbName,
                     range = [0, (2 bsl 31) - 1],
-                    order = undefined,
-                    opts = []
+                    order = undefined
                 }
             ];
         ShardDbName ->
@@ -147,8 +152,7 @@ shards_int(DbName, Options) ->
                     node = config:node_name(),
                     name = ShardDbName,
                     dbname = ShardDbName,
-                    range = [0, (2 bsl 31) - 1],
-                    opts = []
+                    range = [0, (2 bsl 31) - 1]
                 }
             ];
         _ ->
@@ -416,18 +420,6 @@ name(#ordered_shard{name = Name}) ->
 owner(DbName, DocId, Nodes) ->
     hd(mem3_util:rotate_list({DbName, DocId}, lists:usort(Nodes))).
 
-engine(#shard{opts = Opts}) ->
-    engine(Opts);
-engine(#ordered_shard{opts = Opts}) ->
-    engine(Opts);
-engine(Opts) when is_list(Opts) ->
-    case couch_util:get_value(engine, Opts) of
-        Engine when is_binary(Engine) ->
-            [{engine, Engine}];
-        _ ->
-            []
-    end.
-
 %% Check whether a node is up or down
 %%  side effect: set up a connection to Node if there not yet is one.
 
diff --git a/src/mem3/src/mem3_hash.erl b/src/mem3/src/mem3_hash.erl
@@ -23,33 +23,35 @@
 -include_lib("mem3/include/mem3.hrl").
 -include_lib("couch/include/couch_db.hrl").
 
-calculate(#shard{opts = Opts}, DocId) ->
-    Props = couch_util:get_value(props, Opts, []),
-    MFA = get_hash_fun_int(Props),
+calculate(#shard{dbname = DbName}, DocId) ->
+    MFA = get_hash_fun(DbName),
     calculate(MFA, DocId);
-calculate(#ordered_shard{opts = Opts}, DocId) ->
-    Props = couch_util:get_value(props, Opts, []),
-    MFA = get_hash_fun_int(Props),
+calculate(#ordered_shard{dbname = DbName}, DocId) ->
+    MFA = get_hash_fun(DbName),
     calculate(MFA, DocId);
 calculate(DbName, DocId) when is_binary(DbName) ->
     MFA = get_hash_fun(DbName),
     calculate(MFA, DocId);
+calculate(Props, DocId) when is_list(Props) ->
+    MFA = get_hash_fun(Props),
+    calculate(MFA, DocId);
 calculate({Mod, Fun, Args}, DocId) ->
     erlang:apply(Mod, Fun, [DocId | Args]).
 
-get_hash_fun(#shard{opts = Opts}) ->
-    get_hash_fun_int(Opts);
-get_hash_fun(#ordered_shard{opts = Opts}) ->
-    get_hash_fun_int(Opts);
+get_hash_fun(#shard{dbname = DbName}) ->
+    get_hash_fun(DbName);
+get_hash_fun(#ordered_shard{dbname = DbName}) ->
+    get_hash_fun(DbName);
 get_hash_fun(DbName0) when is_binary(DbName0) ->
     DbName = mem3:dbname(DbName0),
     try
-        [#shard{opts = Opts} | _] = mem3_shards:for_db(DbName),
-        get_hash_fun_int(couch_util:get_value(props, Opts, []))
+        get_hash_fun_int(mem3:props(DbName))
     catch
         error:database_does_not_exist ->
             {?MODULE, crc32, []}
-    end.
+    end;
+get_hash_fun(Props) when is_list(Props) ->
+    get_hash_fun_int(Props).
 
 crc32(Item) when is_binary(Item) ->
     erlang:crc32(Item);
diff --git a/src/mem3/src/mem3_shards.erl b/src/mem3/src/mem3_shards.erl
diff --git a/src/mem3/src/mem3_util.erl b/src/mem3/src/mem3_util.erl