Skip to content

Commit 07efa1a

Browse files
committedNov 20, 2023
curvefs: add a new distributed transaction model to improve rename performance
Signed-off-by: wanghai01 <seanhaizi@163.com>
1 parent 1205fd6 commit 07efa1a

File tree

94 files changed

+4298
-1139
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

94 files changed

+4298
-1139
lines changed
 

‎curvefs/conf/client.conf

+6
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,12 @@ fuseClient.getThreadPool=4
8585
# it gurantee the consistent of file after rename, otherwise you should
8686
# disable it for performance.
8787
fuseClient.enableMultiMountPointRename=true
88+
89+
# the rename transaction models are different between version 1 and version 2
90+
# the v2 version greatly improves the performance of rename, especially in concurrent scenarios.
91+
# Node: v1 and v2 are incompatible and cannot be directly upgraded from a v1 cluster to v2.
92+
fuseClient.txVersion=1
93+
8894
# splice will bring higher performance in some cases
8995
# but there might be a kernel issue that will cause kernel panic when enabling it
9096
# see https://lore.kernel.org/all/CAAmZXrsGg2xsP1CK+cbuEMumtrqdvD-NKnWzhNcvn71RV3c1yw@mail.gmail.com/

‎curvefs/conf/metaserver.conf

+9-2
Original file line numberDiff line numberDiff line change
@@ -258,11 +258,17 @@ storage.rocksdb.unordered_write_buffer_size=67108864
258258
# for store inode which exclude its s3chunkinfo list (default: 3)
259259
storage.rocksdb.unordered_max_write_buffer_number=3
260260
# rocksdb column family's write_buffer_size
261-
# for store dentry and inode's s3chunkinfo list (unit: bytes, default: 128MB)
261+
# for store dentry and inode's s3chunkinfo list (unit: bytes, default: 64MB)
262262
storage.rocksdb.ordered_write_buffer_size=67108864
263263
# rocksdb column family's max_write_buffer_number
264264
# for store dentry and inode's s3chunkinfo list (default: 3)
265265
storage.rocksdb.ordered_max_write_buffer_number=3
266+
# rocksdb column family's write_buffer_size
267+
# for store tx lock and write (unit: bytes, default: 64MB)
268+
storage.rocksdb.tx_cf_write_buffer_size=67108864
269+
# rocksdb column family's max_write_buffer_number
270+
# for store tx lock and write (default: 3)
271+
storage.rocksdb.tx_cf_max_write_buffer_number=3
266272
# The target number of write history bytes to hold in memory (default: 20MB)
267273
storage.rocksdb.max_write_buffer_size_to_maintain=20971520
268274
# rocksdb memtable prefix bloom size ratio (size=write_buffer_size*memtable_prefix_bloom_size_ratio)
@@ -286,6 +292,8 @@ storage.rocksdb.perf_sampling_ratio=0
286292
# we will sending its with rpc streaming instead of
287293
# padding its into inode (default: 25000, about 25000 * 41 (byte) = 1MB)
288294
storage.s3_meta_inside_inode.limit_size=25000
295+
# TTL(millisecond) for tx lock
296+
storage.rocksdb.tx_lock_ttl_ms=5000
289297

290298
# recycle options
291299
# metaserver scan recycle period, default 1h
@@ -317,7 +325,6 @@ excutorOpt.minRetryTimesForceTimeoutBackoff=5
317325
excutorOpt.maxRetryTimesBeforeConsiderSuspend=20
318326
# batch limit of get inode attr and xattr
319327
excutorOpt.batchInodeAttrLimit=10000
320-
excutorOpt.enableMultiMountPointRename=true
321328

322329
#### metaCacheOpt
323330
# Gets the number of retries for the leader

0 commit comments

Comments
 (0)