@@ -763,6 +763,8 @@ var requestPool = sync.Pool{
763763}
764764
765765func (db * DB ) writeToLSM (b * request ) error {
766+ db .lock .RLock ()
767+ defer db .lock .RUnlock ()
766768 for i , entry := range b .Entries {
767769 var err error
768770 if db .opt .managedTxns || entry .skipVlogAndSetThreshold (db .valueThreshold ()) {
@@ -1036,10 +1038,9 @@ func (db *DB) HandoverSkiplist(skl *skl.Skiplist, callback func()) error {
10361038
10371039 // Iterate over the skiplist and send the entries to the publisher.
10381040 it := skl .NewIterator ()
1039- it .SeekToFirst ()
10401041
10411042 var entries []* Entry
1042- for it .Valid () {
1043+ for it .SeekToFirst (); it . Valid (); it . Next () {
10431044 v := it .Value ()
10441045 e := & Entry {
10451046 Key : it .Key (),
@@ -1048,7 +1049,6 @@ func (db *DB) HandoverSkiplist(skl *skl.Skiplist, callback func()) error {
10481049 UserMeta : v .UserMeta ,
10491050 }
10501051 entries = append (entries , e )
1051- it .Next ()
10521052 }
10531053 req := & request {
10541054 Entries : entries ,
@@ -1836,6 +1836,122 @@ func (db *DB) dropAll() (func(), error) {
18361836 return resume , nil
18371837}
18381838
1839+ // DropPrefixNonBlocking would logically drop all the keys with the provided prefix. The data would
1840+ // not be cleared from LSM tree immediately. It would be deleted eventually through compactions.
1841+ // This operation is useful when we don't want to block writes while we delete the prefixes.
1842+ // It does this in the following way:
1843+ // - Stream the given prefixes at a given ts.
1844+ // - Write them to skiplist at the specified ts and handover that skiplist to DB.
1845+ func (db * DB ) DropPrefixNonBlocking (prefixes ... []byte ) error {
1846+ if db .opt .ReadOnly {
1847+ return errors .New ("Attempting to drop data in read-only mode." )
1848+ }
1849+
1850+ if len (prefixes ) == 0 {
1851+ return nil
1852+ }
1853+ db .opt .Infof ("Non-blocking DropPrefix called for %s" , prefixes )
1854+
1855+ cbuf := z .NewBuffer (int (db .opt .MemTableSize ), "DropPrefixNonBlocking" )
1856+ defer cbuf .Release ()
1857+
1858+ var wg sync.WaitGroup
1859+ handover := func (force bool ) error {
1860+ if ! force && int64 (cbuf .LenNoPadding ()) < db .opt .MemTableSize {
1861+ return nil
1862+ }
1863+
1864+ // Sort the kvs, add them to the builder, and hand it over to DB.
1865+ cbuf .SortSlice (func (left , right []byte ) bool {
1866+ return y .CompareKeys (left , right ) < 0
1867+ })
1868+
1869+ b := skl .NewBuilder (db .opt .MemTableSize )
1870+ err := cbuf .SliceIterate (func (s []byte ) error {
1871+ b .Add (s , y.ValueStruct {Meta : bitDelete })
1872+ return nil
1873+ })
1874+ if err != nil {
1875+ return err
1876+ }
1877+ cbuf .Reset ()
1878+ wg .Add (1 )
1879+ return db .HandoverSkiplist (b .Skiplist (), wg .Done )
1880+ }
1881+
1882+ dropPrefix := func (prefix []byte ) error {
1883+ stream := db .NewStreamAt (math .MaxUint64 )
1884+ stream .LogPrefix = fmt .Sprintf ("Dropping prefix: %#x" , prefix )
1885+ stream .Prefix = prefix
1886+ // We don't need anything except key and version.
1887+ stream .KeyToList = func (key []byte , itr * Iterator ) (* pb.KVList , error ) {
1888+ if ! itr .Valid () {
1889+ return nil , nil
1890+ }
1891+ item := itr .Item ()
1892+ if item .IsDeletedOrExpired () {
1893+ return nil , nil
1894+ }
1895+ if ! bytes .Equal (key , item .Key ()) {
1896+ // Return on the encounter with another key.
1897+ return nil , nil
1898+ }
1899+
1900+ a := itr .Alloc
1901+ ka := a .Copy (key )
1902+ list := & pb.KVList {}
1903+ // We need to generate only a single delete marker per key. All the versions for this
1904+ // key will be considered deleted, if we delete the one at highest version.
1905+ kv := y .NewKV (a )
1906+ kv .Key = y .KeyWithTs (ka , item .Version ())
1907+ list .Kv = append (list .Kv , kv )
1908+ itr .Next ()
1909+ return list , nil
1910+ }
1911+
1912+ stream .Send = func (buf * z.Buffer ) error {
1913+ kv := pb.KV {}
1914+ err := buf .SliceIterate (func (s []byte ) error {
1915+ kv .Reset ()
1916+ if err := kv .Unmarshal (s ); err != nil {
1917+ return err
1918+ }
1919+ cbuf .WriteSlice (kv .Key )
1920+ return nil
1921+ })
1922+ if err != nil {
1923+ return err
1924+ }
1925+ return handover (false )
1926+ }
1927+ if err := stream .Orchestrate (context .Background ()); err != nil {
1928+ return err
1929+ }
1930+ // Flush the remaining skiplists if any.
1931+ return handover (true )
1932+ }
1933+
1934+ // Iterate over all the prefixes and logically drop them.
1935+ for _ , prefix := range prefixes {
1936+ if err := dropPrefix (prefix ); err != nil {
1937+ return errors .Wrapf (err , "While dropping prefix: %#x" , prefix )
1938+ }
1939+ }
1940+
1941+ wg .Wait ()
1942+ return nil
1943+ }
1944+
1945+ // DropPrefix would drop all the keys with the provided prefix. Based on DB options, it either drops
1946+ // the prefixes by blocking the writes or doing a logical drop.
1947+ // See DropPrefixBlocking and DropPrefixNonBlocking for more information.
1948+ func (db * DB ) DropPrefix (prefixes ... []byte ) error {
1949+ if db .opt .AllowStopTheWorld {
1950+ return db .DropPrefixBlocking (prefixes ... )
1951+ }
1952+ return db .DropPrefixNonBlocking (prefixes ... )
1953+ }
1954+
18391955// DropPrefix would drop all the keys with the provided prefix. It does this in the following way:
18401956// - Stop accepting new writes.
18411957// - Stop memtable flushes before acquiring lock. Because we're acquring lock here
@@ -1847,7 +1963,7 @@ func (db *DB) dropAll() (func(), error) {
18471963// - Compact L0->L1, skipping over Kp.
18481964// - Compact rest of the levels, Li->Li, picking tables which have Kp.
18491965// - Resume memtable flushes, compactions and writes.
1850- func (db * DB ) DropPrefix (prefixes ... []byte ) error {
1966+ func (db * DB ) DropPrefixBlocking (prefixes ... []byte ) error {
18511967 if len (prefixes ) == 0 {
18521968 return nil
18531969 }
0 commit comments