Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Delete op #6

Merged
merged 6 commits into from
Nov 1, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
delete op + checksum for checking valid keys
  • Loading branch information
PaulisMatrix committed Oct 28, 2023
commit 429de856cf3fd3a7583a506d441ed3522a21c038
25 changes: 22 additions & 3 deletions disk_store.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ func (d *DiskStore) Get(key string) string {
//
// How get works?
// 1. Check if there is any KeyEntry record for the key in keyDir
// 2. Return an empty string if key doesn't exist
// 2. Return an empty string if key doesn't exist or if the key has been deleted
// 3. If it exists, then read KeyEntry.totalSize bytes starting from the
// KeyEntry.position from the disk
// 4. Decode the bytes into valid KV pair and return the value
Expand All @@ -122,7 +122,16 @@ func (d *DiskStore) Get(key string) string {
if err != nil {
panic("read error")
}
_, _, value := decodeKV(data)
checkSum, _, _, value := decodeKV(data)
//check if checkSum matches and we dont have any corrupt value
if !verifyCheckSum(value, checkSum) {
return "corrupted value"
}

//check if its tombestone value
if string(value) == TombStoneVal {
return TombStoneVal
}
return value
}

Expand All @@ -141,6 +150,16 @@ func (d *DiskStore) Set(key string, value string) {
d.writePosition += size
}

func (d *DiskStore) Delete(key string) {
// for delete operation, simply write a special tombstone value
timestamp := uint32(time.Now().Unix())
size, data := encodeKV(timestamp, key, TombStoneVal)
d.write(data)
// key is already present, it will update with our new value
d.keyDir[key] = NewKeyEntry(timestamp, uint32(d.writePosition), uint32(size))
d.writePosition += size
}

func (d *DiskStore) Close() bool {
// before we close the file, we need to safely write the contents in the buffers
// to the disk. Check documentation of DiskStore.write() to understand
Expand Down Expand Up @@ -188,7 +207,7 @@ func (d *DiskStore) initKeyDir(existingFile string) {
if err != nil {
break
}
timestamp, keySize, valueSize := decodeHeader(header)
_, timestamp, keySize, valueSize := decodeHeader(header)
key := make([]byte, keySize)
value := make([]byte, valueSize)
_, err = io.ReadFull(file, key)
Expand Down
21 changes: 12 additions & 9 deletions disk_store_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,23 +82,26 @@ func TestDiskStore_Delete(t *testing.T) {
for key, val := range tests {
store.Set(key, val)
}
for key, _ := range tests {
store.Set(key, "")

// only for tests
deletedKeys := []string{"hamlet", "dune", "othello"}
//delete few keys
for _, k := range deletedKeys {
store.Delete(k)
}
store.Set("end", "yes")
store.Close()

store, err = NewDiskStore("test.db")
if err != nil {
t.Fatalf("failed to create disk store: %v", err)
}
for key := range tests {
if store.Get(key) != "" {
t.Errorf("Get() = %v, want '' (empty)", store.Get(key))

//check for deletion
for _, dkeys := range deletedKeys {
actualVal := store.Get(dkeys)
if actualVal != TombStoneVal {
t.Errorf("Get() = %s, want %s", actualVal, TombStoneVal)
}
}
if store.Get("end") != "yes" {
t.Errorf("Get() = %v, want %v", store.Get("end"), "yes")
}
store.Close()
}
33 changes: 33 additions & 0 deletions examples/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package main

import (
"fmt"
"log"
"os"

caskDB "github.com/avinassh/go-caskdb"
)

func main() {
store, err := caskDB.NewDiskStore("test.db")
if err != nil {
log.Fatalf("failed to create disk store: %v", err)
os.Exit(-1)
}
defer store.Close()
defer os.Remove("test.db")

store.Set("screwderia", "charles leclrec") // cause ferrari screws everyone
store.Set("redbull", "max verstappen")
store.Set("mercedes", "lewis hamilton")
store.Set("mclaren", "lando norris")

//yeet lando
store.Delete("mclaren")
val := store.Get("mclaren")
if val == caskDB.TombStoneVal {
fmt.Println("mclaren dropped lando norris for good!")
}

fmt.Printf("%s drives for redbull racing!", store.Get("redbull"))
}
69 changes: 43 additions & 26 deletions format.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,32 +36,39 @@ package caskdb
// func encodeKV(timestamp uint32, key string, value string) (int, []byte)
// func decodeKV(data []byte) (uint32, string, string)

import "encoding/binary"
import (
"encoding/binary"
"hash/crc32"
)

// headerSize specifies the total header size. Our key value pair, when stored on disk
// looks like this:
//
// ┌───────────┬──────────┬────────────┬────────────┐
// │ timestamp │ key_size │ value_size │ key │ value │
// └───────────┴──────────┴────────────┴─────┴───────
// ┌─────────────────────┬──────────┬────────────┬───────┬─────────┐
// | crc │ timestamp │ key_size │ value_size │ key │ value
// └─────────────────────┴──────────┴────────────┴───────┴─────────┴
//
// This is analogous to a typical database's row (or a record). The total length of
// the row is variable, depending on the contents of the key and value.
//
// The first three fields form the header:
// The first four fields form the header:
//
// ┌───────────────┬──────────────────────────────┐
// │ timestamp(4B) │ key_size(4B) │ value_size(4B) │
// └───────────────┴──────────────────────────────┘
// ┌──────────────────────────┬────────────────┐────────────────┐
// | crc(4B) │ timestamp(4B)│ key_size(4B) │ value_size(4B) │
// └──────────────────────────┴────────────────┘────────────────┘
//
// These three fields store unsigned integers of size 4 bytes, giving our header a
// fixed length of 12 bytes. Timestamp field stores the time the record we
// inserted in unix epoch seconds. Key size and value size fields store the length of
// bytes occupied by the key and value. The maximum integer
// These four fields store unsigned integers of size 4 bytes, giving our header a
// fixed length of 16 bytes.
// crc(CheckSum) field stores the checksum to verify if the stored value is valid or not.
// Timestamp field stores the time the record we inserted in unix epoch seconds.
// Key size and value size fields store the length of bytes occupied by the key and value. The maximum integer
// stored by 4 bytes is 4,294,967,295 (2 ** 32 - 1), roughly ~4.2GB. So, the size of
// each key or value cannot exceed this. Theoretically, a single row can be as large
// as ~8.4GB.
const headerSize = 12
const headerSize = 16

// For deletion we will write a special "tombstone" value instead of actually deleting the key or storing this in the header.
const TombStoneVal = "tombstone"

// KeyEntry keeps the metadata about the KV, specially the position of
// the byte offset in the file. Whenever we insert/update a key, we create a new
Expand All @@ -82,30 +89,40 @@ func NewKeyEntry(timestamp uint32, position uint32, totalSize uint32) KeyEntry {
return KeyEntry{timestamp, position, totalSize}
}

func encodeHeader(timestamp uint32, keySize uint32, valueSize uint32) []byte {
func encodeHeader(crc uint32, timestamp uint32, keySize uint32, valueSize uint32) []byte {
header := make([]byte, headerSize)
binary.LittleEndian.PutUint32(header[0:4], timestamp)
binary.LittleEndian.PutUint32(header[4:8], keySize)
binary.LittleEndian.PutUint32(header[8:12], valueSize)
binary.LittleEndian.PutUint32(header[0:4], crc)
binary.LittleEndian.PutUint32(header[4:8], timestamp)
binary.LittleEndian.PutUint32(header[8:12], keySize)
binary.LittleEndian.PutUint32(header[12:16], valueSize)
return header
}

func decodeHeader(header []byte) (uint32, uint32, uint32) {
timestamp := binary.LittleEndian.Uint32(header[0:4])
keySize := binary.LittleEndian.Uint32(header[4:8])
valueSize := binary.LittleEndian.Uint32(header[8:12])
return timestamp, keySize, valueSize
func decodeHeader(header []byte) (uint32, uint32, uint32, uint32) {
checkSum := binary.LittleEndian.Uint32(header[0:4])
timestamp := binary.LittleEndian.Uint32(header[4:8])
keySize := binary.LittleEndian.Uint32(header[8:12])
valueSize := binary.LittleEndian.Uint32(header[12:16])
return checkSum, timestamp, keySize, valueSize
}

func encodeKV(timestamp uint32, key string, value string) (int, []byte) {
header := encodeHeader(timestamp, uint32(len(key)), uint32(len(value)))
header := encodeHeader(calculateCheckSum(value), timestamp, uint32(len(key)), uint32(len(value)))
data := append([]byte(key), []byte(value)...)
return headerSize + len(data), append(header, data...)
}

func decodeKV(data []byte) (uint32, string, string) {
timestamp, keySize, valueSize := decodeHeader(data[0:headerSize])
func decodeKV(data []byte) (uint32, uint32, string, string) {
checkSum, timestamp, keySize, valueSize := decodeHeader(data[0:headerSize])
key := string(data[headerSize : headerSize+keySize])
value := string(data[headerSize+keySize : headerSize+keySize+valueSize])
return timestamp, key, value
return checkSum, timestamp, key, value
}

func calculateCheckSum(value string) uint32 {
return crc32.ChecksumIEEE([]byte(value))
}

func verifyCheckSum(value string, checkSum uint32) bool {
return crc32.ChecksumIEEE([]byte(value)) == checkSum
}
23 changes: 17 additions & 6 deletions format_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,23 @@ import (

func Test_encodeHeader(t *testing.T) {
tests := []struct {
checkSum uint32
timestamp uint32
keySize uint32
valueSize uint32
}{
{10, 10, 10},
{0, 0, 0},
{10000, 10000, 10000},
{10, 10, 10, 10},
{0, 0, 0, 0},
{10000, 10000, 10000, 10000},
}
for _, tt := range tests {
data := encodeHeader(tt.timestamp, tt.keySize, tt.valueSize)
timestamp, keySize, valueSize := decodeHeader(data)
data := encodeHeader(tt.checkSum, tt.timestamp, tt.keySize, tt.valueSize)
checkSum, timestamp, keySize, valueSize := decodeHeader(data)

if checkSum != tt.checkSum {
t.Errorf("encodeHeader() checksum = %v, want %v", checkSum, tt.checkSum)
}

if timestamp != tt.timestamp {
t.Errorf("encodeHeader() timestamp = %v, want %v", timestamp, tt.timestamp)
}
Expand All @@ -41,8 +47,13 @@ func Test_encodeKV(t *testing.T) {
{100, "🔑", "", headerSize + 4},
}
for _, tt := range tests {
expCheckSum := calculateCheckSum(tt.value)
size, data := encodeKV(tt.timestamp, tt.key, tt.value)
timestamp, key, value := decodeKV(data)
checkSum, timestamp, key, value := decodeKV(data)

if checkSum != expCheckSum {
t.Errorf("encodeKV() checksum = %v, want %v", checkSum, expCheckSum)
}
if timestamp != tt.timestamp {
t.Errorf("encodeKV() timestamp = %v, want %v", timestamp, tt.timestamp)
}
Expand Down