diff --git a/.gremlins.yaml b/.gremlins.yaml index 7a1872f..bbef13a 100644 --- a/.gremlins.yaml +++ b/.gremlins.yaml @@ -4,4 +4,6 @@ unleash: timeout-coefficient: 50 workers: 4 - dry-run: false \ No newline at end of file + dry-run: false + threshold: + efficacy: 1.0 \ No newline at end of file diff --git a/Makefile b/Makefile index 57a7650..2ea9847 100644 --- a/Makefile +++ b/Makefile @@ -16,10 +16,12 @@ test-unit: ## Run unit tests with coverage test-mutation: ## Run mutation tests with gremlins gremlins unleash +FUZZ_TIME ?= 30 + test-fuzz: ## Run all fuzz tests for 30s each @for func in $$(grep -r --include='*_test.go' -oh 'func Fuzz\w*' . | sed 's/func //'); do \ echo "Fuzzing $$func..."; \ - go test ./... -fuzz="^$$func$$" -fuzztime=30s; \ + go test ./... -fuzz="^$$func$$" -fuzztime=$(FUZZ_TIME)s; \ done test: test-unit test-mutation test-fuzz ## Run all tests diff --git a/bucket.go b/bucket.go index 63a6681..6a4171c 100644 --- a/bucket.go +++ b/bucket.go @@ -17,15 +17,37 @@ type bucket[K, V any] struct { compare EqualFunc[K] } +// location determines where in the bucket a certain key would be placed. If the +// capacity is 0, this will panic. func (b bucket[K, V]) location(key K) uint64 { return b.hash(key) % b.capacity } func (b bucket[K, V]) get(key K) (value V, found bool) { + if b.capacity == 0 { + return + } + slot := b.slots[b.location(key)] return slot.value, slot.occupied && b.compare(slot.key, key) } +func (b *bucket[K, V]) drop(key K) (occupied bool) { + if b.capacity == 0 { + return + } + + slot := &b.slots[b.location(key)] + + if slot.occupied && b.compare(slot.key, key) { + slot.occupied = false + b.size-- + return true + } + + return false +} + func (b *bucket[K, V]) resize(capacity uint64) { b.slots = make([]slot[K, V], capacity) b.capacity = capacity @@ -33,6 +55,10 @@ func (b *bucket[K, V]) resize(capacity uint64) { } func (b bucket[K, V]) update(key K, value V) (updated bool) { + if b.capacity == 0 { + return + } + slot := &b.slots[b.location(key)] if slot.occupied && b.compare(slot.key, key) { @@ -44,6 +70,10 @@ func (b bucket[K, V]) update(key K, value V) (updated bool) { } func (b *bucket[K, V]) evict(insertion entry[K, V]) (evicted entry[K, V], eviction bool) { + if b.capacity == 0 { + return insertion, true + } + slot := &b.slots[b.location(insertion.key)] if !slot.occupied { diff --git a/cuckoo_fuzz_test.go b/cuckoo_fuzz_test.go index 22cf07b..fd97b2f 100644 --- a/cuckoo_fuzz_test.go +++ b/cuckoo_fuzz_test.go @@ -1,11 +1,11 @@ package cuckoo_test import ( - "bytes" - "encoding/binary" + "maps" "testing" "github.com/stretchr/testify/assert" + go_fuzz_utils "github.com/trailofbits/go-fuzz-utils" "git.maximhutz.com/tools/go-cuckoo" ) @@ -19,31 +19,60 @@ func offsetHash(seed uint32) cuckoo.Hash[uint32] { } } +type fuzzStep struct { + drop bool + key, value uint32 +} + +type fuzzScenario struct { + seedA, seedB uint32 + steps []fuzzStep +} + func FuzzInsertLookup(f *testing.F) { - f.Fuzz(func(t *testing.T, data []byte, seedA, seedB uint32) { + f.Fuzz(func(t *testing.T, data []byte) { + var scenario fuzzScenario assert := assert.New(t) - table := cuckoo.NewCustomTable[uint32, uint32]( - offsetHash(seedA), - offsetHash(seedB), - func(a, b uint32) bool { return a == b }, - ) - - if seedA == seedB { + if tp, err := go_fuzz_utils.NewTypeProvider(data); err != nil { + return + } else if err := tp.Fill(&scenario); err != nil { return } - r := bytes.NewReader(data) - var key, value uint32 - for binary.Read(r, binary.LittleEndian, &key) == nil && - binary.Read(r, binary.LittleEndian, &value) == nil { + if scenario.seedA == scenario.seedB { + return + } - err := table.Put(key, value) - assert.NoError(err) + actual := cuckoo.NewCustomTable[uint32, uint32]( + offsetHash(scenario.seedA), + offsetHash(scenario.seedB), + func(a, b uint32) bool { return a == b }, + ) - found, err := table.Get(key) - assert.NoError(err) - assert.Equal(value, found) + expected := map[uint32]uint32{} + + for _, step := range scenario.steps { + if step.drop { + err := actual.Drop(step.key) + assert.NoError(err) + + delete(expected, step.key) + + _, err = actual.Get(step.key) + assert.Error(err) + } else { + err := actual.Put(step.key, step.value) + assert.NoError(err) + + expected[step.key] = step.value + + found, err := actual.Get(step.key) + assert.NoError(err) + assert.Equal(step.value, found) + } + + assert.Equal(expected, maps.Collect(actual.Entries())) } }) } diff --git a/cuckoo_internal_test.go b/cuckoo_internal_test.go index fde14ee..f2b24a3 100644 --- a/cuckoo_internal_test.go +++ b/cuckoo_internal_test.go @@ -25,6 +25,6 @@ func TestLoad(t *testing.T) { for i := range 16 { err := table.Put(i, true) assert.NoError(err) - assert.Equal(float64(table.Size())/float64(table.Capacity()), table.load()) + assert.Equal(float64(table.Size())/float64(table.TotalCapacity()), table.load()) } } diff --git a/cuckoo_test.go b/cuckoo_test.go index 3f9c7ec..b4dfddb 100644 --- a/cuckoo_test.go +++ b/cuckoo_test.go @@ -1,6 +1,7 @@ package cuckoo_test import ( + "errors" "maps" "math/rand/v2" "testing" @@ -64,7 +65,7 @@ func TestStartingCapacity(t *testing.T) { assert := assert.New(t) table := cuckoo.NewTable[int, bool](cuckoo.Capacity(64)) - assert.Equal(uint64(128), table.Capacity()) + assert.Equal(uint64(128), table.TotalCapacity()) } func TestResizeCapacity(t *testing.T) { @@ -74,12 +75,12 @@ func TestResizeCapacity(t *testing.T) { cuckoo.GrowthFactor(2), ) - for table.Capacity() == 16 { + for table.TotalCapacity() == 16 { err := table.Put(rand.Int(), true) assert.NoError(err) } - assert.Equal(uint64(32), table.Capacity()) + assert.Equal(uint64(32), table.TotalCapacity()) } func TestPutMany(t *testing.T) { @@ -128,3 +129,113 @@ func TestRemove(t *testing.T) { assert.True(table.Has(0)) } + +func TestDropExistingItem(t *testing.T) { + assert := assert.New(t) + key, value := 0, true + table := cuckoo.NewTable[int, bool]() + (table.Put(key, value)) + + err := table.Drop(key) + + assert.NoError(err) + assert.Equal(0, table.Size()) + assert.False(table.Has(key)) +} + +func TestDropNoItem(t *testing.T) { + assert := assert.New(t) + key := 0 + table := cuckoo.NewTable[int, bool]() + + err := table.Drop(key) + + assert.NoError(err) + assert.Equal(0, table.Size()) + assert.False(table.Has(key)) +} + +func TestDropItemCapacity(t *testing.T) { + assert := assert.New(t) + key := 0 + table := cuckoo.NewTable[int, bool]( + cuckoo.Capacity(64), + cuckoo.GrowthFactor(2), + ) + + startingCapacity := table.TotalCapacity() + err := table.Drop(key) + endingCapacity := table.TotalCapacity() + + assert.NoError(err) + assert.Equal(0, table.Size()) + assert.Equal(uint64(128), startingCapacity) + assert.Equal(uint64(64), endingCapacity) +} + +func TestPutNoCapacity(t *testing.T) { + assert := assert.New(t) + key, value := 0, true + table := cuckoo.NewTable[int, bool]( + cuckoo.Capacity(0), + ) + + err := table.Put(key, value) + + assert.NoError(err) + assert.Equal(1, table.Size()) + assert.True(table.Has(key)) +} + +func TestBadHashCapacity(t *testing.T) { + assert := assert.New(t) + table := cuckoo.NewCustomTable[int, bool]( + func(int) uint64 { return 0 }, + func(int) uint64 { return 0 }, + func(a, b int) bool { return a == b }, + cuckoo.Capacity(20), + ) + + err1 := table.Put(0, true) + err2 := table.Put(1, true) + err3 := table.Put(2, true) + + assert.NoError(err1) + assert.NoError(err2) + assert.Error(err3) + + assert.Equal(uint64(80), table.TotalCapacity()) +} + +func TestDropResizeCapacity(t *testing.T) { + assert := assert.New(t) + table := cuckoo.NewTable[int, bool]( + cuckoo.Capacity(10), + ) + + err1 := table.Put(0, true) + err2 := table.Put(1, true) + err3 := table.Drop(1) + + assert.NoError(errors.Join(err1, err2, err3)) + assert.Equal(uint64(20), table.TotalCapacity()) +} + +func TestNewTableBy(t *testing.T) { + type User struct { + _ func() + id string + name string + } + + assert := assert.New(t) + table := cuckoo.NewTableBy[User, bool]( + func(u User) string { return u.id }, + ) + + err := table.Put(User{nil, "1", "Robert"}, true) + + assert.NoError(err) + assert.Equal(1, table.Size()) + assert.True(table.Has(User{nil, "1", "Robbie"})) +} diff --git a/go.mod b/go.mod index 492bb0f..89a64a2 100644 --- a/go.mod +++ b/go.mod @@ -4,8 +4,11 @@ go 1.25.6 require github.com/stretchr/testify v1.11.1 +require github.com/kr/pretty v0.3.1 // indirect + require ( github.com/davecgh/go-spew v1.1.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/trailofbits/go-fuzz-utils v0.0.0-20260318143407-0907cafe7589 gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index c4c1710..db6ed83 100644 --- a/go.sum +++ b/go.sum @@ -1,10 +1,21 @@ +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8= +github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +github.com/trailofbits/go-fuzz-utils v0.0.0-20260318143407-0907cafe7589 h1:UmBZCTPdDYore2IEHN+U4eIqEaRq6METh9pKiPumkqc= +github.com/trailofbits/go-fuzz-utils v0.0.0-20260318143407-0907cafe7589/go.mod h1:zh+T+w9XT/3o4E0WLEGCdmLJ8Yqx/zY3o538tQY3OjY= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/table.go b/table.go index 78ef2ae..3e2a1a6 100644 --- a/table.go +++ b/table.go @@ -16,9 +16,9 @@ type Table[K, V any] struct { minLoadFactor float64 } -// Capacity returns the number of slots allocated for the [Table]. To get the +// TotalCapacity returns the number of slots allocated for the [Table]. To get the // number of slots filled, look at [Table.Size]. -func (t Table[K, V]) Capacity() uint64 { +func (t Table[K, V]) TotalCapacity() uint64 { return t.bucketA.capacity + t.bucketB.capacity } @@ -28,25 +28,34 @@ func (t Table[K, V]) Size() int { } func log2(n uint64) (m int) { - return bits.Len64(n) - 1 + return max(0, bits.Len64(n)-1) } func (t Table[K, V]) maxEvictions() int { - return 3 * log2(t.Capacity()) + return 3 * log2(t.TotalCapacity()) } func (t Table[K, V]) load() float64 { - return float64(t.Size()) / float64(t.Capacity()) + // When there are no slots in the table, we still treat the load as 100%. + // Every slot in the table is full. + if t.TotalCapacity() == 0 { + return 1.0 + } + + return float64(t.Size()) / float64(t.TotalCapacity()) } -func (t *Table[K, V]) resize() error { +// resize clears all buckets, changes the sizes of them to a specific capacity, +// and fills them back up again. It is a helper function for [Table.grow] and +// [Table.shrink]; use them instead. +func (t *Table[K, V]) resize(capacity uint64) error { entries := make([]entry[K, V], 0, t.Size()) for k, v := range t.Entries() { entries = append(entries, entry[K, V]{k, v}) } - t.bucketA.resize(t.growthFactor * t.bucketA.capacity) - t.bucketB.resize(t.growthFactor * t.bucketB.capacity) + t.bucketA.resize(capacity) + t.bucketB.resize(capacity) for _, entry := range entries { if err := t.Put(entry.key, entry.value); err != nil { @@ -57,6 +66,26 @@ func (t *Table[K, V]) resize() error { return nil } +// grow increases the table's capacity by the [Table.growthFactor]. If the +// capacity is 0, it increases it to 1. +func (t *Table[K, V]) grow() error { + var newCapacity uint64 + + if t.TotalCapacity() == 0 { + newCapacity = 1 + } else { + newCapacity = t.bucketA.capacity * t.growthFactor + } + + return t.resize(newCapacity) +} + +// shrink reduces the table's capacity by the [Table.growthFactor]. It may +// reduce it down to 0. +func (t *Table[K, V]) shrink() error { + return t.resize(t.bucketA.capacity / t.growthFactor) +} + // Get fetches the value for a key in the [Table]. Returns an error if no value // is found. func (t Table[K, V]) Get(key K) (value V, err error) { @@ -99,10 +128,10 @@ func (t *Table[K, V]) Put(key K, value V) (err error) { } if t.load() < t.minLoadFactor { - return fmt.Errorf("bad hash: resize on load %d/%d = %f", t.Size(), t.Capacity(), t.load()) + return fmt.Errorf("bad hash: resize on load %d/%d = %f", t.Size(), t.TotalCapacity(), t.load()) } - if err := t.resize(); err != nil { + if err := t.grow(); err != nil { return err } @@ -111,10 +140,15 @@ func (t *Table[K, V]) Put(key K, value V) (err error) { // Drop removes a value for a key in the table. Returns an error if its value // cannot be removed. -// -// Deprecated: Do not use. -func (t Table[K, V]) Drop(_ K) { - panic("Not implemented") +func (t *Table[K, V]) Drop(key K) (err error) { + t.bucketA.drop(key) + t.bucketB.drop(key) + + if t.load() < t.minLoadFactor { + return t.shrink() + } + + return nil } // Entries returns an unordered sequence of all key-value pairs in the table.