diff --git a/bucket.go b/bucket.go index 6a4171c..8010f3e 100644 --- a/bucket.go +++ b/bucket.go @@ -1,12 +1,13 @@ package cuckoo -type entry[K, V any] struct { +// An Entry is a key-value pair. +type Entry[K, V any] struct { key K value V } type slot[K, V any] struct { - entry[K, V] + Entry[K, V] occupied bool } @@ -48,10 +49,13 @@ func (b *bucket[K, V]) drop(key K) (occupied bool) { return false } -func (b *bucket[K, V]) resize(capacity uint64) { - b.slots = make([]slot[K, V], capacity) - b.capacity = capacity - b.size = 0 +func (b *bucket[K, V]) resized(capacity uint64) bucket[K, V] { + return bucket[K, V]{ + slots: make([]slot[K, V], capacity), + capacity: capacity, + hash: b.hash, + compare: b.compare, + } } func (b bucket[K, V]) update(key K, value V) (updated bool) { @@ -69,7 +73,7 @@ func (b bucket[K, V]) update(key K, value V) (updated bool) { return false } -func (b *bucket[K, V]) evict(insertion entry[K, V]) (evicted entry[K, V], eviction bool) { +func (b *bucket[K, V]) insert(insertion Entry[K, V]) (evicted Entry[K, V], eviction bool) { if b.capacity == 0 { return insertion, true } @@ -77,7 +81,7 @@ func (b *bucket[K, V]) evict(insertion entry[K, V]) (evicted entry[K, V], evicti slot := &b.slots[b.location(insertion.key)] if !slot.occupied { - slot.entry = insertion + slot.Entry = insertion slot.occupied = true b.size++ return @@ -88,7 +92,7 @@ func (b *bucket[K, V]) evict(insertion entry[K, V]) (evicted entry[K, V], evicti return } - insertion, slot.entry = slot.entry, insertion + insertion, slot.Entry = slot.Entry, insertion return insertion, true } diff --git a/cuckoo_fuzz_test.go b/cuckoo_fuzz_test.go index f849de0..8d30423 100644 --- a/cuckoo_fuzz_test.go +++ b/cuckoo_fuzz_test.go @@ -76,7 +76,7 @@ func FuzzInsertLookup(f *testing.F) { _, err = actual.Get(step.key) assert.Error(err) } else { - err := actual.Put(step.key, step.value) + _, err := actual.Put(step.key, step.value) assert.NoError(err) expected[step.key] = step.value diff --git a/cuckoo_internal_test.go b/cuckoo_internal_test.go index f2b24a3..ec07056 100644 --- a/cuckoo_internal_test.go +++ b/cuckoo_internal_test.go @@ -23,7 +23,7 @@ func TestLoad(t *testing.T) { table := NewTable[int, bool](Capacity(8)) for i := range 16 { - err := table.Put(i, true) + _, err := table.Put(i, true) assert.NoError(err) assert.Equal(float64(table.Size())/float64(table.TotalCapacity()), table.load()) } diff --git a/cuckoo_test.go b/cuckoo_test.go index 131194a..eac1734 100644 --- a/cuckoo_test.go +++ b/cuckoo_test.go @@ -25,7 +25,7 @@ func TestAddItem(t *testing.T) { key, value := 0, true table := cuckoo.NewTable[int, bool]() - err := table.Put(key, value) + _, err := table.Put(key, value) assert.NoError(err) assert.Equal(1, table.Size()) @@ -38,7 +38,7 @@ func TestPutOverwrite(t *testing.T) { table := cuckoo.NewTable[int, int]() (table.Put(key, value)) - err := table.Put(key, newValue) + _, err := table.Put(key, newValue) assert.NoError(err) assert.Equal(1, table.Size()) @@ -52,9 +52,9 @@ func TestSameHash(t *testing.T) { hash := func(int) uint64 { return 0 } table := cuckoo.NewCustomTable[int, bool](hash, hash, cuckoo.DefaultEqualFunc[int]) - errA := table.Put(0, true) - errB := table.Put(1, true) - errC := table.Put(2, true) + _, errA := table.Put(0, true) + _, errB := table.Put(1, true) + _, errC := table.Put(2, true) assert.NoError(errA) assert.NoError(errB) @@ -76,7 +76,7 @@ func TestResizeCapacity(t *testing.T) { ) for table.TotalCapacity() == 16 { - err := table.Put(rand.Int(), true) + _, err := table.Put(rand.Int(), true) assert.NoError(err) } @@ -89,7 +89,7 @@ func TestPutMany(t *testing.T) { for i := range 1_000 { expected[i] = true - err := actual.Put(i, true) + _, err := actual.Put(i, true) assert.NoError(err) } @@ -103,7 +103,7 @@ func TestGetMany(t *testing.T) { table := cuckoo.NewTable[int, bool]() for i := range 1_000 { - err := table.Put(i, true) + _, err := table.Put(i, true) assert.NoError(err) } @@ -168,7 +168,7 @@ func TestPutNoCapacity(t *testing.T) { cuckoo.Capacity(0), ) - err := table.Put(key, value) + _, err := table.Put(key, value) assert.NoError(err) assert.Equal(1, table.Size()) @@ -184,9 +184,9 @@ func TestBadHashCapacity(t *testing.T) { cuckoo.Capacity(20), ) - err1 := table.Put(0, true) - err2 := table.Put(1, true) - err3 := table.Put(2, true) + _, err1 := table.Put(0, true) + _, err2 := table.Put(1, true) + _, err3 := table.Put(2, true) assert.NoError(err1) assert.NoError(err2) @@ -201,8 +201,8 @@ func TestDropResizeCapacity(t *testing.T) { cuckoo.Capacity(10), ) - err1 := table.Put(0, true) - err2 := table.Put(1, true) + _, err1 := table.Put(0, true) + _, err2 := table.Put(1, true) err3 := table.Drop(1) assert.NoError(errors.Join(err1, err2, err3)) @@ -221,7 +221,7 @@ func TestNewTableBy(t *testing.T) { func(u User) string { return u.id }, ) - err := table.Put(User{nil, "1", "Robert"}, true) + _, err := table.Put(User{nil, "1", "Robert"}, true) assert.NoError(err) assert.Equal(1, table.Size()) diff --git a/doc_example_test.go b/doc_example_test.go index 1fad4ff..86f8e0c 100644 --- a/doc_example_test.go +++ b/doc_example_test.go @@ -10,7 +10,7 @@ import ( func Example_basic() { table := cuckoo.NewTable[int, string]() - if err := table.Put(1, "Hello, World!"); err != nil { + if _, err := table.Put(1, "Hello, World!"); err != nil { fmt.Println("Put error:", err) } diff --git a/settings.go b/settings.go index fe0408f..425d6ba 100644 --- a/settings.go +++ b/settings.go @@ -19,6 +19,11 @@ const DefaultGrowthFactor uint64 = 2 // [libcuckoo]: https://github.com/efficient/libcuckoo/blob/656714705a055df2b7a605eb3c71586d9da1e119/libcuckoo/cuckoohash_config.hh#L21 const defaultMinimumLoad float64 = 0.05 +// defaultGrowthLimit is the maximum number of times a [Table] can grow in a +// single [Table.Put], before the library infers it will lead to a stack +// overflow. The value of '64' was chosen arbirarily. +const defaultGrowthLimit uint64 = 64 + type settings struct { growthFactor uint64 minLoadFactor float64 diff --git a/table.go b/table.go index 922aa81..e2b35ac 100644 --- a/table.go +++ b/table.go @@ -45,30 +45,61 @@ func (t Table[K, V]) load() float64 { return float64(t.Size()) / float64(t.TotalCapacity()) } -// resize clears all buckets, changes the sizes of them to a specific capacity, -// and fills them back up again. It is a helper function for [Table.grow] and -// [Table.shrink]; use them instead. -func (t *Table[K, V]) resize(capacity uint64) error { - entries := make([]entry[K, V], 0, t.Size()) - for k, v := range t.Entries() { - entries = append(entries, entry[K, V]{k, v}) +// insert attempts to put/update an entry in the table, without modifying the +// size of the table. Returns a displaced entry and 'homeless = true' if an +// entry could not be placed after exhausting evictions. +func (t *Table[K, V]) insert(entry Entry[K, V]) (displaced Entry[K, V], homeless bool) { + if t.bucketA.update(entry.key, entry.value) { + return } - t.bucketA.resize(capacity) - t.bucketB.resize(capacity) + if t.bucketB.update(entry.key, entry.value) { + return + } - for _, entry := range entries { - if err := t.Put(entry.key, entry.value); err != nil { - return err + for range t.maxEvictions() { + if entry, homeless = t.bucketA.insert(entry); !homeless { + return + } + + if entry, homeless = t.bucketB.insert(entry); !homeless { + return } } - return nil + return entry, true +} + +// resized creates an empty copy of the table, with a new capacity for each +// bucket. +func (t Table[K, V]) resized(capacity uint64) Table[K, V] { + return Table[K, V]{ + growthFactor: t.growthFactor, + minLoadFactor: t.minLoadFactor, + bucketA: t.bucketA.resized(capacity), + bucketB: t.bucketB.resized(capacity), + } +} + +// resize creates a new [Table.resized] with 'capacity', inserts all items into +// the array, and replaces the current table. It is a helper function for +// [Table.grow] and [Table.shrink]; use them instead. +func (t *Table[K, V]) resize(capacity uint64) bool { + updated := t.resized(capacity) + + for k, v := range t.Entries() { + if _, failed := updated.insert(Entry[K, V]{k, v}); failed { + return false + } + } + + *t = updated + return true } // grow increases the table's capacity by the [Table.growthFactor]. If the // capacity is 0, it increases it to 1. -func (t *Table[K, V]) grow() error { +func (t *Table[K, V]) grow() bool { var newCapacity uint64 if t.TotalCapacity() == 0 { @@ -82,7 +113,7 @@ func (t *Table[K, V]) grow() error { // shrink reduces the table's capacity by the [Table.growthFactor]. It may // reduce it down to 0. -func (t *Table[K, V]) shrink() error { +func (t *Table[K, V]) shrink() bool { return t.resize(t.bucketA.capacity / t.growthFactor) } @@ -106,36 +137,38 @@ func (t Table[K, V]) Has(key K) (exists bool) { return err == nil } -// Put sets the value for a key. Returns error if its value cannot be set. -func (t *Table[K, V]) Put(key K, value V) (err error) { - if t.bucketA.update(key, value) { - return nil - } +// Put sets the value for a key. If it cannot be set, an error is returned, +// along with the last displaced entry. +// +// On failure, the returned entry and the current table contents together +// preserve all previously inserted entries and the attempted entry. +func (t *Table[K, V]) Put(key K, value V) (displaced Entry[K, V], err error) { + var ( + entry = Entry[K, V]{key, value} + homeless bool + ) - if t.bucketB.update(key, value) { - return nil - } - - entry, eviction := entry[K, V]{key, value}, false - for range t.maxEvictions() { - if entry, eviction = t.bucketA.evict(entry); !eviction { - return nil + for range defaultGrowthLimit { + if entry, homeless = t.insert(entry); !homeless { + return } - if entry, eviction = t.bucketB.evict(entry); !eviction { - return nil + // Both this and the growth limit are necessary: this catches bad hashes + // early when the table is sparse, while the latter catches cases where + // growing never helps. + if t.load() < t.minLoadFactor { + return entry, fmt.Errorf("bad hash: resize on load %d/%d", t.Size(), t.TotalCapacity()) + } + + // It is theoretically possible to have a table with a larger capacity + // that is valid. But this chance is astronomically small, so we ignore + // it in this implementation. + if grew := t.grow(); !grew { + return entry, fmt.Errorf("bad hash: could not redistribute entries into larger table") } } - if t.load() < t.minLoadFactor { - return fmt.Errorf("bad hash: resize on load %d/%d = %f", t.Size(), t.TotalCapacity(), t.load()) - } - - if err := t.grow(); err != nil { - return err - } - - return t.Put(entry.key, entry.value) + return entry, fmt.Errorf("bad hash: could not place entry after %d resizes", defaultGrowthLimit) } // Drop removes a value for a key in the table. Returns an error if its value @@ -145,7 +178,9 @@ func (t *Table[K, V]) Drop(key K) (err error) { t.bucketB.drop(key) if t.load() < t.minLoadFactor { - return t.shrink() + // The error is not handled here, because table-shrinking is an internal + // optimization. + t.shrink() } return nil